SHA1
--- a/2023-09-14/lru_cache/README.md
+++ b/2023-09-14/lru_cache/README.md
@@ -0,0 +1,43 @@
 
				+# Cache: Evict Least Recently Used (LRU) Files
			
 
				+
			
 
				+The cache is files that have previously been read, saved so future reads will be more efficient.
			
 
				+
			
 
				+There is a quota that defines the amount of data that can be cached.
			
 
				+
			
 
				+When a new entry is going to be added to the cache and it will exceed the quota,
			
 
				+the LRU items are deleted until there will be space to store the new item.
			
 
				+
			
 
				+Individual items can have a max age as well, determined by the cache server and a local option.
			
 
				+
			
 
				+## Use Cases
			
 
				+
			
 
				+* Sync videos (and sets) for offline use without having to worry about cleaning up.
			
 
				+
			
 
				+A video may be watched once and forgotten about a new videos are added the next time online,
			
 
				+while sets may be played every day. The former would be automatically deleted soon,
			
 
				+while the latter would remain on disk day after day.
			
 
				+
			
 
				+
			
 
				+## Implementation
			
 
				+
			
 
				+The cached file is stored with a hashed name that won't be overwritten except by
			
 
				+a file from the same location. It contains the response body is it would be 
			
 
				+downloaded by a browser
			
 
				+
			
 
				+Adjacent, there is a file that contains a JSON dict of HTTP response headers, as
			
 
				+well as some X- headers added by the caching proxy.
			
 
				+
			
 
				+When a new file is to be written we purge the cache. The purge method takes
			
 
				+a parameter with the amount of space we need freed--this could be the 
			
 
				+Content-Length of a new item to be cached or additional bytes received if 
			
 
				+C.-L. was not accurate.
			
 
				+
			
 
				+
			
 
				+## Related Work
			
 
				+
			
 
				+There is a Requests-Cache plugin that seems to cover many of our bases, but hashed
			
 
				+no concept of quota or least recently used purging. We could add that onto the
			
 
				+existing code base for our purposes.
			
 
				+
			
 
				+https://requests-cache.readthedocs.io/en/stable/user_guide/expiration.html#precedence
			
 
				+
			
--- a/2023-09-14/lru_cache/lru_cache.py
+++ b/2023-09-14/lru_cache/lru_cache.py
@@ -0,0 +1,237 @@
 
				+import os
			
 
				+import json
			
 
				+import hashlib
			
 
				+
			
 
				+import time
			
 
				+import shutil
			
 
				+
			
 
				+import requests
			
 
				+
			
 
				+
			
 
				+CACHE_PATH='.data'
			
 
				+CACHE_QUOTA=5*8192
			
 
				+
			
 
				+def find_cache_usage ():
			
 
				+    usage = 0
			
 
				+    with os.scandir(CACHE_PATH) as sd:
			
 
				+        for entry in sd:
			
 
				+            if entry.name.endswith('.headers'):
			
 
				+                continue
			
 
				+            usage = usage + entry.stat().st_size
			
 
				+    
			
 
				+    return usage
			
 
				+
			
 
				+def find_oldest_file ():
			
 
				+    """
			
 
				+    
			
 
				+    Returns a DirEntry for the oldest file in the cache path.
			
 
				+    
			
 
				+    """
			
 
				+    oldest_file = None
			
 
				+    with os.scandir(CACHE_PATH) as sd:
			
 
				+        for entry in sd:
			
 
				+            if entry.name.endswith('.headers'):
			
 
				+                continue
			
 
				+            ## print(entry.stat())
			
 
				+            osatime = os.path.getatime(entry.path)
			
 
				+            ## print(f'{entry.name} atime = {entry.stat().st_atime}, osatime = {osatime}')
			
 
				+            #if not oldest_file or entry.stat().st_atime < oldest_file.stat().st_atime:
			
 
				+            if not oldest_file or osatime < oldest_file[1]:
			
 
				+                oldest_file = [entry, osatime]
			
 
				+    
			
 
				+    if oldest_file:
			
 
				+        return oldest_file[0]
			
 
				+
			
 
				+def purge_cache (request_free_bytes=0):
			
 
				+    """
			
 
				+    
			
 
				+    Deletes the oldest files from the cache until enough bytes have been freed.
			
 
				+    
			
 
				+    Returns false if not enough bytes could be freed.
			
 
				+    
			
 
				+    Deletes nothing if request bytes freed is 0
			
 
				+    
			
 
				+    """
			
 
				+    usage = find_cache_usage()
			
 
				+    
			
 
				+    request_free_bytes = request_free_bytes - (CACHE_QUOTA - usage)
			
 
				+
			
 
				+    bytes_freed = 0
			
 
				+    oldest_file = find_oldest_file()
			
 
				+    while oldest_file and bytes_freed < request_free_bytes:
			
 
				+        file_size = oldest_file.stat().st_size
			
 
				+        ## print(f'purge_cache: deleting {oldest_file.name}')
			
 
				+        os.remove(oldest_file.path)
			
 
				+        if os.path.exists(oldest_file.path + '.headers'):
			
 
				+            os.remove(oldest_file.path + '.headers')
			
 
				+        bytes_freed = bytes_freed + file_size
			
 
				+        
			
 
				+        oldest_file = find_oldest_file()
			
 
				+        
			
 
				+    
			
 
				+    if request_free_bytes < bytes_freed:
			
 
				+        return False
			
 
				+    
			
 
				+    
			
 
				+    return True
			
 
				+
			
 
				+def filename_for_url (url):
			
 
				+    filename = hashlib.md5(url.encode('utf-8')).hexdigest()
			
 
				+    
			
 
				+    return filename
			
 
				+
			
 
				+def write_response (url, cache_filename, resp):
			
 
				+    with open(CACHE_PATH + '/' + cache_filename + '.headers', 'wt', encoding='utf-8') as f:
			
 
				+        headers = dict(resp.headers)
			
 
				+        headers['X-Request-URL'] = url
			
 
				+        headers['X-Cache-Filename'] = cache_filename
			
 
				+        json.dump(headers, f, indent=2)
			
 
				+    
			
 
				+    with open(CACHE_PATH + '/' + cache_filename, 'wb') as f:
			
 
				+        f.write(resp.content)
			
 
				+
			
 
				+def request_url (url):
			
 
				+    # add auth like S3
			
 
				+    # idea: credentials like .netrc
			
 
				+    return requests.get(url)
			
 
				+
			
 
				+def fetch_file (url):
			
 
				+    cache_filename = filename_for_url(url)
			
 
				+    
			
 
				+    if os.path.exists(CACHE_PATH + '/' + cache_filename):
			
 
				+        # check expiration
			
 
				+        return CACHE_PATH + '/' + cache_filename
			
 
				+    
			
 
				+    resp = request_url(url)
			
 
				+    
			
 
				+    content_length = resp.headers.get('Content-Length', 0)
			
 
				+    
			
 
				+    if content_length == 0:
			
 
				+        content_length = len(resp.content)
			
 
				+        print(f'WARNING: Content-Length = 0, url = {url}, content len = {content_length}')
			
 
				+    
			
 
				+    purge_cache(request_free_bytes=content_length)
			
 
				+    
			
 
				+    write_response(url, cache_filename, resp)
			
 
				+    
			
 
				+    return CACHE_PATH + '/' + cache_filename
			
 
				+
			
 
				+def main ():
			
 
				+    if os.path.exists(CACHE_PATH):
			
 
				+        shutil.rmtree(CACHE_PATH)
			
 
				+        
			
 
				+    os.mkdir(CACHE_PATH)
			
 
				+    
			
 
				+    test_purge_cache()
			
 
				+    
			
 
				+    shutil.rmtree(CACHE_PATH)
			
 
				+    os.mkdir(CACHE_PATH)
			
 
				+    
			
 
				+    test_write_quota()
			
 
				+    
			
 
				+    shutil.rmtree(CACHE_PATH)
			
 
				+
			
 
				+
			
 
				+def test_purge_cache ():
			
 
				+    # create 3 files, each 8KB
			
 
				+    for i in range(0, 5):
			
 
				+        with open(f'{CACHE_PATH}/{i}.txt', 'wb') as f:
			
 
				+            buf = os.urandom(8192)
			
 
				+            f.write(buf)
			
 
				+        time.sleep(1)
			
 
				+    
			
 
				+    with open(f'{CACHE_PATH}/0.txt', 'rb') as f:
			
 
				+        f.read()
			
 
				+    
			
 
				+    oldest_file = find_oldest_file()
			
 
				+    
			
 
				+    print(f'oldest_file: {oldest_file.name}')
			
 
				+    assert oldest_file.name == '1.txt'
			
 
				+    
			
 
				+    purge_cache(request_free_bytes=8192)
			
 
				+    
			
 
				+    oldest_file = find_oldest_file()
			
 
				+    
			
 
				+    print(f'oldest_file: {oldest_file.name}')
			
 
				+    assert oldest_file.name == '2.txt'
			
 
				+    
			
 
				+    purge_cache(request_free_bytes=0)
			
 
				+    
			
 
				+    
			
 
				+    oldest_file = find_oldest_file()
			
 
				+    
			
 
				+    print(f'oldest_file: {oldest_file.name}')
			
 
				+    assert oldest_file.name == '2.txt'
			
 
				+    
			
 
				+    purge_cache(request_free_bytes=2*8192)
			
 
				+    
			
 
				+    
			
 
				+    oldest_file = find_oldest_file()
			
 
				+    
			
 
				+    print(f'oldest_file: {oldest_file.name}')
			
 
				+    assert oldest_file.name == '3.txt'
			
 
				+    
			
 
				+    purge_cache(request_free_bytes=3*8192)
			
 
				+    
			
 
				+    
			
 
				+    oldest_file = find_oldest_file()
			
 
				+    
			
 
				+    print(f'oldest_file: {oldest_file.name}')
			
 
				+    assert oldest_file.name == '4.txt'
			
 
				+    
			
 
				+    purge_cache(request_free_bytes=4*8192)
			
 
				+    
			
 
				+    
			
 
				+    oldest_file = find_oldest_file()
			
 
				+    
			
 
				+    print(f'oldest_file: {oldest_file.name}')
			
 
				+    assert oldest_file.name == '0.txt'
			
 
				+    
			
 
				+    purge_cache(request_free_bytes=5*8192)
			
 
				+    
			
 
				+    
			
 
				+    oldest_file = find_oldest_file()
			
 
				+    
			
 
				+    assert oldest_file == None
			
 
				+    
			
 
				+    # open the first file
			
 
				+    # purge cache
			
 
				+    # assert first 2 files exists
			
 
				+    # purge cache
			
 
				+    # assert only second file exists
			
 
				+    # clear file
			
 
				+    
			
 
				+    return True
			
 
				+
			
 
				+
			
 
				+def test_write_quota ():
			
 
				+    for i in range(0, 5):
			
 
				+        with open(f'{CACHE_PATH}/{i}.txt', 'wb') as f:
			
 
				+            buf = os.urandom(8192)
			
 
				+            f.write(buf)
			
 
				+            
			
 
				+            time.sleep(1)
			
 
				+    
			
 
				+    purge_cache(request_free_bytes=2*8192)
			
 
				+    
			
 
				+    oldest_file = find_oldest_file()
			
 
				+    
			
 
				+    print(f'oldest_file: {oldest_file.name}')
			
 
				+    assert oldest_file.name == '2.txt'
			
 
				+    
			
 
				+    purge_cache(request_free_bytes=2*8192)
			
 
				+    
			
 
				+    oldest_file = find_oldest_file()
			
 
				+    
			
 
				+    print(f'oldest_file: {oldest_file.name}')
			
 
				+    assert oldest_file.name == '2.txt'
			
 
				+    
			
 
				+    purge_cache(request_free_bytes=2*8192 + 1)
			
 
				+    
			
 
				+    oldest_file = find_oldest_file()
			
 
				+    
			
 
				+    print(f'oldest_file: {oldest_file.name}')
			
 
				+    assert oldest_file.name == '3.txt'
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
Autor	SHA1 Zpráva	Datum
Harlan Iverson	c1936d7203 moved LRU cache example to subdir	před 1 rokem
Harlan Iverson	67562610ae added LRU cache example	před 1 rokem