lru_cache.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. import os
  2. import json
  3. import hashlib
  4. import time
  5. import shutil
  6. import requests
  7. CACHE_PATH='.data'
  8. CACHE_QUOTA=5*8192
  9. def find_cache_usage ():
  10. usage = 0
  11. with os.scandir(CACHE_PATH) as sd:
  12. for entry in sd:
  13. if entry.name.endswith('.headers'):
  14. continue
  15. usage = usage + entry.stat().st_size
  16. return usage
  17. def find_oldest_file ():
  18. """
  19. Returns a DirEntry for the oldest file in the cache path.
  20. """
  21. oldest_file = None
  22. with os.scandir(CACHE_PATH) as sd:
  23. for entry in sd:
  24. if entry.name.endswith('.headers'):
  25. continue
  26. ## print(entry.stat())
  27. osatime = os.path.getatime(entry.path)
  28. ## print(f'{entry.name} atime = {entry.stat().st_atime}, osatime = {osatime}')
  29. #if not oldest_file or entry.stat().st_atime < oldest_file.stat().st_atime:
  30. if not oldest_file or osatime < oldest_file[1]:
  31. oldest_file = [entry, osatime]
  32. if oldest_file:
  33. return oldest_file[0]
  34. def purge_cache (request_free_bytes=0):
  35. """
  36. Deletes the oldest files from the cache until enough bytes have been freed.
  37. Returns false if not enough bytes could be freed.
  38. Deletes nothing if request bytes freed is 0
  39. """
  40. usage = find_cache_usage()
  41. request_free_bytes = request_free_bytes - (CACHE_QUOTA - usage)
  42. bytes_freed = 0
  43. oldest_file = find_oldest_file()
  44. while oldest_file and bytes_freed < request_free_bytes:
  45. file_size = oldest_file.stat().st_size
  46. ## print(f'purge_cache: deleting {oldest_file.name}')
  47. os.remove(oldest_file.path)
  48. if os.path.exists(oldest_file.path + '.headers'):
  49. os.remove(oldest_file.path + '.headers')
  50. bytes_freed = bytes_freed + file_size
  51. oldest_file = find_oldest_file()
  52. if request_free_bytes < bytes_freed:
  53. return False
  54. return True
  55. def filename_for_url (url):
  56. filename = hashlib.md5(url.encode('utf-8')).hexdigest()
  57. return filename
  58. def write_response (url, cache_filename, resp):
  59. with open(CACHE_PATH + '/' + cache_filename + '.headers', 'wt', encoding='utf-8') as f:
  60. headers = dict(resp.headers)
  61. headers['X-Request-URL'] = url
  62. headers['X-Cache-Filename'] = cache_filename
  63. json.dump(headers, f, indent=2)
  64. with open(CACHE_PATH + '/' + cache_filename, 'wb') as f:
  65. f.write(resp.content)
  66. def request_url (url):
  67. # add auth like S3
  68. # idea: credentials like .netrc
  69. return requests.get(url)
  70. def fetch_file (url):
  71. cache_filename = filename_for_url(url)
  72. if os.path.exists(CACHE_PATH + '/' + cache_filename):
  73. # check expiration
  74. return CACHE_PATH + '/' + cache_filename
  75. resp = request_url(url)
  76. content_length = resp.headers.get('Content-Length', 0)
  77. if content_length == 0:
  78. content_length = len(resp.content)
  79. print(f'WARNING: Content-Length = 0, url = {url}, content len = {content_length}')
  80. purge_cache(request_free_bytes=content_length)
  81. write_response(url, cache_filename, resp)
  82. return CACHE_PATH + '/' + cache_filename
  83. def main ():
  84. if os.path.exists(CACHE_PATH):
  85. shutil.rmtree(CACHE_PATH)
  86. os.mkdir(CACHE_PATH)
  87. test_purge_cache()
  88. shutil.rmtree(CACHE_PATH)
  89. os.mkdir(CACHE_PATH)
  90. test_write_quota()
  91. shutil.rmtree(CACHE_PATH)
  92. def test_purge_cache ():
  93. # create 3 files, each 8KB
  94. for i in range(0, 5):
  95. with open(f'{CACHE_PATH}/{i}.txt', 'wb') as f:
  96. buf = os.urandom(8192)
  97. f.write(buf)
  98. time.sleep(1)
  99. with open(f'{CACHE_PATH}/0.txt', 'rb') as f:
  100. f.read()
  101. oldest_file = find_oldest_file()
  102. print(f'oldest_file: {oldest_file.name}')
  103. assert oldest_file.name == '1.txt'
  104. purge_cache(request_free_bytes=8192)
  105. oldest_file = find_oldest_file()
  106. print(f'oldest_file: {oldest_file.name}')
  107. assert oldest_file.name == '2.txt'
  108. purge_cache(request_free_bytes=0)
  109. oldest_file = find_oldest_file()
  110. print(f'oldest_file: {oldest_file.name}')
  111. assert oldest_file.name == '2.txt'
  112. purge_cache(request_free_bytes=2*8192)
  113. oldest_file = find_oldest_file()
  114. print(f'oldest_file: {oldest_file.name}')
  115. assert oldest_file.name == '3.txt'
  116. purge_cache(request_free_bytes=3*8192)
  117. oldest_file = find_oldest_file()
  118. print(f'oldest_file: {oldest_file.name}')
  119. assert oldest_file.name == '4.txt'
  120. purge_cache(request_free_bytes=4*8192)
  121. oldest_file = find_oldest_file()
  122. print(f'oldest_file: {oldest_file.name}')
  123. assert oldest_file.name == '0.txt'
  124. purge_cache(request_free_bytes=5*8192)
  125. oldest_file = find_oldest_file()
  126. assert oldest_file == None
  127. # open the first file
  128. # purge cache
  129. # assert first 2 files exists
  130. # purge cache
  131. # assert only second file exists
  132. # clear file
  133. return True
  134. def test_write_quota ():
  135. for i in range(0, 5):
  136. with open(f'{CACHE_PATH}/{i}.txt', 'wb') as f:
  137. buf = os.urandom(8192)
  138. f.write(buf)
  139. time.sleep(1)
  140. purge_cache(request_free_bytes=2*8192)
  141. oldest_file = find_oldest_file()
  142. print(f'oldest_file: {oldest_file.name}')
  143. assert oldest_file.name == '2.txt'
  144. purge_cache(request_free_bytes=2*8192)
  145. oldest_file = find_oldest_file()
  146. print(f'oldest_file: {oldest_file.name}')
  147. assert oldest_file.name == '2.txt'
  148. purge_cache(request_free_bytes=2*8192 + 1)
  149. oldest_file = find_oldest_file()
  150. print(f'oldest_file: {oldest_file.name}')
  151. assert oldest_file.name == '3.txt'
  152. if __name__ == '__main__':
  153. main()