lru_cache.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. # lru_cache.py -- Simple LRU cache for dulwich
  2. # Copyright (C) 2006, 2008 Canonical Ltd
  3. # Copyright (C) 2022 Jelmer Vernooij <jelmer@jelmer.uk>
  4. #
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """A simple least-recently-used (LRU) cache."""
  22. from typing import Generic, TypeVar, Optional, Callable, Dict, Iterable, Iterator
  23. _null_key = object()
  24. K = TypeVar('K')
  25. V = TypeVar('V')
  26. class _LRUNode(Generic[K, V]):
  27. """This maintains the linked-list which is the lru internals."""
  28. __slots__ = ("prev", "next_key", "key", "value", "cleanup", "size")
  29. prev: Optional["_LRUNode[K, V]"]
  30. next_key: K
  31. size: Optional[int]
  32. def __init__(self, key: K, value: V, cleanup=None):
  33. self.prev = None
  34. self.next_key = _null_key # type: ignore
  35. self.key = key
  36. self.value = value
  37. self.cleanup = cleanup
  38. # TODO: We could compute this 'on-the-fly' like we used to, and remove
  39. # one pointer from this object, we just need to decide if it
  40. # actually costs us much of anything in normal usage
  41. self.size = None
  42. def __repr__(self):
  43. if self.prev is None:
  44. prev_key = None
  45. else:
  46. prev_key = self.prev.key
  47. return "{}({!r} n:{!r} p:{!r})".format(
  48. self.__class__.__name__,
  49. self.key,
  50. self.next_key,
  51. prev_key,
  52. )
  53. def run_cleanup(self) -> None:
  54. if self.cleanup is not None:
  55. self.cleanup(self.key, self.value)
  56. self.cleanup = None
  57. # Just make sure to break any refcycles, etc
  58. del self.value
  59. class LRUCache(Generic[K, V]):
  60. """A class which manages a cache of entries, removing unused ones."""
  61. _least_recently_used: Optional[_LRUNode[K, V]]
  62. _most_recently_used: Optional[_LRUNode[K, V]]
  63. def __init__(self, max_cache: int = 100, after_cleanup_count: Optional[int] = None) -> None:
  64. self._cache: Dict[K, _LRUNode[K, V]] = {}
  65. # The "HEAD" of the lru linked list
  66. self._most_recently_used = None
  67. # The "TAIL" of the lru linked list
  68. self._least_recently_used = None
  69. self._update_max_cache(max_cache, after_cleanup_count)
  70. def __contains__(self, key: K) -> bool:
  71. return key in self._cache
  72. def __getitem__(self, key: K) -> V:
  73. cache = self._cache
  74. node = cache[key]
  75. # Inlined from _record_access to decrease the overhead of __getitem__
  76. # We also have more knowledge about structure if __getitem__ is
  77. # succeeding, then we know that self._most_recently_used must not be
  78. # None, etc.
  79. mru = self._most_recently_used
  80. if node is mru:
  81. # Nothing to do, this node is already at the head of the queue
  82. return node.value
  83. # Remove this node from the old location
  84. node_prev = node.prev
  85. next_key = node.next_key
  86. # benchmarking shows that the lookup of _null_key in globals is faster
  87. # than the attribute lookup for (node is self._least_recently_used)
  88. if next_key is _null_key:
  89. # 'node' is the _least_recently_used, because it doesn't have a
  90. # 'next' item. So move the current lru to the previous node.
  91. self._least_recently_used = node_prev
  92. else:
  93. node_next = cache[next_key]
  94. node_next.prev = node_prev
  95. assert node_prev
  96. assert mru
  97. node_prev.next_key = next_key
  98. # Insert this node at the front of the list
  99. node.next_key = mru.key
  100. mru.prev = node
  101. self._most_recently_used = node
  102. node.prev = None
  103. return node.value
  104. def __len__(self) -> int:
  105. return len(self._cache)
  106. def _walk_lru(self) -> Iterator[_LRUNode[K, V]]:
  107. """Walk the LRU list, only meant to be used in tests."""
  108. node = self._most_recently_used
  109. if node is not None:
  110. if node.prev is not None:
  111. raise AssertionError(
  112. "the _most_recently_used entry is not"
  113. " supposed to have a previous entry"
  114. " %s" % (node,)
  115. )
  116. while node is not None:
  117. if node.next_key is _null_key:
  118. if node is not self._least_recently_used:
  119. raise AssertionError(
  120. "only the last node should have" " no next value: %s" % (node,)
  121. )
  122. node_next = None
  123. else:
  124. node_next = self._cache[node.next_key]
  125. if node_next.prev is not node:
  126. raise AssertionError(
  127. "inconsistency found, node.next.prev" " != node: %s" % (node,)
  128. )
  129. if node.prev is None:
  130. if node is not self._most_recently_used:
  131. raise AssertionError(
  132. "only the _most_recently_used should"
  133. " not have a previous node: %s" % (node,)
  134. )
  135. else:
  136. if node.prev.next_key != node.key:
  137. raise AssertionError(
  138. "inconsistency found, node.prev.next" " != node: %s" % (node,)
  139. )
  140. yield node
  141. node = node_next
  142. def add(self, key: K, value: V, cleanup: Optional[Callable[[K, V], None]] = None) -> None:
  143. """Add a new value to the cache.
  144. Also, if the entry is ever removed from the cache, call
  145. cleanup(key, value).
  146. Args:
  147. key: The key to store it under
  148. value: The object to store
  149. cleanup: None or a function taking (key, value) to indicate
  150. 'value' should be cleaned up.
  151. """
  152. if key is _null_key:
  153. raise ValueError("cannot use _null_key as a key")
  154. if key in self._cache:
  155. node = self._cache[key]
  156. node.run_cleanup()
  157. node.value = value
  158. node.cleanup = cleanup
  159. else:
  160. node = _LRUNode(key, value, cleanup=cleanup)
  161. self._cache[key] = node
  162. self._record_access(node)
  163. if len(self._cache) > self._max_cache:
  164. # Trigger the cleanup
  165. self.cleanup()
  166. def cache_size(self) -> int:
  167. """Get the number of entries we will cache."""
  168. return self._max_cache
  169. def get(self, key: K, default: Optional[V] = None) -> Optional[V]:
  170. node = self._cache.get(key, None)
  171. if node is None:
  172. return default
  173. self._record_access(node)
  174. return node.value
  175. def keys(self) -> Iterable[K]:
  176. """Get the list of keys currently cached.
  177. Note that values returned here may not be available by the time you
  178. request them later. This is simply meant as a peak into the current
  179. state.
  180. Returns: An unordered list of keys that are currently cached.
  181. """
  182. return self._cache.keys()
  183. def items(self) -> Dict[K, V]:
  184. """Get the key:value pairs as a dict."""
  185. return {k: n.value for k, n in self._cache.items()}
  186. def cleanup(self):
  187. """Clear the cache until it shrinks to the requested size.
  188. This does not completely wipe the cache, just makes sure it is under
  189. the after_cleanup_count.
  190. """
  191. # Make sure the cache is shrunk to the correct size
  192. while len(self._cache) > self._after_cleanup_count:
  193. self._remove_lru()
  194. def __setitem__(self, key: K, value: V) -> None:
  195. """Add a value to the cache, there will be no cleanup function."""
  196. self.add(key, value, cleanup=None)
  197. def _record_access(self, node: _LRUNode[K, V]) -> None:
  198. """Record that key was accessed."""
  199. # Move 'node' to the front of the queue
  200. if self._most_recently_used is None:
  201. self._most_recently_used = node
  202. self._least_recently_used = node
  203. return
  204. elif node is self._most_recently_used:
  205. # Nothing to do, this node is already at the head of the queue
  206. return
  207. # We've taken care of the tail pointer, remove the node, and insert it
  208. # at the front
  209. # REMOVE
  210. if node is self._least_recently_used:
  211. self._least_recently_used = node.prev
  212. if node.prev is not None:
  213. node.prev.next_key = node.next_key
  214. if node.next_key is not _null_key:
  215. node_next = self._cache[node.next_key]
  216. node_next.prev = node.prev
  217. # INSERT
  218. node.next_key = self._most_recently_used.key
  219. self._most_recently_used.prev = node
  220. self._most_recently_used = node
  221. node.prev = None
  222. def _remove_node(self, node: _LRUNode[K, V]) -> None:
  223. if node is self._least_recently_used:
  224. self._least_recently_used = node.prev
  225. self._cache.pop(node.key)
  226. # If we have removed all entries, remove the head pointer as well
  227. if self._least_recently_used is None:
  228. self._most_recently_used = None
  229. node.run_cleanup()
  230. # Now remove this node from the linked list
  231. if node.prev is not None:
  232. node.prev.next_key = node.next_key
  233. if node.next_key is not _null_key:
  234. node_next = self._cache[node.next_key]
  235. node_next.prev = node.prev
  236. # And remove this node's pointers
  237. node.prev = None
  238. node.next_key = _null_key # type: ignore
  239. def _remove_lru(self) -> None:
  240. """Remove one entry from the lru, and handle consequences.
  241. If there are no more references to the lru, then this entry should be
  242. removed from the cache.
  243. """
  244. assert self._least_recently_used
  245. self._remove_node(self._least_recently_used)
  246. def clear(self) -> None:
  247. """Clear out all of the cache."""
  248. # Clean up in LRU order
  249. while self._cache:
  250. self._remove_lru()
  251. def resize(self, max_cache: int, after_cleanup_count: Optional[int] = None) -> None:
  252. """Change the number of entries that will be cached."""
  253. self._update_max_cache(max_cache, after_cleanup_count=after_cleanup_count)
  254. def _update_max_cache(self, max_cache, after_cleanup_count=None):
  255. self._max_cache = max_cache
  256. if after_cleanup_count is None:
  257. self._after_cleanup_count = self._max_cache * 8 / 10
  258. else:
  259. self._after_cleanup_count = min(after_cleanup_count, self._max_cache)
  260. self.cleanup()
  261. class LRUSizeCache(LRUCache[K, V]):
  262. """An LRUCache that removes things based on the size of the values.
  263. This differs in that it doesn't care how many actual items there are,
  264. it just restricts the cache to be cleaned up after so much data is stored.
  265. The size of items added will be computed using compute_size(value), which
  266. defaults to len() if not supplied.
  267. """
  268. _compute_size: Callable[[V], int]
  269. def __init__(
  270. self, max_size: int = 1024 * 1024, after_cleanup_size: Optional[int] = None,
  271. compute_size: Optional[Callable[[V], int]] = None
  272. ) -> None:
  273. """Create a new LRUSizeCache.
  274. Args:
  275. max_size: The max number of bytes to store before we start
  276. clearing out entries.
  277. after_cleanup_size: After cleaning up, shrink everything to this
  278. size.
  279. compute_size: A function to compute the size of the values. We
  280. use a function here, so that you can pass 'len' if you are just
  281. using simple strings, or a more complex function if you are using
  282. something like a list of strings, or even a custom object.
  283. The function should take the form "compute_size(value) => integer".
  284. If not supplied, it defaults to 'len()'
  285. """
  286. self._value_size = 0
  287. if compute_size is None:
  288. self._compute_size = len # type: ignore
  289. else:
  290. self._compute_size = compute_size
  291. self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
  292. LRUCache.__init__(self, max_cache=max(int(max_size / 512), 1))
  293. def add(self, key: K, value: V, cleanup: Optional[Callable[[K, V], None]] = None) -> None:
  294. """Add a new value to the cache.
  295. Also, if the entry is ever removed from the cache, call
  296. cleanup(key, value).
  297. Args:
  298. key: The key to store it under
  299. value: The object to store
  300. cleanup: None or a function taking (key, value) to indicate
  301. 'value' should be cleaned up.
  302. """
  303. if key is _null_key:
  304. raise ValueError("cannot use _null_key as a key")
  305. node = self._cache.get(key, None)
  306. value_len = self._compute_size(value)
  307. if value_len >= self._after_cleanup_size:
  308. # The new value is 'too big to fit', as it would fill up/overflow
  309. # the cache all by itself
  310. if node is not None:
  311. # We won't be replacing the old node, so just remove it
  312. self._remove_node(node)
  313. if cleanup is not None:
  314. cleanup(key, value)
  315. return
  316. if node is None:
  317. node = _LRUNode(key, value, cleanup=cleanup)
  318. self._cache[key] = node
  319. else:
  320. assert node.size is not None
  321. self._value_size -= node.size
  322. node.size = value_len
  323. self._value_size += value_len
  324. self._record_access(node)
  325. if self._value_size > self._max_size:
  326. # Time to cleanup
  327. self.cleanup()
  328. def cleanup(self) -> None:
  329. """Clear the cache until it shrinks to the requested size.
  330. This does not completely wipe the cache, just makes sure it is under
  331. the after_cleanup_size.
  332. """
  333. # Make sure the cache is shrunk to the correct size
  334. while self._value_size > self._after_cleanup_size:
  335. self._remove_lru()
  336. def _remove_node(self, node: _LRUNode[K, V]) -> None:
  337. assert node.size is not None
  338. self._value_size -= node.size
  339. LRUCache._remove_node(self, node)
  340. def resize(self, max_size: int, after_cleanup_size: Optional[int] = None) -> None:
  341. """Change the number of bytes that will be cached."""
  342. self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
  343. max_cache = max(int(max_size / 512), 1)
  344. self._update_max_cache(max_cache)
  345. def _update_max_size(self, max_size: int, after_cleanup_size: Optional[int] = None) -> None:
  346. self._max_size = max_size
  347. if after_cleanup_size is None:
  348. self._after_cleanup_size = self._max_size * 8 // 10
  349. else:
  350. self._after_cleanup_size = min(after_cleanup_size, self._max_size)