lru_cache.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. # lru_cache.py -- Simple LRU cache for dulwich
  2. # Copyright (C) 2006, 2008 Canonical Ltd
  3. # Copyright (C) 2022 Jelmer Vernooij <jelmer@jelmer.uk>
  4. #
  5. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  6. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  7. # General Public License as published by the Free Software Foundation; version 2.0
  8. # or (at your option) any later version. You can redistribute it and/or
  9. # modify it under the terms of either of these two licenses.
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. # You should have received a copy of the licenses; if not, see
  18. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  19. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  20. # License, Version 2.0.
  21. #
  22. """A simple least-recently-used (LRU) cache."""
  23. from collections.abc import Callable, Iterable, Iterator
  24. from typing import Generic, TypeVar, cast
  25. _null_key = object()
  26. K = TypeVar("K")
  27. V = TypeVar("V")
  28. class _LRUNode(Generic[K, V]):
  29. """This maintains the linked-list which is the lru internals."""
  30. __slots__ = ("cleanup", "key", "next_key", "prev", "size", "value")
  31. prev: "_LRUNode[K, V] | None"
  32. next_key: K | object
  33. size: int | None
  34. def __init__(
  35. self, key: K, value: V, cleanup: Callable[[K, V], None] | None = None
  36. ) -> None:
  37. self.prev = None
  38. self.next_key = _null_key
  39. self.key = key
  40. self.value = value
  41. self.cleanup = cleanup
  42. # TODO: We could compute this 'on-the-fly' like we used to, and remove
  43. # one pointer from this object, we just need to decide if it
  44. # actually costs us much of anything in normal usage
  45. self.size = None
  46. def __repr__(self) -> str:
  47. if self.prev is None:
  48. prev_key = None
  49. else:
  50. prev_key = self.prev.key
  51. return f"{self.__class__.__name__}({self.key!r} n:{self.next_key!r} p:{prev_key!r})"
  52. def run_cleanup(self) -> None:
  53. if self.cleanup is not None:
  54. self.cleanup(self.key, self.value)
  55. self.cleanup = None
  56. # Just make sure to break any refcycles, etc
  57. del self.value
  58. class LRUCache(Generic[K, V]):
  59. """A class which manages a cache of entries, removing unused ones."""
  60. _least_recently_used: _LRUNode[K, V] | None
  61. _most_recently_used: _LRUNode[K, V] | None
  62. def __init__(
  63. self, max_cache: int = 100, after_cleanup_count: int | None = None
  64. ) -> None:
  65. """Initialize LRUCache.
  66. Args:
  67. max_cache: Maximum number of entries to cache
  68. after_cleanup_count: Number of entries to keep after cleanup
  69. """
  70. self._cache: dict[K, _LRUNode[K, V]] = {}
  71. # The "HEAD" of the lru linked list
  72. self._most_recently_used = None
  73. # The "TAIL" of the lru linked list
  74. self._least_recently_used = None
  75. self._update_max_cache(max_cache, after_cleanup_count)
  76. def __contains__(self, key: K) -> bool:
  77. """Check if key is in cache."""
  78. return key in self._cache
  79. def __getitem__(self, key: K) -> V:
  80. """Get item from cache and mark as recently used."""
  81. cache = self._cache
  82. node = cache[key]
  83. # Inlined from _record_access to decrease the overhead of __getitem__
  84. # We also have more knowledge about structure if __getitem__ is
  85. # succeeding, then we know that self._most_recently_used must not be
  86. # None, etc.
  87. mru = self._most_recently_used
  88. if node is mru:
  89. # Nothing to do, this node is already at the head of the queue
  90. return node.value
  91. # Remove this node from the old location
  92. node_prev = node.prev
  93. next_key = node.next_key
  94. # benchmarking shows that the lookup of _null_key in globals is faster
  95. # than the attribute lookup for (node is self._least_recently_used)
  96. if next_key is _null_key:
  97. # 'node' is the _least_recently_used, because it doesn't have a
  98. # 'next' item. So move the current lru to the previous node.
  99. self._least_recently_used = node_prev
  100. else:
  101. node_next = cache[cast(K, next_key)]
  102. node_next.prev = node_prev
  103. assert node_prev
  104. assert mru
  105. node_prev.next_key = next_key
  106. # Insert this node at the front of the list
  107. node.next_key = mru.key
  108. mru.prev = node
  109. self._most_recently_used = node
  110. node.prev = None
  111. return node.value
  112. def __len__(self) -> int:
  113. """Return number of items in cache."""
  114. return len(self._cache)
  115. def _walk_lru(self) -> Iterator[_LRUNode[K, V]]:
  116. """Walk the LRU list, only meant to be used in tests."""
  117. node = self._most_recently_used
  118. if node is not None:
  119. if node.prev is not None:
  120. raise AssertionError(
  121. "the _most_recently_used entry is not"
  122. " supposed to have a previous entry"
  123. f" {node}"
  124. )
  125. while node is not None:
  126. if node.next_key is _null_key:
  127. if node is not self._least_recently_used:
  128. raise AssertionError(
  129. f"only the last node should have no next value: {node}"
  130. )
  131. node_next = None
  132. else:
  133. node_next = self._cache[cast(K, node.next_key)]
  134. if node_next.prev is not node:
  135. raise AssertionError(
  136. f"inconsistency found, node.next.prev != node: {node}"
  137. )
  138. if node.prev is None:
  139. if node is not self._most_recently_used:
  140. raise AssertionError(
  141. "only the _most_recently_used should"
  142. f" not have a previous node: {node}"
  143. )
  144. else:
  145. if node.prev.next_key != node.key:
  146. raise AssertionError(
  147. f"inconsistency found, node.prev.next != node: {node}"
  148. )
  149. yield node
  150. node = node_next
  151. def add(
  152. self, key: K, value: V, cleanup: Callable[[K, V], None] | None = None
  153. ) -> None:
  154. """Add a new value to the cache.
  155. Also, if the entry is ever removed from the cache, call
  156. cleanup(key, value).
  157. Args:
  158. key: The key to store it under
  159. value: The object to store
  160. cleanup: None or a function taking (key, value) to indicate
  161. 'value' should be cleaned up.
  162. """
  163. if key is _null_key:
  164. raise ValueError("cannot use _null_key as a key")
  165. if key in self._cache:
  166. node = self._cache[key]
  167. node.run_cleanup()
  168. node.value = value
  169. node.cleanup = cleanup
  170. else:
  171. node = _LRUNode(key, value, cleanup=cleanup)
  172. self._cache[key] = node
  173. self._record_access(node)
  174. if len(self._cache) > self._max_cache:
  175. # Trigger the cleanup
  176. self.cleanup()
  177. def cache_size(self) -> int:
  178. """Get the number of entries we will cache."""
  179. return self._max_cache
  180. def get(self, key: K, default: V | None = None) -> V | None:
  181. """Get value from cache with default if not found.
  182. Args:
  183. key: Key to look up
  184. default: Default value if key not found
  185. Returns:
  186. Value from cache or default
  187. """
  188. node = self._cache.get(key, None)
  189. if node is None:
  190. return default
  191. self._record_access(node)
  192. return node.value
  193. def keys(self) -> Iterable[K]:
  194. """Get the list of keys currently cached.
  195. Note that values returned here may not be available by the time you
  196. request them later. This is simply meant as a peak into the current
  197. state.
  198. Returns: An unordered list of keys that are currently cached.
  199. """
  200. return self._cache.keys()
  201. def items(self) -> dict[K, V]:
  202. """Get the key:value pairs as a dict."""
  203. return {k: n.value for k, n in self._cache.items()}
  204. def cleanup(self) -> None:
  205. """Clear the cache until it shrinks to the requested size.
  206. This does not completely wipe the cache, just makes sure it is under
  207. the after_cleanup_count.
  208. """
  209. # Make sure the cache is shrunk to the correct size
  210. while len(self._cache) > self._after_cleanup_count:
  211. self._remove_lru()
  212. def __setitem__(self, key: K, value: V) -> None:
  213. """Add a value to the cache, there will be no cleanup function."""
  214. self.add(key, value, cleanup=None)
  215. def _record_access(self, node: _LRUNode[K, V]) -> None:
  216. """Record that key was accessed."""
  217. # Move 'node' to the front of the queue
  218. if self._most_recently_used is None:
  219. self._most_recently_used = node
  220. self._least_recently_used = node
  221. return
  222. elif node is self._most_recently_used:
  223. # Nothing to do, this node is already at the head of the queue
  224. return
  225. # We've taken care of the tail pointer, remove the node, and insert it
  226. # at the front
  227. # REMOVE
  228. if node is self._least_recently_used:
  229. self._least_recently_used = node.prev
  230. if node.prev is not None:
  231. node.prev.next_key = node.next_key
  232. if node.next_key is not _null_key:
  233. node_next = self._cache[cast(K, node.next_key)]
  234. node_next.prev = node.prev
  235. # INSERT
  236. node.next_key = self._most_recently_used.key
  237. self._most_recently_used.prev = node
  238. self._most_recently_used = node
  239. node.prev = None
  240. def _remove_node(self, node: _LRUNode[K, V]) -> None:
  241. if node is self._least_recently_used:
  242. self._least_recently_used = node.prev
  243. self._cache.pop(node.key)
  244. # If we have removed all entries, remove the head pointer as well
  245. if self._least_recently_used is None:
  246. self._most_recently_used = None
  247. node.run_cleanup()
  248. # Now remove this node from the linked list
  249. if node.prev is not None:
  250. node.prev.next_key = node.next_key
  251. if node.next_key is not _null_key:
  252. node_next = self._cache[cast(K, node.next_key)]
  253. node_next.prev = node.prev
  254. # And remove this node's pointers
  255. node.prev = None
  256. node.next_key = _null_key
  257. def _remove_lru(self) -> None:
  258. """Remove one entry from the lru, and handle consequences.
  259. If there are no more references to the lru, then this entry should be
  260. removed from the cache.
  261. """
  262. assert self._least_recently_used
  263. self._remove_node(self._least_recently_used)
  264. def clear(self) -> None:
  265. """Clear out all of the cache."""
  266. # Clean up in LRU order
  267. while self._cache:
  268. self._remove_lru()
  269. def resize(self, max_cache: int, after_cleanup_count: int | None = None) -> None:
  270. """Change the number of entries that will be cached."""
  271. self._update_max_cache(max_cache, after_cleanup_count=after_cleanup_count)
  272. def _update_max_cache(
  273. self, max_cache: int, after_cleanup_count: int | None = None
  274. ) -> None:
  275. self._max_cache = max_cache
  276. if after_cleanup_count is None:
  277. self._after_cleanup_count = self._max_cache * 8 / 10
  278. else:
  279. self._after_cleanup_count = min(after_cleanup_count, self._max_cache)
  280. self.cleanup()
  281. class LRUSizeCache(LRUCache[K, V]):
  282. """An LRUCache that removes things based on the size of the values.
  283. This differs in that it doesn't care how many actual items there are,
  284. it just restricts the cache to be cleaned up after so much data is stored.
  285. The size of items added will be computed using compute_size(value), which
  286. defaults to len() if not supplied.
  287. """
  288. _compute_size: Callable[[V], int]
  289. def __init__(
  290. self,
  291. max_size: int = 1024 * 1024,
  292. after_cleanup_size: int | None = None,
  293. compute_size: Callable[[V], int] | None = None,
  294. ) -> None:
  295. """Create a new LRUSizeCache.
  296. Args:
  297. max_size: The max number of bytes to store before we start
  298. clearing out entries.
  299. after_cleanup_size: After cleaning up, shrink everything to this
  300. size.
  301. compute_size: A function to compute the size of the values. We
  302. use a function here, so that you can pass 'len' if you are just
  303. using simple strings, or a more complex function if you are using
  304. something like a list of strings, or even a custom object.
  305. The function should take the form "compute_size(value) => integer".
  306. If not supplied, it defaults to 'len()'
  307. """
  308. self._value_size = 0
  309. if compute_size is None:
  310. self._compute_size = cast(Callable[[V], int], len)
  311. else:
  312. self._compute_size = compute_size
  313. self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
  314. LRUCache.__init__(self, max_cache=max(int(max_size / 512), 1))
  315. def add(
  316. self, key: K, value: V, cleanup: Callable[[K, V], None] | None = None
  317. ) -> None:
  318. """Add a new value to the cache.
  319. Also, if the entry is ever removed from the cache, call
  320. cleanup(key, value).
  321. Args:
  322. key: The key to store it under
  323. value: The object to store
  324. cleanup: None or a function taking (key, value) to indicate
  325. 'value' should be cleaned up.
  326. """
  327. if key is _null_key:
  328. raise ValueError("cannot use _null_key as a key")
  329. node = self._cache.get(key, None)
  330. value_len = self._compute_size(value)
  331. if value_len >= self._after_cleanup_size:
  332. # The new value is 'too big to fit', as it would fill up/overflow
  333. # the cache all by itself
  334. if node is not None:
  335. # We won't be replacing the old node, so just remove it
  336. self._remove_node(node)
  337. if cleanup is not None:
  338. cleanup(key, value)
  339. return
  340. if node is None:
  341. node = _LRUNode(key, value, cleanup=cleanup)
  342. self._cache[key] = node
  343. else:
  344. assert node.size is not None
  345. self._value_size -= node.size
  346. node.size = value_len
  347. self._value_size += value_len
  348. self._record_access(node)
  349. if self._value_size > self._max_size:
  350. # Time to cleanup
  351. self.cleanup()
  352. def cleanup(self) -> None:
  353. """Clear the cache until it shrinks to the requested size.
  354. This does not completely wipe the cache, just makes sure it is under
  355. the after_cleanup_size.
  356. """
  357. # Make sure the cache is shrunk to the correct size
  358. while self._value_size > self._after_cleanup_size:
  359. self._remove_lru()
  360. def _remove_node(self, node: _LRUNode[K, V]) -> None:
  361. assert node.size is not None
  362. self._value_size -= node.size
  363. LRUCache._remove_node(self, node)
  364. def resize(self, max_size: int, after_cleanup_size: int | None = None) -> None:
  365. """Change the number of bytes that will be cached."""
  366. self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
  367. max_cache = max(int(max_size / 512), 1)
  368. self._update_max_cache(max_cache)
  369. def _update_max_size(
  370. self, max_size: int, after_cleanup_size: int | None = None
  371. ) -> None:
  372. self._max_size = max_size
  373. if after_cleanup_size is None:
  374. self._after_cleanup_size = self._max_size * 8 // 10
  375. else:
  376. self._after_cleanup_size = min(after_cleanup_size, self._max_size)