2
0

lru_cache.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
  1. # lru_cache.py -- Simple LRU cache for dulwich
  2. # Copyright (C) 2006, 2008 Canonical Ltd
  3. # Copyright (C) 2022 Jelmer Vernooij <jelmer@jelmer.uk>
  4. #
  5. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  6. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  7. # General Public License as published by the Free Software Foundation; version 2.0
  8. # or (at your option) any later version. You can redistribute it and/or
  9. # modify it under the terms of either of these two licenses.
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. # You should have received a copy of the licenses; if not, see
  18. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  19. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  20. # License, Version 2.0.
  21. #
  22. """A simple least-recently-used (LRU) cache."""
  23. __all__ = [
  24. "LRUCache",
  25. "LRUSizeCache",
  26. ]
  27. from collections.abc import Callable, Iterable, Iterator
  28. from typing import Generic, TypeVar, cast
  29. _null_key = object()
  30. K = TypeVar("K")
  31. V = TypeVar("V")
  32. class _LRUNode(Generic[K, V]):
  33. """This maintains the linked-list which is the lru internals."""
  34. __slots__ = ("cleanup", "key", "next_key", "prev", "size", "value")
  35. prev: "_LRUNode[K, V] | None"
  36. next_key: K | object
  37. size: int | None
  38. def __init__(
  39. self, key: K, value: V, cleanup: Callable[[K, V], None] | None = None
  40. ) -> None:
  41. self.prev = None
  42. self.next_key = _null_key
  43. self.key = key
  44. self.value = value
  45. self.cleanup = cleanup
  46. # TODO: We could compute this 'on-the-fly' like we used to, and remove
  47. # one pointer from this object, we just need to decide if it
  48. # actually costs us much of anything in normal usage
  49. self.size = None
  50. def __repr__(self) -> str:
  51. if self.prev is None:
  52. prev_key = None
  53. else:
  54. prev_key = self.prev.key
  55. return f"{self.__class__.__name__}({self.key!r} n:{self.next_key!r} p:{prev_key!r})"
  56. def run_cleanup(self) -> None:
  57. if self.cleanup is not None:
  58. self.cleanup(self.key, self.value)
  59. self.cleanup = None
  60. # Just make sure to break any refcycles, etc
  61. del self.value
  62. class LRUCache(Generic[K, V]):
  63. """A class which manages a cache of entries, removing unused ones."""
  64. _least_recently_used: _LRUNode[K, V] | None
  65. _most_recently_used: _LRUNode[K, V] | None
  66. def __init__(
  67. self, max_cache: int = 100, after_cleanup_count: int | None = None
  68. ) -> None:
  69. """Initialize LRUCache.
  70. Args:
  71. max_cache: Maximum number of entries to cache
  72. after_cleanup_count: Number of entries to keep after cleanup
  73. """
  74. self._cache: dict[K, _LRUNode[K, V]] = {}
  75. # The "HEAD" of the lru linked list
  76. self._most_recently_used = None
  77. # The "TAIL" of the lru linked list
  78. self._least_recently_used = None
  79. self._update_max_cache(max_cache, after_cleanup_count)
  80. def __contains__(self, key: K) -> bool:
  81. """Check if key is in cache."""
  82. return key in self._cache
  83. def __getitem__(self, key: K) -> V:
  84. """Get item from cache and mark as recently used."""
  85. cache = self._cache
  86. node = cache[key]
  87. # Inlined from _record_access to decrease the overhead of __getitem__
  88. # We also have more knowledge about structure if __getitem__ is
  89. # succeeding, then we know that self._most_recently_used must not be
  90. # None, etc.
  91. mru = self._most_recently_used
  92. if node is mru:
  93. # Nothing to do, this node is already at the head of the queue
  94. return node.value
  95. # Remove this node from the old location
  96. node_prev = node.prev
  97. next_key = node.next_key
  98. # benchmarking shows that the lookup of _null_key in globals is faster
  99. # than the attribute lookup for (node is self._least_recently_used)
  100. if next_key is _null_key:
  101. # 'node' is the _least_recently_used, because it doesn't have a
  102. # 'next' item. So move the current lru to the previous node.
  103. self._least_recently_used = node_prev
  104. else:
  105. node_next = cache[cast(K, next_key)]
  106. node_next.prev = node_prev
  107. assert node_prev
  108. assert mru
  109. node_prev.next_key = next_key
  110. # Insert this node at the front of the list
  111. node.next_key = mru.key
  112. mru.prev = node
  113. self._most_recently_used = node
  114. node.prev = None
  115. return node.value
  116. def __len__(self) -> int:
  117. """Return number of items in cache."""
  118. return len(self._cache)
  119. def _walk_lru(self) -> Iterator[_LRUNode[K, V]]:
  120. """Walk the LRU list, only meant to be used in tests."""
  121. node = self._most_recently_used
  122. if node is not None:
  123. if node.prev is not None:
  124. raise AssertionError(
  125. "the _most_recently_used entry is not"
  126. " supposed to have a previous entry"
  127. f" {node}"
  128. )
  129. while node is not None:
  130. if node.next_key is _null_key:
  131. if node is not self._least_recently_used:
  132. raise AssertionError(
  133. f"only the last node should have no next value: {node}"
  134. )
  135. node_next = None
  136. else:
  137. node_next = self._cache[cast(K, node.next_key)]
  138. if node_next.prev is not node:
  139. raise AssertionError(
  140. f"inconsistency found, node.next.prev != node: {node}"
  141. )
  142. if node.prev is None:
  143. if node is not self._most_recently_used:
  144. raise AssertionError(
  145. "only the _most_recently_used should"
  146. f" not have a previous node: {node}"
  147. )
  148. else:
  149. if node.prev.next_key != node.key:
  150. raise AssertionError(
  151. f"inconsistency found, node.prev.next != node: {node}"
  152. )
  153. yield node
  154. node = node_next
  155. def add(
  156. self, key: K, value: V, cleanup: Callable[[K, V], None] | None = None
  157. ) -> None:
  158. """Add a new value to the cache.
  159. Also, if the entry is ever removed from the cache, call
  160. cleanup(key, value).
  161. Args:
  162. key: The key to store it under
  163. value: The object to store
  164. cleanup: None or a function taking (key, value) to indicate
  165. 'value' should be cleaned up.
  166. """
  167. if key is _null_key:
  168. raise ValueError("cannot use _null_key as a key")
  169. if key in self._cache:
  170. node = self._cache[key]
  171. node.run_cleanup()
  172. node.value = value
  173. node.cleanup = cleanup
  174. else:
  175. node = _LRUNode(key, value, cleanup=cleanup)
  176. self._cache[key] = node
  177. self._record_access(node)
  178. if len(self._cache) > self._max_cache:
  179. # Trigger the cleanup
  180. self.cleanup()
  181. def cache_size(self) -> int:
  182. """Get the number of entries we will cache."""
  183. return self._max_cache
  184. def get(self, key: K, default: V | None = None) -> V | None:
  185. """Get value from cache with default if not found.
  186. Args:
  187. key: Key to look up
  188. default: Default value if key not found
  189. Returns:
  190. Value from cache or default
  191. """
  192. node = self._cache.get(key, None)
  193. if node is None:
  194. return default
  195. self._record_access(node)
  196. return node.value
  197. def keys(self) -> Iterable[K]:
  198. """Get the list of keys currently cached.
  199. Note that values returned here may not be available by the time you
  200. request them later. This is simply meant as a peak into the current
  201. state.
  202. Returns: An unordered list of keys that are currently cached.
  203. """
  204. return self._cache.keys()
  205. def items(self) -> dict[K, V]:
  206. """Get the key:value pairs as a dict."""
  207. return {k: n.value for k, n in self._cache.items()}
  208. def cleanup(self) -> None:
  209. """Clear the cache until it shrinks to the requested size.
  210. This does not completely wipe the cache, just makes sure it is under
  211. the after_cleanup_count.
  212. """
  213. # Make sure the cache is shrunk to the correct size
  214. while len(self._cache) > self._after_cleanup_count:
  215. self._remove_lru()
  216. def __setitem__(self, key: K, value: V) -> None:
  217. """Add a value to the cache, there will be no cleanup function."""
  218. self.add(key, value, cleanup=None)
  219. def _record_access(self, node: _LRUNode[K, V]) -> None:
  220. """Record that key was accessed."""
  221. # Move 'node' to the front of the queue
  222. if self._most_recently_used is None:
  223. self._most_recently_used = node
  224. self._least_recently_used = node
  225. return
  226. elif node is self._most_recently_used:
  227. # Nothing to do, this node is already at the head of the queue
  228. return
  229. # We've taken care of the tail pointer, remove the node, and insert it
  230. # at the front
  231. # REMOVE
  232. if node is self._least_recently_used:
  233. self._least_recently_used = node.prev
  234. if node.prev is not None:
  235. node.prev.next_key = node.next_key
  236. if node.next_key is not _null_key:
  237. node_next = self._cache[cast(K, node.next_key)]
  238. node_next.prev = node.prev
  239. # INSERT
  240. node.next_key = self._most_recently_used.key
  241. self._most_recently_used.prev = node
  242. self._most_recently_used = node
  243. node.prev = None
  244. def _remove_node(self, node: _LRUNode[K, V]) -> None:
  245. if node is self._least_recently_used:
  246. self._least_recently_used = node.prev
  247. self._cache.pop(node.key)
  248. # If we have removed all entries, remove the head pointer as well
  249. if self._least_recently_used is None:
  250. self._most_recently_used = None
  251. node.run_cleanup()
  252. # Now remove this node from the linked list
  253. if node.prev is not None:
  254. node.prev.next_key = node.next_key
  255. if node.next_key is not _null_key:
  256. node_next = self._cache[cast(K, node.next_key)]
  257. node_next.prev = node.prev
  258. # And remove this node's pointers
  259. node.prev = None
  260. node.next_key = _null_key
  261. def _remove_lru(self) -> None:
  262. """Remove one entry from the lru, and handle consequences.
  263. If there are no more references to the lru, then this entry should be
  264. removed from the cache.
  265. """
  266. assert self._least_recently_used
  267. self._remove_node(self._least_recently_used)
  268. def clear(self) -> None:
  269. """Clear out all of the cache."""
  270. # Clean up in LRU order
  271. while self._cache:
  272. self._remove_lru()
  273. def resize(self, max_cache: int, after_cleanup_count: int | None = None) -> None:
  274. """Change the number of entries that will be cached."""
  275. self._update_max_cache(max_cache, after_cleanup_count=after_cleanup_count)
  276. def _update_max_cache(
  277. self, max_cache: int, after_cleanup_count: int | None = None
  278. ) -> None:
  279. self._max_cache = max_cache
  280. if after_cleanup_count is None:
  281. self._after_cleanup_count = self._max_cache * 8 / 10
  282. else:
  283. self._after_cleanup_count = min(after_cleanup_count, self._max_cache)
  284. self.cleanup()
  285. class LRUSizeCache(LRUCache[K, V]):
  286. """An LRUCache that removes things based on the size of the values.
  287. This differs in that it doesn't care how many actual items there are,
  288. it just restricts the cache to be cleaned up after so much data is stored.
  289. The size of items added will be computed using compute_size(value), which
  290. defaults to len() if not supplied.
  291. """
  292. _compute_size: Callable[[V], int]
  293. def __init__(
  294. self,
  295. max_size: int = 1024 * 1024,
  296. after_cleanup_size: int | None = None,
  297. compute_size: Callable[[V], int] | None = None,
  298. ) -> None:
  299. """Create a new LRUSizeCache.
  300. Args:
  301. max_size: The max number of bytes to store before we start
  302. clearing out entries.
  303. after_cleanup_size: After cleaning up, shrink everything to this
  304. size.
  305. compute_size: A function to compute the size of the values. We
  306. use a function here, so that you can pass 'len' if you are just
  307. using simple strings, or a more complex function if you are using
  308. something like a list of strings, or even a custom object.
  309. The function should take the form "compute_size(value) => integer".
  310. If not supplied, it defaults to 'len()'
  311. """
  312. self._value_size = 0
  313. if compute_size is None:
  314. self._compute_size = cast(Callable[[V], int], len)
  315. else:
  316. self._compute_size = compute_size
  317. self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
  318. LRUCache.__init__(self, max_cache=max(int(max_size / 512), 1))
  319. def add(
  320. self, key: K, value: V, cleanup: Callable[[K, V], None] | None = None
  321. ) -> None:
  322. """Add a new value to the cache.
  323. Also, if the entry is ever removed from the cache, call
  324. cleanup(key, value).
  325. Args:
  326. key: The key to store it under
  327. value: The object to store
  328. cleanup: None or a function taking (key, value) to indicate
  329. 'value' should be cleaned up.
  330. """
  331. if key is _null_key:
  332. raise ValueError("cannot use _null_key as a key")
  333. node = self._cache.get(key, None)
  334. value_len = self._compute_size(value)
  335. if value_len >= self._after_cleanup_size:
  336. # The new value is 'too big to fit', as it would fill up/overflow
  337. # the cache all by itself
  338. if node is not None:
  339. # We won't be replacing the old node, so just remove it
  340. self._remove_node(node)
  341. if cleanup is not None:
  342. cleanup(key, value)
  343. return
  344. if node is None:
  345. node = _LRUNode(key, value, cleanup=cleanup)
  346. self._cache[key] = node
  347. else:
  348. assert node.size is not None
  349. self._value_size -= node.size
  350. node.size = value_len
  351. self._value_size += value_len
  352. self._record_access(node)
  353. if self._value_size > self._max_size:
  354. # Time to cleanup
  355. self.cleanup()
  356. def cleanup(self) -> None:
  357. """Clear the cache until it shrinks to the requested size.
  358. This does not completely wipe the cache, just makes sure it is under
  359. the after_cleanup_size.
  360. """
  361. # Make sure the cache is shrunk to the correct size
  362. while self._value_size > self._after_cleanup_size:
  363. self._remove_lru()
  364. def _remove_node(self, node: _LRUNode[K, V]) -> None:
  365. assert node.size is not None
  366. self._value_size -= node.size
  367. LRUCache._remove_node(self, node)
  368. def resize(self, max_size: int, after_cleanup_size: int | None = None) -> None:
  369. """Change the number of bytes that will be cached."""
  370. self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
  371. max_cache = max(int(max_size / 512), 1)
  372. self._update_max_cache(max_cache)
  373. def _update_max_size(
  374. self, max_size: int, after_cleanup_size: int | None = None
  375. ) -> None:
  376. self._max_size = max_size
  377. if after_cleanup_size is None:
  378. self._after_cleanup_size = self._max_size * 8 // 10
  379. else:
  380. self._after_cleanup_size = min(after_cleanup_size, self._max_size)