Browse Source

Import new lru_cache and tests.

Jelmer Vernooij 16 năm trước cách đây
mục cha
commit
19064d1e49
2 tập tin đã thay đổi với 640 bổ sung79 xóa
  1. 193 79
      dulwich/lru_cache.py
  2. 447 0
      dulwich/tests/test_lru_cache.py

+ 193 - 79
dulwich/lru_cache.py

@@ -12,11 +12,42 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
 """A simple least-recently-used (LRU) cache."""
 
-from collections import deque
+_null_key = object()
+
+class _LRUNode(object):
+    """This maintains the linked-list which is the lru internals."""
+
+    __slots__ = ('prev', 'next_key', 'key', 'value', 'cleanup', 'size')
+
+    def __init__(self, key, value, cleanup=None):
+        self.prev = None
+        self.next_key = _null_key
+        self.key = key
+        self.value = value
+        self.cleanup = cleanup
+        # TODO: We could compute this 'on-the-fly' like we used to, and remove
+        #       one pointer from this object, we just need to decide if it
+        #       actually costs us much of anything in normal usage
+        self.size = None
+
+    def __repr__(self):
+        if self.prev is None:
+            prev_key = None
+        else:
+            prev_key = self.prev.key
+        return '%s(%r n:%r p:%r)' % (self.__class__.__name__, self.key,
+                                     self.next_key, prev_key)
+
+    def run_cleanup(self):
+        if self.cleanup is not None:
+            self.cleanup(self.key, self.value)
+        self.cleanup = None
+        # Just make sure to break any refcycles, etc
+        self.value = None
 
 
 class LRUCache(object):
@@ -24,48 +55,117 @@ class LRUCache(object):
 
     def __init__(self, max_cache=100, after_cleanup_count=None):
         self._cache = {}
-        self._cleanup = {}
-        self._queue = deque() # Track when things are accessed
-        self._refcount = {} # number of entries in self._queue for each key
+        # The "HEAD" of the lru linked list
+        self._most_recently_used = None
+        # The "TAIL" of the lru linked list
+        self._least_recently_used = None
         self._update_max_cache(max_cache, after_cleanup_count)
 
     def __contains__(self, key):
         return key in self._cache
 
     def __getitem__(self, key):
-        val = self._cache[key]
-        self._record_access(key)
-        return val
+        cache = self._cache
+        node = cache[key]
+        # Inlined from _record_access to decrease the overhead of __getitem__
+        # We also have more knowledge about structure if __getitem__ is
+        # succeeding, then we know that self._most_recently_used must not be
+        # None, etc.
+        mru = self._most_recently_used
+        if node is mru:
+            # Nothing to do, this node is already at the head of the queue
+            return node.value
+        # Remove this node from the old location
+        node_prev = node.prev
+        next_key = node.next_key
+        # benchmarking shows that the lookup of _null_key in globals is faster
+        # than the attribute lookup for (node is self._least_recently_used)
+        if next_key is _null_key:
+            # 'node' is the _least_recently_used, because it doesn't have a
+            # 'next' item. So move the current lru to the previous node.
+            self._least_recently_used = node_prev
+        else:
+            node_next = cache[next_key]
+            node_next.prev = node_prev
+        node_prev.next_key = next_key
+        # Insert this node at the front of the list
+        node.next_key = mru.key
+        mru.prev = node
+        self._most_recently_used = node
+        node.prev = None
+        return node.value
 
     def __len__(self):
         return len(self._cache)
 
+    def _walk_lru(self):
+        """Walk the LRU list, only meant to be used in tests."""
+        node = self._most_recently_used
+        if node is not None:
+            if node.prev is not None:
+                raise AssertionError('the _most_recently_used entry is not'
+                                     ' supposed to have a previous entry'
+                                     ' %s' % (node,))
+        while node is not None:
+            if node.next_key is _null_key:
+                if node is not self._least_recently_used:
+                    raise AssertionError('only the last node should have'
+                                         ' no next value: %s' % (node,))
+                node_next = None
+            else:
+                node_next = self._cache[node.next_key]
+                if node_next.prev is not node:
+                    raise AssertionError('inconsistency found, node.next.prev'
+                                         ' != node: %s' % (node,))
+            if node.prev is None:
+                if node is not self._most_recently_used:
+                    raise AssertionError('only the _most_recently_used should'
+                                         ' not have a previous node: %s'
+                                         % (node,))
+            else:
+                if node.prev.next_key != node.key:
+                    raise AssertionError('inconsistency found, node.prev.next'
+                                         ' != node: %s' % (node,))
+            yield node
+            node = node_next
+
     def add(self, key, value, cleanup=None):
         """Add a new value to the cache.
 
-        Also, if the entry is ever removed from the queue, call cleanup.
-        Passing it the key and value being removed.
+        Also, if the entry is ever removed from the cache, call
+        cleanup(key, value).
 
         :param key: The key to store it under
         :param value: The object to store
         :param cleanup: None or a function taking (key, value) to indicate
-                        'value' sohuld be cleaned up.
+                        'value' should be cleaned up.
         """
+        if key is _null_key:
+            raise ValueError('cannot use _null_key as a key')
         if key in self._cache:
-            self._remove(key)
-        self._cache[key] = value
-        if cleanup is not None:
-            self._cleanup[key] = cleanup
-        self._record_access(key)
+            node = self._cache[key]
+            node.run_cleanup()
+            node.value = value
+            node.cleanup = cleanup
+        else:
+            node = _LRUNode(key, value, cleanup=cleanup)
+            self._cache[key] = node
+        self._record_access(node)
 
         if len(self._cache) > self._max_cache:
             # Trigger the cleanup
             self.cleanup()
 
+    def cache_size(self):
+        """Get the number of entries we will cache."""
+        return self._max_cache
+
     def get(self, key, default=None):
-        if key in self._cache:
-            return self[key]
-        return default
+        node = self._cache.get(key, None)
+        if node is None:
+            return default
+        self._record_access(node)
+        return node.value
 
     def keys(self):
         """Get the list of keys currently cached.
@@ -78,6 +178,10 @@ class LRUCache(object):
         """
         return self._cache.keys()
 
+    def items(self):
+        """Get the key:value pairs as a dict."""
+        return dict((k, n.value) for k, n in self._cache.iteritems())
+
     def cleanup(self):
         """Clear the cache until it shrinks to the requested size.
 
@@ -87,45 +191,54 @@ class LRUCache(object):
         # Make sure the cache is shrunk to the correct size
         while len(self._cache) > self._after_cleanup_count:
             self._remove_lru()
-        # No need to compact the queue at this point, because the code that
-        # calls this would have already triggered it based on queue length
 
     def __setitem__(self, key, value):
         """Add a value to the cache, there will be no cleanup function."""
         self.add(key, value, cleanup=None)
 
-    def _record_access(self, key):
+    def _record_access(self, node):
         """Record that key was accessed."""
-        self._queue.append(key)
-        # Can't use setdefault because you can't += 1 the result
-        self._refcount[key] = self._refcount.get(key, 0) + 1
-
-        # If our access queue is too large, clean it up too
-        if len(self._queue) > self._compact_queue_length:
-            self._compact_queue()
-
-    def _compact_queue(self):
-        """Compact the queue, leaving things in sorted last appended order."""
-        new_queue = deque()
-        for item in self._queue:
-            if self._refcount[item] == 1:
-                new_queue.append(item)
-            else:
-                self._refcount[item] -= 1
-        self._queue = new_queue
-        # All entries should be of the same size. There should be one entry in
-        # queue for each entry in cache, and all refcounts should == 1
-        if not (len(self._queue) == len(self._cache) ==
-                len(self._refcount) == sum(self._refcount.itervalues())):
-            raise AssertionError()
-
-    def _remove(self, key):
-        """Remove an entry, making sure to maintain the invariants."""
-        cleanup = self._cleanup.pop(key, None)
-        val = self._cache.pop(key)
-        if cleanup is not None:
-            cleanup(key, val)
-        return val
+        # Move 'node' to the front of the queue
+        if self._most_recently_used is None:
+            self._most_recently_used = node
+            self._least_recently_used = node
+            return
+        elif node is self._most_recently_used:
+            # Nothing to do, this node is already at the head of the queue
+            return
+        # We've taken care of the tail pointer, remove the node, and insert it
+        # at the front
+        # REMOVE
+        if node is self._least_recently_used:
+            self._least_recently_used = node.prev
+        if node.prev is not None:
+            node.prev.next_key = node.next_key
+        if node.next_key is not _null_key:
+            node_next = self._cache[node.next_key]
+            node_next.prev = node.prev
+        # INSERT
+        node.next_key = self._most_recently_used.key
+        self._most_recently_used.prev = node
+        self._most_recently_used = node
+        node.prev = None
+
+    def _remove_node(self, node):
+        if node is self._least_recently_used:
+            self._least_recently_used = node.prev
+        self._cache.pop(node.key)
+        # If we have removed all entries, remove the head pointer as well
+        if self._least_recently_used is None:
+            self._most_recently_used = None
+        node.run_cleanup()
+        # Now remove this node from the linked list
+        if node.prev is not None:
+            node.prev.next_key = node.next_key
+        if node.next_key is not _null_key:
+            node_next = self._cache[node.next_key]
+            node_next.prev = node.prev
+        # And remove this node's pointers
+        node.prev = None
+        node.next_key = _null_key
 
     def _remove_lru(self):
         """Remove one entry from the lru, and handle consequences.
@@ -133,11 +246,7 @@ class LRUCache(object):
         If there are no more references to the lru, then this entry should be
         removed from the cache.
         """
-        key = self._queue.popleft()
-        self._refcount[key] -= 1
-        if not self._refcount[key]:
-            del self._refcount[key]
-            self._remove(key)
+        self._remove_node(self._least_recently_used)
 
     def clear(self):
         """Clear out all of the cache."""
@@ -155,11 +264,8 @@ class LRUCache(object):
         if after_cleanup_count is None:
             self._after_cleanup_count = self._max_cache * 8 / 10
         else:
-            self._after_cleanup_count = min(after_cleanup_count, self._max_cache)
-
-        self._compact_queue_length = 4*self._max_cache
-        if len(self._queue) > self._compact_queue_length:
-            self._compact_queue()
+            self._after_cleanup_count = min(after_cleanup_count,
+                                            self._max_cache)
         self.cleanup()
 
 
@@ -169,7 +275,8 @@ class LRUSizeCache(LRUCache):
     This differs in that it doesn't care how many actual items there are,
     it just restricts the cache to be cleaned up after so much data is stored.
 
-    The values that are added must support len(value).
+    The size of items added will be computed using compute_size(value), which
+    defaults to len() if not supplied.
     """
 
     def __init__(self, max_size=1024*1024, after_cleanup_size=None,
@@ -191,33 +298,41 @@ class LRUSizeCache(LRUCache):
         self._compute_size = compute_size
         if compute_size is None:
             self._compute_size = len
-        # This approximates that texts are > 0.5k in size. It only really
-        # effects when we clean up the queue, so we don't want it to be too
-        # large.
         self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
         LRUCache.__init__(self, max_cache=max(int(max_size/512), 1))
 
     def add(self, key, value, cleanup=None):
         """Add a new value to the cache.
 
-        Also, if the entry is ever removed from the queue, call cleanup.
-        Passing it the key and value being removed.
+        Also, if the entry is ever removed from the cache, call
+        cleanup(key, value).
 
         :param key: The key to store it under
         :param value: The object to store
         :param cleanup: None or a function taking (key, value) to indicate
-                        'value' sohuld be cleaned up.
+                        'value' should be cleaned up.
         """
-        if key in self._cache:
-            self._remove(key)
+        if key is _null_key:
+            raise ValueError('cannot use _null_key as a key')
+        node = self._cache.get(key, None)
         value_len = self._compute_size(value)
         if value_len >= self._after_cleanup_size:
+            # The new value is 'too big to fit', as it would fill up/overflow
+            # the cache all by itself
+            if node is not None:
+                # We won't be replacing the old node, so just remove it
+                self._remove_node(node)
+            if cleanup is not None:
+                cleanup(key, value)
             return
+        if node is None:
+            node = _LRUNode(key, value, cleanup=cleanup)
+            self._cache[key] = node
+        else:
+            self._value_size -= node.size
+        node.size = value_len
         self._value_size += value_len
-        self._cache[key] = value
-        if cleanup is not None:
-            self._cleanup[key] = cleanup
-        self._record_access(key)
+        self._record_access(node)
 
         if self._value_size > self._max_size:
             # Time to cleanup
@@ -233,10 +348,9 @@ class LRUSizeCache(LRUCache):
         while self._value_size > self._after_cleanup_size:
             self._remove_lru()
 
-    def _remove(self, key):
-        """Remove an entry, making sure to maintain the invariants."""
-        val = LRUCache._remove(self, key)
-        self._value_size -= self._compute_size(val)
+    def _remove_node(self, node):
+        self._value_size -= node.size
+        LRUCache._remove_node(self, node)
 
     def resize(self, max_size, after_cleanup_size=None):
         """Change the number of bytes that will be cached."""

+ 447 - 0
dulwich/tests/test_lru_cache.py

@@ -0,0 +1,447 @@
+# Copyright (C) 2006, 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Tests for the lru_cache module."""
+
+from dulwich import (
+    lru_cache,
+    )
+import unittest
+
+
+class TestLRUCache(unittest.TestCase):
+    """Test that LRU cache properly keeps track of entries."""
+
+    def test_cache_size(self):
+        cache = lru_cache.LRUCache(max_cache=10)
+        self.assertEqual(10, cache.cache_size())
+
+        cache = lru_cache.LRUCache(max_cache=256)
+        self.assertEqual(256, cache.cache_size())
+
+        cache.resize(512)
+        self.assertEqual(512, cache.cache_size())
+
+    def test_missing(self):
+        cache = lru_cache.LRUCache(max_cache=10)
+
+        self.failIf('foo' in cache)
+        self.assertRaises(KeyError, cache.__getitem__, 'foo')
+
+        cache['foo'] = 'bar'
+        self.assertEqual('bar', cache['foo'])
+        self.failUnless('foo' in cache)
+        self.failIf('bar' in cache)
+
+    def test_map_None(self):
+        # Make sure that we can properly map None as a key.
+        cache = lru_cache.LRUCache(max_cache=10)
+        self.failIf(None in cache)
+        cache[None] = 1
+        self.assertEqual(1, cache[None])
+        cache[None] = 2
+        self.assertEqual(2, cache[None])
+        # Test the various code paths of __getitem__, to make sure that we can
+        # handle when None is the key for the LRU and the MRU
+        cache[1] = 3
+        cache[None] = 1
+        cache[None]
+        cache[1]
+        cache[None]
+        self.assertEqual([None, 1], [n.key for n in cache._walk_lru()])
+
+    def test_add__null_key(self):
+        cache = lru_cache.LRUCache(max_cache=10)
+        self.assertRaises(ValueError, cache.add, lru_cache._null_key, 1)
+
+    def test_overflow(self):
+        """Adding extra entries will pop out old ones."""
+        cache = lru_cache.LRUCache(max_cache=1, after_cleanup_count=1)
+
+        cache['foo'] = 'bar'
+        # With a max cache of 1, adding 'baz' should pop out 'foo'
+        cache['baz'] = 'biz'
+
+        self.failIf('foo' in cache)
+        self.failUnless('baz' in cache)
+
+        self.assertEqual('biz', cache['baz'])
+
+    def test_by_usage(self):
+        """Accessing entries bumps them up in priority."""
+        cache = lru_cache.LRUCache(max_cache=2)
+
+        cache['baz'] = 'biz'
+        cache['foo'] = 'bar'
+
+        self.assertEqual('biz', cache['baz'])
+
+        # This must kick out 'foo' because it was the last accessed
+        cache['nub'] = 'in'
+
+        self.failIf('foo' in cache)
+
+    def test_cleanup(self):
+        """Test that we can use a cleanup function."""
+        cleanup_called = []
+        def cleanup_func(key, val):
+            cleanup_called.append((key, val))
+
+        cache = lru_cache.LRUCache(max_cache=2)
+
+        cache.add('baz', '1', cleanup=cleanup_func)
+        cache.add('foo', '2', cleanup=cleanup_func)
+        cache.add('biz', '3', cleanup=cleanup_func)
+
+        self.assertEqual([('baz', '1')], cleanup_called)
+
+        # 'foo' is now most recent, so final cleanup will call it last
+        cache['foo']
+        cache.clear()
+        self.assertEqual([('baz', '1'), ('biz', '3'), ('foo', '2')],
+                         cleanup_called)
+
+    def test_cleanup_on_replace(self):
+        """Replacing an object should cleanup the old value."""
+        cleanup_called = []
+        def cleanup_func(key, val):
+            cleanup_called.append((key, val))
+
+        cache = lru_cache.LRUCache(max_cache=2)
+        cache.add(1, 10, cleanup=cleanup_func)
+        cache.add(2, 20, cleanup=cleanup_func)
+        cache.add(2, 25, cleanup=cleanup_func)
+
+        self.assertEqual([(2, 20)], cleanup_called)
+        self.assertEqual(25, cache[2])
+
+        # Even __setitem__ should make sure cleanup() is called
+        cache[2] = 26
+        self.assertEqual([(2, 20), (2, 25)], cleanup_called)
+
+    def test_len(self):
+        cache = lru_cache.LRUCache(max_cache=10, after_cleanup_count=10)
+
+        cache[1] = 10
+        cache[2] = 20
+        cache[3] = 30
+        cache[4] = 40
+
+        self.assertEqual(4, len(cache))
+
+        cache[5] = 50
+        cache[6] = 60
+        cache[7] = 70
+        cache[8] = 80
+
+        self.assertEqual(8, len(cache))
+
+        cache[1] = 15 # replacement
+
+        self.assertEqual(8, len(cache))
+
+        cache[9] = 90
+        cache[10] = 100
+        cache[11] = 110
+
+        # We hit the max
+        self.assertEqual(10, len(cache))
+        self.assertEqual([11, 10, 9, 1, 8, 7, 6, 5, 4, 3],
+                         [n.key for n in cache._walk_lru()])
+
+    def test_cleanup_shrinks_to_after_clean_count(self):
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=3)
+
+        cache.add(1, 10)
+        cache.add(2, 20)
+        cache.add(3, 25)
+        cache.add(4, 30)
+        cache.add(5, 35)
+
+        self.assertEqual(5, len(cache))
+        # This will bump us over the max, which causes us to shrink down to
+        # after_cleanup_cache size
+        cache.add(6, 40)
+        self.assertEqual(3, len(cache))
+
+    def test_after_cleanup_larger_than_max(self):
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=10)
+        self.assertEqual(5, cache._after_cleanup_count)
+
+    def test_after_cleanup_none(self):
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=None)
+        # By default _after_cleanup_size is 80% of the normal size
+        self.assertEqual(4, cache._after_cleanup_count)
+
+    def test_cleanup(self):
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=2)
+
+        # Add these in order
+        cache.add(1, 10)
+        cache.add(2, 20)
+        cache.add(3, 25)
+        cache.add(4, 30)
+        cache.add(5, 35)
+
+        self.assertEqual(5, len(cache))
+        # Force a compaction
+        cache.cleanup()
+        self.assertEqual(2, len(cache))
+
+    def test_preserve_last_access_order(self):
+        cache = lru_cache.LRUCache(max_cache=5)
+
+        # Add these in order
+        cache.add(1, 10)
+        cache.add(2, 20)
+        cache.add(3, 25)
+        cache.add(4, 30)
+        cache.add(5, 35)
+
+        self.assertEqual([5, 4, 3, 2, 1], [n.key for n in cache._walk_lru()])
+
+        # Now access some randomly
+        cache[2]
+        cache[5]
+        cache[3]
+        cache[2]
+        self.assertEqual([2, 3, 5, 4, 1], [n.key for n in cache._walk_lru()])
+
+    def test_get(self):
+        cache = lru_cache.LRUCache(max_cache=5)
+
+        cache.add(1, 10)
+        cache.add(2, 20)
+        self.assertEqual(20, cache.get(2))
+        self.assertEquals(None, cache.get(3))
+        obj = object()
+        self.assertTrue(obj is cache.get(3, obj))
+        self.assertEqual([2, 1], [n.key for n in cache._walk_lru()])
+        self.assertEqual(10, cache.get(1))
+        self.assertEqual([1, 2], [n.key for n in cache._walk_lru()])
+
+    def test_keys(self):
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=5)
+
+        cache[1] = 2
+        cache[2] = 3
+        cache[3] = 4
+        self.assertEqual([1, 2, 3], sorted(cache.keys()))
+        cache[4] = 5
+        cache[5] = 6
+        cache[6] = 7
+        self.assertEqual([2, 3, 4, 5, 6], sorted(cache.keys()))
+
+    def test_resize_smaller(self):
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=4)
+        cache[1] = 2
+        cache[2] = 3
+        cache[3] = 4
+        cache[4] = 5
+        cache[5] = 6
+        self.assertEqual([1, 2, 3, 4, 5], sorted(cache.keys()))
+        cache[6] = 7
+        self.assertEqual([3, 4, 5, 6], sorted(cache.keys()))
+        # Now resize to something smaller, which triggers a cleanup
+        cache.resize(max_cache=3, after_cleanup_count=2)
+        self.assertEqual([5, 6], sorted(cache.keys()))
+        # Adding something will use the new size
+        cache[7] = 8
+        self.assertEqual([5, 6, 7], sorted(cache.keys()))
+        cache[8] = 9
+        self.assertEqual([7, 8], sorted(cache.keys()))
+
+    def test_resize_larger(self):
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=4)
+        cache[1] = 2
+        cache[2] = 3
+        cache[3] = 4
+        cache[4] = 5
+        cache[5] = 6
+        self.assertEqual([1, 2, 3, 4, 5], sorted(cache.keys()))
+        cache[6] = 7
+        self.assertEqual([3, 4, 5, 6], sorted(cache.keys()))
+        cache.resize(max_cache=8, after_cleanup_count=6)
+        self.assertEqual([3, 4, 5, 6], sorted(cache.keys()))
+        cache[7] = 8
+        cache[8] = 9
+        cache[9] = 10
+        cache[10] = 11
+        self.assertEqual([3, 4, 5, 6, 7, 8, 9, 10], sorted(cache.keys()))
+        cache[11] = 12 # triggers cleanup back to new after_cleanup_count
+        self.assertEqual([6, 7, 8, 9, 10, 11], sorted(cache.keys()))
+
+
+class TestLRUSizeCache(unittest.TestCase):
+
+    def test_basic_init(self):
+        cache = lru_cache.LRUSizeCache()
+        self.assertEqual(2048, cache._max_cache)
+        self.assertEqual(int(cache._max_size*0.8), cache._after_cleanup_size)
+        self.assertEqual(0, cache._value_size)
+
+    def test_add__null_key(self):
+        cache = lru_cache.LRUSizeCache()
+        self.assertRaises(ValueError, cache.add, lru_cache._null_key, 1)
+
+    def test_add_tracks_size(self):
+        cache = lru_cache.LRUSizeCache()
+        self.assertEqual(0, cache._value_size)
+        cache.add('my key', 'my value text')
+        self.assertEqual(13, cache._value_size)
+
+    def test_remove_tracks_size(self):
+        cache = lru_cache.LRUSizeCache()
+        self.assertEqual(0, cache._value_size)
+        cache.add('my key', 'my value text')
+        self.assertEqual(13, cache._value_size)
+        node = cache._cache['my key']
+        cache._remove_node(node)
+        self.assertEqual(0, cache._value_size)
+
+    def test_no_add_over_size(self):
+        """Adding a large value may not be cached at all."""
+        cache = lru_cache.LRUSizeCache(max_size=10, after_cleanup_size=5)
+        self.assertEqual(0, cache._value_size)
+        self.assertEqual({}, cache.items())
+        cache.add('test', 'key')
+        self.assertEqual(3, cache._value_size)
+        self.assertEqual({'test': 'key'}, cache.items())
+        cache.add('test2', 'key that is too big')
+        self.assertEqual(3, cache._value_size)
+        self.assertEqual({'test':'key'}, cache.items())
+        # If we would add a key, only to cleanup and remove all cached entries,
+        # then obviously that value should not be stored
+        cache.add('test3', 'bigkey')
+        self.assertEqual(3, cache._value_size)
+        self.assertEqual({'test':'key'}, cache.items())
+
+        cache.add('test4', 'bikey')
+        self.assertEqual(3, cache._value_size)
+        self.assertEqual({'test':'key'}, cache.items())
+
+    def test_no_add_over_size_cleanup(self):
+        """If a large value is not cached, we will call cleanup right away."""
+        cleanup_calls = []
+        def cleanup(key, value):
+            cleanup_calls.append((key, value))
+
+        cache = lru_cache.LRUSizeCache(max_size=10, after_cleanup_size=5)
+        self.assertEqual(0, cache._value_size)
+        self.assertEqual({}, cache.items())
+        cache.add('test', 'key that is too big', cleanup=cleanup)
+        # key was not added
+        self.assertEqual(0, cache._value_size)
+        self.assertEqual({}, cache.items())
+        # and cleanup was called
+        self.assertEqual([('test', 'key that is too big')], cleanup_calls)
+
+    def test_adding_clears_cache_based_on_size(self):
+        """The cache is cleared in LRU order until small enough"""
+        cache = lru_cache.LRUSizeCache(max_size=20)
+        cache.add('key1', 'value') # 5 chars
+        cache.add('key2', 'value2') # 6 chars
+        cache.add('key3', 'value23') # 7 chars
+        self.assertEqual(5+6+7, cache._value_size)
+        cache['key2'] # reference key2 so it gets a newer reference time
+        cache.add('key4', 'value234') # 8 chars, over limit
+        # We have to remove 2 keys to get back under limit
+        self.assertEqual(6+8, cache._value_size)
+        self.assertEqual({'key2':'value2', 'key4':'value234'},
+                         cache.items())
+
+    def test_adding_clears_to_after_cleanup_size(self):
+        cache = lru_cache.LRUSizeCache(max_size=20, after_cleanup_size=10)
+        cache.add('key1', 'value') # 5 chars
+        cache.add('key2', 'value2') # 6 chars
+        cache.add('key3', 'value23') # 7 chars
+        self.assertEqual(5+6+7, cache._value_size)
+        cache['key2'] # reference key2 so it gets a newer reference time
+        cache.add('key4', 'value234') # 8 chars, over limit
+        # We have to remove 3 keys to get back under limit
+        self.assertEqual(8, cache._value_size)
+        self.assertEqual({'key4':'value234'}, cache.items())
+
+    def test_custom_sizes(self):
+        def size_of_list(lst):
+            return sum(len(x) for x in lst)
+        cache = lru_cache.LRUSizeCache(max_size=20, after_cleanup_size=10,
+                                       compute_size=size_of_list)
+
+        cache.add('key1', ['val', 'ue']) # 5 chars
+        cache.add('key2', ['val', 'ue2']) # 6 chars
+        cache.add('key3', ['val', 'ue23']) # 7 chars
+        self.assertEqual(5+6+7, cache._value_size)
+        cache['key2'] # reference key2 so it gets a newer reference time
+        cache.add('key4', ['value', '234']) # 8 chars, over limit
+        # We have to remove 3 keys to get back under limit
+        self.assertEqual(8, cache._value_size)
+        self.assertEqual({'key4':['value', '234']}, cache.items())
+
+    def test_cleanup(self):
+        cache = lru_cache.LRUSizeCache(max_size=20, after_cleanup_size=10)
+
+        # Add these in order
+        cache.add('key1', 'value') # 5 chars
+        cache.add('key2', 'value2') # 6 chars
+        cache.add('key3', 'value23') # 7 chars
+        self.assertEqual(5+6+7, cache._value_size)
+
+        cache.cleanup()
+        # Only the most recent fits after cleaning up
+        self.assertEqual(7, cache._value_size)
+
+    def test_keys(self):
+        cache = lru_cache.LRUSizeCache(max_size=10)
+
+        cache[1] = 'a'
+        cache[2] = 'b'
+        cache[3] = 'cdef'
+        self.assertEqual([1, 2, 3], sorted(cache.keys()))
+
+    def test_resize_smaller(self):
+        cache = lru_cache.LRUSizeCache(max_size=10, after_cleanup_size=9)
+        cache[1] = 'abc'
+        cache[2] = 'def'
+        cache[3] = 'ghi'
+        cache[4] = 'jkl'
+        # Triggers a cleanup
+        self.assertEqual([2, 3, 4], sorted(cache.keys()))
+        # Resize should also cleanup again
+        cache.resize(max_size=6, after_cleanup_size=4)
+        self.assertEqual([4], sorted(cache.keys()))
+        # Adding should use the new max size
+        cache[5] = 'mno'
+        self.assertEqual([4, 5], sorted(cache.keys()))
+        cache[6] = 'pqr'
+        self.assertEqual([6], sorted(cache.keys()))
+
+    def test_resize_larger(self):
+        cache = lru_cache.LRUSizeCache(max_size=10, after_cleanup_size=9)
+        cache[1] = 'abc'
+        cache[2] = 'def'
+        cache[3] = 'ghi'
+        cache[4] = 'jkl'
+        # Triggers a cleanup
+        self.assertEqual([2, 3, 4], sorted(cache.keys()))
+        cache.resize(max_size=15, after_cleanup_size=12)
+        self.assertEqual([2, 3, 4], sorted(cache.keys()))
+        cache[5] = 'mno'
+        cache[6] = 'pqr'
+        self.assertEqual([2, 3, 4, 5, 6], sorted(cache.keys()))
+        cache[7] = 'stu'
+        self.assertEqual([4, 5, 6, 7], sorted(cache.keys()))
+