Browse Source

Import new lru_cache and tests.

Jelmer Vernooij 16 years ago
parent
commit
19064d1e49
2 changed files with 640 additions and 79 deletions
  1. 193 79
      dulwich/lru_cache.py
  2. 447 0
      dulwich/tests/test_lru_cache.py

+ 193 - 79
dulwich/lru_cache.py

@@ -12,11 +12,42 @@
 #
 #
 # You should have received a copy of the GNU General Public License
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
 
 """A simple least-recently-used (LRU) cache."""
 """A simple least-recently-used (LRU) cache."""
 
 
-from collections import deque
+_null_key = object()
+
+class _LRUNode(object):
+    """This maintains the linked-list which is the lru internals."""
+
+    __slots__ = ('prev', 'next_key', 'key', 'value', 'cleanup', 'size')
+
+    def __init__(self, key, value, cleanup=None):
+        self.prev = None
+        self.next_key = _null_key
+        self.key = key
+        self.value = value
+        self.cleanup = cleanup
+        # TODO: We could compute this 'on-the-fly' like we used to, and remove
+        #       one pointer from this object, we just need to decide if it
+        #       actually costs us much of anything in normal usage
+        self.size = None
+
+    def __repr__(self):
+        if self.prev is None:
+            prev_key = None
+        else:
+            prev_key = self.prev.key
+        return '%s(%r n:%r p:%r)' % (self.__class__.__name__, self.key,
+                                     self.next_key, prev_key)
+
+    def run_cleanup(self):
+        if self.cleanup is not None:
+            self.cleanup(self.key, self.value)
+        self.cleanup = None
+        # Just make sure to break any refcycles, etc
+        self.value = None
 
 
 
 
 class LRUCache(object):
 class LRUCache(object):
@@ -24,48 +55,117 @@ class LRUCache(object):
 
 
     def __init__(self, max_cache=100, after_cleanup_count=None):
     def __init__(self, max_cache=100, after_cleanup_count=None):
         self._cache = {}
         self._cache = {}
-        self._cleanup = {}
-        self._queue = deque() # Track when things are accessed
-        self._refcount = {} # number of entries in self._queue for each key
+        # The "HEAD" of the lru linked list
+        self._most_recently_used = None
+        # The "TAIL" of the lru linked list
+        self._least_recently_used = None
         self._update_max_cache(max_cache, after_cleanup_count)
         self._update_max_cache(max_cache, after_cleanup_count)
 
 
     def __contains__(self, key):
     def __contains__(self, key):
         return key in self._cache
         return key in self._cache
 
 
     def __getitem__(self, key):
     def __getitem__(self, key):
-        val = self._cache[key]
-        self._record_access(key)
-        return val
+        cache = self._cache
+        node = cache[key]
+        # Inlined from _record_access to decrease the overhead of __getitem__
+        # We also have more knowledge about structure if __getitem__ is
+        # succeeding, then we know that self._most_recently_used must not be
+        # None, etc.
+        mru = self._most_recently_used
+        if node is mru:
+            # Nothing to do, this node is already at the head of the queue
+            return node.value
+        # Remove this node from the old location
+        node_prev = node.prev
+        next_key = node.next_key
+        # benchmarking shows that the lookup of _null_key in globals is faster
+        # than the attribute lookup for (node is self._least_recently_used)
+        if next_key is _null_key:
+            # 'node' is the _least_recently_used, because it doesn't have a
+            # 'next' item. So move the current lru to the previous node.
+            self._least_recently_used = node_prev
+        else:
+            node_next = cache[next_key]
+            node_next.prev = node_prev
+        node_prev.next_key = next_key
+        # Insert this node at the front of the list
+        node.next_key = mru.key
+        mru.prev = node
+        self._most_recently_used = node
+        node.prev = None
+        return node.value
 
 
     def __len__(self):
     def __len__(self):
         return len(self._cache)
         return len(self._cache)
 
 
+    def _walk_lru(self):
+        """Walk the LRU list, only meant to be used in tests."""
+        node = self._most_recently_used
+        if node is not None:
+            if node.prev is not None:
+                raise AssertionError('the _most_recently_used entry is not'
+                                     ' supposed to have a previous entry'
+                                     ' %s' % (node,))
+        while node is not None:
+            if node.next_key is _null_key:
+                if node is not self._least_recently_used:
+                    raise AssertionError('only the last node should have'
+                                         ' no next value: %s' % (node,))
+                node_next = None
+            else:
+                node_next = self._cache[node.next_key]
+                if node_next.prev is not node:
+                    raise AssertionError('inconsistency found, node.next.prev'
+                                         ' != node: %s' % (node,))
+            if node.prev is None:
+                if node is not self._most_recently_used:
+                    raise AssertionError('only the _most_recently_used should'
+                                         ' not have a previous node: %s'
+                                         % (node,))
+            else:
+                if node.prev.next_key != node.key:
+                    raise AssertionError('inconsistency found, node.prev.next'
+                                         ' != node: %s' % (node,))
+            yield node
+            node = node_next
+
     def add(self, key, value, cleanup=None):
     def add(self, key, value, cleanup=None):
         """Add a new value to the cache.
         """Add a new value to the cache.
 
 
-        Also, if the entry is ever removed from the queue, call cleanup.
-        Passing it the key and value being removed.
+        Also, if the entry is ever removed from the cache, call
+        cleanup(key, value).
 
 
         :param key: The key to store it under
         :param key: The key to store it under
         :param value: The object to store
         :param value: The object to store
         :param cleanup: None or a function taking (key, value) to indicate
         :param cleanup: None or a function taking (key, value) to indicate
-                        'value' sohuld be cleaned up.
+                        'value' should be cleaned up.
         """
         """
+        if key is _null_key:
+            raise ValueError('cannot use _null_key as a key')
         if key in self._cache:
         if key in self._cache:
-            self._remove(key)
-        self._cache[key] = value
-        if cleanup is not None:
-            self._cleanup[key] = cleanup
-        self._record_access(key)
+            node = self._cache[key]
+            node.run_cleanup()
+            node.value = value
+            node.cleanup = cleanup
+        else:
+            node = _LRUNode(key, value, cleanup=cleanup)
+            self._cache[key] = node
+        self._record_access(node)
 
 
         if len(self._cache) > self._max_cache:
         if len(self._cache) > self._max_cache:
             # Trigger the cleanup
             # Trigger the cleanup
             self.cleanup()
             self.cleanup()
 
 
+    def cache_size(self):
+        """Get the number of entries we will cache."""
+        return self._max_cache
+
     def get(self, key, default=None):
     def get(self, key, default=None):
-        if key in self._cache:
-            return self[key]
-        return default
+        node = self._cache.get(key, None)
+        if node is None:
+            return default
+        self._record_access(node)
+        return node.value
 
 
     def keys(self):
     def keys(self):
         """Get the list of keys currently cached.
         """Get the list of keys currently cached.
@@ -78,6 +178,10 @@ class LRUCache(object):
         """
         """
         return self._cache.keys()
         return self._cache.keys()
 
 
+    def items(self):
+        """Get the key:value pairs as a dict."""
+        return dict((k, n.value) for k, n in self._cache.iteritems())
+
     def cleanup(self):
     def cleanup(self):
         """Clear the cache until it shrinks to the requested size.
         """Clear the cache until it shrinks to the requested size.
 
 
@@ -87,45 +191,54 @@ class LRUCache(object):
         # Make sure the cache is shrunk to the correct size
         # Make sure the cache is shrunk to the correct size
         while len(self._cache) > self._after_cleanup_count:
         while len(self._cache) > self._after_cleanup_count:
             self._remove_lru()
             self._remove_lru()
-        # No need to compact the queue at this point, because the code that
-        # calls this would have already triggered it based on queue length
 
 
     def __setitem__(self, key, value):
     def __setitem__(self, key, value):
         """Add a value to the cache, there will be no cleanup function."""
         """Add a value to the cache, there will be no cleanup function."""
         self.add(key, value, cleanup=None)
         self.add(key, value, cleanup=None)
 
 
-    def _record_access(self, key):
+    def _record_access(self, node):
         """Record that key was accessed."""
         """Record that key was accessed."""
-        self._queue.append(key)
-        # Can't use setdefault because you can't += 1 the result
-        self._refcount[key] = self._refcount.get(key, 0) + 1
-
-        # If our access queue is too large, clean it up too
-        if len(self._queue) > self._compact_queue_length:
-            self._compact_queue()
-
-    def _compact_queue(self):
-        """Compact the queue, leaving things in sorted last appended order."""
-        new_queue = deque()
-        for item in self._queue:
-            if self._refcount[item] == 1:
-                new_queue.append(item)
-            else:
-                self._refcount[item] -= 1
-        self._queue = new_queue
-        # All entries should be of the same size. There should be one entry in
-        # queue for each entry in cache, and all refcounts should == 1
-        if not (len(self._queue) == len(self._cache) ==
-                len(self._refcount) == sum(self._refcount.itervalues())):
-            raise AssertionError()
-
-    def _remove(self, key):
-        """Remove an entry, making sure to maintain the invariants."""
-        cleanup = self._cleanup.pop(key, None)
-        val = self._cache.pop(key)
-        if cleanup is not None:
-            cleanup(key, val)
-        return val
+        # Move 'node' to the front of the queue
+        if self._most_recently_used is None:
+            self._most_recently_used = node
+            self._least_recently_used = node
+            return
+        elif node is self._most_recently_used:
+            # Nothing to do, this node is already at the head of the queue
+            return
+        # We've taken care of the tail pointer, remove the node, and insert it
+        # at the front
+        # REMOVE
+        if node is self._least_recently_used:
+            self._least_recently_used = node.prev
+        if node.prev is not None:
+            node.prev.next_key = node.next_key
+        if node.next_key is not _null_key:
+            node_next = self._cache[node.next_key]
+            node_next.prev = node.prev
+        # INSERT
+        node.next_key = self._most_recently_used.key
+        self._most_recently_used.prev = node
+        self._most_recently_used = node
+        node.prev = None
+
+    def _remove_node(self, node):
+        if node is self._least_recently_used:
+            self._least_recently_used = node.prev
+        self._cache.pop(node.key)
+        # If we have removed all entries, remove the head pointer as well
+        if self._least_recently_used is None:
+            self._most_recently_used = None
+        node.run_cleanup()
+        # Now remove this node from the linked list
+        if node.prev is not None:
+            node.prev.next_key = node.next_key
+        if node.next_key is not _null_key:
+            node_next = self._cache[node.next_key]
+            node_next.prev = node.prev
+        # And remove this node's pointers
+        node.prev = None
+        node.next_key = _null_key
 
 
     def _remove_lru(self):
     def _remove_lru(self):
         """Remove one entry from the lru, and handle consequences.
         """Remove one entry from the lru, and handle consequences.
@@ -133,11 +246,7 @@ class LRUCache(object):
         If there are no more references to the lru, then this entry should be
         If there are no more references to the lru, then this entry should be
         removed from the cache.
         removed from the cache.
         """
         """
-        key = self._queue.popleft()
-        self._refcount[key] -= 1
-        if not self._refcount[key]:
-            del self._refcount[key]
-            self._remove(key)
+        self._remove_node(self._least_recently_used)
 
 
     def clear(self):
     def clear(self):
         """Clear out all of the cache."""
         """Clear out all of the cache."""
@@ -155,11 +264,8 @@ class LRUCache(object):
         if after_cleanup_count is None:
         if after_cleanup_count is None:
             self._after_cleanup_count = self._max_cache * 8 / 10
             self._after_cleanup_count = self._max_cache * 8 / 10
         else:
         else:
-            self._after_cleanup_count = min(after_cleanup_count, self._max_cache)
-
-        self._compact_queue_length = 4*self._max_cache
-        if len(self._queue) > self._compact_queue_length:
-            self._compact_queue()
+            self._after_cleanup_count = min(after_cleanup_count,
+                                            self._max_cache)
         self.cleanup()
         self.cleanup()
 
 
 
 
@@ -169,7 +275,8 @@ class LRUSizeCache(LRUCache):
     This differs in that it doesn't care how many actual items there are,
     This differs in that it doesn't care how many actual items there are,
     it just restricts the cache to be cleaned up after so much data is stored.
     it just restricts the cache to be cleaned up after so much data is stored.
 
 
-    The values that are added must support len(value).
+    The size of items added will be computed using compute_size(value), which
+    defaults to len() if not supplied.
     """
     """
 
 
     def __init__(self, max_size=1024*1024, after_cleanup_size=None,
     def __init__(self, max_size=1024*1024, after_cleanup_size=None,
@@ -191,33 +298,41 @@ class LRUSizeCache(LRUCache):
         self._compute_size = compute_size
         self._compute_size = compute_size
         if compute_size is None:
         if compute_size is None:
             self._compute_size = len
             self._compute_size = len
-        # This approximates that texts are > 0.5k in size. It only really
-        # effects when we clean up the queue, so we don't want it to be too
-        # large.
         self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
         self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
         LRUCache.__init__(self, max_cache=max(int(max_size/512), 1))
         LRUCache.__init__(self, max_cache=max(int(max_size/512), 1))
 
 
     def add(self, key, value, cleanup=None):
     def add(self, key, value, cleanup=None):
         """Add a new value to the cache.
         """Add a new value to the cache.
 
 
-        Also, if the entry is ever removed from the queue, call cleanup.
-        Passing it the key and value being removed.
+        Also, if the entry is ever removed from the cache, call
+        cleanup(key, value).
 
 
         :param key: The key to store it under
         :param key: The key to store it under
         :param value: The object to store
         :param value: The object to store
         :param cleanup: None or a function taking (key, value) to indicate
         :param cleanup: None or a function taking (key, value) to indicate
-                        'value' sohuld be cleaned up.
+                        'value' should be cleaned up.
         """
         """
-        if key in self._cache:
-            self._remove(key)
+        if key is _null_key:
+            raise ValueError('cannot use _null_key as a key')
+        node = self._cache.get(key, None)
         value_len = self._compute_size(value)
         value_len = self._compute_size(value)
         if value_len >= self._after_cleanup_size:
         if value_len >= self._after_cleanup_size:
+            # The new value is 'too big to fit', as it would fill up/overflow
+            # the cache all by itself
+            if node is not None:
+                # We won't be replacing the old node, so just remove it
+                self._remove_node(node)
+            if cleanup is not None:
+                cleanup(key, value)
             return
             return
+        if node is None:
+            node = _LRUNode(key, value, cleanup=cleanup)
+            self._cache[key] = node
+        else:
+            self._value_size -= node.size
+        node.size = value_len
         self._value_size += value_len
         self._value_size += value_len
-        self._cache[key] = value
-        if cleanup is not None:
-            self._cleanup[key] = cleanup
-        self._record_access(key)
+        self._record_access(node)
 
 
         if self._value_size > self._max_size:
         if self._value_size > self._max_size:
             # Time to cleanup
             # Time to cleanup
@@ -233,10 +348,9 @@ class LRUSizeCache(LRUCache):
         while self._value_size > self._after_cleanup_size:
         while self._value_size > self._after_cleanup_size:
             self._remove_lru()
             self._remove_lru()
 
 
-    def _remove(self, key):
-        """Remove an entry, making sure to maintain the invariants."""
-        val = LRUCache._remove(self, key)
-        self._value_size -= self._compute_size(val)
+    def _remove_node(self, node):
+        self._value_size -= node.size
+        LRUCache._remove_node(self, node)
 
 
     def resize(self, max_size, after_cleanup_size=None):
     def resize(self, max_size, after_cleanup_size=None):
         """Change the number of bytes that will be cached."""
         """Change the number of bytes that will be cached."""

+ 447 - 0
dulwich/tests/test_lru_cache.py

@@ -0,0 +1,447 @@
+# Copyright (C) 2006, 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Tests for the lru_cache module."""
+
+from dulwich import (
+    lru_cache,
+    )
+import unittest
+
+
+class TestLRUCache(unittest.TestCase):
+    """Test that LRU cache properly keeps track of entries."""
+
+    def test_cache_size(self):
+        cache = lru_cache.LRUCache(max_cache=10)
+        self.assertEqual(10, cache.cache_size())
+
+        cache = lru_cache.LRUCache(max_cache=256)
+        self.assertEqual(256, cache.cache_size())
+
+        cache.resize(512)
+        self.assertEqual(512, cache.cache_size())
+
+    def test_missing(self):
+        cache = lru_cache.LRUCache(max_cache=10)
+
+        self.failIf('foo' in cache)
+        self.assertRaises(KeyError, cache.__getitem__, 'foo')
+
+        cache['foo'] = 'bar'
+        self.assertEqual('bar', cache['foo'])
+        self.failUnless('foo' in cache)
+        self.failIf('bar' in cache)
+
+    def test_map_None(self):
+        # Make sure that we can properly map None as a key.
+        cache = lru_cache.LRUCache(max_cache=10)
+        self.failIf(None in cache)
+        cache[None] = 1
+        self.assertEqual(1, cache[None])
+        cache[None] = 2
+        self.assertEqual(2, cache[None])
+        # Test the various code paths of __getitem__, to make sure that we can
+        # handle when None is the key for the LRU and the MRU
+        cache[1] = 3
+        cache[None] = 1
+        cache[None]
+        cache[1]
+        cache[None]
+        self.assertEqual([None, 1], [n.key for n in cache._walk_lru()])
+
+    def test_add__null_key(self):
+        cache = lru_cache.LRUCache(max_cache=10)
+        self.assertRaises(ValueError, cache.add, lru_cache._null_key, 1)
+
+    def test_overflow(self):
+        """Adding extra entries will pop out old ones."""
+        cache = lru_cache.LRUCache(max_cache=1, after_cleanup_count=1)
+
+        cache['foo'] = 'bar'
+        # With a max cache of 1, adding 'baz' should pop out 'foo'
+        cache['baz'] = 'biz'
+
+        self.failIf('foo' in cache)
+        self.failUnless('baz' in cache)
+
+        self.assertEqual('biz', cache['baz'])
+
+    def test_by_usage(self):
+        """Accessing entries bumps them up in priority."""
+        cache = lru_cache.LRUCache(max_cache=2)
+
+        cache['baz'] = 'biz'
+        cache['foo'] = 'bar'
+
+        self.assertEqual('biz', cache['baz'])
+
+        # This must kick out 'foo' because it was the last accessed
+        cache['nub'] = 'in'
+
+        self.failIf('foo' in cache)
+
+    def test_cleanup(self):
+        """Test that we can use a cleanup function."""
+        cleanup_called = []
+        def cleanup_func(key, val):
+            cleanup_called.append((key, val))
+
+        cache = lru_cache.LRUCache(max_cache=2)
+
+        cache.add('baz', '1', cleanup=cleanup_func)
+        cache.add('foo', '2', cleanup=cleanup_func)
+        cache.add('biz', '3', cleanup=cleanup_func)
+
+        self.assertEqual([('baz', '1')], cleanup_called)
+
+        # 'foo' is now most recent, so final cleanup will call it last
+        cache['foo']
+        cache.clear()
+        self.assertEqual([('baz', '1'), ('biz', '3'), ('foo', '2')],
+                         cleanup_called)
+
+    def test_cleanup_on_replace(self):
+        """Replacing an object should cleanup the old value."""
+        cleanup_called = []
+        def cleanup_func(key, val):
+            cleanup_called.append((key, val))
+
+        cache = lru_cache.LRUCache(max_cache=2)
+        cache.add(1, 10, cleanup=cleanup_func)
+        cache.add(2, 20, cleanup=cleanup_func)
+        cache.add(2, 25, cleanup=cleanup_func)
+
+        self.assertEqual([(2, 20)], cleanup_called)
+        self.assertEqual(25, cache[2])
+
+        # Even __setitem__ should make sure cleanup() is called
+        cache[2] = 26
+        self.assertEqual([(2, 20), (2, 25)], cleanup_called)
+
+    def test_len(self):
+        cache = lru_cache.LRUCache(max_cache=10, after_cleanup_count=10)
+
+        cache[1] = 10
+        cache[2] = 20
+        cache[3] = 30
+        cache[4] = 40
+
+        self.assertEqual(4, len(cache))
+
+        cache[5] = 50
+        cache[6] = 60
+        cache[7] = 70
+        cache[8] = 80
+
+        self.assertEqual(8, len(cache))
+
+        cache[1] = 15 # replacement
+
+        self.assertEqual(8, len(cache))
+
+        cache[9] = 90
+        cache[10] = 100
+        cache[11] = 110
+
+        # We hit the max
+        self.assertEqual(10, len(cache))
+        self.assertEqual([11, 10, 9, 1, 8, 7, 6, 5, 4, 3],
+                         [n.key for n in cache._walk_lru()])
+
+    def test_cleanup_shrinks_to_after_clean_count(self):
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=3)
+
+        cache.add(1, 10)
+        cache.add(2, 20)
+        cache.add(3, 25)
+        cache.add(4, 30)
+        cache.add(5, 35)
+
+        self.assertEqual(5, len(cache))
+        # This will bump us over the max, which causes us to shrink down to
+        # after_cleanup_cache size
+        cache.add(6, 40)
+        self.assertEqual(3, len(cache))
+
+    def test_after_cleanup_larger_than_max(self):
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=10)
+        self.assertEqual(5, cache._after_cleanup_count)
+
+    def test_after_cleanup_none(self):
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=None)
+        # By default _after_cleanup_size is 80% of the normal size
+        self.assertEqual(4, cache._after_cleanup_count)
+
+    def test_cleanup(self):
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=2)
+
+        # Add these in order
+        cache.add(1, 10)
+        cache.add(2, 20)
+        cache.add(3, 25)
+        cache.add(4, 30)
+        cache.add(5, 35)
+
+        self.assertEqual(5, len(cache))
+        # Force a compaction
+        cache.cleanup()
+        self.assertEqual(2, len(cache))
+
+    def test_preserve_last_access_order(self):
+        cache = lru_cache.LRUCache(max_cache=5)
+
+        # Add these in order
+        cache.add(1, 10)
+        cache.add(2, 20)
+        cache.add(3, 25)
+        cache.add(4, 30)
+        cache.add(5, 35)
+
+        self.assertEqual([5, 4, 3, 2, 1], [n.key for n in cache._walk_lru()])
+
+        # Now access some randomly
+        cache[2]
+        cache[5]
+        cache[3]
+        cache[2]
+        self.assertEqual([2, 3, 5, 4, 1], [n.key for n in cache._walk_lru()])
+
+    def test_get(self):
+        cache = lru_cache.LRUCache(max_cache=5)
+
+        cache.add(1, 10)
+        cache.add(2, 20)
+        self.assertEqual(20, cache.get(2))
+        self.assertEquals(None, cache.get(3))
+        obj = object()
+        self.assertTrue(obj is cache.get(3, obj))
+        self.assertEqual([2, 1], [n.key for n in cache._walk_lru()])
+        self.assertEqual(10, cache.get(1))
+        self.assertEqual([1, 2], [n.key for n in cache._walk_lru()])
+
+    def test_keys(self):
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=5)
+
+        cache[1] = 2
+        cache[2] = 3
+        cache[3] = 4
+        self.assertEqual([1, 2, 3], sorted(cache.keys()))
+        cache[4] = 5
+        cache[5] = 6
+        cache[6] = 7
+        self.assertEqual([2, 3, 4, 5, 6], sorted(cache.keys()))
+
+    def test_resize_smaller(self):
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=4)
+        cache[1] = 2
+        cache[2] = 3
+        cache[3] = 4
+        cache[4] = 5
+        cache[5] = 6
+        self.assertEqual([1, 2, 3, 4, 5], sorted(cache.keys()))
+        cache[6] = 7
+        self.assertEqual([3, 4, 5, 6], sorted(cache.keys()))
+        # Now resize to something smaller, which triggers a cleanup
+        cache.resize(max_cache=3, after_cleanup_count=2)
+        self.assertEqual([5, 6], sorted(cache.keys()))
+        # Adding something will use the new size
+        cache[7] = 8
+        self.assertEqual([5, 6, 7], sorted(cache.keys()))
+        cache[8] = 9
+        self.assertEqual([7, 8], sorted(cache.keys()))
+
+    def test_resize_larger(self):
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=4)
+        cache[1] = 2
+        cache[2] = 3
+        cache[3] = 4
+        cache[4] = 5
+        cache[5] = 6
+        self.assertEqual([1, 2, 3, 4, 5], sorted(cache.keys()))
+        cache[6] = 7
+        self.assertEqual([3, 4, 5, 6], sorted(cache.keys()))
+        cache.resize(max_cache=8, after_cleanup_count=6)
+        self.assertEqual([3, 4, 5, 6], sorted(cache.keys()))
+        cache[7] = 8
+        cache[8] = 9
+        cache[9] = 10
+        cache[10] = 11
+        self.assertEqual([3, 4, 5, 6, 7, 8, 9, 10], sorted(cache.keys()))
+        cache[11] = 12 # triggers cleanup back to new after_cleanup_count
+        self.assertEqual([6, 7, 8, 9, 10, 11], sorted(cache.keys()))
+
+
+class TestLRUSizeCache(unittest.TestCase):
+
+    def test_basic_init(self):
+        cache = lru_cache.LRUSizeCache()
+        self.assertEqual(2048, cache._max_cache)
+        self.assertEqual(int(cache._max_size*0.8), cache._after_cleanup_size)
+        self.assertEqual(0, cache._value_size)
+
+    def test_add__null_key(self):
+        cache = lru_cache.LRUSizeCache()
+        self.assertRaises(ValueError, cache.add, lru_cache._null_key, 1)
+
+    def test_add_tracks_size(self):
+        cache = lru_cache.LRUSizeCache()
+        self.assertEqual(0, cache._value_size)
+        cache.add('my key', 'my value text')
+        self.assertEqual(13, cache._value_size)
+
+    def test_remove_tracks_size(self):
+        cache = lru_cache.LRUSizeCache()
+        self.assertEqual(0, cache._value_size)
+        cache.add('my key', 'my value text')
+        self.assertEqual(13, cache._value_size)
+        node = cache._cache['my key']
+        cache._remove_node(node)
+        self.assertEqual(0, cache._value_size)
+
+    def test_no_add_over_size(self):
+        """Adding a large value may not be cached at all."""
+        cache = lru_cache.LRUSizeCache(max_size=10, after_cleanup_size=5)
+        self.assertEqual(0, cache._value_size)
+        self.assertEqual({}, cache.items())
+        cache.add('test', 'key')
+        self.assertEqual(3, cache._value_size)
+        self.assertEqual({'test': 'key'}, cache.items())
+        cache.add('test2', 'key that is too big')
+        self.assertEqual(3, cache._value_size)
+        self.assertEqual({'test':'key'}, cache.items())
+        # If we would add a key, only to cleanup and remove all cached entries,
+        # then obviously that value should not be stored
+        cache.add('test3', 'bigkey')
+        self.assertEqual(3, cache._value_size)
+        self.assertEqual({'test':'key'}, cache.items())
+
+        cache.add('test4', 'bikey')
+        self.assertEqual(3, cache._value_size)
+        self.assertEqual({'test':'key'}, cache.items())
+
+    def test_no_add_over_size_cleanup(self):
+        """If a large value is not cached, we will call cleanup right away."""
+        cleanup_calls = []
+        def cleanup(key, value):
+            cleanup_calls.append((key, value))
+
+        cache = lru_cache.LRUSizeCache(max_size=10, after_cleanup_size=5)
+        self.assertEqual(0, cache._value_size)
+        self.assertEqual({}, cache.items())
+        cache.add('test', 'key that is too big', cleanup=cleanup)
+        # key was not added
+        self.assertEqual(0, cache._value_size)
+        self.assertEqual({}, cache.items())
+        # and cleanup was called
+        self.assertEqual([('test', 'key that is too big')], cleanup_calls)
+
+    def test_adding_clears_cache_based_on_size(self):
+        """The cache is cleared in LRU order until small enough"""
+        cache = lru_cache.LRUSizeCache(max_size=20)
+        cache.add('key1', 'value') # 5 chars
+        cache.add('key2', 'value2') # 6 chars
+        cache.add('key3', 'value23') # 7 chars
+        self.assertEqual(5+6+7, cache._value_size)
+        cache['key2'] # reference key2 so it gets a newer reference time
+        cache.add('key4', 'value234') # 8 chars, over limit
+        # We have to remove 2 keys to get back under limit
+        self.assertEqual(6+8, cache._value_size)
+        self.assertEqual({'key2':'value2', 'key4':'value234'},
+                         cache.items())
+
+    def test_adding_clears_to_after_cleanup_size(self):
+        cache = lru_cache.LRUSizeCache(max_size=20, after_cleanup_size=10)
+        cache.add('key1', 'value') # 5 chars
+        cache.add('key2', 'value2') # 6 chars
+        cache.add('key3', 'value23') # 7 chars
+        self.assertEqual(5+6+7, cache._value_size)
+        cache['key2'] # reference key2 so it gets a newer reference time
+        cache.add('key4', 'value234') # 8 chars, over limit
+        # We have to remove 3 keys to get back under limit
+        self.assertEqual(8, cache._value_size)
+        self.assertEqual({'key4':'value234'}, cache.items())
+
+    def test_custom_sizes(self):
+        def size_of_list(lst):
+            return sum(len(x) for x in lst)
+        cache = lru_cache.LRUSizeCache(max_size=20, after_cleanup_size=10,
+                                       compute_size=size_of_list)
+
+        cache.add('key1', ['val', 'ue']) # 5 chars
+        cache.add('key2', ['val', 'ue2']) # 6 chars
+        cache.add('key3', ['val', 'ue23']) # 7 chars
+        self.assertEqual(5+6+7, cache._value_size)
+        cache['key2'] # reference key2 so it gets a newer reference time
+        cache.add('key4', ['value', '234']) # 8 chars, over limit
+        # We have to remove 3 keys to get back under limit
+        self.assertEqual(8, cache._value_size)
+        self.assertEqual({'key4':['value', '234']}, cache.items())
+
+    def test_cleanup(self):
+        cache = lru_cache.LRUSizeCache(max_size=20, after_cleanup_size=10)
+
+        # Add these in order
+        cache.add('key1', 'value') # 5 chars
+        cache.add('key2', 'value2') # 6 chars
+        cache.add('key3', 'value23') # 7 chars
+        self.assertEqual(5+6+7, cache._value_size)
+
+        cache.cleanup()
+        # Only the most recent fits after cleaning up
+        self.assertEqual(7, cache._value_size)
+
+    def test_keys(self):
+        cache = lru_cache.LRUSizeCache(max_size=10)
+
+        cache[1] = 'a'
+        cache[2] = 'b'
+        cache[3] = 'cdef'
+        self.assertEqual([1, 2, 3], sorted(cache.keys()))
+
+    def test_resize_smaller(self):
+        cache = lru_cache.LRUSizeCache(max_size=10, after_cleanup_size=9)
+        cache[1] = 'abc'
+        cache[2] = 'def'
+        cache[3] = 'ghi'
+        cache[4] = 'jkl'
+        # Triggers a cleanup
+        self.assertEqual([2, 3, 4], sorted(cache.keys()))
+        # Resize should also cleanup again
+        cache.resize(max_size=6, after_cleanup_size=4)
+        self.assertEqual([4], sorted(cache.keys()))
+        # Adding should use the new max size
+        cache[5] = 'mno'
+        self.assertEqual([4, 5], sorted(cache.keys()))
+        cache[6] = 'pqr'
+        self.assertEqual([6], sorted(cache.keys()))
+
+    def test_resize_larger(self):
+        cache = lru_cache.LRUSizeCache(max_size=10, after_cleanup_size=9)
+        cache[1] = 'abc'
+        cache[2] = 'def'
+        cache[3] = 'ghi'
+        cache[4] = 'jkl'
+        # Triggers a cleanup
+        self.assertEqual([2, 3, 4], sorted(cache.keys()))
+        cache.resize(max_size=15, after_cleanup_size=12)
+        self.assertEqual([2, 3, 4], sorted(cache.keys()))
+        cache[5] = 'mno'
+        cache[6] = 'pqr'
+        self.assertEqual([2, 3, 4, 5, 6], sorted(cache.keys()))
+        cache[7] = 'stu'
+        self.assertEqual([4, 5, 6, 7], sorted(cache.keys()))
+