浏览代码

Fixed #20536 -- rewrite of the file based cache backend

 * Safer for use in multiprocess environments
 * Better random culling
 * Cache files use less disk space
 * Safer delete behavior

Also fixed #15806, fixed #15825.
Jaap Roes 11 年之前
父节点
当前提交
7be638390e
共有 3 个文件被更改,包括 142 次插入144 次删除
  1. 113 113
      django/core/cache/backends/filebased.py
  2. 5 9
      docs/topics/cache.txt
  3. 24 22
      tests/cache/tests.py

+ 113 - 113
django/core/cache/backends/filebased.py

@@ -1,156 +1,156 @@
 "File-based cache backend"
-
+import errno
+import glob
 import hashlib
+import io
 import os
-import shutil
+import random
+import tempfile
 import time
+import zlib
+from django.core.cache.backends.base import BaseCache, DEFAULT_TIMEOUT
+from django.core.files.move import file_move_safe
+from django.utils.encoding import force_bytes
 try:
     from django.utils.six.moves import cPickle as pickle
 except ImportError:
     import pickle
 
-from django.core.cache.backends.base import BaseCache, DEFAULT_TIMEOUT
-from django.utils.encoding import force_bytes
-
 
 class FileBasedCache(BaseCache):
+    cache_suffix = '.djcache'
+
     def __init__(self, dir, params):
-        BaseCache.__init__(self, params)
-        self._dir = dir
-        if not os.path.exists(self._dir):
-            self._createdir()
+        super(FileBasedCache, self).__init__(params)
+        self._dir = os.path.abspath(dir)
+        self._createdir()
 
     def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
-        if self.has_key(key, version=version):
+        if self.has_key(key, version):
             return False
-
-        self.set(key, value, timeout, version=version)
+        self.set(key, value, timeout, version)
         return True
 
     def get(self, key, default=None, version=None):
-        key = self.make_key(key, version=version)
-        self.validate_key(key)
-
-        fname = self._key_to_file(key)
-        try:
-            with open(fname, 'rb') as f:
-                exp = pickle.load(f)
-                now = time.time()
-                if exp is not None and exp < now:
-                    self._delete(fname)
-                else:
-                    return pickle.load(f)
-        except (IOError, OSError, EOFError, pickle.PickleError):
-            pass
+        fname = self._key_to_file(key, version)
+        if os.path.exists(fname):
+            try:
+                with io.open(fname, 'rb') as f:
+                    if not self._is_expired(f):
+                        return pickle.loads(zlib.decompress(f.read()))
+            except IOError as e:
+                if e.errno == errno.ENOENT:
+                    pass  # Cache file was removed after the exists check
         return default
 
     def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
-        key = self.make_key(key, version=version)
-        self.validate_key(key)
-
-        fname = self._key_to_file(key)
-        dirname = os.path.dirname(fname)
-
-        self._cull()
-
+        self._createdir()  # Cache dir can be deleted at any time.
+        fname = self._key_to_file(key, version)
+        self._cull()  # make some room if necessary
+        fd, tmp_path = tempfile.mkstemp(dir=self._dir)
+        renamed = False
         try:
-            if not os.path.exists(dirname):
-                os.makedirs(dirname)
-
-            with open(fname, 'wb') as f:
+            with io.open(fd, 'wb') as f:
                 expiry = self.get_backend_timeout(timeout)
-                pickle.dump(expiry, f, pickle.HIGHEST_PROTOCOL)
-                pickle.dump(value, f, pickle.HIGHEST_PROTOCOL)
-        except (IOError, OSError):
-            pass
+                f.write(pickle.dumps(expiry, -1))
+                f.write(zlib.compress(pickle.dumps(value), -1))
+            file_move_safe(tmp_path, fname, allow_overwrite=True)
+            renamed = True
+        finally:
+            if not renamed:
+                os.remove(tmp_path)
 
     def delete(self, key, version=None):
-        key = self.make_key(key, version=version)
-        self.validate_key(key)
-        try:
-            self._delete(self._key_to_file(key))
-        except (IOError, OSError):
-            pass
+        self._delete(self._key_to_file(key, version))
 
     def _delete(self, fname):
-        os.remove(fname)
+        if not fname.startswith(self._dir) or not os.path.exists(fname):
+            return
         try:
-            # Remove the 2 subdirs if they're empty
-            dirname = os.path.dirname(fname)
-            os.rmdir(dirname)
-            os.rmdir(os.path.dirname(dirname))
-        except (IOError, OSError):
-            pass
+            os.remove(fname)
+        except OSError as e:
+            # ENOENT can happen if the cache file is removed (by another
+            # process) after the os.path.exists check.
+            if e.errno != errno.ENOENT:
+                raise
 
     def has_key(self, key, version=None):
-        key = self.make_key(key, version=version)
-        self.validate_key(key)
-        fname = self._key_to_file(key)
-        try:
-            with open(fname, 'rb') as f:
-                exp = pickle.load(f)
-            now = time.time()
-            if exp < now:
-                self._delete(fname)
-                return False
-            else:
-                return True
-        except (IOError, OSError, EOFError, pickle.PickleError):
-            return False
+        fname = self._key_to_file(key, version)
+        if os.path.exists(fname):
+            with io.open(fname, 'rb') as f:
+                return not self._is_expired(f)
+        return False
 
     def _cull(self):
-        if int(self._num_entries) < self._max_entries:
-            return
-
-        try:
-            filelist = sorted(os.listdir(self._dir))
-        except (IOError, OSError):
-            return
-
+        """
+        Removes random cache entries if max_entries is reached at a ratio
+        of num_entries / cull_frequency. A value of 0 for CULL_FREQUENCY means
+        that the entire cache will be purged.
+        """
+        filelist = self._list_cache_files()
+        num_entries = len(filelist)
+        if num_entries < self._max_entries:
+            return  # return early if no culling is required
         if self._cull_frequency == 0:
-            doomed = filelist
-        else:
-            doomed = [os.path.join(self._dir, k) for (i, k) in enumerate(filelist) if i % self._cull_frequency == 0]
-
-        for topdir in doomed:
-            try:
-                for root, _, files in os.walk(topdir):
-                    for f in files:
-                        self._delete(os.path.join(root, f))
-            except (IOError, OSError):
-                pass
+            return self.clear()  # Clear the cache when CULL_FREQUENCY = 0
+        # Delete a random selection of entries
+        filelist = random.sample(filelist,
+                                 int(num_entries / self._cull_frequency))
+        for fname in filelist:
+            self._delete(fname)
 
     def _createdir(self):
-        try:
-            os.makedirs(self._dir)
-        except OSError:
-            raise EnvironmentError("Cache directory '%s' does not exist and could not be created'" % self._dir)
-
-    def _key_to_file(self, key):
+        if not os.path.exists(self._dir):
+            try:
+                os.makedirs(self._dir, 0o700)
+            except OSError as e:
+                if e.errno != errno.EEXIST:
+                    raise EnvironmentError(
+                        "Cache directory '%s' does not exist "
+                        "and could not be created'" % self._dir)
+
+    def _key_to_file(self, key, version=None):
+        """
+        Convert a key into a cache file path. Basically this is the
+        root cache path joined with the md5sum of the key and a suffix.
         """
-        Convert the filename into an md5 string. We'll turn the first couple
-        bits of the path into directory prefixes to be nice to filesystems
-        that have problems with large numbers of files in a directory.
+        key = self.make_key(key, version=version)
+        self.validate_key(key)
+        return os.path.join(self._dir, ''.join(
+            [hashlib.md5(force_bytes(key)).hexdigest(), self.cache_suffix]))
 
-        Thus, a cache key of "foo" gets turnned into a file named
-        ``{cache-dir}ac/bd/18db4cc2f85cedef654fccc4a4d8``.
+    def clear(self):
+        """
+        Remove all the cache files.
         """
-        path = hashlib.md5(force_bytes(key)).hexdigest()
-        path = os.path.join(path[:2], path[2:4], path[4:])
-        return os.path.join(self._dir, path)
+        if not os.path.exists(self._dir):
+            return
+        for fname in self._list_cache_files():
+            self._delete(fname)
 
-    def _get_num_entries(self):
-        count = 0
-        for _, _, files in os.walk(self._dir):
-            count += len(files)
-        return count
-    _num_entries = property(_get_num_entries)
+    def _is_expired(self, f):
+        """
+        Takes an open cache file and determines if it has expired,
+        deletes the file if it is has passed its expiry time.
+        """
+        exp = pickle.load(f)
+        if exp is not None and exp < time.time():
+            f.close()  # On Windows a file has to be closed before deleting
+            self._delete(f.name)
+            return True
+        return False
+
+    def _list_cache_files(self):
+        """
+        Get a list of paths to all the cache files. These are all the files
+        in the root cache dir that end on the cache_suffix.
+        """
+        if not os.path.exists(self._dir):
+            return []
+        filelist = [os.path.join(self._dir, fname) for fname
+                    in glob.glob1(self._dir, '*%s' % self.cache_suffix)]
+        return filelist
 
-    def clear(self):
-        try:
-            shutil.rmtree(self._dir)
-        except (IOError, OSError):
-            pass
 
 
 # For backwards compatibility

+ 5 - 9
docs/topics/cache.txt

@@ -253,10 +253,11 @@ model.
 Filesystem caching
 ------------------
 
-To store cached items on a filesystem, use
-``"django.core.cache.backends.filebased.FileBasedCache"`` for
-:setting:`BACKEND <CACHES-BACKEND>`. For example, to store cached data in
-``/var/tmp/django_cache``, use this setting::
+The file-based backend serializes and stores each cache value as a separate
+file. To use this backend set :setting:`BACKEND <CACHES-BACKEND>` to
+``"django.core.cache.backends.filebased.FileBasedCache"`` and
+:setting:`LOCATION <CACHES-LOCATION>` to a suitable directory. For example,
+to store cached data in ``/var/tmp/django_cache``, use this setting::
 
     CACHES = {
         'default': {
@@ -265,7 +266,6 @@ To store cached items on a filesystem, use
         }
     }
 
-
 If you're on Windows, put the drive letter at the beginning of the path,
 like this::
 
@@ -286,10 +286,6 @@ above example, if your server runs as the user ``apache``, make sure the
 directory ``/var/tmp/django_cache`` exists and is readable and writable by the
 user ``apache``.
 
-Each cache value will be stored as a separate file whose contents are the
-cache data saved in a serialized ("pickled") format, using Python's ``pickle``
-module. Each file's name is the cache key, escaped for safe filesystem use.
-
 Local-memory caching
 --------------------
 

+ 24 - 22
tests/cache/tests.py

@@ -1076,32 +1076,34 @@ class FileBasedCacheTests(unittest.TestCase, BaseCacheTests):
 
     def tearDown(self):
         self.cache.clear()
+        os.rmdir(self.dirname)
 
-    def test_hashing(self):
-        """Test that keys are hashed into subdirectories correctly"""
-        self.cache.set("foo", "bar")
-        key = self.cache.make_key("foo")
-        keyhash = hashlib.md5(key.encode()).hexdigest()
-        keypath = os.path.join(self.dirname, keyhash[:2], keyhash[2:4], keyhash[4:])
-        self.assertTrue(os.path.exists(keypath))
+    def test_cull(self):
+        self.perform_cull_test(50, 29)
 
-    def test_subdirectory_removal(self):
-        """
-        Make sure that the created subdirectories are correctly removed when empty.
-        """
-        self.cache.set("foo", "bar")
-        key = self.cache.make_key("foo")
-        keyhash = hashlib.md5(key.encode()).hexdigest()
-        keypath = os.path.join(self.dirname, keyhash[:2], keyhash[2:4], keyhash[4:])
-        self.assertTrue(os.path.exists(keypath))
+    def test_ignores_non_cache_files(self):
+        fname = os.path.join(self.dirname, 'not-a-cache-file')
+        with open(fname, 'w'):
+            os.utime(fname, None)
+        self.cache.clear()
+        self.assertTrue(os.path.exists(fname),
+                        'Expected cache.clear to ignore non cache files')
+        os.remove(fname)
 
-        self.cache.delete("foo")
-        self.assertTrue(not os.path.exists(keypath))
-        self.assertTrue(not os.path.exists(os.path.dirname(keypath)))
-        self.assertTrue(not os.path.exists(os.path.dirname(os.path.dirname(keypath))))
+    def test_clear_does_not_remove_cache_dir(self):
+        self.cache.clear()
+        self.assertTrue(os.path.exists(self.dirname),
+                        'Expected cache.clear to keep the cache dir')
 
-    def test_cull(self):
-        self.perform_cull_test(50, 29)
+    def test_creates_cache_dir_if_nonexistent(self):
+        os.rmdir(self.dirname)
+        self.cache.set('foo', 'bar')
+        os.path.exists(self.dirname)
+
+    def test_zero_cull(self):
+        # Regression test for #15806
+        self.cache = get_cache(self.backend_name, LOCATION=self.dirname, OPTIONS={'MAX_ENTRIES': 30, 'CULL_FREQUENCY': 0})
+        self.perform_cull_test(50, 19)
 
 
 class CustomCacheKeyValidationTests(unittest.TestCase):