Browse Source

Make ShaFiles created from files trust their filenames.

This adds a dummy FixedSha class that implements the read-only part
of hashlib's hash interface but does not actually compute a SHA-1
digest. This allows us to assign ids to file objects based on their
filename rather than requiring a read of the whole file; SHA-1s will
soon be checked during check().

Added a filename_to_sha helper function to objects.py; refactored the
opposite sha_to_filename functionality into this file as well for
parallelism and testing. As a side effect, reorganized some files in
tests/data to have the normal 2/38 filename structure.

Change-Id: Ic459628aec32a92e29ea49cfd6cbe685053971ef
Dave Borowitz 15 years ago
parent
commit
5e7ba36ab6

+ 2 - 3
dulwich/object_store.py

@@ -39,6 +39,7 @@ from dulwich.objects import (
     Tree,
     hex_to_sha,
     sha_to_hex,
+    hex_to_filename,
     S_ISGITLINK,
     )
 from dulwich.pack import (
@@ -362,10 +363,8 @@ class DiskObjectStore(PackBasedObjectStore):
             raise
 
     def _get_shafile_path(self, sha):
-        dir = sha[:2]
-        file = sha[2:]
         # Check from object dir
-        return os.path.join(self.path, dir, file)
+        return hex_to_filename(self.path, sha)
 
     def _iter_loose_objects(self):
         for base in os.listdir(self.path):

+ 36 - 0
dulwich/objects.py

@@ -84,6 +84,27 @@ def hex_to_sha(hex):
     return binascii.unhexlify(hex)
 
 
+def hex_to_filename(path, hex):
+    """Takes a hex sha and returns its filename relative to the given path."""
+    dir = hex[:2]
+    file = hex[2:]
+    # Check from object dir
+    return os.path.join(path, dir, file)
+
+
+def filename_to_hex(filename):
+    """Takes an object filename and returns its corresponding hex sha."""
+    # grab the last (up to) two path components
+    names = filename.rsplit(os.path.sep, 2)[-2:]
+    errmsg = "Invalid object filename: %s" % filename
+    assert len(names) == 2, errmsg
+    base, rest = names
+    assert len(base) == 2 and len(rest) == 38, errmsg
+    hex = base + rest
+    hex_to_sha(hex)
+    return hex
+
+
 def serializable_property(name, docstring=None):
     def set(obj, value):
         obj._ensure_parsed()
@@ -122,6 +143,20 @@ def check_identity(identity, error_msg):
         raise ObjectFormatException(error_msg)
 
 
+class FixedSha(object):
+    """SHA object that behaves like hashlib's but is given a fixed value."""
+
+    def __init__(self, hexsha):
+        self._hexsha = hexsha
+        self._sha = hex_to_sha(hexsha)
+
+    def digest(self):
+        return self._sha
+
+    def hexdigest(self):
+        return self._hexsha
+
+
 class ShaFile(object):
     """A git SHA file."""
 
@@ -282,6 +317,7 @@ class ShaFile(object):
         try:
             try:
                 obj = cls._parse_file_header(f)
+                obj._sha = FixedSha(filename_to_hex(filename))
                 obj._needs_parsing = True
                 obj._needs_serialization = True
                 return obj

+ 0 - 0
dulwich/tests/data/blobs/6f670c0fb53f9463760b7295fbb814e965fb20c8 → dulwich/tests/data/blobs/6f/670c0fb53f9463760b7295fbb814e965fb20c8


+ 0 - 0
dulwich/tests/data/blobs/954a536f7819d40e6f637f849ee187dd10066349 → dulwich/tests/data/blobs/95/4a536f7819d40e6f637f849ee187dd10066349


+ 0 - 0
dulwich/tests/data/blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 → dulwich/tests/data/blobs/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391


+ 0 - 0
dulwich/tests/data/commits/0d89f20333fbb1d2f3a94da77f4981373d8f4310 → dulwich/tests/data/commits/0d/89f20333fbb1d2f3a94da77f4981373d8f4310


+ 0 - 0
dulwich/tests/data/commits/5dac377bdded4c9aeb8dff595f0faeebcc8498cc → dulwich/tests/data/commits/5d/ac377bdded4c9aeb8dff595f0faeebcc8498cc


+ 0 - 0
dulwich/tests/data/commits/60dacdc733de308bb77bb76ce0fb0f9b44c9769e → dulwich/tests/data/commits/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e


+ 0 - 0
dulwich/tests/data/tags/71033db03a03c6a36721efcf1968dd8f8e0cf023 → dulwich/tests/data/tags/71/033db03a03c6a36721efcf1968dd8f8e0cf023


+ 0 - 0
dulwich/tests/data/trees/70c190eb48fa8bbb50ddc692a17b44cb781af7f6 → dulwich/tests/data/trees/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6


+ 8 - 7
dulwich/tests/test_objects.py

@@ -38,6 +38,7 @@ from dulwich.objects import (
     Tag,
     format_timezone,
     hex_to_sha,
+    hex_to_filename,
     check_hexsha,
     check_identity,
     parse_timezone,
@@ -89,11 +90,11 @@ except ImportError:
 
 class BlobReadTests(unittest.TestCase):
     """Test decompression of blobs"""
-  
-    def get_sha_file(self, obj, base, sha):
-        return obj.from_file(os.path.join(os.path.dirname(__file__),
-                                          'data', base, sha))
-  
+
+    def get_sha_file(self, cls, base, sha):
+        dir = os.path.join(os.path.dirname(__file__), 'data', base)
+        return cls.from_file(hex_to_filename(dir, sha))
+
     def get_blob(self, sha):
         """Return the blob named sha from the test data dir"""
         return self.get_sha_file(Blob, 'blobs', sha)
@@ -406,8 +407,8 @@ class TreeTests(ShaFileCheckTests):
         self.assertEquals(["a.c", "a", "a/c"], [p[0] for p in x.iteritems()])
 
     def _do_test_parse_tree(self, parse_tree):
-        o = Tree.from_file(os.path.join(os.path.dirname(__file__), 'data',
-                                        'trees', tree_sha))
+        dir = os.path.join(os.path.dirname(__file__), 'data', 'trees')
+        o = Tree.from_file(hex_to_filename(dir, tree_sha))
         o._parse_file()
         self.assertEquals([('a', 0100644, a_sha), ('b', 0100644, b_sha)],
                           list(parse_tree(o.as_raw_string())))