Sfoglia il codice sorgente

Hash packed objects without creating ShaFiles.

Recent changes to ShaFile result in parsing files when created from
strings. Since the packed object code was just using ShaFile for
computing a SHA, this change elminates that and hashes file contents
directly.

Change-Id: I70a14104fa896c248189f0839c24d02c48a43c4d
Dave Borowitz 15 anni fa
parent
commit
7f4238d888
2 ha cambiato i file con 18 aggiunte e 8 eliminazioni
  1. 6 1
      dulwich/objects.py
  2. 12 7
      dulwich/pack.py

+ 6 - 1
dulwich/objects.py

@@ -106,6 +106,11 @@ def filename_to_hex(filename):
     return hex
 
 
+def object_header(num_type, length):
+    """Return an object header for the given numeric type and text length."""
+    return "%s %d\0" % (object_class(num_type).type_name, length)
+
+
 def serializable_property(name, docstring=None):
     def set(obj, value):
         obj._ensure_parsed()
@@ -390,7 +395,7 @@ class ShaFile(object):
             raise ChecksumMismatch(new_sha, old_sha)
 
     def _header(self):
-        return "%s %lu\0" % (self.type_name, self.raw_length())
+        return object_header(self.type, self.raw_length())
 
     def raw_length(self):
         """Returns the length of the raw string of this object."""

+ 12 - 7
dulwich/pack.py

@@ -63,6 +63,7 @@ from dulwich.objects import (
     ShaFile,
     hex_to_sha,
     sha_to_hex,
+    object_header,
     )
 from dulwich.misc import (
     make_sha,
@@ -490,6 +491,15 @@ def _compute_object_size((num, obj)):
     return chunks_length(obj)
 
 
+def obj_sha(type, chunks):
+    """Compute the SHA for a numeric type and object chunks."""
+    sha = make_sha()
+    sha.update(object_header(type, chunks_length(chunks)))
+    for chunk in chunks:
+        sha.update(chunk)
+    return sha.digest()
+
+
 class PackData(object):
     """The data contained in a packfile.
 
@@ -668,10 +678,7 @@ class PackData(object):
             assert isinstance(type, int)
             assert isinstance(obj, list) or isinstance(obj, tuple)
             type, obj = self.resolve_object(offset, type, obj)
-            # TODO: hash the data directly to avoid object parsing overhead.
-            shafile = ShaFile.from_raw_chunks(type, obj)
-            sha = shafile.sha().digest()
-            yield sha, offset, crc32
+            yield obj_sha(type, obj), offset, crc32
 
     def sorted_entries(self, progress=None):
         """Return entries in this pack, sorted by SHA.
@@ -804,9 +811,7 @@ class ThinPackData(PackData):
                 # Save memory by not storing the inflated obj in postponed
                 postponed[e.sha].append((offset, type, None, crc32))
             else:
-                # TODO: hash the data directly to avoid object parsing overhead.
-                shafile = ShaFile.from_raw_chunks(type, obj)
-                sha = shafile.sha().digest()
+                sha = obj_sha(type, obj)
                 found[sha] = offset
                 yield sha, offset, crc32
                 extra.extend(postponed.pop(sha, []))