소스 검색

Hash packed objects without creating ShaFiles.

Recent changes to ShaFile result in parsing files when created from
strings. Since the packed object code was just using ShaFile for
computing a SHA, this change elminates that and hashes file contents
directly.

Change-Id: I70a14104fa896c248189f0839c24d02c48a43c4d
Dave Borowitz 15 년 전
부모
커밋
7f4238d888
2개의 변경된 파일18개의 추가작업 그리고 8개의 파일을 삭제
  1. 6 1
      dulwich/objects.py
  2. 12 7
      dulwich/pack.py

+ 6 - 1
dulwich/objects.py

@@ -106,6 +106,11 @@ def filename_to_hex(filename):
     return hex
 
 
+def object_header(num_type, length):
+    """Return an object header for the given numeric type and text length."""
+    return "%s %d\0" % (object_class(num_type).type_name, length)
+
+
 def serializable_property(name, docstring=None):
     def set(obj, value):
         obj._ensure_parsed()
@@ -390,7 +395,7 @@ class ShaFile(object):
             raise ChecksumMismatch(new_sha, old_sha)
 
     def _header(self):
-        return "%s %lu\0" % (self.type_name, self.raw_length())
+        return object_header(self.type, self.raw_length())
 
     def raw_length(self):
         """Returns the length of the raw string of this object."""

+ 12 - 7
dulwich/pack.py

@@ -63,6 +63,7 @@ from dulwich.objects import (
     ShaFile,
     hex_to_sha,
     sha_to_hex,
+    object_header,
     )
 from dulwich.misc import (
     make_sha,
@@ -490,6 +491,15 @@ def _compute_object_size((num, obj)):
     return chunks_length(obj)
 
 
+def obj_sha(type, chunks):
+    """Compute the SHA for a numeric type and object chunks."""
+    sha = make_sha()
+    sha.update(object_header(type, chunks_length(chunks)))
+    for chunk in chunks:
+        sha.update(chunk)
+    return sha.digest()
+
+
 class PackData(object):
     """The data contained in a packfile.
 
@@ -668,10 +678,7 @@ class PackData(object):
             assert isinstance(type, int)
             assert isinstance(obj, list) or isinstance(obj, tuple)
             type, obj = self.resolve_object(offset, type, obj)
-            # TODO: hash the data directly to avoid object parsing overhead.
-            shafile = ShaFile.from_raw_chunks(type, obj)
-            sha = shafile.sha().digest()
-            yield sha, offset, crc32
+            yield obj_sha(type, obj), offset, crc32
 
     def sorted_entries(self, progress=None):
         """Return entries in this pack, sorted by SHA.
@@ -804,9 +811,7 @@ class ThinPackData(PackData):
                 # Save memory by not storing the inflated obj in postponed
                 postponed[e.sha].append((offset, type, None, crc32))
             else:
-                # TODO: hash the data directly to avoid object parsing overhead.
-                shafile = ShaFile.from_raw_chunks(type, obj)
-                sha = shafile.sha().digest()
+                sha = obj_sha(type, obj)
                 found[sha] = offset
                 yield sha, offset, crc32
                 extra.extend(postponed.pop(sha, []))