瀏覽代碼

Separate out delta generation from pack writing.

Jelmer Vernooij 13 年之前
父節點
當前提交
0d8a2187d5
共有 5 個文件被更改,包括 74 次插入42 次删除
  1. 2 2
      dulwich/client.py
  2. 2 2
      dulwich/object_store.py
  3. 65 33
      dulwich/pack.py
  4. 2 2
      dulwich/server.py
  5. 3 3
      dulwich/tests/test_object_store.py

+ 2 - 2
dulwich/client.py

@@ -38,7 +38,7 @@ from dulwich.protocol import (
     extract_capabilities,
     )
 from dulwich.pack import (
-    write_pack_data,
+    write_pack_objects,
     )
 
 
@@ -184,7 +184,7 @@ class GitClient(object):
         if not want:
             return new_refs
         objects = generate_pack_contents(have, want)
-        entries, sha = write_pack_data(proto.write_file(), objects)
+        entries, sha = write_pack_objects(proto.write_file(), objects)
 
         if 'report-status' in self._send_capabilities:
             self._parse_status_report(proto)

+ 2 - 2
dulwich/object_store.py

@@ -54,8 +54,8 @@ from dulwich.pack import (
     iter_sha1,
     load_pack_index,
     write_pack,
-    write_pack_data,
     write_pack_index_v2,
+    write_pack_objects,
     )
 
 INFODIR = 'info'
@@ -321,7 +321,7 @@ class PackBasedObjectStore(BaseObjectStore):
             # Don't bother writing an empty pack file
             return
         f, commit = self.add_pack()
-        write_pack_data(f, objects)
+        write_pack_objects(f, objects)
         return commit()
 
 

+ 65 - 33
dulwich/pack.py

@@ -1065,16 +1065,19 @@ class SHA1Writer(object):
 
     def __init__(self, f):
         self.f = f
+        self.length = 0
         self.sha1 = make_sha("")
 
     def write(self, data):
         self.sha1.update(data)
         self.f.write(data)
+        self.length += len(data)
 
     def write_sha(self):
         sha = self.sha1.digest()
         assert len(sha) == 20
         self.f.write(sha)
+        self.length += len(sha)
         return sha
 
     def close(self):
@@ -1082,6 +1085,9 @@ class SHA1Writer(object):
         self.f.close()
         return sha
 
+    def offset(self):
+        return self.length
+
     def tell(self):
         return self.f.tell()
 
@@ -1136,7 +1142,8 @@ def write_pack(filename, objects, num_objects=None):
                       DeprecationWarning)
     f = GitFile(filename + ".pack", 'wb')
     try:
-        entries, data_sum = write_pack_data(f, objects, num_objects=num_objects)
+        entries, data_sum = write_pack_objects(f, objects,
+            num_objects=num_objects)
     finally:
         f.close()
     entries = [(k, v[0], v[1]) for (k, v) in entries.iteritems()]
@@ -1155,24 +1162,12 @@ def write_pack_header(f, num_objects):
     f.write(struct.pack('>L', num_objects))  # Number of objects in pack
 
 
-def write_pack_data(f, objects, num_objects=None, window=10):
-    """Write a new pack data file.
+def deltify_pack_objects(objects, window=10):
+    """Generate deltas for pack objects.
 
-    :param f: File to write to
-    :param objects: Iterable of (object, path) tuples to write.
-        Should provide __len__
-    :param window: Sliding window size for searching for deltas; currently
-                   unimplemented
-    :return: Dict mapping id -> (offset, crc32 checksum), pack checksum
+    :param objects: Objects to deltify
+    :param window: Window size
     """
-    if num_objects is not None:
-        warnings.warn("num_objects argument to write_pack_data is deprecated",
-                      DeprecationWarning)
-        # Previously it was possible to pass in an iterable
-        objects = list(objects)
-    else:
-        num_objects = len(objects)
-
     # Build a list of objects ordered by the magic Linus heuristic
     # This helps us find good objects to diff against us
     magic = []
@@ -1182,31 +1177,68 @@ def write_pack_data(f, objects, num_objects=None, window=10):
 
     possible_bases = deque()
 
-    # Write the pack
-    entries = {}
-    f = SHA1Writer(f)
-    write_pack_header(f, num_objects)
     for type_num, path, neg_length, o in magic:
         raw = o.as_raw_string()
-        winner = (type_num, raw)
+        winner = raw
+        winner_base = None
         for base in possible_bases:
             if base.type_num != type_num:
                 continue
             delta = create_delta(base.as_raw_string(), raw)
             if len(delta) < len(winner):
-                base_id = base.sha().digest()
-                try:
-                    base_offset, base_crc32 = entries[base_id]
-                except KeyError:
-                    winner = (OFS_DELTA, (base_offset, delta))
-                else:
-                    winner = (REF_DELTA, (base_id, delta))
-        offset = f.tell()
+                winner_base = base.sha().digest()
+                winner = delta
+        yield type_num, o.sha().digest(), winner_base, winner
         possible_bases.appendleft(o)
         while len(possible_bases) > window:
             possible_bases.pop()
-        crc32 = write_pack_object(f, winner[0], winner[1])
-        entries[o.sha().digest()] = (offset, crc32)
+
+
+def write_pack_objects(f, objects, window=10, num_objects=None):
+    """Write a new pack data file.
+
+    :param f: File to write to
+    :param objects: Iterable of (object, path) tuples to write.
+        Should provide __len__
+    :param window: Sliding window size for searching for deltas; currently
+                   unimplemented
+    :param num_objects: Number of objects (do not use, deprecated)
+    :return: Dict mapping id -> (offset, crc32 checksum), pack checksum
+    """
+    if num_objects is None:
+        num_objects = len(objects)
+    # FIXME: pack_contents = deltify_pack_objects(objects, window)
+    pack_contents = (
+        (o.type_num, o.sha().digest(), None, o.as_raw_string())
+        for (o, path) in objects)
+    return write_pack_data(f, num_objects, pack_contents)
+
+
+def write_pack_data(f, num_records, records):
+    """Write a new pack data file.
+
+    :param f: File to write to
+    :param num_records: Number of records
+    :param records: Iterator over type_num, object_id, delta_base, raw
+    :return: Dict mapping id -> (offset, crc32 checksum), pack checksum
+    """
+    # Write the pack
+    entries = {}
+    f = SHA1Writer(f)
+    write_pack_header(f, num_records)
+    for type_num, object_id, delta_base, raw in records:
+        if delta_base is not None:
+            try:
+                base_offset, base_crc32 = entries[delta_base]
+            except KeyError:
+                type_num = REF_DELTA
+                raw = (delta_base, raw)
+            else:
+                type_num = OFS_DELTA
+                raw = (base_offset, raw)
+        offset = f.offset()
+        crc32 = write_pack_object(f, type_num, raw)
+        entries[object_id] = (offset, crc32)
     return entries, f.write_sha()
 
 
@@ -1510,7 +1542,7 @@ class Pack(object):
               *self.data.resolve_object(offset, type, obj))
 
     def pack_tuples(self):
-        """Provide an iterable for use with write_pack_data.
+        """Provide an iterable for use with write_pack_objects.
 
         :return: Object that can iterate over (object, path) tuples
             and provides __len__

+ 2 - 2
dulwich/server.py

@@ -45,7 +45,7 @@ from dulwich.objects import (
     )
 from dulwich.pack import (
     PackStreamReader,
-    write_pack_data,
+    write_pack_objects,
     )
 from dulwich.protocol import (
     BufferedPktLineWriter,
@@ -280,7 +280,7 @@ class UploadPackHandler(Handler):
 
         self.progress("dul-daemon says what\n")
         self.progress("counting objects: %d, done.\n" % len(objects_iter))
-        write_pack_data(ProtocolFile(None, write), objects_iter)
+        write_pack_objects(ProtocolFile(None, write), objects_iter)
         self.progress("how was that, then?\n")
         # we are done
         self.proto.write("0000")

+ 3 - 3
dulwich/tests/test_object_store.py

@@ -42,7 +42,7 @@ from dulwich.object_store import (
     tree_lookup_path,
     )
 from dulwich.pack import (
-    write_pack_data,
+    write_pack_objects,
     )
 from dulwich.tests import (
     TestCase,
@@ -226,14 +226,14 @@ class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase):
         o = DiskObjectStore(self.store_dir)
         f, commit = o.add_pack()
         b = make_object(Blob, data="more yummy data")
-        write_pack_data(f, [(b, None)])
+        write_pack_objects(f, [(b, None)])
         commit()
 
     def test_add_thin_pack(self):
         o = DiskObjectStore(self.store_dir)
         f, commit = o.add_thin_pack()
         b = make_object(Blob, data="more yummy data")
-        write_pack_data(f, [(b, None)])
+        write_pack_objects(f, [(b, None)])
         commit()