Bläddra i källkod

Merge pull request #1051 from jelmer/avoid-bytesio

Factor out chunking of pack files, avoiding use of BytesIO
Jelmer Vernooij 2 år sedan
förälder
incheckning
d312ac9dc8

+ 1 - 1
dulwich/bundle.py

@@ -119,4 +119,4 @@ def write_bundle(f, bundle):
     for ref, obj_id in bundle.references.items():
         f.write(b"%s %s\n" % (obj_id, ref))
     f.write(b"\n")
-    write_pack_data(f, records=bundle.pack_data)
+    write_pack_data(f.write, records=bundle.pack_data)

+ 1 - 3
dulwich/client.py

@@ -111,7 +111,6 @@ from dulwich.protocol import (
     SIDE_BAND_CHANNEL_FATAL,
     PktLineParser,
     Protocol,
-    ProtocolFile,
     TCP_GIT_PORT,
     ZERO_SHA,
     extract_capabilities,
@@ -1543,8 +1542,7 @@ class LocalGitClient(GitClient):
             # Note that the client still expects a 0-object pack in most cases.
             if objects_iter is None:
                 return FetchPackResult(None, symrefs, agent)
-            protocol = ProtocolFile(None, pack_data)
-            write_pack_objects(protocol, objects_iter)
+            write_pack_objects(pack_data, objects_iter)
             return FetchPackResult(r.get_refs(), symrefs, agent)
 
     def get_refs(self, path):

+ 5 - 5
dulwich/object_store.py

@@ -179,7 +179,7 @@ class BaseObjectStore(object):
         f, commit, abort = self.add_pack()
         try:
             write_pack_data(
-                f,
+                f.write,
                 count,
                 pack_data,
                 progress,
@@ -780,7 +780,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
         # Update the header with the new number of objects.
         f.seek(0)
-        write_pack_header(f, len(entries) + len(indexer.ext_refs()))
+        write_pack_header(f.write, len(entries) + len(indexer.ext_refs()))
 
         # Must flush before reading (http://bugs.python.org/issue3207)
         f.flush()
@@ -797,7 +797,7 @@ class DiskObjectStore(PackBasedObjectStore):
             type_num, data = self.get_raw(ext_sha)
             offset = f.tell()
             crc32 = write_pack_object(
-                f,
+                f.write,
                 type_num,
                 data,
                 sha=new_sha,
@@ -1047,7 +1047,7 @@ class MemoryObjectStore(BaseObjectStore):
 
         # Update the header with the new number of objects.
         f.seek(0)
-        write_pack_header(f, len(entries) + len(indexer.ext_refs()))
+        write_pack_header(f.write, len(entries) + len(indexer.ext_refs()))
 
         # Rescan the rest of the pack, computing the SHA with the new header.
         new_sha = compute_file_sha(f, end_ofs=-20)
@@ -1056,7 +1056,7 @@ class MemoryObjectStore(BaseObjectStore):
         for ext_sha in indexer.ext_refs():
             assert len(ext_sha) == 20
             type_num, data = self.get_raw(ext_sha)
-            write_pack_object(f, type_num, data, sha=new_sha)
+            write_pack_object(f.write, type_num, data, sha=new_sha)
         pack_sha = new_sha.digest()
         f.write(pack_sha)
 

+ 74 - 29
dulwich/pack.py

@@ -47,6 +47,7 @@ from itertools import chain
 import os
 import sys
 from typing import Optional, Callable, Tuple, List
+import warnings
 
 from hashlib import sha1
 from os import (
@@ -1520,15 +1521,14 @@ def pack_object_header(type_num, delta_base, size):
     return bytearray(header)
 
 
-def write_pack_object(f, type, object, sha=None, compression_level=-1):
-    """Write pack object to a file.
+def pack_object_chunks(type, object, compression_level=-1):
+    """Generate chunks for a pack object.
 
     Args:
-      f: File to write to
       type: Numeric type of the object
       object: Object to write
       compression_level: the zlib compression level
-    Returns: Tuple with offset at which the object was written, and crc32
+    Returns: Chunks
     """
     if type in DELTA_TYPES:
         delta_base, object = object
@@ -1536,12 +1536,32 @@ def write_pack_object(f, type, object, sha=None, compression_level=-1):
         delta_base = None
     header = bytes(pack_object_header(type, delta_base, len(object)))
     comp_data = zlib.compress(object, compression_level)
-    crc32 = 0
     for data in (header, comp_data):
-        f.write(data)
+        yield data
+
+
+def write_pack_object(write, type, object, sha=None, compression_level=-1):
+    """Write pack object to a file.
+
+    Args:
+      write: Write function to use
+      type: Numeric type of the object
+      object: Object to write
+      compression_level: the zlib compression level
+    Returns: Tuple with offset at which the object was written, and crc32
+    """
+    if hasattr(write, 'write'):
+        warnings.warn(
+            'write_pack_object() now takes a write rather than file argument',
+            DeprecationWarning, stacklevel=2)
+        write = write.write
+    crc32 = 0
+    for chunk in pack_object_chunks(
+            type, object, compression_level=compression_level):
+        write(chunk)
         if sha is not None:
-            sha.update(data)
-        crc32 = binascii.crc32(data, crc32)
+            sha.update(chunk)
+        crc32 = binascii.crc32(chunk, crc32)
     return crc32 & 0xFFFFFFFF
 
 
@@ -1564,7 +1584,7 @@ def write_pack(
     """
     with GitFile(filename + ".pack", "wb") as f:
         entries, data_sum = write_pack_objects(
-            f,
+            f.write,
             objects,
             delta_window_size=delta_window_size,
             deltify=deltify,
@@ -1575,11 +1595,22 @@ def write_pack(
         return data_sum, write_pack_index_v2(f, entries, data_sum)
 
 
-def write_pack_header(f, num_objects):
+def pack_header_chunks(num_objects):
+    """Yield chunks for a pack header."""
+    yield b"PACK"  # Pack header
+    yield struct.pack(b">L", 2)  # Pack version
+    yield struct.pack(b">L", num_objects)  # Number of objects in pack
+
+
+def write_pack_header(write, num_objects):
     """Write a pack header for the given number of objects."""
-    f.write(b"PACK")  # Pack header
-    f.write(struct.pack(b">L", 2))  # Pack version
-    f.write(struct.pack(b">L", num_objects))  # Number of objects in pack
+    if hasattr(write, 'write'):
+        write = write.write
+        warnings.warn(
+            'write_pack_header() now takes a write rather than file argument',
+            DeprecationWarning, stacklevel=2)
+    for chunk in pack_header_chunks(num_objects):
+        write(chunk)
 
 
 def deltify_pack_objects(objects, window_size=None):
@@ -1638,12 +1669,12 @@ def pack_objects_to_data(objects):
 
 
 def write_pack_objects(
-    f, objects, delta_window_size=None, deltify=None, compression_level=-1
+    write, objects, delta_window_size=None, deltify=None, compression_level=-1
 ):
     """Write a new pack data file.
 
     Args:
-      f: File to write to
+      write: write function to use
       objects: Iterable of (object, path) tuples to write. Should provide
          __len__
       delta_window_size: Sliding window size for searching for deltas;
@@ -1652,6 +1683,12 @@ def write_pack_objects(
       compression_level: the zlib compression level to use
     Returns: Dict mapping id -> (offset, crc32 checksum), pack checksum
     """
+    if hasattr(write, 'write'):
+        warnings.warn(
+            'write_pack_objects() now takes a write rather than file argument',
+            DeprecationWarning, stacklevel=2)
+        write = write.write
+
     if deltify is None:
         # PERFORMANCE/TODO(jelmer): This should be enabled but is *much* too
         # slow at the moment.
@@ -1663,7 +1700,7 @@ def write_pack_objects(
         pack_contents_count, pack_contents = pack_objects_to_data(objects)
 
     return write_pack_data(
-        f,
+        write,
         pack_contents_count,
         pack_contents,
         compression_level=compression_level,
@@ -1697,11 +1734,11 @@ class PackChunkGenerator(object):
         # Write the pack
         if num_records is None:
             num_records = len(records)
-        f = BytesIO()
-        write_pack_header(f, num_records)
-        self.cs.update(f.getvalue())
-        yield f.getvalue()
-        offset = f.tell()
+        offset = 0
+        for chunk in pack_header_chunks(num_records):
+            yield chunk
+            self.cs.update(chunk)
+            offset += len(chunk)
         actual_num_records = 0
         for i, (type_num, object_id, delta_base, raw) in enumerate(records):
             if progress is not None:
@@ -1715,13 +1752,16 @@ class PackChunkGenerator(object):
                 else:
                     type_num = OFS_DELTA
                     raw = (offset - base_offset, raw)
-            f = BytesIO()
-            crc32 = write_pack_object(f, type_num, raw, compression_level=compression_level)
-            self.cs.update(f.getvalue())
-            yield f.getvalue()
+            crc32 = 0
+            object_size = 0
+            for chunk in pack_object_chunks(type_num, raw, compression_level=compression_level):
+                yield chunk
+                crc32 = binascii.crc32(chunk, crc32)
+                self.cs.update(chunk)
+                object_size += len(chunk)
             actual_num_records += 1
             self.entries[object_id] = (offset, crc32)
-            offset += f.tell()
+            offset += object_size
         if actual_num_records != num_records:
             raise AssertionError(
                 'actual records written differs: %d != %d' % (
@@ -1730,22 +1770,27 @@ class PackChunkGenerator(object):
         yield self.cs.digest()
 
 
-def write_pack_data(f, num_records=None, records=None, progress=None, compression_level=-1):
+def write_pack_data(write, num_records=None, records=None, progress=None, compression_level=-1):
     """Write a new pack data file.
 
     Args:
-      f: File to write to
+      write: Write function to use
       num_records: Number of records (defaults to len(records) if None)
       records: Iterator over type_num, object_id, delta_base, raw
       progress: Function to report progress to
       compression_level: the zlib compression level
     Returns: Dict mapping id -> (offset, crc32 checksum), pack checksum
     """
+    if hasattr(write, 'write'):
+        warnings.warn(
+            'write_pack_data() now takes a write rather than file argument',
+            DeprecationWarning, stacklevel=2)
+        write = write.write
     chunk_generator = PackChunkGenerator(
         num_records=num_records, records=records, progress=progress,
         compression_level=compression_level)
     for chunk in chunk_generator:
-        f.write(chunk)
+        write(chunk)
     return chunk_generator.entries, chunk_generator.sha1digest()
 
 

+ 1 - 1
dulwich/porcelain.py

@@ -1742,7 +1742,7 @@ def pack_objects(repo, object_ids, packf, idxf, delta_window_size=None):
     """
     with open_repo_closing(repo) as r:
         entries, data_sum = write_pack_objects(
-            packf,
+            packf.write,
             r.object_store.iter_shas((oid, None) for oid in object_ids),
             delta_window_size=delta_window_size,
         )

+ 0 - 34
dulwich/protocol.py

@@ -147,20 +147,6 @@ COMMAND_WANT = b"want"
 COMMAND_HAVE = b"have"
 
 
-class ProtocolFile(object):
-    """A dummy file for network ops that expect file-like objects."""
-
-    def __init__(self, read, write):
-        self.read = read
-        self.write = write
-
-    def tell(self):
-        pass
-
-    def close(self):
-        pass
-
-
 def format_cmd_pkt(cmd, *args):
     return cmd + b" " + b"".join([(a + b"\0") for a in args])
 
@@ -308,26 +294,6 @@ class Protocol(object):
         except socket.error as e:
             raise GitProtocolError(e)
 
-    def write_file(self):
-        """Return a writable file-like object for this protocol."""
-
-        class ProtocolFile(object):
-            def __init__(self, proto):
-                self._proto = proto
-                self._offset = 0
-
-            def write(self, data):
-                self._proto.write(data)
-                self._offset += len(data)
-
-            def tell(self):
-                return self._offset
-
-            def close(self):
-                pass
-
-        return ProtocolFile(self)
-
     def write_sideband(self, channel, blob):
         """Write multiplexed data to the sideband.
 

+ 1 - 2
dulwich/server.py

@@ -96,7 +96,6 @@ from dulwich.protocol import (
     MULTI_ACK,
     MULTI_ACK_DETAILED,
     Protocol,
-    ProtocolFile,
     ReceivableProtocol,
     SIDE_BAND_CHANNEL_DATA,
     SIDE_BAND_CHANNEL_PROGRESS,
@@ -409,7 +408,7 @@ class UploadPackHandler(PackHandler):
         self.progress(
             ("counting objects: %d, done.\n" % len(objects_iter)).encode("ascii")
         )
-        write_pack_objects(ProtocolFile(None, write), objects_iter)
+        write_pack_objects(write, objects_iter)
         # we are done
         self.proto.write_pkt_line(None)
 

+ 2 - 2
dulwich/tests/test_client.py

@@ -339,7 +339,7 @@ class GitClientTests(TestCase):
             return 0, []
 
         f = BytesIO()
-        write_pack_objects(f, {})
+        write_pack_objects(f.write, {})
         self.client.send_pack("/", update_refs, generate_pack_data)
         self.assertEqual(
             self.rout.getvalue(),
@@ -384,7 +384,7 @@ class GitClientTests(TestCase):
             )
 
         f = BytesIO()
-        write_pack_data(f, *generate_pack_data(None, None))
+        write_pack_data(f.write, *generate_pack_data(None, None))
         self.client.send_pack(b"/", update_refs, generate_pack_data)
         self.assertEqual(
             self.rout.getvalue(),

+ 2 - 2
dulwich/tests/test_object_store.py

@@ -292,7 +292,7 @@ class MemoryObjectStoreTests(ObjectStoreTests, TestCase):
         f, commit, abort = o.add_pack()
         try:
             b = make_object(Blob, data=b"more yummy data")
-            write_pack_objects(f, [(b, None)])
+            write_pack_objects(f.write, [(b, None)])
         except BaseException:
             abort()
             raise
@@ -525,7 +525,7 @@ class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase):
         f, commit, abort = o.add_pack()
         try:
             b = make_object(Blob, data=b"more yummy data")
-            write_pack_objects(f, [(b, None)])
+            write_pack_objects(f.write, [(b, None)])
         except BaseException:
             abort()
             raise

+ 5 - 5
dulwich/tests/test_pack.py

@@ -498,7 +498,7 @@ class TestPack(PackTests):
 
             data._file.seek(12)
             bad_file = BytesIO()
-            write_pack_header(bad_file, 9999)
+            write_pack_header(bad_file.write, 9999)
             bad_file.write(data._file.read())
             bad_file = BytesIO(bad_file.getvalue())
             bad_data = PackData("", file=bad_file)
@@ -618,14 +618,14 @@ class TestThinPack(PackTests):
 class WritePackTests(TestCase):
     def test_write_pack_header(self):
         f = BytesIO()
-        write_pack_header(f, 42)
+        write_pack_header(f.write, 42)
         self.assertEqual(b"PACK\x00\x00\x00\x02\x00\x00\x00*", f.getvalue())
 
     def test_write_pack_object(self):
         f = BytesIO()
         f.write(b"header")
         offset = f.tell()
-        crc32 = write_pack_object(f, Blob.type_num, b"blob")
+        crc32 = write_pack_object(f.write, Blob.type_num, b"blob")
         self.assertEqual(crc32, zlib.crc32(f.getvalue()[6:]) & 0xFFFFFFFF)
 
         f.write(b"x")  # unpack_object needs extra trailing data.
@@ -643,7 +643,7 @@ class WritePackTests(TestCase):
         offset = f.tell()
         sha_a = sha1(b"foo")
         sha_b = sha_a.copy()
-        write_pack_object(f, Blob.type_num, b"blob", sha=sha_a)
+        write_pack_object(f.write, Blob.type_num, b"blob", sha=sha_a)
         self.assertNotEqual(sha_a.digest(), sha_b.digest())
         sha_b.update(f.getvalue()[offset:])
         self.assertEqual(sha_a.digest(), sha_b.digest())
@@ -654,7 +654,7 @@ class WritePackTests(TestCase):
         offset = f.tell()
         sha_a = sha1(b"foo")
         sha_b = sha_a.copy()
-        write_pack_object(f, Blob.type_num, b"blob", sha=sha_a, compression_level=6)
+        write_pack_object(f.write, Blob.type_num, b"blob", sha=sha_a, compression_level=6)
         self.assertNotEqual(sha_a.digest(), sha_b.digest())
         sha_b.update(f.getvalue()[offset:])
         self.assertEqual(sha_a.digest(), sha_b.digest())

+ 2 - 2
dulwich/tests/utils.py

@@ -230,7 +230,7 @@ def build_pack(f, objects_spec, store=None):
     """
     sf = SHA1Writer(f)
     num_objects = len(objects_spec)
-    write_pack_header(sf, num_objects)
+    write_pack_header(sf.write, num_objects)
 
     full_objects = {}
     offsets = {}
@@ -270,7 +270,7 @@ def build_pack(f, objects_spec, store=None):
                 base = obj_sha(base_type_num, base_data)
             obj = (base, create_delta(base_data, data))
 
-        crc32 = write_pack_object(sf, type_num, obj)
+        crc32 = write_pack_object(sf.write, type_num, obj)
         offsets[i] = offset
         crc32s[i] = crc32