Browse Source

Change send_pack argument from generate_pack_contents to generate_pack_data.

Jelmer Vernooij 7 years ago
parent
commit
bbb18c93b9

+ 6 - 0
NEWS

@@ -13,6 +13,12 @@
 
   * Add a fastimport ``extra``. (Jelmer Vernooij)
 
+ API CHANGES
+
+  * ``GitClient.send_pack`` now accepts a ``generate_pack_data``
+    rather than a ``generate_pack_contents`` function for
+    performance reasons. (Jelmer Vernooij)
+
 0.18.6	2017-11-11
 
  BUG FIXES

+ 21 - 20
dulwich/client.py

@@ -100,7 +100,6 @@ from dulwich.protocol import (
     parse_capability,
     )
 from dulwich.pack import (
-    pack_objects_to_data,
     write_pack_data,
     write_pack_objects,
     )
@@ -310,7 +309,7 @@ class GitClient(object):
         """
         raise NotImplementedError(cls.from_parsedurl)
 
-    def send_pack(self, path, update_refs, generate_pack_contents,
+    def send_pack(self, path, update_refs, generate_pack_data,
                   progress=None):
         """Upload a pack to a remote repository.
 
@@ -318,8 +317,8 @@ class GitClient(object):
         :param update_refs: Function to determine changes to remote refs.
             Receive dict with existing remote refs, returns dict with
             changed refs (name -> sha, where sha=ZERO_SHA for deletions)
-        :param generate_pack_contents: Function that can return a sequence of
-            the shas of the objects to upload.
+        :param generate_pack_data: Function that can return a tuple
+            with number of objects and list of pack data to include
         :param progress: Optional progress function
 
         :raises SendPackError: if server rejects the pack data
@@ -635,7 +634,7 @@ class TraditionalGitClient(GitClient):
         """
         raise NotImplementedError()
 
-    def send_pack(self, path, update_refs, generate_pack_contents,
+    def send_pack(self, path, update_refs, generate_pack_data,
                   progress=None):
         """Upload a pack to a remote repository.
 
@@ -643,8 +642,8 @@ class TraditionalGitClient(GitClient):
         :param update_refs: Function to determine changes to remote refs.
             Receive dict with existing remote refs, returns dict with
             changed refs (name -> sha, where sha=ZERO_SHA for deletions)
-        :param generate_pack_contents: Function that can return a sequence of
-            the shas of the objects to upload.
+        :param generate_pack_data: Function that can return a tuple with
+            number of objects and pack data to upload.
         :param progress: Optional callback called with progress updates
 
         :raises SendPackError: if server rejects the pack data
@@ -696,9 +695,9 @@ class TraditionalGitClient(GitClient):
             if (not want and
                     set(new_refs.items()).issubset(set(old_refs.items()))):
                 return new_refs
-            objects = generate_pack_contents(have, want)
-
-            pack_data_count, pack_data = pack_objects_to_data(objects)
+            pack_data_count, pack_data = generate_pack_data(
+                have, want,
+                ofs_delta=(CAPABILITY_OFS_DELTA in negotiated_capabilities))
 
             dowrite = bool(pack_data_count)
             dowrite = dowrite or any(old_refs.get(ref) != sha
@@ -948,7 +947,7 @@ class LocalGitClient(GitClient):
             path = path.decode(sys.getfilesystemencoding())
         return closing(Repo(path))
 
-    def send_pack(self, path, update_refs, generate_pack_contents,
+    def send_pack(self, path, update_refs, generate_pack_data,
                   progress=None):
         """Upload a pack to a remote repository.
 
@@ -956,8 +955,8 @@ class LocalGitClient(GitClient):
         :param update_refs: Function to determine changes to remote refs.
             Receive dict with existing remote refs, returns dict with
             changed refs (name -> sha, where sha=ZERO_SHA for deletions)
-        :param generate_pack_contents: Function that can return a sequence of
-            the shas of the objects to upload.
+        :param generate_pack_data: Function that can return a tuple
+            with number of items and pack data to upload.
         :param progress: Optional progress function
 
         :raises SendPackError: if server rejects the pack data
@@ -986,7 +985,8 @@ class LocalGitClient(GitClient):
                     set(new_refs.items()).issubset(set(old_refs.items()))):
                 return new_refs
 
-            target.object_store.add_objects(generate_pack_contents(have, want))
+            target.object_store.add_pack_data(
+                *generate_pack_data(have, want, ofs_delta=True))
 
             for refname, new_sha1 in new_refs.items():
                 old_sha1 = old_refs.get(refname, ZERO_SHA)
@@ -1332,7 +1332,7 @@ class HttpGitClient(GitClient):
                                    % content_type)
         return resp, read
 
-    def send_pack(self, path, update_refs, generate_pack_contents,
+    def send_pack(self, path, update_refs, generate_pack_data,
                   progress=None):
         """Upload a pack to a remote repository.
 
@@ -1340,8 +1340,8 @@ class HttpGitClient(GitClient):
         :param update_refs: Function to determine changes to remote refs.
             Receive dict with existing remote refs, returns dict with
             changed refs (name -> sha, where sha=ZERO_SHA for deletions)
-        :param generate_pack_contents: Function that can return a sequence of
-            the shas of the objects to upload.
+        :param generate_pack_data: Function that can return a tuple
+            with number of elements and pack data to upload.
         :param progress: Optional progress function
 
         :raises SendPackError: if server rejects the pack data
@@ -1372,9 +1372,10 @@ class HttpGitClient(GitClient):
             req_proto, negotiated_capabilities, old_refs, new_refs)
         if not want and set(new_refs.items()).issubset(set(old_refs.items())):
             return new_refs
-        objects = generate_pack_contents(have, want)
-        pack_data_count, pack_data = pack_objects_to_data(objects)
-        if bool(pack_data_count):
+        pack_data_count, pack_data = generate_pack_data(
+                have, want,
+                ofs_delta=(CAPABILITY_OFS_DELTA in negotiated_capabilities))
+        if pack_data_count:
             write_pack_data(req_proto.write_file(), pack_data_count, pack_data)
         resp, read = self._smart_request("git-receive-pack", url,
                                          data=req_data.getvalue())

+ 7 - 7
dulwich/contrib/test_swift_smoke.py

@@ -142,7 +142,7 @@ class SwiftRepoSmokeTest(unittest.TestCase):
                                          port=self.port)
         tcp_client.send_pack(self.fakerepo,
                              determine_wants,
-                             local_repo.object_store.generate_pack_contents)
+                             local_repo.object_store.generate_pack_data)
         swift_repo = swift.SwiftRepo("fakerepo", self.conf)
         remote_sha = swift_repo.refs.read_loose_ref('refs/heads/master')
         self.assertEqual(sha, remote_sha)
@@ -162,7 +162,7 @@ class SwiftRepoSmokeTest(unittest.TestCase):
                                          port=self.port)
         tcp_client.send_pack("/fakerepo",
                              determine_wants,
-                             local_repo.object_store.generate_pack_contents)
+                             local_repo.object_store.generate_pack_data)
         swift_repo = swift.SwiftRepo(self.fakerepo, self.conf)
         remote_sha = swift_repo.refs.read_loose_ref('refs/heads/mybranch')
         self.assertEqual(sha, remote_sha)
@@ -189,7 +189,7 @@ class SwiftRepoSmokeTest(unittest.TestCase):
                                          port=self.port)
         tcp_client.send_pack(self.fakerepo,
                              determine_wants,
-                             local_repo.object_store.generate_pack_contents)
+                             local_repo.object_store.generate_pack_data)
         swift_repo = swift.SwiftRepo("fakerepo", self.conf)
         for branch in ('master', 'mybranch', 'pullr-108'):
             remote_shas[branch] = swift_repo.refs.read_loose_ref(
@@ -214,7 +214,7 @@ class SwiftRepoSmokeTest(unittest.TestCase):
                                          port=self.port)
         tcp_client.send_pack(self.fakerepo,
                              determine_wants,
-                             local_repo.object_store.generate_pack_contents)
+                             local_repo.object_store.generate_pack_data)
         swift_repo = swift.SwiftRepo("fakerepo", self.conf)
         commit_sha = swift_repo.refs.read_loose_ref('refs/heads/master')
         otype, data = swift_repo.object_store.get_raw(commit_sha)
@@ -261,7 +261,7 @@ class SwiftRepoSmokeTest(unittest.TestCase):
                              ref='refs/heads/master')
         tcp_client.send_pack("/fakerepo",
                              determine_wants,
-                             local_repo.object_store.generate_pack_contents)
+                             local_repo.object_store.generate_pack_data)
 
     def test_push_remove_branch(self):
         def determine_wants(*args):
@@ -277,7 +277,7 @@ class SwiftRepoSmokeTest(unittest.TestCase):
                                          port=self.port)
         tcp_client.send_pack(self.fakerepo,
                              determine_wants,
-                             local_repo.object_store.generate_pack_contents)
+                             local_repo.object_store.generate_pack_data)
         swift_repo = swift.SwiftRepo("fakerepo", self.conf)
         self.assertNotIn('refs/heads/pullr-108', swift_repo.refs.allkeys())
 
@@ -304,7 +304,7 @@ class SwiftRepoSmokeTest(unittest.TestCase):
                                          port=self.port)
         tcp_client.send_pack(self.fakerepo,
                              determine_wants,
-                             local_repo.object_store.generate_pack_contents)
+                             local_repo.object_store.generate_pack_data)
         swift_repo = swift.SwiftRepo(self.fakerepo, self.conf)
         tag_sha = swift_repo.refs.read_loose_ref('refs/tags/v1.0')
         otype, data = swift_repo.object_store.get_raw(tag_sha)

+ 33 - 12
dulwich/object_store.py

@@ -56,10 +56,11 @@ from dulwich.pack import (
     PackData,
     PackInflater,
     iter_sha1,
+    pack_objects_to_data,
     write_pack_header,
     write_pack_index_v2,
+    write_pack_data,
     write_pack_object,
-    write_pack_objects,
     compute_file_sha,
     PackIndexer,
     PackStreamCopier,
@@ -135,6 +136,24 @@ class BaseObjectStore(object):
         """
         raise NotImplementedError(self.add_objects)
 
+    def add_pack_data(self, count, pack_data):
+        """Add pack data to this object store.
+
+        :param num_items: Number of items to add
+        :param pack_data: Iterator over pack data tuples
+        """
+        if count == 0:
+            # Don't bother writing an empty pack file
+            return
+        f, commit, abort = self.add_pack()
+        try:
+            write_pack_data(f, count, pack_data)
+        except BaseException:
+            abort()
+            raise
+        else:
+            return commit()
+
     def tree_changes(self, source, target, want_unchanged=False):
         """Find the differences between the contents of two trees
 
@@ -207,6 +226,18 @@ class BaseObjectStore(object):
         """
         return self.iter_shas(self.find_missing_objects(have, want, progress))
 
+    def generate_pack_data(self, have, want, progress=None, ofs_delta=True):
+        """Generate pack data objects for a set of wants/haves.
+
+        :param have: List of SHA1s of objects that should not be sent
+        :param want: List of SHA1s of objects that should be sent
+        :param ofs_delta: Whether OFS deltas can be included
+        :param progress: Optional progress reporting method
+        """
+        # TODO(jelmer): More efficient implementation
+        return pack_objects_to_data(
+            self.generate_pack_contents(have, want, progress))
+
     def peel_sha(self, sha):
         """Peel all tags from a SHA.
 
@@ -429,17 +460,7 @@ class PackBasedObjectStore(BaseObjectStore):
             __len__.
         :return: Pack object of the objects written.
         """
-        if len(objects) == 0:
-            # Don't bother writing an empty pack file
-            return
-        f, commit, abort = self.add_pack()
-        try:
-            write_pack_objects(f, objects)
-        except BaseException:
-            abort()
-            raise
-        else:
-            return commit()
+        return self.add_pack_data(*pack_objects_to_data(objects))
 
 
 class DiskObjectStore(PackBasedObjectStore):

+ 11 - 6
dulwich/pack.py

@@ -1245,7 +1245,8 @@ class PackData(object):
         assert offset >= self._header_size
         self._file.seek(offset)
         unpacked, _ = unpack_object(self._file.read, include_comp=True)
-        return (unpacked.pack_type_num, unpacked.delta_base, unpacked.comp_chunks)
+        return (unpacked.pack_type_num, unpacked.delta_base,
+                unpacked.comp_chunks)
 
     def get_object_at(self, offset):
         """Given an offset in to the packfile return the object that is there.
@@ -1551,6 +1552,7 @@ def deltify_pack_objects(objects, window_size=None):
     :return: Iterator over type_num, object id, delta_base, content
         delta_base is None for full text entries
     """
+    # TODO(jelmer): Use threads
     if window_size is None:
         window_size = DEFAULT_PACK_DELTA_WINDOW_SIZE
     # Build a list of objects ordered by the magic Linus heuristic
@@ -1578,6 +1580,7 @@ def deltify_pack_objects(objects, window_size=None):
         while len(possible_bases) > window_size:
             possible_bases.pop()
 
+
 def pack_objects_to_data(objects):
     """Create pack data from objects
 
@@ -1587,7 +1590,7 @@ def pack_objects_to_data(objects):
     count = len(objects)
     return (count,
             ((o.type_num, o.sha().digest(), None, o.as_raw_string())
-              for (o, path) in objects))
+             for (o, path) in objects))
 
 
 def write_pack_objects(f, objects, delta_window_size=None, deltify=None):
@@ -1602,8 +1605,8 @@ def write_pack_objects(f, objects, delta_window_size=None, deltify=None):
     :return: Dict mapping id -> (offset, crc32 checksum), pack checksum
     """
     if deltify is None:
-        # PERFORMANCE/TODO(jelmer): This should be enabled but is *much* too slow
-        # at the moment.
+        # PERFORMANCE/TODO(jelmer): This should be enabled but is *much* too
+        # slow at the moment.
         deltify = False
     if deltify:
         pack_contents = deltify_pack_objects(objects, delta_window_size)
@@ -1968,9 +1971,11 @@ class Pack(object):
             list of data chunks
         """
         offset = self.index.object_index(sha1)
-        (obj_type, delta_base, chunks) = self.data.get_compressed_data_at(offset)
+        (obj_type, delta_base, chunks) = self.data.get_compressed_data_at(
+                offset)
         if obj_type == OFS_DELTA:
-            delta_base = sha_to_hex(self.index.object_sha1(offset - delta_base))
+            delta_base = sha_to_hex(
+                    self.index.object_sha1(offset - delta_base))
             obj_type = REF_DELTA
         return (obj_type, delta_base, chunks)
 

+ 2 - 1
dulwich/porcelain.py

@@ -777,7 +777,8 @@ def push(repo, remote_location, refspecs,
         remote_location_bytes = client.get_url(path).encode(err_encoding)
         try:
             client.send_pack(
-                path, update_refs, r.object_store.generate_pack_contents,
+                path, update_refs,
+                generate_pack_data=r.object_store.generate_pack_data,
                 progress=errstream.write)
             errstream.write(
                 b"Push to " + remote_location_bytes + b" successful.\n")

+ 5 - 5
dulwich/tests/compat/test_client.py

@@ -105,7 +105,7 @@ class DulwichClientTestBase(object):
             sendrefs = dict(src.get_refs())
             del sendrefs[b'HEAD']
             c.send_pack(self._build_path('/dest'), lambda _: sendrefs,
-                        src.object_store.generate_pack_contents)
+                        src.object_store.generate_pack_data)
 
     def test_send_pack(self):
         self._do_send_pack()
@@ -125,7 +125,7 @@ class DulwichClientTestBase(object):
             sendrefs = dict(src.get_refs())
             del sendrefs[b'HEAD']
             c.send_pack(self._build_path('/dest'), lambda _: sendrefs,
-                        src.object_store.generate_pack_contents)
+                        src.object_store.generate_pack_data)
             self.assertDestEqualsSrc()
 
     def make_dummy_commit(self, dest):
@@ -152,7 +152,7 @@ class DulwichClientTestBase(object):
     def compute_send(self, src):
         sendrefs = dict(src.get_refs())
         del sendrefs[b'HEAD']
-        return sendrefs, src.object_store.generate_pack_contents
+        return sendrefs, src.object_store.generate_pack_data
 
     def test_send_pack_one_error(self):
         dest, dummy_commit = self.disable_ff_and_make_dummy_commit()
@@ -251,8 +251,8 @@ class DulwichClientTestBase(object):
             sendrefs[b'refs/heads/abranch'] = b"00" * 20
             del sendrefs[b'HEAD']
 
-            def gen_pack(have, want):
-                return []
+            def gen_pack(have, want, ofs_delta=False):
+                return 0, []
             c = self._client()
             self.assertEqual(dest.refs[b"refs/heads/abranch"], dummy_commit)
             c.send_pack(

+ 25 - 23
dulwich/tests/test_client.py

@@ -68,6 +68,8 @@ from dulwich.protocol import (
     Protocol,
     )
 from dulwich.pack import (
+    pack_objects_to_data,
+    write_pack_data,
     write_pack_objects,
     )
 from dulwich.objects import (
@@ -198,12 +200,12 @@ class GitClientTests(TestCase):
         def determine_wants(refs):
             return {b'refs/foo/bar': commit.id, }
 
-        def generate_pack_contents(have, want):
-            return [(commit, None), (tree, ''), ]
+        def generate_pack_data(have, want, ofs_delta=False):
+            return pack_objects_to_data([(commit, None), (tree, ''), ])
 
         self.assertRaises(UpdateRefsError,
                           self.client.send_pack, "blah",
-                          determine_wants, generate_pack_contents)
+                          determine_wants, generate_pack_data)
 
     def test_send_pack_none(self):
         self.rin.write(
@@ -219,10 +221,10 @@ class GitClientTests(TestCase):
                     b'310ca9477129b8586fa2afc779c1f57cf64bba6c'
             }
 
-        def generate_pack_contents(have, want):
-            return {}
+        def generate_pack_data(have, want, ofs_delta=False):
+            return 0, []
 
-        self.client.send_pack(b'/', determine_wants, generate_pack_contents)
+        self.client.send_pack(b'/', determine_wants, generate_pack_data)
         self.assertEqual(self.rout.getvalue(), b'0000')
 
     def test_send_pack_keep_and_delete(self):
@@ -238,10 +240,10 @@ class GitClientTests(TestCase):
         def determine_wants(refs):
             return {b'refs/heads/master': b'0' * 40}
 
-        def generate_pack_contents(have, want):
-            return {}
+        def generate_pack_data(have, want, ofs_delta=False):
+            return 0, []
 
-        self.client.send_pack(b'/', determine_wants, generate_pack_contents)
+        self.client.send_pack(b'/', determine_wants, generate_pack_data)
         self.assertIn(
             self.rout.getvalue(),
             [b'007f310ca9477129b8586fa2afc779c1f57cf64bba6c '
@@ -263,10 +265,10 @@ class GitClientTests(TestCase):
         def determine_wants(refs):
             return {b'refs/heads/master': b'0' * 40}
 
-        def generate_pack_contents(have, want):
-            return {}
+        def generate_pack_data(have, want, ofs_delta=False):
+            return 0, []
 
-        self.client.send_pack(b'/', determine_wants, generate_pack_contents)
+        self.client.send_pack(b'/', determine_wants, generate_pack_data)
         self.assertIn(
             self.rout.getvalue(),
             [b'007f310ca9477129b8586fa2afc779c1f57cf64bba6c '
@@ -293,12 +295,12 @@ class GitClientTests(TestCase):
                     b'310ca9477129b8586fa2afc779c1f57cf64bba6c'
             }
 
-        def generate_pack_contents(have, want):
-            return {}
+        def generate_pack_data(have, want, ofs_delta=False):
+            return 0, []
 
         f = BytesIO()
         write_pack_objects(f, {})
-        self.client.send_pack('/', determine_wants, generate_pack_contents)
+        self.client.send_pack('/', determine_wants, generate_pack_data)
         self.assertIn(
             self.rout.getvalue(),
             [b'007f0000000000000000000000000000000000000000 '
@@ -336,12 +338,12 @@ class GitClientTests(TestCase):
                     b'310ca9477129b8586fa2afc779c1f57cf64bba6c'
             }
 
-        def generate_pack_contents(have, want):
-            return [(commit, None), (tree, b''), ]
+        def generate_pack_data(have, want, ofs_delta=False):
+            return pack_objects_to_data([(commit, None), (tree, b''), ])
 
         f = BytesIO()
-        write_pack_objects(f, generate_pack_contents(None, None))
-        self.client.send_pack(b'/', determine_wants, generate_pack_contents)
+        write_pack_data(f, *generate_pack_data(None, None))
+        self.client.send_pack(b'/', determine_wants, generate_pack_data)
         self.assertIn(
             self.rout.getvalue(),
             [b'007f0000000000000000000000000000000000000000 ' + commit.id +
@@ -366,12 +368,12 @@ class GitClientTests(TestCase):
         def determine_wants(refs):
             return {b'refs/heads/master': b'0' * 40}
 
-        def generate_pack_contents(have, want):
-            return {}
+        def generate_pack_data(have, want, ofs_delta=False):
+            return 0, []
 
         self.assertRaises(UpdateRefsError,
                           self.client.send_pack, b"/",
-                          determine_wants, generate_pack_contents)
+                          determine_wants, generate_pack_data)
         self.assertEqual(self.rout.getvalue(), b'0000')
 
 
@@ -826,7 +828,7 @@ class LocalGitClientTests(TestCase):
         ref_name = b"refs/heads/" + branch
         new_refs = client.send_pack(target.path,
                                     lambda _: {ref_name: local.refs[ref_name]},
-                                    local.object_store.generate_pack_contents)
+                                    local.object_store.generate_pack_data)
 
         self.assertEqual(local.refs[ref_name], new_refs[ref_name])