Browse Source

Include type_num in hint.

Jelmer Vernooij 2 years ago
parent
commit
83a679251a
6 changed files with 94 additions and 39 deletions
  1. 5 2
      dulwich/client.py
  2. 14 7
      dulwich/object_store.py
  3. 44 14
      dulwich/pack.py
  4. 24 11
      dulwich/repo.py
  5. 6 4
      dulwich/server.py
  6. 1 1
      dulwich/tests/test_repository.py

+ 5 - 2
dulwich/client.py

@@ -857,6 +857,7 @@ class GitClient:
         determine_wants,
         graph_walker,
         pack_data,
+        *,
         progress=None,
         depth=None,
     ):
@@ -1536,9 +1537,11 @@ class LocalGitClient(GitClient):
 
         """
         with self._open_repo(path) as r:
-            object_ids = r.fetch_objects(
+            missing_objects = r.find_missing_objects(
                 determine_wants, graph_walker, progress=progress, depth=depth
             )
+            other_haves = missing_objects.get_remote_has()
+            object_ids = list(missing_objects)
             symrefs = r.refs.get_symrefs()
             agent = agent_string()
 
@@ -1546,7 +1549,7 @@ class LocalGitClient(GitClient):
             # Note that the client still expects a 0-object pack in most cases.
             if object_ids is None:
                 return FetchPackResult(None, symrefs, agent)
-            write_pack_from_container(pack_data, r.object_store, object_ids)
+            write_pack_from_container(pack_data, r.object_store, object_ids, other_haves=other_haves)
             return FetchPackResult(r.get_refs(), symrefs, agent)
 
     def get_refs(self, path):

+ 14 - 7
dulwich/object_store.py

@@ -54,6 +54,7 @@ from dulwich.pack import (
     ObjectContainer,
     Pack,
     PackData,
+    PackHint,
     PackInflater,
     PackFileDisappeared,
     UnpackedObject,
@@ -253,7 +254,8 @@ class BaseObjectStore:
         """
         # Note that the pack-specific implementation below is more efficient,
         # as it reuses deltas
-        object_ids = list(MissingObjectFinder(self, have, want, shallow, progress))
+        missing_objects = MissingObjectFinder(self, have, want, shallow, progress)
+        object_ids = list(missing_objects)
         return pack_objects_to_data(
             [(self[oid], path) for oid, path in object_ids], ofs_delta=ofs_delta,
             progress=progress)
@@ -393,12 +395,15 @@ class PackBasedObjectStore(BaseObjectStore):
           ofs_delta: Whether OFS deltas can be included
           progress: Optional progress reporting method
         """
-        object_ids = list(MissingObjectFinder(self, have, want, shallow, progress))
+        missing_objects = MissingObjectFinder(self, have, want, shallow, progress)
+        remote_has = missing_objects.get_remote_has()
+        object_ids = list(missing_objects)
         return len(object_ids), generate_unpacked_objects(
             cast(PackedObjectContainer, self),
             object_ids,
             progress=progress,
-            ofs_delta=ofs_delta)
+            ofs_delta=ofs_delta,
+            other_haves=remote_has)
 
     def _clear_cached_packs(self):
         pack_cache = self._pack_cache
@@ -1285,10 +1290,13 @@ class MissingObjectFinder:
             self.progress = progress
         self._tagged = get_tagged and get_tagged() or {}
 
-    def add_todo(self, entries):
+    def get_remote_has(self):
+        return self.remote_has
+
+    def add_todo(self, entries: Iterable[Tuple[ObjectID, Optional[bytes], Optional[int], bool]]):
         self.objects_to_send.update([e for e in entries if not e[0] in self.sha_done])
 
-    def __next__(self) -> Tuple[bytes, Optional[bytes]]:
+    def __next__(self) -> Tuple[bytes, PackHint]:
         while True:
             if not self.objects_to_send:
                 self.progress(("counting objects: %d, done.\n" % len(self.sha_done)).encode("ascii"))
@@ -1316,8 +1324,7 @@ class MissingObjectFinder:
         self.sha_done.add(sha)
         if len(self.sha_done) % 1000 == 0:
             self.progress(("counting objects: %d\r" % len(self.sha_done)).encode("ascii"))
-        # TODO (type_num, name, -length, sha)
-        return (sha, name)
+        return (sha, (type_num, name))
 
     def __iter__(self):
         return self

+ 44 - 14
dulwich/pack.py

@@ -82,6 +82,7 @@ from dulwich.lru_cache import (
 )
 from dulwich.objects import (
     ShaFile,
+    ObjectID,
     hex_to_sha,
     sha_to_hex,
     object_header,
@@ -100,6 +101,7 @@ DEFAULT_PACK_DELTA_WINDOW_SIZE = 10
 OldUnpackedObject = Union[Tuple[Union[bytes, int], List[bytes]], List[bytes]]
 ResolveExtRefFn = Callable[[bytes], Tuple[int, OldUnpackedObject]]
 ProgressFn = Callable[[int, str], None]
+PackHint = Tuple[int, Optional[bytes]]
 
 
 class ObjectContainer(Protocol):
@@ -138,6 +140,15 @@ class PackedObjectContainer(ObjectContainer):
         raise NotImplementedError(self.iter_unpacked_subset)
 
 
+class UnpackedObjectStream:
+
+    def __iter__(self) -> Iterator["UnpackedObject"]:
+        raise NotImplementedError(self.__iter__)
+
+    def __len__(self) -> int:
+        raise NotImplementedError(self.__len__)
+
+
 def take_msb_bytes(read: Callable[[int], bytes], crc32: Optional[int] = None) -> Tuple[List[int], Optional[int]]:
     """Read bytes marked with most significant bit.
 
@@ -1674,7 +1685,7 @@ def write_pack_object(write, type, object, sha=None, compression_level=-1):
 
 def write_pack(
         filename,
-        objects: Sequence[Tuple[ShaFile, Optional[bytes]]],
+        objects: Union[Sequence[ShaFile], Sequence[Tuple[ShaFile, Optional[bytes]]]],
         *,
         deltify: Optional[bool] = None,
         delta_window_size: Optional[int] = None,
@@ -1741,7 +1752,7 @@ def find_reusable_deltas(
 
 
 def deltify_pack_objects(
-        objects,
+        objects: Union[Iterator[bytes], Iterator[Tuple[ShaFile, Optional[bytes]]]],
         *, window_size: Optional[int] = None,
         progress=None) -> Iterator[UnpackedObject]:
     """Generate deltas for pack objects.
@@ -1752,20 +1763,31 @@ def deltify_pack_objects(
     Returns: Iterator over type_num, object id, delta_base, content
         delta_base is None for full text entries
     """
+    def objects_with_hints():
+        for e in objects:
+            if isinstance(e, ShaFile):
+                yield (e, (e.type_num, None))
+            else:
+                yield (e[0], (e[0].type_num, e[1]))
     yield from deltas_from_sorted_objects(
-        sort_objects_for_delta(objects),
+        sort_objects_for_delta(objects_with_hints()),
         window_size=window_size,
         progress=progress)
 
 
-def sort_objects_for_delta(objects: Union[Iterator[ShaFile], Iterator[Tuple[ShaFile, Optional[bytes]]]]) -> Iterator[ShaFile]:
+def sort_objects_for_delta(objects: Union[Iterator[ShaFile], Iterator[Tuple[ShaFile, Optional[PackHint]]]]) -> Iterator[ShaFile]:
     magic = []
     for entry in objects:
         if isinstance(entry, tuple):
-            obj, path = entry
+            obj, hint = entry
+            if hint is None:
+                type_num = None
+                path = None
+            else:
+                (type_num, path) = hint
         else:
             obj = entry
-        magic.append((obj.type_num, path, -obj.raw_length(), obj))
+        magic.append((type_num, path, -obj.raw_length(), obj))
     # Build a list of objects ordered by the magic Linus heuristic
     # This helps us find good objects to diff against us
     magic.sort()
@@ -1806,7 +1828,7 @@ def deltas_from_sorted_objects(objects, window_size: Optional[int] = None, progr
 
 
 def pack_objects_to_data(
-        objects: Sequence[Tuple[ShaFile, Optional[bytes]]],
+        objects: Union[Sequence[ShaFile], Sequence[Tuple[ShaFile, Optional[bytes]]]],
         *,
         deltify: Optional[bool] = None,
         delta_window_size: Optional[int] = None,
@@ -1827,17 +1849,23 @@ def pack_objects_to_data(
     if deltify:
         return (
             count,
-            deltify_pack_objects(objects, window_size=delta_window_size, progress=progress))
+            deltify_pack_objects(iter(objects), window_size=delta_window_size, progress=progress))  # type: ignore
     else:
+        def iter_without_path():
+            for o in objects:
+                if isinstance(o, tuple):
+                    yield full_unpacked_object(o[0])
+                else:
+                    yield full_unpacked_object(o)
         return (
             count,
-            (full_unpacked_object(o) for o, path in objects)
+            iter_without_path()
         )
 
 
 def generate_unpacked_objects(
         container: PackedObjectContainer,
-        object_ids: Sequence[Tuple[bytes, Optional[bytes]]],
+        object_ids: Sequence[Tuple[ObjectID, Optional[PackHint]]],
         delta_window_size: Optional[int] = None,
         deltify: Optional[bool] = None,
         reuse_deltas: bool = True,
@@ -1883,11 +1911,12 @@ def full_unpacked_object(o: ShaFile) -> UnpackedObject:
 def write_pack_from_container(
         write,
         container: PackedObjectContainer,
-        object_ids: Sequence[Tuple[bytes, Optional[bytes]]],
+        object_ids: Sequence[Tuple[ObjectID, Optional[PackHint]]],
         delta_window_size: Optional[int] = None,
         deltify: Optional[bool] = None,
         reuse_deltas: bool = True,
-        compression_level: int = -1
+        compression_level: int = -1,
+        other_haves: Optional[Set[bytes]] = None
 ):
     """Write a new pack data file.
 
@@ -1905,7 +1934,8 @@ def write_pack_from_container(
     pack_contents = generate_unpacked_objects(
         container, object_ids, delta_window_size=delta_window_size,
         deltify=deltify,
-        reuse_deltas=reuse_deltas)
+        reuse_deltas=reuse_deltas,
+        other_haves=other_haves)
 
     return write_pack_data(
         write,
@@ -1918,7 +1948,7 @@ def write_pack_from_container(
 
 def write_pack_objects(
         write,
-        objects: Sequence[Tuple[ShaFile, Optional[bytes]]],
+        objects: Union[Sequence[ShaFile], Sequence[Tuple[ShaFile, Optional[bytes]]]],
         delta_window_size: Optional[int] = None,
         deltify: Optional[bool] = None,
         compression_level: int = -1

+ 24 - 11
dulwich/repo.py

@@ -39,6 +39,7 @@ from typing import (
     Callable,
     Tuple,
     TYPE_CHECKING,
+    FrozenSet,
     List,
     Dict,
     Union,
@@ -485,22 +486,23 @@ class BaseRepo:
           depth: Shallow fetch depth
         Returns: count and iterator over pack data
         """
-        # TODO(jelmer): Fetch pack data directly, don't create objects first.
-        object_ids = self.fetch_objects(
+        missing_objects = self.find_missing_objects(
             determine_wants, graph_walker, progress, get_tagged, depth=depth
         )
-        # TODO(jelmer): Set other_has=missing_objects.remote_has
+        remote_has = missing_objects.get_remote_has()
+        object_ids = list(missing_objects)
         return len(object_ids), generate_unpacked_objects(
-            self.object_store, object_ids, progress=progress)
+            self.object_store, object_ids, progress=progress,
+            other_haves=remote_has)
 
-    def fetch_objects(
+    def find_missing_objects(
         self,
         determine_wants,
         graph_walker,
         progress,
         get_tagged=None,
         depth=None,
-    ):
+    ) -> Optional[MissingObjectFinder]:
         """Fetch the missing objects required for a set of revisions.
 
         Args:
@@ -539,8 +541,8 @@ class BaseRepo:
         if not isinstance(wants, list):
             raise TypeError("determine_wants() did not return a list")
 
-        shallows = getattr(graph_walker, "shallow", frozenset())
-        unshallows = getattr(graph_walker, "unshallow", frozenset())
+        shallows: FrozenSet[ObjectID] = getattr(graph_walker, "shallow", frozenset())
+        unshallows: FrozenSet[ObjectID] = getattr(graph_walker, "unshallow", frozenset())
 
         if wants == []:
             # TODO(dborowitz): find a way to short-circuit that doesn't change
@@ -550,7 +552,18 @@ class BaseRepo:
                 # Do not send a pack in shallow short-circuit path
                 return None
 
-            return []
+            class DummyMissingObjectFinder:
+
+                def get_remote_has(self):
+                    return None
+
+                def __len__(self):
+                    return 0
+
+                def __iter__(self):
+                    yield from []
+
+            return DummyMissingObjectFinder()  # type: ignore
 
         # If the graph walker is set up with an implementation that can
         # ACK/NAK to the wire, it will write data to the client through
@@ -569,14 +582,14 @@ class BaseRepo:
         def get_parents(commit):
             return parents_provider.get_parents(commit.id, commit)
 
-        return list(MissingObjectFinder(
+        return MissingObjectFinder(
             self.object_store,
             haves,
             wants,
             self.get_shallow(),
             progress,
             get_tagged,
-            get_parents=get_parents))
+            get_parents=get_parents)
 
     def generate_pack_data(self, have: List[ObjectID], want: List[ObjectID],
                            progress: Optional[Callable[[str], None]] = None,

+ 6 - 4
dulwich/server.py

@@ -175,7 +175,7 @@ class BackendRepo:
         """
         return None
 
-    def fetch_objects(self, determine_wants, graph_walker, progress, get_tagged=None):
+    def find_missing_objects(self, determine_wants, graph_walker, progress, get_tagged=None):
         """
         Yield the objects required for a list of commits.
 
@@ -355,7 +355,7 @@ class UploadPackHandler(PackHandler):
                 # TODO: fix behavior when missing
                 return {}
         # TODO(jelmer): Integrate this with the refs logic in
-        # Repo.fetch_objects
+        # Repo.find_missing_objects
         tagged = {}
         for name, sha in refs.items():
             peeled_sha = repo.get_peeled(name)
@@ -379,13 +379,15 @@ class UploadPackHandler(PackHandler):
             wants.extend(graph_walker.determine_wants(refs, **kwargs))
             return wants
 
-        object_ids = self.repo.fetch_objects(
+        missing_objects = self.repo.find_missing_objects(
             wants_wrapper,
             graph_walker,
             self.progress,
             get_tagged=self.get_tagged,
         )
 
+        object_ids = list(missing_objects)
+
         # Note the fact that client is only processing responses related
         # to the have lines it sent, and any other data (including side-
         # band) will be be considered a fatal error.
@@ -604,7 +606,7 @@ class _ProtocolGraphWalker:
                     peeled_sha = self.get_peeled(ref)
                 except KeyError:
                     # Skip refs that are inaccessible
-                    # TODO(jelmer): Integrate with Repo.fetch_objects refs
+                    # TODO(jelmer): Integrate with Repo.find_missing_objects refs
                     # logic.
                     continue
                 if i == 0:

+ 1 - 1
dulwich/tests/test_repository.py

@@ -539,7 +539,7 @@ class RepositoryRootTests(TestCase):
         This test demonstrates that ``find_common_revisions()`` actually
         returns common heads, not revisions; dulwich already uses
         ``find_common_revisions()`` in such a manner (see
-        ``Repo.fetch_objects()``).
+        ``Repo.find_objects()``).
         """
 
         expected_shas = {b"60dacdc733de308bb77bb76ce0fb0f9b44c9769e"}