Browse Source

Fix ``ObjectStore.iterobjects_subset()`` when hex shas are passed for objects that live in packs.

Fixes #1166
Jelmer Vernooij 1 year ago
parent
commit
217768b343
4 changed files with 27 additions and 12 deletions
  1. 4 0
      NEWS
  2. 11 9
      dulwich/objects.py
  3. 3 3
      dulwich/pack.py
  4. 9 0
      dulwich/tests/test_pack.py

+ 4 - 0
NEWS

@@ -7,6 +7,10 @@
  * Support ``init.defaultBranch`` config.
    (Jelmer Vernooij)
 
+ * Fix ``ObjectStore.iterobjects_subset()`` when
+   hex shas are passed for objects that live in packs.
+   (Jelmer Vernooij, #1166)
+
 0.21.3	2023-02-17
 
  * Add support for ``worktreeconfig`` extension.

+ 11 - 9
dulwich/objects.py

@@ -34,6 +34,7 @@ from typing import (
     Tuple,
     Type,
     Union,
+    BinaryIO,
 )
 import zlib
 from collections import namedtuple
@@ -252,11 +253,11 @@ class FixedSha:
         self._hexsha = hexsha
         self._sha = hex_to_sha(hexsha)
 
-    def digest(self):
+    def digest(self) -> bytes:
         """Return the raw SHA digest."""
         return self._sha
 
-    def hexdigest(self):
+    def hexdigest(self) -> str:
         """Return the hex SHA digest."""
         return self._hexsha.decode("ascii")
 
@@ -273,7 +274,7 @@ class ShaFile:
     _sha: Union[FixedSha, None, HASH]
 
     @staticmethod
-    def _parse_legacy_object_header(magic, f) -> "ShaFile":
+    def _parse_legacy_object_header(magic, f: BinaryIO) -> "ShaFile":
         """Parse a legacy object, creating it but not reading the file."""
         bufsize = 1024
         decomp = zlib.decompressobj()
@@ -823,6 +824,7 @@ class Tag(ShaFile):
             if field == _OBJECT_HEADER:
                 self._object_sha = value
             elif field == _TYPE_HEADER:
+                assert isinstance(value, bytes)
                 obj_class = object_class(value)
                 if not obj_class:
                     raise ObjectFormatException("Not a known type: %s" % value)
@@ -895,7 +897,7 @@ class Tag(ShaFile):
                     self.as_raw_string(), mode=gpg.constants.sig.mode.DETACH
                 )
 
-    def verify(self, keyids: Optional[Iterable[str]] = None):
+    def verify(self, keyids: Optional[Iterable[str]] = None) -> None:
         """Verify GPG signature for this tag (if it is signed).
 
         Args:
@@ -937,7 +939,7 @@ class Tag(ShaFile):
 class TreeEntry(namedtuple("TreeEntry", ["path", "mode", "sha"])):
     """Named tuple encapsulating a single tree entry."""
 
-    def in_path(self, path):
+    def in_path(self, path: bytes):
         """Return a copy of this entry with the given path prepended."""
         if not isinstance(self.path, bytes):
             raise TypeError("Expected bytes for path, got %r" % path)
@@ -1011,11 +1013,11 @@ def sorted_tree_items(entries, name_order: bool):
         yield TreeEntry(name, mode, hexsha)
 
 
-def key_entry(entry):
+def key_entry(entry) -> bytes:
     """Sort key for tree entry.
 
     Args:
-      entry: (name, value) tuplee
+      entry: (name, value) tuple
     """
     (name, value) = entry
     if stat.S_ISDIR(value[0]):
@@ -1028,7 +1030,7 @@ def key_entry_name_order(entry):
     return entry[0]
 
 
-def pretty_format_tree_entry(name, mode, hexsha, encoding="utf-8"):
+def pretty_format_tree_entry(name, mode, hexsha, encoding="utf-8") -> str:
     """Pretty format tree entry.
 
     Args:
@@ -1185,7 +1187,7 @@ class Tree(ShaFile):
         return list(serialize_tree(self.iteritems()))
 
     def as_pretty_string(self):
-        text = []
+        text: List[str] = []
         for name, mode, hexsha in self.iteritems():
             text.append(pretty_format_tree_entry(name, mode, hexsha))
         return "".join(text)

+ 3 - 3
dulwich/pack.py

@@ -2389,16 +2389,16 @@ class Pack:
             PackInflater.for_pack_data(self.data, resolve_ext_ref=self.resolve_ext_ref)
         )
 
-    def iterobjects_subset(self, shas, *, allow_missing: bool = False) -> Iterator[ShaFile]:
+    def iterobjects_subset(self, shas: Iterable[ObjectID], *, allow_missing: bool = False) -> Iterator[ShaFile]:
         return (
             uo
             for uo in
             PackInflater.for_pack_subset(
                 self, shas, allow_missing=allow_missing,
                 resolve_ext_ref=self.resolve_ext_ref)
-            if uo.sha() in shas)
+            if uo.id in shas)
 
-    def iter_unpacked_subset(self, shas, *, include_comp: bool = False, allow_missing: bool = False, convert_ofs_delta: bool = False) -> Iterator[UnpackedObject]:
+    def iter_unpacked_subset(self, shas: Iterable[ObjectID], *, include_comp: bool = False, allow_missing: bool = False, convert_ofs_delta: bool = False) -> Iterator[UnpackedObject]:
         ofs_pending: Dict[int, List[UnpackedObject]] = defaultdict(list)
         ofs: Dict[bytes, int] = {}
         todo = set(shas)

+ 9 - 0
dulwich/tests/test_pack.py

@@ -494,6 +494,12 @@ class TestPack(PackTests):
             self.assertIsInstance(objs[tree_sha], Tree)
             self.assertIsInstance(objs[commit_sha], Commit)
 
+    def test_iterobjects_subset(self):
+        with self.get_pack(pack1_sha) as p:
+            objs = {o.id: o for o in p.iterobjects_subset([commit_sha])}
+            self.assertEqual(1, len(objs))
+            self.assertIsInstance(objs[commit_sha], Commit)
+
 
 class TestThinPack(PackTests):
     def setUp(self):
@@ -1003,6 +1009,9 @@ class DeltaChainIteratorTests(TestCase):
         self.assertEntriesMatch([], entries, self.make_pack_iter_subset(f, []))
         f.seek(0)
         self.assertEntriesMatch([1, 0], entries, self.make_pack_iter_subset(f, [entries[0][3], entries[1][3]]))
+        f.seek(0)
+        self.assertEntriesMatch(
+            [1, 0], entries, self.make_pack_iter_subset(f, [sha_to_hex(entries[0][3]), sha_to_hex(entries[1][3])]))
 
     def test_ofs_deltas(self):
         f = BytesIO()