Просмотр исходного кода

Rename pack related object-format handling to back hash-format

Jelmer Vernooij 1 месяц назад
Родитель
Сommit
69b3060b2e

+ 13 - 20
dulwich/commit_graph.py

@@ -180,7 +180,7 @@ class CommitGraph:
         )
         self.chunks: dict[bytes, CommitGraphChunk] = {}
         self.entries: list[CommitGraphEntry] = []
-        self._oid_to_index: dict[ObjectID, int] = {}
+        self._oid_to_index: dict[RawObjectID, int] = {}
 
     @classmethod
     def from_file(cls, f: BinaryIO) -> "CommitGraph":
@@ -256,7 +256,7 @@ class CommitGraph:
         for i in range(num_commits):
             start = i * self.object_format.oid_length
             end = start + self.object_format.oid_length
-            oid = oid_lookup_data[start:end]
+            oid = RawObjectID(oid_lookup_data[start:end])
             oids.append(oid)
             self._oid_to_index[oid] = i
 
@@ -307,14 +307,16 @@ class CommitGraph:
 
             entry = CommitGraphEntry(
                 commit_id=sha_to_hex(oids[i]),
-                tree_id=sha_to_hex(tree_id),
+                tree_id=sha_to_hex(RawObjectID(tree_id)),
                 parents=[sha_to_hex(p) for p in parents],
                 generation=generation,
                 commit_time=commit_time,
             )
             self.entries.append(entry)
 
-    def _parse_extra_edges(self, offset: int, oids: Sequence[bytes]) -> list[bytes]:
+    def _parse_extra_edges(
+        self, offset: int, oids: Sequence[RawObjectID]
+    ) -> list[RawObjectID]:
         """Parse extra parent edges for commits with 3+ parents."""
         if CHUNK_EXTRA_EDGE_LIST not in self.chunks:
             return []
@@ -342,10 +344,10 @@ class CommitGraph:
         # Convert hex ObjectID to binary if needed for lookup
         if isinstance(oid, bytes) and len(oid) == self.object_format.hex_length:
             # Input is hex ObjectID, convert to binary for internal lookup
-            lookup_oid = hex_to_sha(oid)
+            lookup_oid: RawObjectID = hex_to_sha(oid)
         else:
             # Input is already binary
-            lookup_oid = oid
+            lookup_oid = RawObjectID(oid)
         index = self._oid_to_index.get(lookup_oid)
         if index is not None:
             return self.entries[index]
@@ -574,19 +576,9 @@ def generate_commit_graph(
         # commit_id is already hex ObjectID from normalized_commit_ids
         commit_hex: ObjectID = commit_id
 
-        # Handle tree ID - might already be hex ObjectID
-        if isinstance(commit_obj.tree, bytes) and len(commit_obj.tree) == hex_length:
-            tree_hex = commit_obj.tree  # Already hex ObjectID
-        else:
-            tree_hex = sha_to_hex(commit_obj.tree)  # Binary, convert to hex
-
-        # Handle parent IDs - might already be hex ObjectIDs
-        parents_hex: list[ObjectID] = []
-        for parent_id in commit_obj.parents:
-            if isinstance(parent_id, bytes) and len(parent_id) == hex_length:
-                parents_hex.append(parent_id)  # Already hex ObjectID
-            else:
-                parents_hex.append(sha_to_hex(parent_id))  # Binary, convert to hex
+        # commit_obj.tree and commit_obj.parents are already ObjectIDs
+        tree_hex = commit_obj.tree
+        parents_hex: list[ObjectID] = commit_obj.parents
 
         entry = CommitGraphEntry(
             commit_id=commit_hex,
@@ -600,7 +592,8 @@ def generate_commit_graph(
     # Build the OID to index mapping for lookups
     graph._oid_to_index = {}
     for i, entry in enumerate(graph.entries):
-        graph._oid_to_index[entry.commit_id] = i
+        # Convert hex ObjectID to binary RawObjectID for consistent lookup
+        graph._oid_to_index[hex_to_sha(entry.commit_id)] = i
 
     return graph
 

+ 1 - 1
dulwich/dumb.py

@@ -447,7 +447,7 @@ class DumbRemoteHTTPRepo:
         """Get the peeled value of a ref."""
         # For dumb HTTP, we don't have peeled refs readily available
         # We would need to fetch and parse tag objects
-        sha = self.get_refs().get(ref, None)
+        sha: ObjectID | None = self.get_refs().get(ref, None)
         return sha if sha is not None else ZERO_SHA
 
     def fetch_pack_data(

+ 1 - 0
dulwich/fastexport.py

@@ -369,6 +369,7 @@ class GitImportProcessor(processor.ImportProcessor):  # type: ignore[misc,unused
 
     def reset_handler(self, cmd: commands.ResetCommand) -> None:
         """Process a ResetCommand."""
+        from_: ObjectID
         if cmd.from_ is None:
             from_ = ZERO_SHA
         else:

+ 6 - 2
dulwich/object_format.py

@@ -32,6 +32,8 @@ from typing import TYPE_CHECKING
 if TYPE_CHECKING:
     from _hashlib import HASH
 
+    from .objects import ObjectID, RawObjectID
+
 
 class ObjectFormat:
     """Object format (hash algorithm) used in Git."""
@@ -55,8 +57,10 @@ class ObjectFormat:
         self.oid_length = oid_length
         self.hex_length = hex_length
         self.hash_func = hash_func
-        self.zero_oid = b"0" * hex_length
-        self.zero_oid_bin = b"\x00" * oid_length
+        # Type annotations for proper ObjectID/RawObjectID types
+        # These are bytes at runtime but typed as NewType wrappers for type safety
+        self.zero_oid: ObjectID = b"0" * hex_length  # type: ignore[assignment]
+        self.zero_oid_bin: RawObjectID = b"\x00" * oid_length  # type: ignore[assignment]
 
     def __str__(self) -> str:
         """Return string representation."""

+ 12 - 9
dulwich/object_store.py

@@ -1189,11 +1189,12 @@ class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
         """
         if name == ZERO_SHA:
             raise KeyError(name)
+        sha: RawObjectID
         if len(name) == self.object_format.hex_length:
-            sha = hex_to_sha(name)
+            sha = hex_to_sha(ObjectID(name))
             hexsha = name
         elif len(name) == self.object_format.oid_length:
-            sha = name
+            sha = RawObjectID(name)
             hexsha = None
         else:
             raise AssertionError(f"Invalid object name {name!r}")
@@ -1424,7 +1425,6 @@ class DiskObjectStore(PackBasedObjectStore):
           file_mode: File permission mask for shared repository
           dir_mode: Directory permission mask for shared repository
           object_format: Hash algorithm to use (SHA1 or SHA256)
-          object_format: Hash algorithm to use (SHA1 or SHA256)
         """
         # Import here to avoid circular dependency
         from .object_format import DEFAULT_OBJECT_FORMAT
@@ -1745,7 +1745,12 @@ class DiskObjectStore(PackBasedObjectStore):
         path = self._get_shafile_path(sha)
         try:
             # Load the object from path with SHA and hash algorithm from object store
-            return ShaFile.from_path(path, sha, object_format=self.object_format)
+            # Convert to hex ObjectID if needed
+            if len(sha) == self.object_format.oid_length:
+                hex_sha: ObjectID = sha_to_hex(RawObjectID(sha))
+            else:
+                hex_sha = ObjectID(sha)
+            return ShaFile.from_path(path, hex_sha, object_format=self.object_format)
         except FileNotFoundError:
             return None
 
@@ -2014,7 +2019,7 @@ class DiskObjectStore(PackBasedObjectStore):
           obj: Object to add
         """
         # Use the correct hash algorithm for the object ID
-        obj_id = obj.get_id(self.object_format)
+        obj_id = ObjectID(obj.get_id(self.object_format))
         path = self._get_shafile_path(obj_id)
         dir = os.path.dirname(path)
         try:
@@ -2334,7 +2339,7 @@ class DiskObjectStore(PackBasedObjectStore):
                     # Binary SHA, convert to hex ObjectID
                     from .objects import sha_to_hex
 
-                    commit_ids.append(sha_to_hex(ref))
+                    commit_ids.append(sha_to_hex(RawObjectID(ref)))
                 else:
                     # Assume it's already correct format
                     commit_ids.append(ref)
@@ -2446,10 +2451,8 @@ class MemoryObjectStore(PackCapableObjectStore):
         Args:
             object_format: Hash algorithm to use (defaults to SHA1)
         """
-        super().__init__()
-        self._data: dict[ObjectID, ShaFile] = {}
         super().__init__(object_format=object_format)
-        self._data: dict[bytes, ShaFile] = {}
+        self._data: dict[ObjectID, ShaFile] = {}
         self.pack_compression_level = -1
 
     def _to_hexsha(self, sha: ObjectID | RawObjectID) -> ObjectID:

+ 3 - 4
dulwich/objects.py

@@ -113,8 +113,7 @@ if TYPE_CHECKING:
 
     from .file import _GitFile
 
-# Zero SHA constants for backward compatibility
-ZERO_SHA = b"0" * 40  # SHA1 - kept for backward compatibility
+# Zero SHA constants for backward compatibility - now defined below as ObjectID
 
 
 # Header fields for commits
@@ -1610,7 +1609,7 @@ class Tree(ShaFile):
         # TODO: list comprehension is for efficiency in the common (small)
         # case; if memory efficiency in the large case is a concern, use a
         # genexp.
-        self._entries = {n: (m, s) for n, m, s in parsed_entries}
+        self._entries = {n: (m, ObjectID(s)) for n, m, s in parsed_entries}
 
     def check(self) -> None:
         """Check this object for internal consistency.
@@ -1644,7 +1643,7 @@ class Tree(ShaFile):
             if mode not in allowed_modes:
                 raise ObjectFormatException(f"invalid mode {mode:06o}")
 
-            entry = (name, (mode, sha))
+            entry = (name, (mode, ObjectID(sha)))
             if last:
                 if key_entry(last) > key_entry(entry):
                     raise ObjectFormatException("entries not sorted")

+ 19 - 13
dulwich/pack.py

@@ -646,7 +646,7 @@ class PackIndex:
     """
 
     # Default to SHA-1 for backward compatibility
-    object_format = 1
+    hash_format = 1
     hash_size = 20
 
     def __eq__(self, other: object) -> bool:
@@ -806,9 +806,12 @@ class MemoryPackIndex(PackIndex):
           sha: SHA to look up (binary or hex)
         Returns: Offset in the pack file
         """
+        lookup_sha: RawObjectID
         if len(sha) == self.hash_size * 2:  # hex string
-            sha = hex_to_sha(sha)
-        return self._by_sha[sha]
+            lookup_sha = hex_to_sha(ObjectID(sha))
+        else:
+            lookup_sha = RawObjectID(sha)
+        return self._by_sha[lookup_sha]
 
     def object_sha1(self, offset: int) -> bytes:
         """Return the SHA1 for the object at the given offset."""
@@ -991,10 +994,13 @@ class FilePackIndex(PackIndex):
         lives at within the corresponding pack file. If the pack file doesn't
         have the object then None will be returned.
         """
+        lookup_sha: RawObjectID
         if len(sha) == self.hash_size * 2:  # hex string
-            sha = hex_to_sha(sha)
+            lookup_sha = hex_to_sha(ObjectID(sha))
+        else:
+            lookup_sha = RawObjectID(sha)
         try:
-            return self._object_offset(sha)
+            return self._object_offset(lookup_sha)
         except ValueError as exc:
             closed = getattr(self._contents, "closed", None)
             if closed in (None, True):
@@ -1078,7 +1084,7 @@ class PackIndex1(FilePackIndex):
         base_offset = (0x100 * 4) + (i * self._entry_size)
         offset = unpack_from(">L", self._contents, base_offset)[0]
         name = self._contents[base_offset + 4 : base_offset + 4 + self.hash_size]
-        return (name, offset, None)
+        return (RawObjectID(name), offset, None)
 
     def _unpack_name(self, i: int) -> bytes:
         offset = (0x100 * 4) + (i * self._entry_size) + 4
@@ -1200,13 +1206,13 @@ class PackIndex3(FilePackIndex):
             raise AssertionError(f"Version was {self.version}")
 
         # Read hash algorithm identifier (1 = SHA-1, 2 = SHA-256)
-        (self.object_format,) = unpack_from(b">L", self._contents, 8)
-        if self.object_format == 1:
+        (self.hash_format,) = unpack_from(b">L", self._contents, 8)
+        if self.hash_format == 1:
             self.hash_size = 20  # SHA-1
-        elif self.object_format == 2:
+        elif self.hash_format == 2:
             self.hash_size = 32  # SHA-256
         else:
-            raise AssertionError(f"Unknown hash algorithm {self.object_format}")
+            raise AssertionError(f"Unknown hash algorithm {self.hash_format}")
 
         # Read length of shortened object names
         (self.shortened_oid_len,) = unpack_from(b">L", self._contents, 12)
@@ -3076,7 +3082,7 @@ def write_pack_from_container(
     deltify: bool | None = None,
     reuse_deltas: bool = True,
     compression_level: int = -1,
-    other_haves: set[bytes] | None = None,
+    other_haves: set[ObjectID] | None = None,
     object_format: Optional["ObjectFormat"] = None,
 ) -> tuple[dict[bytes, tuple[int, int]], bytes]:
     """Write a new pack data file.
@@ -4093,7 +4099,7 @@ class Pack:
                     isinstance(basename, bytes)
                     and len(basename) == self.object_format.oid_length
                 )
-                base_offset_temp, base_type, base_obj = get_ref(basename)
+                base_offset_temp, base_type, base_obj = get_ref(RawObjectID(basename))
                 assert isinstance(base_type, int)
                 # base_offset_temp can be None for thin packs (external references)
                 base_offset = base_offset_temp
@@ -4186,7 +4192,7 @@ def extend_pack(
     compression_level: int = -1,
     progress: Callable[[bytes], None] | None = None,
     object_format: Optional["ObjectFormat"] = None,
-) -> tuple[bytes, list[tuple[bytes, int, int]]]:
+) -> tuple[bytes, list[tuple[RawObjectID, int, int]]]:
     """Extend a pack file with more objects.
 
     The caller should make sure that object_ids does not contain any objects

+ 4 - 4
dulwich/repo.py

@@ -1176,10 +1176,10 @@ class BaseRepo:
         elif len(name) == 40 and valid_hexsha(name):
             return ObjectID(name) in self.object_store or Ref(name) in self.refs
         # Check if it's a binary or hex SHA
-        if len(name) == self.object_format.oid_length or (
-            len(name) == self.object_format.hex_length and valid_hexsha(name)
-        ):
-            return name in self.object_store or name in self.refs
+        if len(name) == self.object_format.oid_length:
+            return RawObjectID(name) in self.object_store or Ref(name) in self.refs
+        elif len(name) == self.object_format.hex_length and valid_hexsha(name):
+            return ObjectID(name) in self.object_store or Ref(name) in self.refs
         else:
             return Ref(name) in self.refs
 

+ 3 - 1
dulwich/tests/test_object_store.py

@@ -81,10 +81,12 @@ class ObjectStoreTests:
 
     def test_determine_wants_all_zero(self) -> None:
         """Test determine_wants_all with zero ref."""
+        from dulwich.refs import Ref
+
         self.assertEqual(
             [],
             self.store.determine_wants_all(
-                {b"refs/heads/foo": self.store.object_format.zero_oid}
+                {Ref(b"refs/heads/foo"): self.store.object_format.zero_oid}
             ),
         )
 

+ 1 - 1
tests/compat/test_pack.py

@@ -253,7 +253,7 @@ class TestPackIndexCompat(PackTests):
         idx = load_pack_index(v3_path)
         self.assertIsInstance(idx, PackIndex3)
         self.assertEqual(idx.version, 3)
-        self.assertEqual(idx.object_format, 1)  # SHA-1
+        self.assertEqual(idx.hash_format, 1)  # SHA-1
         self.assertEqual(idx.hash_size, 20)
 
         # Verify SHA-256 would raise NotImplementedError

+ 10 - 10
tests/compat/test_sha256.py

@@ -28,7 +28,7 @@ from dulwich.object_format import SHA256
 from dulwich.objects import Blob, Commit, Tree
 from dulwich.repo import Repo
 
-from .utils import CompatTestCase, run_git_or_fail
+from .utils import CompatTestCase, rmtree_ro, run_git_or_fail
 
 
 class GitSHA256CompatibilityTests(CompatTestCase):
@@ -44,7 +44,7 @@ class GitSHA256CompatibilityTests(CompatTestCase):
         """Test that dulwich-created SHA256 repos are readable by git."""
         # Create SHA256 repo with dulwich
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
 
         # Add a blob and tree using dulwich
@@ -75,7 +75,7 @@ class GitSHA256CompatibilityTests(CompatTestCase):
         """Test that git-created SHA256 repos are readable by dulwich."""
         # Create SHA256 repo with git
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         self._run_git(["init", "--object-format=sha256", repo_path])
 
         # Create a file and commit with git
@@ -111,7 +111,7 @@ class GitSHA256CompatibilityTests(CompatTestCase):
         """Test that object hashing is consistent between dulwich and git."""
         # Create SHA256 repo with git
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         self._run_git(["init", "--object-format=sha256", repo_path])
 
         # Create a test file with known content
@@ -135,7 +135,7 @@ class GitSHA256CompatibilityTests(CompatTestCase):
         """Test that tree hashing is consistent between dulwich and git."""
         # Create SHA256 repo with git
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         self._run_git(["init", "--object-format=sha256", repo_path])
 
         # Create a test file and add to index
@@ -164,7 +164,7 @@ class GitSHA256CompatibilityTests(CompatTestCase):
         """Test commit creation interoperability between dulwich and git."""
         # Create SHA256 repo with dulwich
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
 
         # Create objects with dulwich
@@ -207,7 +207,7 @@ class GitSHA256CompatibilityTests(CompatTestCase):
         """Test that ref updates work between dulwich and git."""
         # Create repo with git
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         self._run_git(["init", "--object-format=sha256", repo_path])
 
         # Create initial commit with git
@@ -260,7 +260,7 @@ class GitSHA256CompatibilityTests(CompatTestCase):
         """Test cloning a git SHA256 repository with dulwich."""
         # Create source repo with git
         source_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(source_path))
+        self.addCleanup(rmtree_ro, source_path)
         self._run_git(["init", "--object-format=sha256", source_path])
 
         # Add content
@@ -273,7 +273,7 @@ class GitSHA256CompatibilityTests(CompatTestCase):
 
         # Clone with dulwich
         target_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(target_path))
+        self.addCleanup(rmtree_ro, target_path)
 
         target_repo = Repo.init(target_path, mkdir=False, object_format="sha256")
 
@@ -311,7 +311,7 @@ class GitSHA256CompatibilityTests(CompatTestCase):
         """Test that git fsck works on dulwich-created SHA256 repos."""
         # Create repo with dulwich
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
 
         # Create a more complex object graph

+ 6 - 6
tests/compat/test_sha256_packs.py

@@ -29,7 +29,7 @@ from dulwich.objects import Blob, Commit, Tree
 from dulwich.pack import load_pack_index_file
 from dulwich.repo import Repo
 
-from .utils import CompatTestCase, run_git_or_fail
+from .utils import CompatTestCase, rmtree_ro, run_git_or_fail
 
 
 class GitSHA256PackCompatibilityTests(CompatTestCase):
@@ -45,7 +45,7 @@ class GitSHA256PackCompatibilityTests(CompatTestCase):
         """Test that git-created SHA256 pack files are readable by dulwich."""
         # Create SHA256 repo with git
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         self._run_git(["init", "--object-format=sha256", repo_path])
 
         # Create multiple files to ensure pack creation
@@ -116,7 +116,7 @@ class GitSHA256PackCompatibilityTests(CompatTestCase):
         """Test that dulwich-created SHA256 objects are readable by git."""
         # Create SHA256 repo with dulwich
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
 
         # Create objects
@@ -170,7 +170,7 @@ class GitSHA256PackCompatibilityTests(CompatTestCase):
         """Test pack index v1 interoperability with SHA256."""
         # Create repo with git using pack index v1
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         self._run_git(["init", "--object-format=sha256", repo_path])
         self._run_git(["config", "pack.indexVersion", "1"], cwd=repo_path)
 
@@ -213,7 +213,7 @@ class GitSHA256PackCompatibilityTests(CompatTestCase):
         """Test large pack file interoperability."""
         # Create repo with dulwich
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
 
         # Create a large file that will use delta compression
@@ -273,7 +273,7 @@ class GitSHA256PackCompatibilityTests(CompatTestCase):
         """Test repositories with both loose and packed objects."""
         # Create repo with git
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         self._run_git(["init", "--object-format=sha256", repo_path])
 
         # Create initial objects that will be packed

+ 41 - 0
tests/test_commit_graph.py

@@ -435,6 +435,47 @@ class CommitGraphGenerationTests(unittest.TestCase):
         self.assertEqual(entry.generation, 1)
         self.assertEqual(entry.commit_time, 1234567890)
 
+    def test_generate_commit_graph_oid_index_uses_binary_keys(self) -> None:
+        """Test that generated commit graph _oid_to_index uses binary RawObjectID keys."""
+        from dulwich.object_store import MemoryObjectStore
+        from dulwich.objects import Commit, Tree, hex_to_sha
+
+        object_store = MemoryObjectStore()
+
+        # Create a tree and commit
+        tree = Tree()
+        object_store.add_object(tree)
+
+        commit = Commit()
+        commit.tree = tree.id
+        commit.author = b"Test Author <test@example.com>"
+        commit.committer = b"Test Author <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        commit.message = b"Test commit"
+        object_store.add_object(commit)
+
+        # Generate graph
+        graph = generate_commit_graph(object_store, [commit.id])
+
+        # Verify _oid_to_index uses binary keys (RawObjectID)
+        # commit.id is hex (ObjectID), so we need to convert to binary for lookup
+        binary_commit_id = hex_to_sha(commit.id)
+        self.assertIn(binary_commit_id, graph._oid_to_index)
+        self.assertEqual(graph._oid_to_index[binary_commit_id], 0)
+
+        # Verify lookup with hex ObjectID works via get_entry_by_oid
+        entry = graph.get_entry_by_oid(commit.id)
+        self.assertIsNotNone(entry)
+        assert entry is not None
+        self.assertEqual(entry.commit_id, commit.id)
+
+        # Verify lookup with binary RawObjectID also works
+        entry_binary = graph.get_entry_by_oid(binary_commit_id)
+        self.assertIsNotNone(entry_binary)
+        assert entry_binary is not None
+        self.assertEqual(entry_binary.commit_id, commit.id)
+
     def test_get_reachable_commits(self) -> None:
         """Test getting reachable commits."""
         from dulwich.object_store import MemoryObjectStore

+ 3 - 3
tests/test_filters.py

@@ -22,6 +22,8 @@
 """Tests for filters."""
 
 import os
+import shutil
+import sys
 import tempfile
 import threading
 from collections.abc import Iterator
@@ -711,11 +713,9 @@ class FilterContextTests(TestCase):
 
     def test_filter_context_with_real_process_filter(self):
         """Test FilterContext with real ProcessFilterDriver instances."""
-        import sys
-
         # Use existing test filter from ProcessFilterDriverTests
         test_dir = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(test_dir))
+        self.addCleanup(shutil.rmtree, test_dir)
 
         # Create a simple test filter that just passes data through
         filter_script = _PASSTHROUGH_FILTER_SCRIPT

+ 2 - 2
tests/test_pack.py

@@ -1130,7 +1130,7 @@ class TestPackIndexWritingv3(TestCase, BaseTestFilePackIndexWriting):
         idx = load_pack_index(filename)
         self.assertIsInstance(idx, PackIndex3)
         self.assertEqual(idx.version, 3)
-        self.assertEqual(idx.object_format, 1)  # SHA-1
+        self.assertEqual(idx.hash_format, 1)  # SHA-1
         self.assertEqual(idx.hash_size, 20)
         self.assertEqual(idx.shortened_oid_len, 20)
 
@@ -1142,7 +1142,7 @@ class TestPackIndexWritingv3(TestCase, BaseTestFilePackIndexWriting):
         with GitFile(filename, "wb") as f:
             write_pack_index_v3(f, entries, b"1" * 20, hash_format=1)
         idx = load_pack_index(filename)
-        self.assertEqual(idx.object_format, 1)
+        self.assertEqual(idx.hash_format, 1)
         self.assertEqual(idx.hash_size, 20)
 
     def test_v3_sha256_length(self) -> None: