Просмотр исходного кода

Rename pack related object-format handling to back hash-format

Jelmer Vernooij 1 месяц назад
Родитель
Сommit
69b3060b2e

+ 13 - 20
dulwich/commit_graph.py

@@ -180,7 +180,7 @@ class CommitGraph:
         )
         )
         self.chunks: dict[bytes, CommitGraphChunk] = {}
         self.chunks: dict[bytes, CommitGraphChunk] = {}
         self.entries: list[CommitGraphEntry] = []
         self.entries: list[CommitGraphEntry] = []
-        self._oid_to_index: dict[ObjectID, int] = {}
+        self._oid_to_index: dict[RawObjectID, int] = {}
 
 
     @classmethod
     @classmethod
     def from_file(cls, f: BinaryIO) -> "CommitGraph":
     def from_file(cls, f: BinaryIO) -> "CommitGraph":
@@ -256,7 +256,7 @@ class CommitGraph:
         for i in range(num_commits):
         for i in range(num_commits):
             start = i * self.object_format.oid_length
             start = i * self.object_format.oid_length
             end = start + self.object_format.oid_length
             end = start + self.object_format.oid_length
-            oid = oid_lookup_data[start:end]
+            oid = RawObjectID(oid_lookup_data[start:end])
             oids.append(oid)
             oids.append(oid)
             self._oid_to_index[oid] = i
             self._oid_to_index[oid] = i
 
 
@@ -307,14 +307,16 @@ class CommitGraph:
 
 
             entry = CommitGraphEntry(
             entry = CommitGraphEntry(
                 commit_id=sha_to_hex(oids[i]),
                 commit_id=sha_to_hex(oids[i]),
-                tree_id=sha_to_hex(tree_id),
+                tree_id=sha_to_hex(RawObjectID(tree_id)),
                 parents=[sha_to_hex(p) for p in parents],
                 parents=[sha_to_hex(p) for p in parents],
                 generation=generation,
                 generation=generation,
                 commit_time=commit_time,
                 commit_time=commit_time,
             )
             )
             self.entries.append(entry)
             self.entries.append(entry)
 
 
-    def _parse_extra_edges(self, offset: int, oids: Sequence[bytes]) -> list[bytes]:
+    def _parse_extra_edges(
+        self, offset: int, oids: Sequence[RawObjectID]
+    ) -> list[RawObjectID]:
         """Parse extra parent edges for commits with 3+ parents."""
         """Parse extra parent edges for commits with 3+ parents."""
         if CHUNK_EXTRA_EDGE_LIST not in self.chunks:
         if CHUNK_EXTRA_EDGE_LIST not in self.chunks:
             return []
             return []
@@ -342,10 +344,10 @@ class CommitGraph:
         # Convert hex ObjectID to binary if needed for lookup
         # Convert hex ObjectID to binary if needed for lookup
         if isinstance(oid, bytes) and len(oid) == self.object_format.hex_length:
         if isinstance(oid, bytes) and len(oid) == self.object_format.hex_length:
             # Input is hex ObjectID, convert to binary for internal lookup
             # Input is hex ObjectID, convert to binary for internal lookup
-            lookup_oid = hex_to_sha(oid)
+            lookup_oid: RawObjectID = hex_to_sha(oid)
         else:
         else:
             # Input is already binary
             # Input is already binary
-            lookup_oid = oid
+            lookup_oid = RawObjectID(oid)
         index = self._oid_to_index.get(lookup_oid)
         index = self._oid_to_index.get(lookup_oid)
         if index is not None:
         if index is not None:
             return self.entries[index]
             return self.entries[index]
@@ -574,19 +576,9 @@ def generate_commit_graph(
         # commit_id is already hex ObjectID from normalized_commit_ids
         # commit_id is already hex ObjectID from normalized_commit_ids
         commit_hex: ObjectID = commit_id
         commit_hex: ObjectID = commit_id
 
 
-        # Handle tree ID - might already be hex ObjectID
-        if isinstance(commit_obj.tree, bytes) and len(commit_obj.tree) == hex_length:
-            tree_hex = commit_obj.tree  # Already hex ObjectID
-        else:
-            tree_hex = sha_to_hex(commit_obj.tree)  # Binary, convert to hex
-
-        # Handle parent IDs - might already be hex ObjectIDs
-        parents_hex: list[ObjectID] = []
-        for parent_id in commit_obj.parents:
-            if isinstance(parent_id, bytes) and len(parent_id) == hex_length:
-                parents_hex.append(parent_id)  # Already hex ObjectID
-            else:
-                parents_hex.append(sha_to_hex(parent_id))  # Binary, convert to hex
+        # commit_obj.tree and commit_obj.parents are already ObjectIDs
+        tree_hex = commit_obj.tree
+        parents_hex: list[ObjectID] = commit_obj.parents
 
 
         entry = CommitGraphEntry(
         entry = CommitGraphEntry(
             commit_id=commit_hex,
             commit_id=commit_hex,
@@ -600,7 +592,8 @@ def generate_commit_graph(
     # Build the OID to index mapping for lookups
     # Build the OID to index mapping for lookups
     graph._oid_to_index = {}
     graph._oid_to_index = {}
     for i, entry in enumerate(graph.entries):
     for i, entry in enumerate(graph.entries):
-        graph._oid_to_index[entry.commit_id] = i
+        # Convert hex ObjectID to binary RawObjectID for consistent lookup
+        graph._oid_to_index[hex_to_sha(entry.commit_id)] = i
 
 
     return graph
     return graph
 
 

+ 1 - 1
dulwich/dumb.py

@@ -447,7 +447,7 @@ class DumbRemoteHTTPRepo:
         """Get the peeled value of a ref."""
         """Get the peeled value of a ref."""
         # For dumb HTTP, we don't have peeled refs readily available
         # For dumb HTTP, we don't have peeled refs readily available
         # We would need to fetch and parse tag objects
         # We would need to fetch and parse tag objects
-        sha = self.get_refs().get(ref, None)
+        sha: ObjectID | None = self.get_refs().get(ref, None)
         return sha if sha is not None else ZERO_SHA
         return sha if sha is not None else ZERO_SHA
 
 
     def fetch_pack_data(
     def fetch_pack_data(

+ 1 - 0
dulwich/fastexport.py

@@ -369,6 +369,7 @@ class GitImportProcessor(processor.ImportProcessor):  # type: ignore[misc,unused
 
 
     def reset_handler(self, cmd: commands.ResetCommand) -> None:
     def reset_handler(self, cmd: commands.ResetCommand) -> None:
         """Process a ResetCommand."""
         """Process a ResetCommand."""
+        from_: ObjectID
         if cmd.from_ is None:
         if cmd.from_ is None:
             from_ = ZERO_SHA
             from_ = ZERO_SHA
         else:
         else:

+ 6 - 2
dulwich/object_format.py

@@ -32,6 +32,8 @@ from typing import TYPE_CHECKING
 if TYPE_CHECKING:
 if TYPE_CHECKING:
     from _hashlib import HASH
     from _hashlib import HASH
 
 
+    from .objects import ObjectID, RawObjectID
+
 
 
 class ObjectFormat:
 class ObjectFormat:
     """Object format (hash algorithm) used in Git."""
     """Object format (hash algorithm) used in Git."""
@@ -55,8 +57,10 @@ class ObjectFormat:
         self.oid_length = oid_length
         self.oid_length = oid_length
         self.hex_length = hex_length
         self.hex_length = hex_length
         self.hash_func = hash_func
         self.hash_func = hash_func
-        self.zero_oid = b"0" * hex_length
-        self.zero_oid_bin = b"\x00" * oid_length
+        # Type annotations for proper ObjectID/RawObjectID types
+        # These are bytes at runtime but typed as NewType wrappers for type safety
+        self.zero_oid: ObjectID = b"0" * hex_length  # type: ignore[assignment]
+        self.zero_oid_bin: RawObjectID = b"\x00" * oid_length  # type: ignore[assignment]
 
 
     def __str__(self) -> str:
     def __str__(self) -> str:
         """Return string representation."""
         """Return string representation."""

+ 12 - 9
dulwich/object_store.py

@@ -1189,11 +1189,12 @@ class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
         """
         """
         if name == ZERO_SHA:
         if name == ZERO_SHA:
             raise KeyError(name)
             raise KeyError(name)
+        sha: RawObjectID
         if len(name) == self.object_format.hex_length:
         if len(name) == self.object_format.hex_length:
-            sha = hex_to_sha(name)
+            sha = hex_to_sha(ObjectID(name))
             hexsha = name
             hexsha = name
         elif len(name) == self.object_format.oid_length:
         elif len(name) == self.object_format.oid_length:
-            sha = name
+            sha = RawObjectID(name)
             hexsha = None
             hexsha = None
         else:
         else:
             raise AssertionError(f"Invalid object name {name!r}")
             raise AssertionError(f"Invalid object name {name!r}")
@@ -1424,7 +1425,6 @@ class DiskObjectStore(PackBasedObjectStore):
           file_mode: File permission mask for shared repository
           file_mode: File permission mask for shared repository
           dir_mode: Directory permission mask for shared repository
           dir_mode: Directory permission mask for shared repository
           object_format: Hash algorithm to use (SHA1 or SHA256)
           object_format: Hash algorithm to use (SHA1 or SHA256)
-          object_format: Hash algorithm to use (SHA1 or SHA256)
         """
         """
         # Import here to avoid circular dependency
         # Import here to avoid circular dependency
         from .object_format import DEFAULT_OBJECT_FORMAT
         from .object_format import DEFAULT_OBJECT_FORMAT
@@ -1745,7 +1745,12 @@ class DiskObjectStore(PackBasedObjectStore):
         path = self._get_shafile_path(sha)
         path = self._get_shafile_path(sha)
         try:
         try:
             # Load the object from path with SHA and hash algorithm from object store
             # Load the object from path with SHA and hash algorithm from object store
-            return ShaFile.from_path(path, sha, object_format=self.object_format)
+            # Convert to hex ObjectID if needed
+            if len(sha) == self.object_format.oid_length:
+                hex_sha: ObjectID = sha_to_hex(RawObjectID(sha))
+            else:
+                hex_sha = ObjectID(sha)
+            return ShaFile.from_path(path, hex_sha, object_format=self.object_format)
         except FileNotFoundError:
         except FileNotFoundError:
             return None
             return None
 
 
@@ -2014,7 +2019,7 @@ class DiskObjectStore(PackBasedObjectStore):
           obj: Object to add
           obj: Object to add
         """
         """
         # Use the correct hash algorithm for the object ID
         # Use the correct hash algorithm for the object ID
-        obj_id = obj.get_id(self.object_format)
+        obj_id = ObjectID(obj.get_id(self.object_format))
         path = self._get_shafile_path(obj_id)
         path = self._get_shafile_path(obj_id)
         dir = os.path.dirname(path)
         dir = os.path.dirname(path)
         try:
         try:
@@ -2334,7 +2339,7 @@ class DiskObjectStore(PackBasedObjectStore):
                     # Binary SHA, convert to hex ObjectID
                     # Binary SHA, convert to hex ObjectID
                     from .objects import sha_to_hex
                     from .objects import sha_to_hex
 
 
-                    commit_ids.append(sha_to_hex(ref))
+                    commit_ids.append(sha_to_hex(RawObjectID(ref)))
                 else:
                 else:
                     # Assume it's already correct format
                     # Assume it's already correct format
                     commit_ids.append(ref)
                     commit_ids.append(ref)
@@ -2446,10 +2451,8 @@ class MemoryObjectStore(PackCapableObjectStore):
         Args:
         Args:
             object_format: Hash algorithm to use (defaults to SHA1)
             object_format: Hash algorithm to use (defaults to SHA1)
         """
         """
-        super().__init__()
-        self._data: dict[ObjectID, ShaFile] = {}
         super().__init__(object_format=object_format)
         super().__init__(object_format=object_format)
-        self._data: dict[bytes, ShaFile] = {}
+        self._data: dict[ObjectID, ShaFile] = {}
         self.pack_compression_level = -1
         self.pack_compression_level = -1
 
 
     def _to_hexsha(self, sha: ObjectID | RawObjectID) -> ObjectID:
     def _to_hexsha(self, sha: ObjectID | RawObjectID) -> ObjectID:

+ 3 - 4
dulwich/objects.py

@@ -113,8 +113,7 @@ if TYPE_CHECKING:
 
 
     from .file import _GitFile
     from .file import _GitFile
 
 
-# Zero SHA constants for backward compatibility
-ZERO_SHA = b"0" * 40  # SHA1 - kept for backward compatibility
+# Zero SHA constants for backward compatibility - now defined below as ObjectID
 
 
 
 
 # Header fields for commits
 # Header fields for commits
@@ -1610,7 +1609,7 @@ class Tree(ShaFile):
         # TODO: list comprehension is for efficiency in the common (small)
         # TODO: list comprehension is for efficiency in the common (small)
         # case; if memory efficiency in the large case is a concern, use a
         # case; if memory efficiency in the large case is a concern, use a
         # genexp.
         # genexp.
-        self._entries = {n: (m, s) for n, m, s in parsed_entries}
+        self._entries = {n: (m, ObjectID(s)) for n, m, s in parsed_entries}
 
 
     def check(self) -> None:
     def check(self) -> None:
         """Check this object for internal consistency.
         """Check this object for internal consistency.
@@ -1644,7 +1643,7 @@ class Tree(ShaFile):
             if mode not in allowed_modes:
             if mode not in allowed_modes:
                 raise ObjectFormatException(f"invalid mode {mode:06o}")
                 raise ObjectFormatException(f"invalid mode {mode:06o}")
 
 
-            entry = (name, (mode, sha))
+            entry = (name, (mode, ObjectID(sha)))
             if last:
             if last:
                 if key_entry(last) > key_entry(entry):
                 if key_entry(last) > key_entry(entry):
                     raise ObjectFormatException("entries not sorted")
                     raise ObjectFormatException("entries not sorted")

+ 19 - 13
dulwich/pack.py

@@ -646,7 +646,7 @@ class PackIndex:
     """
     """
 
 
     # Default to SHA-1 for backward compatibility
     # Default to SHA-1 for backward compatibility
-    object_format = 1
+    hash_format = 1
     hash_size = 20
     hash_size = 20
 
 
     def __eq__(self, other: object) -> bool:
     def __eq__(self, other: object) -> bool:
@@ -806,9 +806,12 @@ class MemoryPackIndex(PackIndex):
           sha: SHA to look up (binary or hex)
           sha: SHA to look up (binary or hex)
         Returns: Offset in the pack file
         Returns: Offset in the pack file
         """
         """
+        lookup_sha: RawObjectID
         if len(sha) == self.hash_size * 2:  # hex string
         if len(sha) == self.hash_size * 2:  # hex string
-            sha = hex_to_sha(sha)
-        return self._by_sha[sha]
+            lookup_sha = hex_to_sha(ObjectID(sha))
+        else:
+            lookup_sha = RawObjectID(sha)
+        return self._by_sha[lookup_sha]
 
 
     def object_sha1(self, offset: int) -> bytes:
     def object_sha1(self, offset: int) -> bytes:
         """Return the SHA1 for the object at the given offset."""
         """Return the SHA1 for the object at the given offset."""
@@ -991,10 +994,13 @@ class FilePackIndex(PackIndex):
         lives at within the corresponding pack file. If the pack file doesn't
         lives at within the corresponding pack file. If the pack file doesn't
         have the object then None will be returned.
         have the object then None will be returned.
         """
         """
+        lookup_sha: RawObjectID
         if len(sha) == self.hash_size * 2:  # hex string
         if len(sha) == self.hash_size * 2:  # hex string
-            sha = hex_to_sha(sha)
+            lookup_sha = hex_to_sha(ObjectID(sha))
+        else:
+            lookup_sha = RawObjectID(sha)
         try:
         try:
-            return self._object_offset(sha)
+            return self._object_offset(lookup_sha)
         except ValueError as exc:
         except ValueError as exc:
             closed = getattr(self._contents, "closed", None)
             closed = getattr(self._contents, "closed", None)
             if closed in (None, True):
             if closed in (None, True):
@@ -1078,7 +1084,7 @@ class PackIndex1(FilePackIndex):
         base_offset = (0x100 * 4) + (i * self._entry_size)
         base_offset = (0x100 * 4) + (i * self._entry_size)
         offset = unpack_from(">L", self._contents, base_offset)[0]
         offset = unpack_from(">L", self._contents, base_offset)[0]
         name = self._contents[base_offset + 4 : base_offset + 4 + self.hash_size]
         name = self._contents[base_offset + 4 : base_offset + 4 + self.hash_size]
-        return (name, offset, None)
+        return (RawObjectID(name), offset, None)
 
 
     def _unpack_name(self, i: int) -> bytes:
     def _unpack_name(self, i: int) -> bytes:
         offset = (0x100 * 4) + (i * self._entry_size) + 4
         offset = (0x100 * 4) + (i * self._entry_size) + 4
@@ -1200,13 +1206,13 @@ class PackIndex3(FilePackIndex):
             raise AssertionError(f"Version was {self.version}")
             raise AssertionError(f"Version was {self.version}")
 
 
         # Read hash algorithm identifier (1 = SHA-1, 2 = SHA-256)
         # Read hash algorithm identifier (1 = SHA-1, 2 = SHA-256)
-        (self.object_format,) = unpack_from(b">L", self._contents, 8)
-        if self.object_format == 1:
+        (self.hash_format,) = unpack_from(b">L", self._contents, 8)
+        if self.hash_format == 1:
             self.hash_size = 20  # SHA-1
             self.hash_size = 20  # SHA-1
-        elif self.object_format == 2:
+        elif self.hash_format == 2:
             self.hash_size = 32  # SHA-256
             self.hash_size = 32  # SHA-256
         else:
         else:
-            raise AssertionError(f"Unknown hash algorithm {self.object_format}")
+            raise AssertionError(f"Unknown hash algorithm {self.hash_format}")
 
 
         # Read length of shortened object names
         # Read length of shortened object names
         (self.shortened_oid_len,) = unpack_from(b">L", self._contents, 12)
         (self.shortened_oid_len,) = unpack_from(b">L", self._contents, 12)
@@ -3076,7 +3082,7 @@ def write_pack_from_container(
     deltify: bool | None = None,
     deltify: bool | None = None,
     reuse_deltas: bool = True,
     reuse_deltas: bool = True,
     compression_level: int = -1,
     compression_level: int = -1,
-    other_haves: set[bytes] | None = None,
+    other_haves: set[ObjectID] | None = None,
     object_format: Optional["ObjectFormat"] = None,
     object_format: Optional["ObjectFormat"] = None,
 ) -> tuple[dict[bytes, tuple[int, int]], bytes]:
 ) -> tuple[dict[bytes, tuple[int, int]], bytes]:
     """Write a new pack data file.
     """Write a new pack data file.
@@ -4093,7 +4099,7 @@ class Pack:
                     isinstance(basename, bytes)
                     isinstance(basename, bytes)
                     and len(basename) == self.object_format.oid_length
                     and len(basename) == self.object_format.oid_length
                 )
                 )
-                base_offset_temp, base_type, base_obj = get_ref(basename)
+                base_offset_temp, base_type, base_obj = get_ref(RawObjectID(basename))
                 assert isinstance(base_type, int)
                 assert isinstance(base_type, int)
                 # base_offset_temp can be None for thin packs (external references)
                 # base_offset_temp can be None for thin packs (external references)
                 base_offset = base_offset_temp
                 base_offset = base_offset_temp
@@ -4186,7 +4192,7 @@ def extend_pack(
     compression_level: int = -1,
     compression_level: int = -1,
     progress: Callable[[bytes], None] | None = None,
     progress: Callable[[bytes], None] | None = None,
     object_format: Optional["ObjectFormat"] = None,
     object_format: Optional["ObjectFormat"] = None,
-) -> tuple[bytes, list[tuple[bytes, int, int]]]:
+) -> tuple[bytes, list[tuple[RawObjectID, int, int]]]:
     """Extend a pack file with more objects.
     """Extend a pack file with more objects.
 
 
     The caller should make sure that object_ids does not contain any objects
     The caller should make sure that object_ids does not contain any objects

+ 4 - 4
dulwich/repo.py

@@ -1176,10 +1176,10 @@ class BaseRepo:
         elif len(name) == 40 and valid_hexsha(name):
         elif len(name) == 40 and valid_hexsha(name):
             return ObjectID(name) in self.object_store or Ref(name) in self.refs
             return ObjectID(name) in self.object_store or Ref(name) in self.refs
         # Check if it's a binary or hex SHA
         # Check if it's a binary or hex SHA
-        if len(name) == self.object_format.oid_length or (
-            len(name) == self.object_format.hex_length and valid_hexsha(name)
-        ):
-            return name in self.object_store or name in self.refs
+        if len(name) == self.object_format.oid_length:
+            return RawObjectID(name) in self.object_store or Ref(name) in self.refs
+        elif len(name) == self.object_format.hex_length and valid_hexsha(name):
+            return ObjectID(name) in self.object_store or Ref(name) in self.refs
         else:
         else:
             return Ref(name) in self.refs
             return Ref(name) in self.refs
 
 

+ 3 - 1
dulwich/tests/test_object_store.py

@@ -81,10 +81,12 @@ class ObjectStoreTests:
 
 
     def test_determine_wants_all_zero(self) -> None:
     def test_determine_wants_all_zero(self) -> None:
         """Test determine_wants_all with zero ref."""
         """Test determine_wants_all with zero ref."""
+        from dulwich.refs import Ref
+
         self.assertEqual(
         self.assertEqual(
             [],
             [],
             self.store.determine_wants_all(
             self.store.determine_wants_all(
-                {b"refs/heads/foo": self.store.object_format.zero_oid}
+                {Ref(b"refs/heads/foo"): self.store.object_format.zero_oid}
             ),
             ),
         )
         )
 
 

+ 1 - 1
tests/compat/test_pack.py

@@ -253,7 +253,7 @@ class TestPackIndexCompat(PackTests):
         idx = load_pack_index(v3_path)
         idx = load_pack_index(v3_path)
         self.assertIsInstance(idx, PackIndex3)
         self.assertIsInstance(idx, PackIndex3)
         self.assertEqual(idx.version, 3)
         self.assertEqual(idx.version, 3)
-        self.assertEqual(idx.object_format, 1)  # SHA-1
+        self.assertEqual(idx.hash_format, 1)  # SHA-1
         self.assertEqual(idx.hash_size, 20)
         self.assertEqual(idx.hash_size, 20)
 
 
         # Verify SHA-256 would raise NotImplementedError
         # Verify SHA-256 would raise NotImplementedError

+ 10 - 10
tests/compat/test_sha256.py

@@ -28,7 +28,7 @@ from dulwich.object_format import SHA256
 from dulwich.objects import Blob, Commit, Tree
 from dulwich.objects import Blob, Commit, Tree
 from dulwich.repo import Repo
 from dulwich.repo import Repo
 
 
-from .utils import CompatTestCase, run_git_or_fail
+from .utils import CompatTestCase, rmtree_ro, run_git_or_fail
 
 
 
 
 class GitSHA256CompatibilityTests(CompatTestCase):
 class GitSHA256CompatibilityTests(CompatTestCase):
@@ -44,7 +44,7 @@ class GitSHA256CompatibilityTests(CompatTestCase):
         """Test that dulwich-created SHA256 repos are readable by git."""
         """Test that dulwich-created SHA256 repos are readable by git."""
         # Create SHA256 repo with dulwich
         # Create SHA256 repo with dulwich
         repo_path = tempfile.mkdtemp()
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
         repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
 
 
         # Add a blob and tree using dulwich
         # Add a blob and tree using dulwich
@@ -75,7 +75,7 @@ class GitSHA256CompatibilityTests(CompatTestCase):
         """Test that git-created SHA256 repos are readable by dulwich."""
         """Test that git-created SHA256 repos are readable by dulwich."""
         # Create SHA256 repo with git
         # Create SHA256 repo with git
         repo_path = tempfile.mkdtemp()
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         self._run_git(["init", "--object-format=sha256", repo_path])
         self._run_git(["init", "--object-format=sha256", repo_path])
 
 
         # Create a file and commit with git
         # Create a file and commit with git
@@ -111,7 +111,7 @@ class GitSHA256CompatibilityTests(CompatTestCase):
         """Test that object hashing is consistent between dulwich and git."""
         """Test that object hashing is consistent between dulwich and git."""
         # Create SHA256 repo with git
         # Create SHA256 repo with git
         repo_path = tempfile.mkdtemp()
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         self._run_git(["init", "--object-format=sha256", repo_path])
         self._run_git(["init", "--object-format=sha256", repo_path])
 
 
         # Create a test file with known content
         # Create a test file with known content
@@ -135,7 +135,7 @@ class GitSHA256CompatibilityTests(CompatTestCase):
         """Test that tree hashing is consistent between dulwich and git."""
         """Test that tree hashing is consistent between dulwich and git."""
         # Create SHA256 repo with git
         # Create SHA256 repo with git
         repo_path = tempfile.mkdtemp()
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         self._run_git(["init", "--object-format=sha256", repo_path])
         self._run_git(["init", "--object-format=sha256", repo_path])
 
 
         # Create a test file and add to index
         # Create a test file and add to index
@@ -164,7 +164,7 @@ class GitSHA256CompatibilityTests(CompatTestCase):
         """Test commit creation interoperability between dulwich and git."""
         """Test commit creation interoperability between dulwich and git."""
         # Create SHA256 repo with dulwich
         # Create SHA256 repo with dulwich
         repo_path = tempfile.mkdtemp()
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
         repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
 
 
         # Create objects with dulwich
         # Create objects with dulwich
@@ -207,7 +207,7 @@ class GitSHA256CompatibilityTests(CompatTestCase):
         """Test that ref updates work between dulwich and git."""
         """Test that ref updates work between dulwich and git."""
         # Create repo with git
         # Create repo with git
         repo_path = tempfile.mkdtemp()
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         self._run_git(["init", "--object-format=sha256", repo_path])
         self._run_git(["init", "--object-format=sha256", repo_path])
 
 
         # Create initial commit with git
         # Create initial commit with git
@@ -260,7 +260,7 @@ class GitSHA256CompatibilityTests(CompatTestCase):
         """Test cloning a git SHA256 repository with dulwich."""
         """Test cloning a git SHA256 repository with dulwich."""
         # Create source repo with git
         # Create source repo with git
         source_path = tempfile.mkdtemp()
         source_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(source_path))
+        self.addCleanup(rmtree_ro, source_path)
         self._run_git(["init", "--object-format=sha256", source_path])
         self._run_git(["init", "--object-format=sha256", source_path])
 
 
         # Add content
         # Add content
@@ -273,7 +273,7 @@ class GitSHA256CompatibilityTests(CompatTestCase):
 
 
         # Clone with dulwich
         # Clone with dulwich
         target_path = tempfile.mkdtemp()
         target_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(target_path))
+        self.addCleanup(rmtree_ro, target_path)
 
 
         target_repo = Repo.init(target_path, mkdir=False, object_format="sha256")
         target_repo = Repo.init(target_path, mkdir=False, object_format="sha256")
 
 
@@ -311,7 +311,7 @@ class GitSHA256CompatibilityTests(CompatTestCase):
         """Test that git fsck works on dulwich-created SHA256 repos."""
         """Test that git fsck works on dulwich-created SHA256 repos."""
         # Create repo with dulwich
         # Create repo with dulwich
         repo_path = tempfile.mkdtemp()
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
         repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
 
 
         # Create a more complex object graph
         # Create a more complex object graph

+ 6 - 6
tests/compat/test_sha256_packs.py

@@ -29,7 +29,7 @@ from dulwich.objects import Blob, Commit, Tree
 from dulwich.pack import load_pack_index_file
 from dulwich.pack import load_pack_index_file
 from dulwich.repo import Repo
 from dulwich.repo import Repo
 
 
-from .utils import CompatTestCase, run_git_or_fail
+from .utils import CompatTestCase, rmtree_ro, run_git_or_fail
 
 
 
 
 class GitSHA256PackCompatibilityTests(CompatTestCase):
 class GitSHA256PackCompatibilityTests(CompatTestCase):
@@ -45,7 +45,7 @@ class GitSHA256PackCompatibilityTests(CompatTestCase):
         """Test that git-created SHA256 pack files are readable by dulwich."""
         """Test that git-created SHA256 pack files are readable by dulwich."""
         # Create SHA256 repo with git
         # Create SHA256 repo with git
         repo_path = tempfile.mkdtemp()
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         self._run_git(["init", "--object-format=sha256", repo_path])
         self._run_git(["init", "--object-format=sha256", repo_path])
 
 
         # Create multiple files to ensure pack creation
         # Create multiple files to ensure pack creation
@@ -116,7 +116,7 @@ class GitSHA256PackCompatibilityTests(CompatTestCase):
         """Test that dulwich-created SHA256 objects are readable by git."""
         """Test that dulwich-created SHA256 objects are readable by git."""
         # Create SHA256 repo with dulwich
         # Create SHA256 repo with dulwich
         repo_path = tempfile.mkdtemp()
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
         repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
 
 
         # Create objects
         # Create objects
@@ -170,7 +170,7 @@ class GitSHA256PackCompatibilityTests(CompatTestCase):
         """Test pack index v1 interoperability with SHA256."""
         """Test pack index v1 interoperability with SHA256."""
         # Create repo with git using pack index v1
         # Create repo with git using pack index v1
         repo_path = tempfile.mkdtemp()
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         self._run_git(["init", "--object-format=sha256", repo_path])
         self._run_git(["init", "--object-format=sha256", repo_path])
         self._run_git(["config", "pack.indexVersion", "1"], cwd=repo_path)
         self._run_git(["config", "pack.indexVersion", "1"], cwd=repo_path)
 
 
@@ -213,7 +213,7 @@ class GitSHA256PackCompatibilityTests(CompatTestCase):
         """Test large pack file interoperability."""
         """Test large pack file interoperability."""
         # Create repo with dulwich
         # Create repo with dulwich
         repo_path = tempfile.mkdtemp()
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
         repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
 
 
         # Create a large file that will use delta compression
         # Create a large file that will use delta compression
@@ -273,7 +273,7 @@ class GitSHA256PackCompatibilityTests(CompatTestCase):
         """Test repositories with both loose and packed objects."""
         """Test repositories with both loose and packed objects."""
         # Create repo with git
         # Create repo with git
         repo_path = tempfile.mkdtemp()
         repo_path = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self.addCleanup(rmtree_ro, repo_path)
         self._run_git(["init", "--object-format=sha256", repo_path])
         self._run_git(["init", "--object-format=sha256", repo_path])
 
 
         # Create initial objects that will be packed
         # Create initial objects that will be packed

+ 41 - 0
tests/test_commit_graph.py

@@ -435,6 +435,47 @@ class CommitGraphGenerationTests(unittest.TestCase):
         self.assertEqual(entry.generation, 1)
         self.assertEqual(entry.generation, 1)
         self.assertEqual(entry.commit_time, 1234567890)
         self.assertEqual(entry.commit_time, 1234567890)
 
 
+    def test_generate_commit_graph_oid_index_uses_binary_keys(self) -> None:
+        """Test that generated commit graph _oid_to_index uses binary RawObjectID keys."""
+        from dulwich.object_store import MemoryObjectStore
+        from dulwich.objects import Commit, Tree, hex_to_sha
+
+        object_store = MemoryObjectStore()
+
+        # Create a tree and commit
+        tree = Tree()
+        object_store.add_object(tree)
+
+        commit = Commit()
+        commit.tree = tree.id
+        commit.author = b"Test Author <test@example.com>"
+        commit.committer = b"Test Author <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        commit.message = b"Test commit"
+        object_store.add_object(commit)
+
+        # Generate graph
+        graph = generate_commit_graph(object_store, [commit.id])
+
+        # Verify _oid_to_index uses binary keys (RawObjectID)
+        # commit.id is hex (ObjectID), so we need to convert to binary for lookup
+        binary_commit_id = hex_to_sha(commit.id)
+        self.assertIn(binary_commit_id, graph._oid_to_index)
+        self.assertEqual(graph._oid_to_index[binary_commit_id], 0)
+
+        # Verify lookup with hex ObjectID works via get_entry_by_oid
+        entry = graph.get_entry_by_oid(commit.id)
+        self.assertIsNotNone(entry)
+        assert entry is not None
+        self.assertEqual(entry.commit_id, commit.id)
+
+        # Verify lookup with binary RawObjectID also works
+        entry_binary = graph.get_entry_by_oid(binary_commit_id)
+        self.assertIsNotNone(entry_binary)
+        assert entry_binary is not None
+        self.assertEqual(entry_binary.commit_id, commit.id)
+
     def test_get_reachable_commits(self) -> None:
     def test_get_reachable_commits(self) -> None:
         """Test getting reachable commits."""
         """Test getting reachable commits."""
         from dulwich.object_store import MemoryObjectStore
         from dulwich.object_store import MemoryObjectStore

+ 3 - 3
tests/test_filters.py

@@ -22,6 +22,8 @@
 """Tests for filters."""
 """Tests for filters."""
 
 
 import os
 import os
+import shutil
+import sys
 import tempfile
 import tempfile
 import threading
 import threading
 from collections.abc import Iterator
 from collections.abc import Iterator
@@ -711,11 +713,9 @@ class FilterContextTests(TestCase):
 
 
     def test_filter_context_with_real_process_filter(self):
     def test_filter_context_with_real_process_filter(self):
         """Test FilterContext with real ProcessFilterDriver instances."""
         """Test FilterContext with real ProcessFilterDriver instances."""
-        import sys
-
         # Use existing test filter from ProcessFilterDriverTests
         # Use existing test filter from ProcessFilterDriverTests
         test_dir = tempfile.mkdtemp()
         test_dir = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(test_dir))
+        self.addCleanup(shutil.rmtree, test_dir)
 
 
         # Create a simple test filter that just passes data through
         # Create a simple test filter that just passes data through
         filter_script = _PASSTHROUGH_FILTER_SCRIPT
         filter_script = _PASSTHROUGH_FILTER_SCRIPT

+ 2 - 2
tests/test_pack.py

@@ -1130,7 +1130,7 @@ class TestPackIndexWritingv3(TestCase, BaseTestFilePackIndexWriting):
         idx = load_pack_index(filename)
         idx = load_pack_index(filename)
         self.assertIsInstance(idx, PackIndex3)
         self.assertIsInstance(idx, PackIndex3)
         self.assertEqual(idx.version, 3)
         self.assertEqual(idx.version, 3)
-        self.assertEqual(idx.object_format, 1)  # SHA-1
+        self.assertEqual(idx.hash_format, 1)  # SHA-1
         self.assertEqual(idx.hash_size, 20)
         self.assertEqual(idx.hash_size, 20)
         self.assertEqual(idx.shortened_oid_len, 20)
         self.assertEqual(idx.shortened_oid_len, 20)
 
 
@@ -1142,7 +1142,7 @@ class TestPackIndexWritingv3(TestCase, BaseTestFilePackIndexWriting):
         with GitFile(filename, "wb") as f:
         with GitFile(filename, "wb") as f:
             write_pack_index_v3(f, entries, b"1" * 20, hash_format=1)
             write_pack_index_v3(f, entries, b"1" * 20, hash_format=1)
         idx = load_pack_index(filename)
         idx = load_pack_index(filename)
-        self.assertEqual(idx.object_format, 1)
+        self.assertEqual(idx.hash_format, 1)
         self.assertEqual(idx.hash_size, 20)
         self.assertEqual(idx.hash_size, 20)
 
 
     def test_v3_sha256_length(self) -> None:
     def test_v3_sha256_length(self) -> None: