Procházet zdrojové kódy

Make object_format mandatory for pack index loading

Jelmer Vernooij před 1 měsícem
rodič
revize
143ca5814e

+ 1 - 0
dulwich/bundle.py

@@ -168,6 +168,7 @@ def _read_bundle(f: BinaryIO, version: int) -> Bundle:
     from .object_format import DEFAULT_OBJECT_FORMAT
 
     pack_file = BytesIO(pack_bytes)
+    # TODO: Support specifying object format based on bundle metadata
     pack_data = PackData.from_file(pack_file, object_format=DEFAULT_OBJECT_FORMAT)
     ret = Bundle()
     ret.references = references

+ 10 - 1
dulwich/cli.py

@@ -1598,10 +1598,19 @@ class cmd_dump_pack(Command):
         """
         parser = argparse.ArgumentParser()
         parser.add_argument("filename", help="Pack file to dump")
+        parser.add_argument(
+            "--object-format",
+            choices=["sha1", "sha256"],
+            default="sha1",
+            help="Object format (hash algorithm) used in the pack file",
+        )
         parsed_args = parser.parse_args(args)
 
+        from .object_format import OBJECT_FORMAT_NAMES
+
+        object_format = OBJECT_FORMAT_NAMES[parsed_args.object_format]
         basename, _ = os.path.splitext(parsed_args.filename)
-        x = Pack(basename)
+        x = Pack(basename, object_format=object_format)
         logger.info("Object names checksum: %s", x.name().decode("ascii", "replace"))
         logger.info("Checksum: %r", sha_to_hex(RawObjectID(x.get_stored_checksum())))
         x.check()

+ 7 - 2
dulwich/client.py

@@ -1631,7 +1631,9 @@ class TraditionalGitClient(GitClient):
 
             if self._should_send_pack(new_refs):
                 for chunk in PackChunkGenerator(
-                    pack_data_count, pack_data, object_format=DEFAULT_OBJECT_FORMAT
+                    num_records=pack_data_count,
+                    records=pack_data,
+                    object_format=DEFAULT_OBJECT_FORMAT,
                 ):
                     proto.write(chunk)
 
@@ -3993,7 +3995,10 @@ class AbstractHttpGitClient(GitClient):
             )
             if self._should_send_pack(new_refs):
                 yield from PackChunkGenerator(
-                    pack_data_count, pack_data, object_format=DEFAULT_OBJECT_FORMAT
+                    # TODO: Don't hardcode object format
+                    num_records=pack_data_count,
+                    records=pack_data,
+                    object_format=DEFAULT_OBJECT_FORMAT,
                 )
 
         resp, read = self._smart_request("git-receive-pack", url, data=body_generator())

+ 4 - 3
dulwich/cloud/gcs.py

@@ -87,18 +87,19 @@ class GcsObjectStore(BucketBasedObjectStore):
         with tempfile.SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE) as f:
             b.download_to_file(f)
             f.seek(0)
-            return PackData(name + ".pack", cast(_GitFile, f))
+            return PackData(name + ".pack", self.object_format, cast(_GitFile, f))
 
     def _load_pack_index(self, name: str) -> PackIndex:
         b = self.bucket.blob(posixpath.join(self.subpath, name + ".idx"))
         with tempfile.SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE) as f:
             b.download_to_file(f)
             f.seek(0)
-            return load_pack_index_file(name + ".idx", f)
+            return load_pack_index_file(name + ".idx", f, self.object_format)
 
     def _get_pack(self, name: str) -> Pack:
         return Pack.from_lazy_objects(
-            lambda: self._load_pack_data(name), lambda: self._load_pack_index(name)
+            lambda: self._load_pack_data(name),
+            lambda: self._load_pack_index(name),
         )
 
     def _upload_pack(

+ 24 - 11
dulwich/contrib/swift.py

@@ -197,12 +197,15 @@ def load_conf(path: str | None = None, file: str | None = None) -> ConfigParser:
     return conf
 
 
-def swift_load_pack_index(scon: "SwiftConnector", filename: str) -> "PackIndex":
+def swift_load_pack_index(
+    scon: "SwiftConnector", filename: str, object_format: ObjectFormat
+) -> "PackIndex":
     """Read a pack index file from Swift.
 
     Args:
       scon: a `SwiftConnector` instance
       filename: Path to the index file objectise
+      object_format: Object format for this pack
     Returns: a `PackIndexer` instance
     """
     f = scon.get_object(filename)
@@ -210,7 +213,7 @@ def swift_load_pack_index(scon: "SwiftConnector", filename: str) -> "PackIndex":
         raise Exception(f"Could not retrieve index file {filename}")
     if isinstance(f, bytes):
         f = BytesIO(f)
-    return load_pack_index_file(filename, f)
+    return load_pack_index_file(filename, f, object_format)
 
 
 def pack_info_create(pack_data: "PackData", pack_index: "PackIndex") -> bytes:
@@ -223,7 +226,9 @@ def pack_info_create(pack_data: "PackData", pack_index: "PackIndex") -> bytes:
     Returns:
       Compressed JSON bytes containing pack information
     """
-    pack = Pack.from_objects(pack_data, pack_index)
+    pack = Pack.from_objects(
+        pack_data, pack_index
+    )
     info: dict[bytes, Any] = {}
     for obj in pack.iterobjects():
         # Commit
@@ -754,7 +759,9 @@ class SwiftPack(Pack):
         self._pack_info_load: Callable[[], dict[str, Any] | None] = (
             lambda: load_pack_info(self._pack_info_path, self.scon)
         )
-        self._idx_load = lambda: swift_load_pack_index(self.scon, self._idx_path)
+        self._idx_load = lambda: swift_load_pack_index(
+            self.scon, self._idx_path, self.object_format
+        )
         self._data_load = lambda: SwiftPackData(self.scon, self._data_path)
 
     @property
@@ -795,7 +802,7 @@ class SwiftObjectStore(PackBasedObjectStore):
         ]
         ret = []
         for basename in pack_files:
-            pack = SwiftPack(basename, scon=self.scon)
+            pack = SwiftPack(basename, object_format=self.object_format, scon=self.scon)
             self._pack_cache[basename] = pack
             ret.append(pack)
         return ret
@@ -873,7 +880,9 @@ class SwiftObjectStore(PackBasedObjectStore):
 
             from ..file import _GitFile
 
-            pack = PackData(file=cast(_GitFile, f), filename="")
+            pack = PackData(
+                file=cast(_GitFile, f), filename="", object_format=self.object_format
+            )
             entries = pack.sorted_entries()
             if entries:
                 basename = posixpath.join(
@@ -886,7 +895,7 @@ class SwiftObjectStore(PackBasedObjectStore):
                 f.close()
                 self.scon.put_object(basename + ".idx", index)
                 index.close()
-                final_pack = SwiftPack(basename, scon=self.scon)
+                final_pack = SwiftPack(basename, object_format=self.object_format, scon=self.scon)
                 final_pack.check_length_and_checksum()
                 self._add_cached_pack(basename, final_pack)
                 return final_pack
@@ -962,7 +971,9 @@ class SwiftObjectStore(PackBasedObjectStore):
 
         # Rescan the rest of the pack, computing the SHA with the new header.
         new_sha = compute_file_sha(
-            f, self.object_format, end_ofs=-self.object_format.oid_length
+            f,
+            hash_func=self.object_format.hash_func,
+            end_ofs=-self.object_format.oid_length,
         )
 
         # Must reposition before writing (http://bugs.python.org/issue3207)
@@ -995,9 +1006,11 @@ class SwiftObjectStore(PackBasedObjectStore):
 
         # Write pack info.
         f.seek(0)
-        pack_data = PackData(filename="", file=cast(_GitFile, f))
+        pack_data = PackData(
+            filename="", file=cast(_GitFile, f), object_format=self.object_format
+        )
         index_file.seek(0)
-        pack_index = load_pack_index_file("", index_file)
+        pack_index = load_pack_index_file("", index_file, self.object_format)
         serialized_pack_info = pack_info_create(pack_data, pack_index)
         f.close()
         index_file.close()
@@ -1007,7 +1020,7 @@ class SwiftObjectStore(PackBasedObjectStore):
         pack_info_file.close()
 
         # Add the pack to the store and return it.
-        final_pack = SwiftPack(pack_base_name, scon=self.scon)
+        final_pack = SwiftPack(pack_base_name, object_format=self.object_format, scon=self.scon)
         final_pack.check_length_and_checksum()
         self._add_cached_pack(pack_base_name, final_pack)
         return final_pack

+ 4 - 2
dulwich/dumb.py

@@ -208,7 +208,7 @@ class DumbHTTPObjectStore(BaseObjectStore):
                     # Fetch and cache the index
                     idx_data = self._fetch_url(f"objects/pack/{pack_name}.idx")
 
-                    idx = load_pack_index_file("<http>", BytesIO(idx_data))
+                    idx = load_pack_index_file("<http>", BytesIO(idx_data), self.object_format)
                     if self._packs is not None:
                         self._packs[i] = (name, idx)
                 return idx
@@ -259,7 +259,9 @@ class DumbHTTPObjectStore(BaseObjectStore):
 
             # Open the pack and get the object
             pack_data = PackData(pack_path, object_format=self.object_format)
-            pack = Pack.from_objects(pack_data, pack_idx)
+            pack = Pack.from_objects(
+                pack_data, pack_idx
+            )
             try:
                 return pack.get_raw(binsha)
             finally:

+ 13 - 8
dulwich/object_format.py

@@ -32,8 +32,6 @@ from typing import TYPE_CHECKING
 if TYPE_CHECKING:
     from _hashlib import HASH
 
-    from .objects import ObjectID, RawObjectID
-
 
 class ObjectFormat:
     """Object format (hash algorithm) used in Git."""
@@ -41,6 +39,7 @@ class ObjectFormat:
     def __init__(
         self,
         name: str,
+        type_num: int,
         oid_length: int,
         hex_length: int,
         hash_func: Callable[[], "HASH"],
@@ -49,18 +48,16 @@ class ObjectFormat:
 
         Args:
             name: Name of the format (e.g., "sha1", "sha256")
+            type_num: Format type number used in Git
             oid_length: Length of the binary object ID in bytes
             hex_length: Length of the hexadecimal object ID in characters
             hash_func: Hash function from hashlib
         """
         self.name = name
+        self.type_num = type_num
         self.oid_length = oid_length
         self.hex_length = hex_length
         self.hash_func = hash_func
-        # Type annotations for proper ObjectID/RawObjectID types
-        # These are bytes at runtime but typed as NewType wrappers for type safety
-        self.zero_oid: ObjectID = b"0" * hex_length  # type: ignore[assignment]
-        self.zero_oid_bin: RawObjectID = b"\x00" * oid_length  # type: ignore[assignment]
 
     def __str__(self) -> str:
         """Return string representation."""
@@ -102,8 +99,10 @@ class ObjectFormat:
 
 
 # Define the supported object formats
-SHA1 = ObjectFormat("sha1", 20, 40, sha1)
-SHA256 = ObjectFormat("sha256", 32, 64, sha256)
+SHA1 = ObjectFormat("sha1", type_num=1, oid_length=20, hex_length=40, hash_func=sha1)
+SHA256 = ObjectFormat(
+    "sha256", type_num=20, oid_length=32, hex_length=64, hash_func=sha256
+)
 
 # Map of format names to ObjectFormat instances
 OBJECT_FORMATS = {
@@ -111,6 +110,12 @@ OBJECT_FORMATS = {
     "sha256": SHA256,
 }
 
+# Map of format numbers to ObjectFormat instances
+OBJECT_FORMAT_TYPE_NUMS = {
+    1: SHA1,
+    2: SHA256,
+}
+
 # Default format for backward compatibility
 DEFAULT_OBJECT_FORMAT = SHA1
 

+ 8 - 19
dulwich/object_store.py

@@ -78,7 +78,6 @@ from .file import GitFile, _GitFile
 from .midx import MultiPackIndex, load_midx
 from .objects import (
     S_ISGITLINK,
-    ZERO_SHA,
     Blob,
     Commit,
     ObjectID,
@@ -349,7 +348,6 @@ class BaseObjectStore:
             for (ref, sha) in refs.items()
             if (sha not in self or _want_deepen(sha))
             and not ref.endswith(PEELED_TAG_SUFFIX)
-            and not sha == ZERO_SHA
         ]
 
     def contains_loose(self, sha: ObjectID | RawObjectID) -> bool:
@@ -1187,8 +1185,6 @@ class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
           name: sha for the object.
         Returns: tuple with numeric type and object contents.
         """
-        if name == ZERO_SHA:
-            raise KeyError(name)
         sha: RawObjectID
         if len(name) == self.object_format.hex_length:
             sha = hex_to_sha(ObjectID(name))
@@ -1326,8 +1322,6 @@ class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
           sha1: sha for the object.
           include_comp: Whether to include compression metadata.
         """
-        if sha1 == self.object_format.zero_oid:
-            raise KeyError(sha1)
         if len(sha1) == self.object_format.hex_length:
             sha = hex_to_sha(cast(ObjectID, sha1))
             hexsha = cast(ObjectID, sha1)
@@ -1687,13 +1681,13 @@ class DiskObjectStore(PackBasedObjectStore):
             if f not in self._pack_cache:
                 pack = Pack(
                     os.path.join(self.pack_dir, f),
+                    object_format=self.object_format,
                     delta_window_size=self.pack_delta_window_size,
                     window_memory=self.pack_window_memory,
                     delta_cache_size=self.pack_delta_cache_size,
                     depth=self.pack_depth,
                     threads=self.pack_threads,
                     big_file_threshold=self.pack_big_file_threshold,
-                    object_format=self.object_format,
                 )
                 new_packs.append(pack)
                 self._pack_cache[f] = pack
@@ -1908,7 +1902,7 @@ class DiskObjectStore(PackBasedObjectStore):
                 pack_index = load_pack_index_file(
                     os.path.basename(target_index_path),
                     idx_file,
-                    object_format=self.object_format,
+                    self.object_format,
                 )
 
             # Generate the bitmap
@@ -1934,13 +1928,13 @@ class DiskObjectStore(PackBasedObjectStore):
         # Add the pack to the store and return it.
         final_pack = Pack(
             pack_base_name,
+            object_format=self.object_format,
             delta_window_size=self.pack_delta_window_size,
             window_memory=self.pack_window_memory,
             delta_cache_size=self.pack_delta_cache_size,
             depth=self.pack_depth,
             threads=self.pack_threads,
             big_file_threshold=self.pack_big_file_threshold,
-            object_format=self.object_format,
         )
         final_pack.check_length_and_checksum()
         self._add_cached_pack(pack_base_name, final_pack)
@@ -2007,7 +2001,7 @@ class DiskObjectStore(PackBasedObjectStore):
             if f.tell() > 0:
                 f.seek(0)
 
-                with PackData(path, f, object_format=self.object_format) as pd:
+                with PackData(path, file=f, object_format=self.object_format) as pd:
                     indexer = PackIndexer.for_pack_data(
                         pd,
                         resolve_ext_ref=self.get_raw,  # type: ignore[arg-type]
@@ -2198,13 +2192,13 @@ class DiskObjectStore(PackBasedObjectStore):
 
         pack = Pack(
             pack_path,
+            object_format=self.object_format,
             delta_window_size=self.pack_delta_window_size,
             window_memory=self.pack_window_memory,
             delta_cache_size=self.pack_delta_cache_size,
             depth=self.pack_depth,
             threads=self.pack_threads,
             big_file_threshold=self.pack_big_file_threshold,
-            object_format=self.object_format,
         )
         self._pack_cache[base_name] = pack
         return pack
@@ -2243,9 +2237,6 @@ class DiskObjectStore(PackBasedObjectStore):
         Raises:
             KeyError: If object not found
         """
-        if name == ZERO_SHA:
-            raise KeyError(name)
-
         sha: RawObjectID
         if len(name) in (40, 64):
             # name is ObjectID (hex), convert to RawObjectID
@@ -2557,7 +2548,7 @@ class MemoryObjectStore(PackCapableObjectStore):
             if size > 0:
                 f.seek(0)
 
-                p = PackData.from_file(f, size, object_format=self.object_format)
+                p = PackData.from_file(f, self.object_format, size)
                 for obj in PackInflater.for_pack_data(p, self.get_raw):  # type: ignore[arg-type]
                     self.add_object(obj)
                 p.close()
@@ -3362,7 +3353,7 @@ class BucketBasedObjectStore(PackBasedObjectStore):
 
             pf.seek(0)
 
-            p = PackData(pf.name, pf)
+            p = PackData(pf.name, file=pf, object_format=self.object_format)
             entries = p.sorted_entries()
             basename = iter_sha1(entry[0] for entry in entries).decode("ascii")
             idxf = tempfile.SpooledTemporaryFile(
@@ -3371,9 +3362,7 @@ class BucketBasedObjectStore(PackBasedObjectStore):
             checksum = p.get_stored_checksum()
             write_pack_index(idxf, entries, checksum, version=self.pack_index_version)
             idxf.seek(0)
-            idx = load_pack_index_file(
-                basename + ".idx", idxf, object_format=self.object_format
-            )
+            idx = load_pack_index_file(basename + ".idx", idxf, self.object_format)
             for pack in self.packs:
                 if pack.get_stored_checksum() == p.get_stored_checksum():
                     p.close()

+ 99 - 165
dulwich/pack.py

@@ -124,7 +124,6 @@ from typing import (
     Any,
     BinaryIO,
     Generic,
-    Optional,
     Protocol,
     TypeVar,
 )
@@ -141,7 +140,6 @@ if TYPE_CHECKING:
 
     from .bitmap import PackBitmap
     from .commit_graph import CommitGraph
-    from .object_format import ObjectFormat
     from .object_store import BaseObjectStore
     from .ref import Ref
 
@@ -153,6 +151,7 @@ from . import replace_me
 from .errors import ApplyDeltaError, ChecksumMismatch
 from .file import GitFile, _GitFile
 from .lru_cache import LRUSizeCache
+from .object_format import OBJECT_FORMAT_TYPE_NUMS, SHA1, ObjectFormat
 from .objects import (
     ObjectID,
     RawObjectID,
@@ -537,7 +536,7 @@ def iter_sha1(iter: Iterable[bytes]) -> bytes:
 
 
 def load_pack_index(
-    path: str | os.PathLike[str], object_format: Optional["ObjectFormat"] = None
+    path: str | os.PathLike[str], object_format: ObjectFormat
 ) -> "PackIndex":
     """Load an index file by path.
 
@@ -547,7 +546,7 @@ def load_pack_index(
     Returns: A PackIndex loaded from the given path
     """
     with GitFile(path, "rb") as f:
-        return load_pack_index_file(path, f, object_format=object_format)
+        return load_pack_index_file(path, f, object_format)
 
 
 def _load_file_contents(
@@ -584,7 +583,7 @@ def _load_file_contents(
 def load_pack_index_file(
     path: str | os.PathLike[str],
     f: IO[bytes] | _GitFile,
-    object_format: Optional["ObjectFormat"] = None,
+    object_format: ObjectFormat,
 ) -> "PackIndex":
     """Load an index file from a file-like object.
 
@@ -600,19 +599,19 @@ def load_pack_index_file(
         if version == 2:
             return PackIndex2(
                 path,
+                object_format,
                 file=f,
                 contents=contents,
                 size=size,
-                object_format=object_format,
             )
         elif version == 3:
-            return PackIndex3(path, file=f, contents=contents, size=size)
+            return PackIndex3(
+                path, object_format, file=f, contents=contents, size=size
+            )
         else:
             raise KeyError(f"Unknown pack index format {version}")
     else:
-        return PackIndex1(
-            path, file=f, contents=contents, size=size, object_format=object_format
-        )
+        return PackIndex1(path, object_format, file=f, contents=contents, size=size)
 
 
 def bisect_find_sha(
@@ -650,9 +649,7 @@ class PackIndex:
     packfile of that object if it has it.
     """
 
-    # Default to SHA-1 for backward compatibility
-    hash_format = 1
-    hash_size = 20
+    object_format: "ObjectFormat"
 
     def __eq__(self, other: object) -> bool:
         """Check equality with another PackIndex."""
@@ -766,16 +763,15 @@ class MemoryPackIndex(PackIndex):
     def __init__(
         self,
         entries: list[PackIndexEntry],
+        object_format: ObjectFormat,
         pack_checksum: bytes | None = None,
-        *,
-        object_format: Optional["ObjectFormat"] = None,
     ) -> None:
         """Create a new MemoryPackIndex.
 
         Args:
           entries: Sequence of name, idx, crc32 (sorted)
+          object_format: Object format used by this index
           pack_checksum: Optional pack checksum
-          object_format: Object format (hash algorithm) to use
         """
         self._by_sha = {}
         self._by_offset = {}
@@ -784,17 +780,7 @@ class MemoryPackIndex(PackIndex):
             self._by_offset[offset] = name
         self._entries = entries
         self._pack_checksum = pack_checksum
-
-        # Set hash size from object format
-        if object_format:
-            self.hash_size = object_format.oid_length
-        else:
-            warnings.warn(
-                "MemoryPackIndex() should be called with object_format parameter",
-                DeprecationWarning,
-                stacklevel=2,
-            )
-            self.hash_size = 20  # Default to SHA1
+        self.object_format = object_format
 
     def get_pack_checksum(self) -> bytes | None:
         """Return the SHA checksum stored for the corresponding packfile."""
@@ -812,7 +798,7 @@ class MemoryPackIndex(PackIndex):
         Returns: Offset in the pack file
         """
         lookup_sha: RawObjectID
-        if len(sha) == self.hash_size * 2:  # hex string
+        if len(sha) == self.object_format.hex_length:
             lookup_sha = hex_to_sha(ObjectID(sha))
         else:
             lookup_sha = RawObjectID(sha)
@@ -842,7 +828,11 @@ class MemoryPackIndex(PackIndex):
     @classmethod
     def clone(cls, other_index: "PackIndex") -> "MemoryPackIndex":
         """Create a copy of another PackIndex in memory."""
-        return cls(list(other_index.iterentries()), other_index.get_pack_checksum())
+        return cls(
+            list(other_index.iterentries()),
+            other_index.object_format,
+            other_index.get_pack_checksum(),
+        )
 
 
 class FilePackIndex(PackIndex):
@@ -1000,7 +990,7 @@ class FilePackIndex(PackIndex):
         have the object then None will be returned.
         """
         lookup_sha: RawObjectID
-        if len(sha) == self.hash_size * 2:  # hex string
+        if len(sha) == self.object_format.hex_length:  # hex string
             lookup_sha = hex_to_sha(ObjectID(sha))
         else:
             lookup_sha = RawObjectID(sha)
@@ -1057,32 +1047,37 @@ class FilePackIndex(PackIndex):
 class PackIndex1(FilePackIndex):
     """Version 1 Pack Index file."""
 
+    object_format = SHA1
+
     def __init__(
         self,
         filename: str | os.PathLike[str],
+        object_format: ObjectFormat,
         file: IO[bytes] | _GitFile | None = None,
         contents: bytes | None = None,
         size: int | None = None,
-        object_format: Optional["ObjectFormat"] = None,
     ) -> None:
         """Initialize a version 1 pack index.
 
         Args:
             filename: Path to the index file
+            object_format: Object format used by the repository
             file: Optional file object
             contents: Optional mmap'd contents
             size: Optional size of the index
-            object_format: Object format used by the repository
         """
         super().__init__(filename, file, contents, size)
+
+        # PackIndex1 only supports SHA1
+        if object_format != SHA1:
+            raise AssertionError(
+                f"PackIndex1 only supports SHA1, not {object_format.name}"
+            )
+
+        self.object_format = object_format
         self.version = 1
         self._fan_out_table = self._read_fan_out_table(0)
-        # Use provided hash algorithm if available, otherwise default to SHA1
-        if object_format:
-            self.hash_size = object_format.oid_length
-        else:
-            self.hash_size = 20  # Default to SHA1
-
+        self.hash_size = self.object_format.oid_length
         self._entry_size = 4 + self.hash_size
 
     def _unpack_entry(self, i: int) -> tuple[RawObjectID, int, None]:
@@ -1107,37 +1102,34 @@ class PackIndex1(FilePackIndex):
 class PackIndex2(FilePackIndex):
     """Version 2 Pack Index file."""
 
+    object_format = SHA1
+
     def __init__(
         self,
         filename: str | os.PathLike[str],
+        object_format: ObjectFormat,
         file: IO[bytes] | _GitFile | None = None,
         contents: bytes | None = None,
         size: int | None = None,
-        object_format: Optional["ObjectFormat"] = None,
     ) -> None:
         """Initialize a version 2 pack index.
 
         Args:
             filename: Path to the index file
+            object_format: Object format used by the repository
             file: Optional file object
             contents: Optional mmap'd contents
             size: Optional size of the index
-            object_format: Object format used by the repository
         """
         super().__init__(filename, file, contents, size)
+        self.object_format = object_format
         if self._contents[:4] != b"\377tOc":
             raise AssertionError("Not a v2 pack index file")
         (self.version,) = unpack_from(b">L", self._contents, 4)
         if self.version != 2:
             raise AssertionError(f"Version was {self.version}")
         self._fan_out_table = self._read_fan_out_table(8)
-
-        # Use provided hash algorithm if available, otherwise default to SHA1
-        if object_format:
-            self.hash_size = object_format.oid_length
-        else:
-            self.hash_size = 20  # Default to SHA1
-
+        self.hash_size = self.object_format.oid_length
         self._name_table_offset = 8 + 0x100 * 4
         self._crc32_table_offset = self._name_table_offset + self.hash_size * len(self)
         self._pack_offset_table_offset = self._crc32_table_offset + 4 * len(self)
@@ -1214,6 +1206,7 @@ class PackIndex3(FilePackIndex):
     def __init__(
         self,
         filename: str | os.PathLike[str],
+        object_format: ObjectFormat,
         file: IO[bytes] | _GitFile | None = None,
         contents: bytes | None = None,
         size: int | None = None,
@@ -1222,6 +1215,7 @@ class PackIndex3(FilePackIndex):
 
         Args:
             filename: Path to the index file
+            object_format: Object format used by the repository
             file: Optional file object
             contents: Optional mmap'd contents
             size: Optional size of the index
@@ -1235,12 +1229,17 @@ class PackIndex3(FilePackIndex):
 
         # Read hash algorithm identifier (1 = SHA-1, 2 = SHA-256)
         (self.hash_format,) = unpack_from(b">L", self._contents, 8)
-        if self.hash_format == 1:
-            self.hash_size = 20  # SHA-1
-        elif self.hash_format == 2:
-            self.hash_size = 32  # SHA-256
-        else:
-            raise AssertionError(f"Unknown hash algorithm {self.hash_format}")
+        file_object_format = OBJECT_FORMAT_TYPE_NUMS[self.hash_format]
+
+        # Verify provided object_format matches what's in the file
+        if object_format != file_object_format:
+            raise AssertionError(
+                f"Object format mismatch: provided {object_format.name}, "
+                f"but file contains {file_object_format.name}"
+            )
+
+        self.object_format = object_format
+        self.hash_size = self.object_format.oid_length
 
         # Read length of shortened object names
         (self.shortened_oid_len,) = unpack_from(b">L", self._contents, 12)
@@ -1653,7 +1652,7 @@ def obj_sha(
 
 def compute_file_sha(
     f: IO[bytes],
-    object_format: "ObjectFormat",
+    hash_func: Callable[[], "HashObject"],
     start_ofs: int = 0,
     end_ofs: int = 0,
     buffer_size: int = 1 << 16,
@@ -1662,14 +1661,14 @@ def compute_file_sha(
 
     Args:
       f: A file-like object to read from that supports seek().
-      object_format: Hash algorithm to use.
+      hash_func: A callable that returns a new HashObject.
       start_ofs: The offset in the file to start reading at.
       end_ofs: The offset in the file to end reading at, relative to the
         end of the file.
       buffer_size: A buffer size for reading.
     Returns: A new SHA object updated with data read from the file.
     """
-    sha = object_format.new_hash()
+    sha = hash_func()
     f.seek(0, SEEK_END)
     length = f.tell()
     if start_ofs < 0:
@@ -1717,6 +1716,7 @@ class PackData:
     def __init__(
         self,
         filename: str | os.PathLike[str],
+        object_format: ObjectFormat,
         file: IO[bytes] | None = None,
         size: int | None = None,
         *,
@@ -1726,7 +1726,6 @@ class PackData:
         depth: int | None = None,
         threads: int | None = None,
         big_file_threshold: int | None = None,
-        object_format: "ObjectFormat | None" = None,
     ) -> None:
         """Create a PackData object representing the pack in the given filename.
 
@@ -1736,17 +1735,8 @@ class PackData:
         Currently there is a restriction on the size of the pack as the python
         mmap implementation is flawed.
         """
-        from .object_format import DEFAULT_OBJECT_FORMAT
-
-        if object_format is None:
-            warnings.warn(
-                "PackData() should be called with object_format parameter",
-                DeprecationWarning,
-                stacklevel=2,
-            )
-            object_format = DEFAULT_OBJECT_FORMAT
-        self.object_format = object_format
         self._filename = filename
+        self.object_format = object_format
         self._size = size
         self._header_size = 12
         self.delta_window_size = delta_window_size
@@ -1791,32 +1781,32 @@ class PackData:
     def from_file(
         cls,
         file: IO[bytes],
+        object_format: ObjectFormat,
         size: int | None = None,
-        object_format: Optional["ObjectFormat"] = None,
     ) -> "PackData":
         """Create a PackData object from an open file.
 
         Args:
           file: Open file object
+          object_format: Object format
           size: Optional file size
-          object_format: Object format used by the repository
 
         Returns:
           PackData instance
         """
-        return cls(str(file), file=file, size=size, object_format=object_format)
+        return cls(str(file), object_format, file=file, size=size)
 
     @classmethod
     def from_path(
         cls,
         path: str | os.PathLike[str],
-        object_format: Optional["ObjectFormat"] = None,
+        object_format: ObjectFormat,
     ) -> "PackData":
         """Create a PackData object from a file path.
 
         Args:
           path: Path to the pack file
-          object_format: Object format used by the repository
+          object_format: Object format
 
         Returns:
           PackData instance
@@ -1865,7 +1855,9 @@ class PackData:
         Returns: Binary digest (size depends on hash algorithm)
         """
         return compute_file_sha(
-            self._file, self.object_format, end_ofs=-self.object_format.oid_length
+            self._file,
+            hash_func=self.object_format.hash_func,
+            end_ofs=-self.object_format.oid_length,
         ).digest()
 
     def iter_unpacked(self, *, include_comp: bool = False) -> Iterator[UnpackedObject]:
@@ -2858,7 +2850,7 @@ def pack_object_header(
     type_num: int,
     delta_base: bytes | int | None,
     size: int,
-    object_format: Optional["ObjectFormat"] = None,
+    object_format: "ObjectFormat",
 ) -> bytearray:
     """Create a pack object header for the given object info.
 
@@ -2869,16 +2861,6 @@ def pack_object_header(
       object_format: Object format (hash algorithm) to use.
     Returns: A header for a packed object.
     """
-    from .object_format import DEFAULT_OBJECT_FORMAT
-
-    if object_format is None:
-        warnings.warn(
-            "pack_object_header() should be called with object_format parameter",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        object_format = DEFAULT_OBJECT_FORMAT
-
     header = []
     c = (type_num << 4) | (size & 15)
     size >>= 4
@@ -2906,27 +2888,19 @@ def pack_object_header(
 def pack_object_chunks(
     type: int,
     object: list[bytes] | tuple[bytes | int, list[bytes]],
+    object_format: "ObjectFormat",
+    *,
     compression_level: int = -1,
-    object_format: Optional["ObjectFormat"] = None,
 ) -> Iterator[bytes]:
     """Generate chunks for a pack object.
 
     Args:
       type: Numeric type of the object
       object: Object to write
-      compression_level: the zlib compression level
       object_format: Object format (hash algorithm) to use
+      compression_level: the zlib compression level
     Returns: Chunks
     """
-    from .object_format import DEFAULT_OBJECT_FORMAT
-
-    if object_format is None:
-        warnings.warn(
-            "pack_object_chunks() should be called with object_format parameter",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        object_format = DEFAULT_OBJECT_FORMAT
     if type in DELTA_TYPES:
         if isinstance(object, tuple):
             delta_base, object = object
@@ -2961,9 +2935,10 @@ def write_pack_object(
     write: Callable[[bytes], int],
     type: int,
     object: list[bytes] | tuple[bytes | int, list[bytes]],
+    object_format: "ObjectFormat",
+    *,
     sha: "HashObject | None" = None,
     compression_level: int = -1,
-    object_format: Optional["ObjectFormat"] = None,
 ) -> int:
     """Write pack object to a file.
 
@@ -2971,20 +2946,11 @@ def write_pack_object(
       write: Write function to use
       type: Numeric type of the object
       object: Object to write
+      object_format: Object format (hash algorithm) to use
       sha: Optional SHA-1 hasher to update
       compression_level: the zlib compression level
-      object_format: Object format (hash algorithm) to use
     Returns: CRC32 checksum of the written object
     """
-    from .object_format import DEFAULT_OBJECT_FORMAT
-
-    if object_format is None:
-        warnings.warn(
-            "write_pack_object() should be called with object_format parameter",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        object_format = DEFAULT_OBJECT_FORMAT
     crc32 = 0
     for chunk in pack_object_chunks(
         type, object, compression_level=compression_level, object_format=object_format
@@ -2999,32 +2965,23 @@ def write_pack_object(
 def write_pack(
     filename: str,
     objects: Sequence[ShaFile] | Sequence[tuple[ShaFile, bytes | None]],
+    object_format: "ObjectFormat",
     *,
     deltify: bool | None = None,
     delta_window_size: int | None = None,
     compression_level: int = -1,
-    object_format: Optional["ObjectFormat"] = None,
 ) -> tuple[bytes, bytes]:
     """Write a new pack data file.
 
     Args:
       filename: Path to the new pack file (without .pack extension)
       objects: Objects to write to the pack
+      object_format: Object format
       delta_window_size: Delta window size
       deltify: Whether to deltify pack objects
       compression_level: the zlib compression level
-      object_format: Object format
     Returns: Tuple with checksum of pack file and index file
     """
-    from .object_format import DEFAULT_OBJECT_FORMAT
-
-    if object_format is None:
-        warnings.warn(
-            "write_pack() should be called with object_format parameter",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        object_format = DEFAULT_OBJECT_FORMAT
     with GitFile(filename + ".pack", "wb") as f:
         entries, data_sum = write_pack_objects(
             f,
@@ -3328,12 +3285,13 @@ def write_pack_from_container(
     | IO[bytes],
     container: PackedObjectContainer,
     object_ids: Sequence[tuple[ObjectID, PackHint | None]],
+    object_format: "ObjectFormat",
+    *,
     delta_window_size: int | None = None,
     deltify: bool | None = None,
     reuse_deltas: bool = True,
     compression_level: int = -1,
     other_haves: set[ObjectID] | None = None,
-    object_format: Optional["ObjectFormat"] = None,
 ) -> tuple[dict[bytes, tuple[int, int]], bytes]:
     """Write a new pack data file.
 
@@ -3341,13 +3299,13 @@ def write_pack_from_container(
       write: write function to use
       container: PackedObjectContainer
       object_ids: Sequence of (object_id, hint) tuples to write
+      object_format: Object format (hash algorithm) to use
       delta_window_size: Sliding window size for searching for deltas;
                          Set to None for default window size.
       deltify: Whether to deltify objects
       reuse_deltas: Whether to reuse existing deltas
       compression_level: the zlib compression level to use
       other_haves: Set of additional object IDs the receiver has
-      object_format: Object format (hash algorithm) to use
     Returns: Dict mapping id -> (offset, crc32 checksum), pack checksum
     """
     pack_contents_count = len(object_ids)
@@ -3372,22 +3330,22 @@ def write_pack_from_container(
 def write_pack_objects(
     write: Callable[[bytes], None] | IO[bytes],
     objects: Sequence[ShaFile] | Sequence[tuple[ShaFile, bytes | None]],
+    object_format: "ObjectFormat",
     *,
     delta_window_size: int | None = None,
     deltify: bool | None = None,
     compression_level: int = -1,
-    object_format: Optional["ObjectFormat"] = None,
 ) -> tuple[dict[bytes, tuple[int, int]], bytes]:
     """Write a new pack data file.
 
     Args:
       write: write function to use
       objects: Sequence of (object, path) tuples to write
+      object_format: Object format (hash algorithm) to use
       delta_window_size: Sliding window size for searching for deltas;
                          Set to None for default window size.
       deltify: Whether to deltify objects
       compression_level: the zlib compression level to use
-      object_format: Object format (hash algorithm) to use
     Returns: Dict mapping id -> (offset, crc32 checksum), pack checksum
     """
     pack_contents_count, pack_contents = pack_objects_to_data(objects, deltify=deltify)
@@ -3406,12 +3364,12 @@ class PackChunkGenerator:
 
     def __init__(
         self,
+        object_format: "ObjectFormat",
         num_records: int | None = None,
         records: Iterator[UnpackedObject] | None = None,
         progress: Callable[..., None] | None = None,
         compression_level: int = -1,
         reuse_compressed: bool = True,
-        object_format: Optional["ObjectFormat"] = None,
     ) -> None:
         """Initialize PackChunkGenerator.
 
@@ -3423,15 +3381,6 @@ class PackChunkGenerator:
             reuse_compressed: Whether to reuse compressed chunks
             object_format: Object format (hash algorithm) to use
         """
-        from .object_format import DEFAULT_OBJECT_FORMAT
-
-        if object_format is None:
-            warnings.warn(
-                "PackChunkGenerator() should be called with object_format parameter",
-                DeprecationWarning,
-                stacklevel=2,
-            )
-            object_format = DEFAULT_OBJECT_FORMAT
         self.object_format = object_format
         self.cs = object_format.new_hash()
         self.entries: dict[bytes, tuple[int, int]] = {}
@@ -3534,11 +3483,11 @@ def write_pack_data(
     | Callable[[bytes | bytearray | memoryview], int]
     | IO[bytes],
     records: Iterator[UnpackedObject],
+    object_format: "ObjectFormat",
     *,
     num_records: int | None = None,
     progress: Callable[..., None] | None = None,
     compression_level: int = -1,
-    object_format: Optional["ObjectFormat"] = None,
 ) -> tuple[dict[bytes, tuple[int, int]], bytes]:
     """Write a new pack data file.
 
@@ -3546,9 +3495,9 @@ def write_pack_data(
       write: Write function to use
       num_records: Number of records (defaults to len(records) if None)
       records: Iterator over type_num, object_id, delta_base, raw
+      object_format: Object format (hash algorithm) to use
       progress: Function to report progress to
       compression_level: the zlib compression level
-      object_format: Object format (hash algorithm) to use
     Returns: Dict mapping id -> (offset, crc32 checksum), pack checksum
     """
     chunk_generator = PackChunkGenerator(
@@ -3947,20 +3896,21 @@ class Pack:
     def __init__(
         self,
         basename: str,
-        resolve_ext_ref: ResolveExtRefFn | None = None,
         *,
+        object_format: ObjectFormat,
+        resolve_ext_ref: ResolveExtRefFn | None = None,
         delta_window_size: int | None = None,
         window_memory: int | None = None,
         delta_cache_size: int | None = None,
         depth: int | None = None,
         threads: int | None = None,
         big_file_threshold: int | None = None,
-        object_format: Optional["ObjectFormat"] = None,
     ) -> None:
         """Initialize a Pack object.
 
         Args:
           basename: Base path for pack files (without .pack/.idx extension)
+          object_format: Hash algorithm used by the repository
           resolve_ext_ref: Optional function to resolve external references
           delta_window_size: Size of the delta compression window
           window_memory: Memory limit for delta compression window
@@ -3968,9 +3918,9 @@ class Pack:
           depth: Maximum depth for delta chains
           threads: Number of threads to use for operations
           big_file_threshold: Size threshold for big file handling
-          object_format: Hash algorithm to use (defaults to SHA1)
         """
         self._basename = basename
+        self.object_format = object_format
         self._data = None
         self._idx = None
         self._bitmap = None
@@ -3983,10 +3933,7 @@ class Pack:
         self.depth = depth
         self.threads = threads
         self.big_file_threshold = big_file_threshold
-        # Always set object_format, defaulting to default
-        from .object_format import DEFAULT_OBJECT_FORMAT
-
-        self.object_format = object_format if object_format else DEFAULT_OBJECT_FORMAT
+        self._idx_load = lambda: load_pack_index(self._idx_path, object_format)
         self._data_load = lambda: PackData(
             self._data_path,
             delta_window_size=delta_window_size,
@@ -3995,10 +3942,7 @@ class Pack:
             depth=depth,
             threads=threads,
             big_file_threshold=big_file_threshold,
-            object_format=self.object_format,
-        )
-        self._idx_load = lambda: load_pack_index(
-            self._idx_path, object_format=object_format
+            object_format=object_format,
         )
         self.resolve_ext_ref = resolve_ext_ref
 
@@ -4007,23 +3951,20 @@ class Pack:
         cls,
         data_fn: Callable[[], PackData],
         idx_fn: Callable[[], PackIndex],
-        object_format: Optional["ObjectFormat"] = None,
     ) -> "Pack":
         """Create a new pack object from callables to load pack data and index objects."""
-        ret = cls("", object_format=object_format)
+        # Load index to get object format
+        idx = idx_fn()
+        ret = cls("", object_format=idx.object_format)
         ret._data_load = data_fn
-        ret._idx_load = idx_fn
+        ret._idx = idx
+        ret._idx_load = None
         return ret
 
     @classmethod
-    def from_objects(
-        cls,
-        data: PackData,
-        idx: PackIndex,
-        object_format: Optional["ObjectFormat"] = None,
-    ) -> "Pack":
+    def from_objects(cls, data: PackData, idx: PackIndex) -> "Pack":
         """Create a new pack object from pack data and index objects."""
-        ret = cls("", object_format=object_format)
+        ret = cls("", object_format=idx.object_format)
         ret._data = data
         ret._data_load = None
         ret._idx = idx
@@ -4451,25 +4392,16 @@ def extend_pack(
     f: BinaryIO,
     object_ids: Set["RawObjectID"],
     get_raw: Callable[["RawObjectID | ObjectID"], tuple[int, bytes]],
+    object_format: "ObjectFormat",
     *,
     compression_level: int = -1,
     progress: Callable[[bytes], None] | None = None,
-    object_format: Optional["ObjectFormat"] = None,
 ) -> tuple[bytes, list[tuple[RawObjectID, int, int]]]:
     """Extend a pack file with more objects.
 
     The caller should make sure that object_ids does not contain any objects
     that are already in the pack
     """
-    from .object_format import DEFAULT_OBJECT_FORMAT
-
-    if object_format is None:
-        warnings.warn(
-            "extend_pack() should be called with object_format parameter",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        object_format = DEFAULT_OBJECT_FORMAT
     # Update the header with the new number of objects.
     f.seek(0)
     _version, num_objects = read_pack_header(f.read)
@@ -4482,7 +4414,9 @@ def extend_pack(
         f.flush()
 
     # Rescan the rest of the pack, computing the SHA with the new header.
-    new_sha = compute_file_sha(f, object_format, end_ofs=-object_format.oid_length)
+    new_sha = compute_file_sha(
+        f, hash_func=object_format.hash_func, end_ofs=-object_format.oid_length
+    )
 
     # Must reposition before writing (http://bugs.python.org/issue3207)
     f.seek(0, os.SEEK_CUR)

+ 1 - 1
dulwich/porcelain/__init__.py

@@ -6139,7 +6139,7 @@ def unpack_objects(
 
     with open_repo_closing(target) as r:
         pack_basename = os.path.splitext(pack_path)[0]
-        with Pack(pack_basename) as pack:
+        with Pack(pack_basename, object_format=r.object_store.object_format) as pack:
             count = 0
             for unpacked in pack.iter_unpacked():
                 obj = unpacked.sha_file()

+ 0 - 11
dulwich/tests/test_object_store.py

@@ -79,17 +79,6 @@ class ObjectStoreTests:
             ),
         )
 
-    def test_determine_wants_all_zero(self) -> None:
-        """Test determine_wants_all with zero ref."""
-        from dulwich.refs import Ref
-
-        self.assertEqual(
-            [],
-            self.store.determine_wants_all(
-                {Ref(b"refs/heads/foo"): self.store.object_format.zero_oid}
-            ),
-        )
-
     def test_determine_wants_all_depth(self) -> None:
         """Test determine_wants_all with depth parameter."""
         self.store.add_object(testobject)

+ 4 - 3
tests/compat/test_bitmap.py

@@ -33,6 +33,7 @@ from dulwich.bitmap import (
     PackBitmap,
     write_bitmap,
 )
+from dulwich.object_format import DEFAULT_OBJECT_FORMAT
 from dulwich.pack import Pack
 from dulwich.repo import Repo
 
@@ -99,7 +100,7 @@ class BitmapCompatTests(TestCase):
         )
 
         # Try to load the bitmap using Dulwich
-        with Pack(pack_path) as pack:
+        with Pack(pack_path, object_format=DEFAULT_OBJECT_FORMAT) as pack:
             bitmap = pack.bitmap
 
             # Basic checks
@@ -196,7 +197,7 @@ class BitmapCompatTests(TestCase):
         pack_path = os.path.join(pack_dir, pack_basename)
 
         # Load the pack and create bitmap data, then close before writing
-        with Pack(pack_path) as pack:
+        with Pack(pack_path, object_format=DEFAULT_OBJECT_FORMAT) as pack:
             # Create a simple bitmap for testing
             # Git requires BITMAP_OPT_FULL_DAG flag
             bitmap = PackBitmap(
@@ -261,7 +262,7 @@ class BitmapCompatTests(TestCase):
         bitmap_name = bitmap_files[0]
         pack_basename = bitmap_name.replace(".bitmap", "")
         pack_path = os.path.join(pack_dir, pack_basename)
-        with Pack(pack_path) as pack:
+        with Pack(pack_path, object_format=DEFAULT_OBJECT_FORMAT) as pack:
             bitmap = pack.bitmap
 
             self.assertIsNotNone(bitmap)

+ 2 - 2
tests/compat/test_pack.py

@@ -231,7 +231,7 @@ class TestPackIndexCompat(PackTests):
         run_git_or_fail(["index-pack", pack_path + ".pack"])
 
         # Load with dulwich
-        idx = load_pack_index(pack_path + ".idx")
+        idx = load_pack_index(pack_path + ".idx", DEFAULT_OBJECT_FORMAT)
 
         # Verify it works
         self.assertIn(blob.id, idx)
@@ -250,7 +250,7 @@ class TestPackIndexCompat(PackTests):
             write_pack_index_v3(f, entries, b"x" * 20, hash_format=1)
 
         # Load and verify structure
-        idx = load_pack_index(v3_path)
+        idx = load_pack_index(v3_path, DEFAULT_OBJECT_FORMAT)
         self.assertIsInstance(idx, PackIndex3)
         self.assertEqual(idx.version, 3)
         self.assertEqual(idx.hash_format, 1)  # SHA-1

+ 1 - 46
tests/compat/test_sha256_packs.py

@@ -76,9 +76,7 @@ class GitSHA256PackCompatibilityTests(CompatTestCase):
 
             # Load pack index with SHA256 algorithm
             with open(idx_path, "rb") as f:
-                pack_idx = load_pack_index_file(
-                    idx_path, f, object_format=repo.object_format
-                )
+                pack_idx = load_pack_index_file(idx_path, f, SHA256)
 
             # Verify it's detected as SHA256
             self.assertEqual(pack_idx.hash_size, 32)
@@ -166,49 +164,6 @@ class GitSHA256PackCompatibilityTests(CompatTestCase):
                 content = f.read()
                 self.assertEqual(content, f"Dulwich blob content {i}".encode())
 
-    def test_pack_index_v1_interop(self):
-        """Test pack index v1 interoperability with SHA256."""
-        # Create repo with git using pack index v1
-        repo_path = tempfile.mkdtemp()
-        self.addCleanup(rmtree_ro, repo_path)
-        self._run_git(["init", "--object-format=sha256", repo_path])
-        self._run_git(["config", "pack.indexVersion", "1"], cwd=repo_path)
-
-        # Create files
-        for i in range(10):
-            test_file = os.path.join(repo_path, f"v1test{i}.txt")
-            with open(test_file, "w") as f:
-                f.write(f"Pack v1 test {i}\n")
-
-        self._run_git(["add", "."], cwd=repo_path)
-        self._run_git(["commit", "-m", "Test pack v1"], cwd=repo_path)
-        self._run_git(["gc"], cwd=repo_path)
-
-        # Read with dulwich
-        repo = Repo(repo_path)
-
-        # Find pack index
-        pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
-        idx_files = [f for f in os.listdir(pack_dir) if f.endswith(".idx")]
-
-        for idx_file in idx_files:
-            idx_path = os.path.join(pack_dir, idx_file)
-            with open(idx_path, "rb") as f:
-                pack_idx = load_pack_index_file(
-                    idx_path, f, object_format=repo.object_format
-                )
-
-            # Verify it's v1 with SHA256
-            self.assertEqual(pack_idx.version, 1)
-            self.assertEqual(pack_idx.hash_size, 32)
-
-            # Verify we can iterate
-            for sha, offset, crc32 in pack_idx.iterentries():
-                self.assertEqual(len(sha), 32)
-                self.assertIsNone(crc32)  # v1 doesn't store CRC32
-
-        repo.close()
-
     def test_large_pack_interop(self):
         """Test large pack file interoperability."""
         # Create repo with dulwich

+ 2 - 2
tests/porcelain/__init__.py

@@ -6938,9 +6938,9 @@ class ReceivePackTests(PorcelainTestCase):
         outlines = outf.getvalue().splitlines()
         self.assertEqual(
             [
-                b"0091319b56ce3aee2d489f759736a79cc552c9bb86d9 HEAD\x00 report-status "
+                b"00a4319b56ce3aee2d489f759736a79cc552c9bb86d9 HEAD\x00 report-status "
                 b"delete-refs quiet ofs-delta side-band-64k "
-                b"no-done symref=HEAD:refs/heads/master",
+                b"no-done object-format=sha1 symref=HEAD:refs/heads/master",
                 b"003f319b56ce3aee2d489f759736a79cc552c9bb86d9 refs/heads/master",
                 b"0000",
             ],

+ 1 - 1
tests/test_cloud_gcs.py

@@ -140,7 +140,7 @@ class GcsObjectStoreTests(unittest.TestCase):
             args = mock_pack_cls.from_lazy_objects.call_args[0]
             self.assertEqual(2, len(args))
 
-            # They should be callables
+            # Both should be callables (lazy loaders for data and index)
             self.assertTrue(callable(args[0]))
             self.assertTrue(callable(args[1]))
 

+ 2 - 2
tests/test_object_store.py

@@ -387,7 +387,7 @@ class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase):
 
         # Load and verify it's version 1
         idx_path = os.path.join(pack_dir, idx_files[0])
-        idx = load_pack_index(idx_path)
+        idx = load_pack_index(idx_path, DEFAULT_OBJECT_FORMAT)
         self.assertEqual(1, idx.version)
 
         # Test version 3
@@ -416,7 +416,7 @@ class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase):
         self.assertEqual(1, len(idx_files2))
 
         idx_path2 = os.path.join(pack_dir2, idx_files2[0])
-        idx2 = load_pack_index(idx_path2)
+        idx2 = load_pack_index(idx_path2, DEFAULT_OBJECT_FORMAT)
         self.assertEqual(3, idx2.version)
 
     def test_prune_orphaned_tempfiles(self) -> None:

+ 31 - 30
tests/test_pack.py

@@ -101,7 +101,8 @@ class PackTests(TestCase):
     def get_pack_index(self, sha):
         """Returns a PackIndex from the datadir with the given sha."""
         return load_pack_index(
-            os.path.join(self.datadir, "pack-{}.idx".format(sha.decode("ascii")))
+            os.path.join(self.datadir, "pack-{}.idx".format(sha.decode("ascii"))),
+            DEFAULT_OBJECT_FORMAT,
         )
 
     def get_pack_data(self, sha):
@@ -112,7 +113,10 @@ class PackTests(TestCase):
         )
 
     def get_pack(self, sha):
-        return Pack(os.path.join(self.datadir, "pack-{}".format(sha.decode("ascii"))))
+        return Pack(
+            os.path.join(self.datadir, "pack-{}".format(sha.decode("ascii"))),
+            object_format=DEFAULT_OBJECT_FORMAT,
+        )
 
     def assertSucceeds(self, func, *args, **kwargs) -> None:
         try:
@@ -474,9 +478,7 @@ class TestPackData(PackTests):
             self.datadir, "pack-{}.pack".format(pack1_sha.decode("ascii"))
         )
         with open(path, "rb") as f:
-            PackData.from_file(
-                f, os.path.getsize(path), object_format=DEFAULT_OBJECT_FORMAT
-            )
+            PackData.from_file(f, DEFAULT_OBJECT_FORMAT, os.path.getsize(path))
 
     def test_pack_len(self) -> None:
         with self.get_pack_data(pack1_sha) as p:
@@ -569,7 +571,7 @@ class TestPackData(PackTests):
         with self.get_pack_data(pack1_sha) as p:
             filename = os.path.join(self.tempdir, "v1test.idx")
             p.create_index_v1(filename)
-            idx1 = load_pack_index(filename)
+            idx1 = load_pack_index(filename, DEFAULT_OBJECT_FORMAT)
             idx2 = self.get_pack_index(pack1_sha)
             self.assertEqual(oct(os.stat(filename).st_mode), indexmode)
             self.assertEqual(idx1, idx2)
@@ -578,7 +580,7 @@ class TestPackData(PackTests):
         with self.get_pack_data(pack1_sha) as p:
             filename = os.path.join(self.tempdir, "v2test.idx")
             p.create_index_v2(filename)
-            idx1 = load_pack_index(filename)
+            idx1 = load_pack_index(filename, DEFAULT_OBJECT_FORMAT)
             idx2 = self.get_pack_index(pack1_sha)
             self.assertEqual(oct(os.stat(filename).st_mode), indexmode)
             self.assertEqual(idx1, idx2)
@@ -587,7 +589,7 @@ class TestPackData(PackTests):
         with self.get_pack_data(pack1_sha) as p:
             filename = os.path.join(self.tempdir, "v3test.idx")
             p.create_index_v3(filename)
-            idx1 = load_pack_index(filename)
+            idx1 = load_pack_index(filename, DEFAULT_OBJECT_FORMAT)
             idx2 = self.get_pack_index(pack1_sha)
             self.assertEqual(oct(os.stat(filename).st_mode), indexmode)
             self.assertEqual(idx1, idx2)
@@ -598,7 +600,7 @@ class TestPackData(PackTests):
         with self.get_pack_data(pack1_sha) as p:
             filename = os.path.join(self.tempdir, "version3test.idx")
             p.create_index(filename, version=3)
-            idx = load_pack_index(filename)
+            idx = load_pack_index(filename, DEFAULT_OBJECT_FORMAT)
             self.assertIsInstance(idx, PackIndex3)
             self.assertEqual(idx.version, 3)
 
@@ -607,24 +609,24 @@ class TestPackData(PackTests):
         try:
             self.assertEqual(
                 sha1(b"abcd1234wxyz").hexdigest(),
-                compute_file_sha(f, DEFAULT_OBJECT_FORMAT).hexdigest(),
+                compute_file_sha(f, DEFAULT_OBJECT_FORMAT.hash_func).hexdigest(),
             )
             self.assertEqual(
                 sha1(b"abcd1234wxyz").hexdigest(),
-                compute_file_sha(f, DEFAULT_OBJECT_FORMAT, buffer_size=5).hexdigest(),
+                compute_file_sha(f, DEFAULT_OBJECT_FORMAT.hash_func, buffer_size=5).hexdigest(),
             )
             self.assertEqual(
                 sha1(b"abcd1234").hexdigest(),
-                compute_file_sha(f, DEFAULT_OBJECT_FORMAT, end_ofs=-4).hexdigest(),
+                compute_file_sha(f, DEFAULT_OBJECT_FORMAT.hash_func, end_ofs=-4).hexdigest(),
             )
             self.assertEqual(
                 sha1(b"1234wxyz").hexdigest(),
-                compute_file_sha(f, DEFAULT_OBJECT_FORMAT, start_ofs=4).hexdigest(),
+                compute_file_sha(f, DEFAULT_OBJECT_FORMAT.hash_func, start_ofs=4).hexdigest(),
             )
             self.assertEqual(
                 sha1(b"1234").hexdigest(),
                 compute_file_sha(
-                    f, DEFAULT_OBJECT_FORMAT, start_ofs=4, end_ofs=-4
+                    f, DEFAULT_OBJECT_FORMAT.hash_func, start_ofs=4, end_ofs=-4
                 ).hexdigest(),
             )
         finally:
@@ -634,13 +636,13 @@ class TestPackData(PackTests):
         f = BytesIO(b"abcd1234wxyz")
         try:
             self.assertRaises(
-                AssertionError, compute_file_sha, f, DEFAULT_OBJECT_FORMAT, -20
+                AssertionError, compute_file_sha, f, DEFAULT_OBJECT_FORMAT.hash_func, -20
             )
             self.assertRaises(
-                AssertionError, compute_file_sha, f, DEFAULT_OBJECT_FORMAT, 0, 20
+                AssertionError, compute_file_sha, f, DEFAULT_OBJECT_FORMAT.hash_func, 0, 20
             )
             self.assertRaises(
-                AssertionError, compute_file_sha, f, DEFAULT_OBJECT_FORMAT, 10, -12
+                AssertionError, compute_file_sha, f, DEFAULT_OBJECT_FORMAT.hash_func, 10, -12
             )
         finally:
             f.close()
@@ -699,7 +701,7 @@ class TestPack(PackTests):
                 basename, origpack.pack_tuples(), object_format=DEFAULT_OBJECT_FORMAT
             )
 
-            with Pack(basename) as newpack:
+            with Pack(basename, object_format=DEFAULT_OBJECT_FORMAT) as newpack:
                 self.assertEqual(origpack, newpack)
                 self.assertSucceeds(newpack.index.check)
                 self.assertEqual(origpack.name(), newpack.name())
@@ -727,7 +729,7 @@ class TestPack(PackTests):
         write_pack(
             basename, origpack.pack_tuples(), object_format=DEFAULT_OBJECT_FORMAT
         )
-        return Pack(basename)
+        return Pack(basename, object_format=DEFAULT_OBJECT_FORMAT)
 
     def test_keep_no_message(self) -> None:
         with self.get_pack(pack1_sha) as p:
@@ -871,6 +873,7 @@ class TestThinPack(PackTests):
     def make_pack(self, resolve_ext_ref):
         return Pack(
             self.pack_prefix,
+            object_format=DEFAULT_OBJECT_FORMAT,
             resolve_ext_ref=self.store.get_raw if resolve_ext_ref else None,
         )
 
@@ -1062,7 +1065,7 @@ class BaseTestFilePackIndexWriting(BaseTestPackIndexWriting):
     def index(self, filename, entries, pack_checksum):
         path = os.path.join(self.tempdir, filename)
         self.writeIndex(path, entries, pack_checksum)
-        idx = load_pack_index(path)
+        idx = load_pack_index(path, DEFAULT_OBJECT_FORMAT)
         self.assertSucceeds(idx.check)
         self.assertEqual(idx.version, self._expected_version)
         return idx
@@ -1082,9 +1085,7 @@ class TestMemoryIndexWriting(TestCase, BaseTestPackIndexWriting):
     def index(self, filename, entries, pack_checksum):
         from dulwich.object_format import DEFAULT_OBJECT_FORMAT
 
-        return MemoryPackIndex(
-            entries, pack_checksum, object_format=DEFAULT_OBJECT_FORMAT
-        )
+        return MemoryPackIndex(entries, DEFAULT_OBJECT_FORMAT, pack_checksum)
 
     def tearDown(self) -> None:
         TestCase.tearDown(self)
@@ -1136,7 +1137,7 @@ class TestPackIndexWritingv3(TestCase, BaseTestFilePackIndexWriting):
         entries = [(b"abcd" * 5, 0, zlib.crc32(b""))]
         filename = os.path.join(self.tempdir, "test.idx")
         self.writeIndex(filename, entries, b"1234567890" * 2)
-        idx = load_pack_index(filename)
+        idx = load_pack_index(filename, DEFAULT_OBJECT_FORMAT)
         self.assertIsInstance(idx, PackIndex3)
         self.assertEqual(idx.version, 3)
         self.assertEqual(idx.hash_format, 1)  # SHA-1
@@ -1150,7 +1151,7 @@ class TestPackIndexWritingv3(TestCase, BaseTestFilePackIndexWriting):
         # Write v3 index with SHA-1 (algorithm=1)
         with GitFile(filename, "wb") as f:
             write_pack_index_v3(f, entries, b"1" * 20, hash_format=1)
-        idx = load_pack_index(filename)
+        idx = load_pack_index(filename, DEFAULT_OBJECT_FORMAT)
         self.assertEqual(idx.hash_format, 1)
         self.assertEqual(idx.hash_size, 20)
 
@@ -1213,7 +1214,7 @@ class WritePackIndexTests(TestCase):
         with GitFile(filename, "wb") as f:
             write_pack_index(f, entries, b"P" * 20)
 
-        idx = load_pack_index(filename)
+        idx = load_pack_index(filename, DEFAULT_OBJECT_FORMAT)
         self.assertEqual(DEFAULT_PACK_INDEX_VERSION, idx.version)
 
     def test_write_pack_index_version_1(self) -> None:
@@ -1230,7 +1231,7 @@ class WritePackIndexTests(TestCase):
         with GitFile(filename, "wb") as f:
             write_pack_index(f, entries, b"P" * 20, version=1)
 
-        idx = load_pack_index(filename)
+        idx = load_pack_index(filename, DEFAULT_OBJECT_FORMAT)
         self.assertEqual(1, idx.version)
 
     def test_write_pack_index_version_3(self) -> None:
@@ -1247,7 +1248,7 @@ class WritePackIndexTests(TestCase):
         with GitFile(filename, "wb") as f:
             write_pack_index(f, entries, b"P" * 20, version=3)
 
-        idx = load_pack_index(filename)
+        idx = load_pack_index(filename, DEFAULT_OBJECT_FORMAT)
         self.assertEqual(3, idx.version)
 
     def test_write_pack_index_invalid_version(self) -> None:
@@ -1800,7 +1801,7 @@ class DeltaChainIteratorTests(TestCase):
         )
         fsize = f.tell()
         f.seek(0)
-        packdata = PackData.from_file(f, fsize, object_format=DEFAULT_OBJECT_FORMAT)
+        packdata = PackData.from_file(f, DEFAULT_OBJECT_FORMAT, fsize)
         td = tempfile.mkdtemp()
         idx_path = os.path.join(td, "test.idx")
         self.addCleanup(shutil.rmtree, td)
@@ -1809,7 +1810,7 @@ class DeltaChainIteratorTests(TestCase):
             version=2,
             resolve_ext_ref=self.get_raw_no_repeat,
         )
-        packindex = load_pack_index(idx_path)
+        packindex = load_pack_index(idx_path, DEFAULT_OBJECT_FORMAT)
         pack = Pack.from_objects(packdata, packindex)
         try:
             # Attempting to open this REF_DELTA object would loop forever

+ 0 - 4
tests/test_sha256.py

@@ -40,8 +40,6 @@ class HashAlgorithmTests(unittest.TestCase):
         self.assertEqual(alg.name, "sha1")
         self.assertEqual(alg.oid_length, 20)
         self.assertEqual(alg.hex_length, 40)
-        self.assertEqual(len(alg.zero_oid), 40)
-        self.assertEqual(len(alg.zero_oid_bin), 20)
 
     def test_sha256_properties(self):
         """Test SHA256 algorithm properties."""
@@ -49,8 +47,6 @@ class HashAlgorithmTests(unittest.TestCase):
         self.assertEqual(alg.name, "sha256")
         self.assertEqual(alg.oid_length, 32)
         self.assertEqual(alg.hex_length, 64)
-        self.assertEqual(len(alg.zero_oid), 64)
-        self.assertEqual(len(alg.zero_oid_bin), 32)
 
     def test_get_hash_algorithm(self):
         """Test getting hash algorithms by name."""

+ 2 - 2
tests/test_sha256_pack.py

@@ -67,7 +67,7 @@ class SHA256PackTests(unittest.TestCase):
 
         # Load and verify the index
         index_buf.seek(0)
-        pack_idx = load_pack_index_file("<memory>", index_buf)
+        pack_idx = load_pack_index_file("<memory>", index_buf, SHA256)
 
         # Check that the index loaded correctly
         self.assertEqual(len(pack_idx), 5)
@@ -108,7 +108,7 @@ class SHA256PackTests(unittest.TestCase):
 
         # Load and verify the index
         index_buf.seek(0)
-        pack_idx = load_pack_index_file("<memory>", index_buf)
+        pack_idx = load_pack_index_file("<memory>", index_buf, SHA256)
 
         # Check that the index loaded correctly
         self.assertEqual(len(pack_idx), 5)