Преглед изворни кода

Update more hardcoded sha length

Jelmer Vernooij пре 2 месеци
родитељ
комит
43b49600a6

+ 6 - 9
NEWS

@@ -68,8 +68,12 @@ compatible.
    Dulwich version, and installed dependencies with their versions.
    (Jelmer Vernooij, #1835)
 
- * Add basic ``dulwich restore`` and ``dulwich switch``
-   commands. (Jelmer Vernooij, #1777)
+ * Add initial support for SHA256 repositories. Dulwich can now read and write Git
+   repositories using SHA256 object format. This includes support for loose
+   objects, pack files (v1 and v2 indexes), and tree parsing with SHA256 hashes.
+   The Rust extensions have been updated to support variable hash lengths.
+   SHA256 repositories require format version 1 and the objectFormat extension.
+   (Jelmer Vernooij, #1115)
 
 0.24.10	2025-11-10
 
@@ -531,13 +535,6 @@ compatible.
 
  * Add ``gc`` command to ``dulwich.porcelain.`` (Jelmer Vernooij, #92)
 
- * Add initial support for SHA256 repositories. Dulwich can now read and write Git
-   repositories using SHA256 object format. This includes support for loose
-   objects, pack files (v1 and v2 indexes), and tree parsing with SHA256 hashes.
-   The Rust extensions have been updated to support variable hash lengths.
-   SHA256 repositories require format version 1 and the objectFormat extension.
-   (Jelmer Vernooij, #1115)
-
  * Add ``unpack-objects`` plumbing command to unpack objects from pack files
    into loose objects in the repository. This command extracts all objects
    from a pack file and writes them to the object store as individual files.

+ 11 - 36
crates/objects/src/lib.rs

@@ -49,12 +49,16 @@ fn sha_to_pyhex(py: Python, sha: &[u8]) -> PyResult<Py<PyAny>> {
     Ok(PyBytes::new(py, hexsha.as_slice()).into())
 }
 
-fn parse_tree_with_length(
+#[pyfunction]
+#[pyo3(signature = (text, sha_len, strict=None))]
+fn parse_tree(
     py: Python,
     mut text: &[u8],
-    strict: bool,
-    hash_len: usize,
+    sha_len: usize,
+    strict: Option<bool>,
 ) -> PyResult<Vec<(Py<PyAny>, u32, Py<PyAny>)>> {
+    let strict = strict.unwrap_or(false);
+
     let mut entries = Vec::new();
     while !text.is_empty() {
         let mode_end = memchr(b' ', text)
@@ -76,49 +80,21 @@ fn parse_tree_with_length(
         text = &text[namelen + 1..];
 
         // Check if we have enough bytes for the hash
-        if text.len() < hash_len {
+        if text.len() < sha_len {
             return Err(ObjectFormatException::new_err(("SHA truncated",)));
         }
 
-        let sha = &text[..hash_len];
+        let sha = &text[..sha_len];
         entries.push((
             PyBytes::new(py, name).into_pyobject(py)?.unbind().into(),
             mode,
             sha_to_pyhex(py, sha)?,
         ));
-        text = &text[hash_len..];
+        text = &text[sha_len..];
     }
     Ok(entries)
 }
 
-#[pyfunction]
-#[pyo3(signature = (text, strict=None, object_format=None))]
-fn parse_tree(
-    py: Python,
-    text: &[u8],
-    strict: Option<bool>,
-    object_format: Option<Py<PyAny>>,
-) -> PyResult<Vec<(Py<PyAny>, u32, Py<PyAny>)>> {
-    let strict = strict.unwrap_or(false);
-
-    // Determine hash length from object_format if provided
-    if let Some(algo) = object_format {
-        // Get oid_length attribute from object format object
-        let oid_length: usize = algo.getattr(py, "oid_length")?.extract(py)?;
-        parse_tree_with_length(py, text, strict, oid_length)
-    } else {
-        // Try to auto-detect by attempting to parse with both lengths
-        // We'll attempt to parse with SHA1 first (20 bytes), then SHA256 (32 bytes)
-        match parse_tree_with_length(py, text, strict, 20) {
-            Ok(entries) => Ok(entries),
-            Err(_) => {
-                // SHA1 failed, try SHA256
-                parse_tree_with_length(py, text, strict, 32)
-            }
-        }
-    }
-}
-
 fn cmp_with_suffix(a: (u32, &[u8]), b: (u32, &[u8])) -> std::cmp::Ordering {
     let len = std::cmp::min(a.1.len(), b.1.len());
     let cmp = a.1[..len].cmp(&b.1[..len]);
@@ -182,8 +158,7 @@ fn sorted_tree_items(
                         .unbind()
                         .into_any(),
                 ))?
-                .unbind()
-                .into())
+                .unbind())
         })
         .collect::<PyResult<Vec<Py<PyAny>>>>()
 }

+ 2 - 1
dulwich/cli.py

@@ -2513,7 +2513,7 @@ class cmd_show_ref(Command):
             "-s",
             "--hash",
             nargs="?",
-            const=40,
+            const=40,  # TODO: Support SHA256
             type=int,
             metavar="n",
             help="Only show the OID, not the reference name",
@@ -2590,6 +2590,7 @@ class cmd_show_ref(Command):
 
         # Output results
         if not parsed_args.quiet:
+            # TODO: Add support for SHA256
             abbrev_len = parsed_args.abbrev if parsed_args.abbrev else 40
             hash_only = parsed_args.hash is not None
             if hash_only and parsed_args.hash:

+ 10 - 10
dulwich/object_store.py

@@ -1186,11 +1186,11 @@ class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
         """
         if name == ZERO_SHA:
             raise KeyError(name)
-        if len(name) in (40, 64):  # Support both SHA1 (40) and SHA256 (64) hex
-            sha = hex_to_sha(cast(ObjectID, name))
-            hexsha = cast(ObjectID, name)
-        elif len(name) in (20, 32):  # Support both SHA1 (20) and SHA256 (32) binary
-            sha = cast(RawObjectID, name)
+        if len(name) == self.object_format.hex_length:
+            sha = hex_to_sha(name)
+            hexsha = name
+        elif len(name) == self.object_format.oid_length:
+            sha = name
             hexsha = None
         else:
             raise AssertionError(f"Invalid object name {name!r}")
@@ -1730,6 +1730,9 @@ class DiskObjectStore(PackBasedObjectStore):
         Returns:
             Number of loose objects
         """
+        # Calculate expected filename length for loose
+        # objects (excluding directory)
+        fn_length = self.object_format.hex_length - 2
         count = 0
         if not os.path.exists(self.path):
             return 0
@@ -1738,11 +1741,7 @@ class DiskObjectStore(PackBasedObjectStore):
             subdir = os.path.join(self.path, f"{i:02x}")
             try:
                 count += len(
-                    [
-                        name
-                        for name in os.listdir(subdir)
-                        if len(name) == 38  # 40 - 2 for the prefix
-                    ]
+                    [name for name in os.listdir(subdir) if len(name) == fn_length]
                 )
             except FileNotFoundError:
                 # Directory may have been removed or is inaccessible
@@ -2958,6 +2957,7 @@ class ObjectStoreGraphWalker:
     def ack(self, sha: ObjectID) -> None:
         """Ack that a revision and its ancestors are present in the source."""
         if len(sha) != 40:
+            # TODO: support SHA256
             raise ValueError(f"unexpected sha {sha!r} received")
         ancestors = {sha}
 

+ 13 - 38
dulwich/objects.py

@@ -106,6 +106,7 @@ from .errors import (
     ObjectFormatException,
 )
 from .file import GitFile
+from .object_format import DEFAULT_OBJECT_FORMAT, ObjectFormat
 
 if TYPE_CHECKING:
     from _hashlib import HASH
@@ -116,21 +117,7 @@ if TYPE_CHECKING:
 ZERO_SHA = b"0" * 40  # SHA1 - kept for backward compatibility
 
 
-def zero_sha_for(object_format=None) -> bytes:
-    """Get the zero SHA for a given hash algorithm.
-
-    Args:
-        object_format: HashAlgorithm instance. If None, returns SHA1 zero.
-
-    Returns:
-        Zero SHA as hex bytes (40 chars for SHA1, 64 for SHA256)
-    """
-    if object_format is None:
-        return ZERO_SHA
-    return object_format.zero_oid
-
-
-
+>>>>>>> 063bbc0c (Update more hardcoded sha length)
 # Header fields for commits
 _TREE_HEADER = b"tree"
 _PARENT_HEADER = b"parent"
@@ -472,14 +459,18 @@ class ShaFile:
     type_name: bytes
     type_num: int
     _chunked_text: list[bytes] | None
+<<<<<<< HEAD
     _sha: "FixedSha | None | HASH"
+=======
+    _sha: Union[FixedSha, None, "HASH"]
+    object_format: ObjectFormat
 
     def __init__(self) -> None:
         """Initialize a ShaFile."""
         self._sha = None
         self._chunked_text = None
         self._needs_serialization = True
-        self.object_format = None
+        self.object_format = DEFAULT_OBJECT_FORMAT
 
     @staticmethod
     def _parse_legacy_object_header(
@@ -1346,36 +1337,19 @@ class TreeEntry(NamedTuple):
 
 
 def parse_tree(
-    text: bytes, strict: bool = False, object_format=None
-) -> Iterator[tuple[bytes, int, ObjectID]]:
+    text: bytes, sha_len: int | None = None, *, strict: bool = False
+) -> Iterator[tuple[bytes, int, bytes]]:
     """Parse a tree text.
 
     Args:
       text: Serialized text to parse
+      sha_len: Length of the object IDs in bytes
       strict: Whether to be strict about format
-      object_format: Hash algorithm object (SHA1 or SHA256) - if None, auto-detect
     Returns: iterator of tuples of (name, mode, sha)
 
     Raises:
       ObjectFormatException: if the object was malformed in some way
     """
-    if object_format is not None:
-        sha_len = object_format.oid_length
-        return _parse_tree_with_sha_len(text, strict, sha_len)
-
-    # Try both hash lengths and use the one that works
-    try:
-        # Try SHA1 first (more common)
-        return _parse_tree_with_sha_len(text, strict, 20)
-    except ObjectFormatException:
-        # If SHA1 fails, try SHA256
-        return _parse_tree_with_sha_len(text, strict, 32)
-
-
-def _parse_tree_with_sha_len(
-    text: bytes, strict: bool, sha_len: int
-) -> Iterator[tuple[bytes, int, bytes]]:
-    """Helper function to parse tree with a specific hash length."""
     count = 0
     length = len(text)
 
@@ -1598,7 +1572,8 @@ class Tree(ShaFile):
         """Grab the entries in the tree."""
         try:
             parsed_entries = parse_tree(
-                b"".join(chunks), object_format=self.object_format
+                b"".join(chunks),
+                sha_len=self.object_format.oid_length,
             )
         except ValueError as exc:
             raise ObjectFormatException(exc) from exc
@@ -1628,7 +1603,7 @@ class Tree(ShaFile):
         for name, mode, sha in parse_tree(
             b"".join(self._chunked_text),
             strict=True,
-            object_format=self.object_format,
+            sha_len=self.object_format.oid_length,
         ):
             check_hexsha(sha, f"invalid sha {sha!r}")
             if b"/" in name or name in (b"", b".", b"..", b".git"):

+ 74 - 41
dulwich/pack.py

@@ -124,6 +124,7 @@ from typing import (
     Any,
     BinaryIO,
     Generic,
+    Optional,
     Protocol,
     TypeVar,
     cast,
@@ -141,8 +142,7 @@ if TYPE_CHECKING:
 
     from .bitmap import PackBitmap
     from .commit_graph import CommitGraph
-    from .object_store import BaseObjectStore
-    from .refs import Ref
+    from .object_format import ObjectFormat
 
 # For some reason the above try, except fails to set has_mmap = False for plan9
 if sys.platform == "Plan9":
@@ -530,7 +530,9 @@ def iter_sha1(iter: Iterable[bytes]) -> bytes:
     return sha.hexdigest().encode("ascii")
 
 
-def load_pack_index(path: str | os.PathLike[str], object_format: ObjectFormat| None = None) -> "PackIndex":
+def load_pack_index(
+    path: str | os.PathLike[str], object_format: Optional["ObjectFormat"] = None
+) -> "PackIndex":
     """Load an index file by path.
 
     Args:
@@ -574,8 +576,9 @@ def _load_file_contents(
 
 
 def load_pack_index_file(
-    path: str | os.PathLike[str], f: IO[bytes] | _GitFile
-    object_format: ObjectFormat | = None,
+    path: str | os.PathLike[str],
+    f: IO[bytes] | _GitFile,
+    object_format: Optional["ObjectFormat"] = None,
 ) -> "PackIndex":
     """Load an index file from a file-like object.
 
@@ -759,7 +762,7 @@ class MemoryPackIndex(PackIndex):
         entries: list[PackIndexEntry],
         pack_checksum: bytes | None = None,
         *,
-        object_format: ObjectFormat | None = None,
+        object_format: Optional["ObjectFormat"] = None,
     ) -> None:
         """Create a new MemoryPackIndex.
 
@@ -822,7 +825,9 @@ class MemoryPackIndex(PackIndex):
     def for_pack(cls, pack_data: "PackData") -> "MemoryPackIndex":
         """Create a MemoryPackIndex from a PackData object."""
         return MemoryPackIndex(
-            list(pack_data.sorted_entries()), pack_checksum=pack_data.get_stored_checksum(), object_format=pack.object_format,
+            list(pack_data.sorted_entries()),
+            pack_checksum=pack_data.get_stored_checksum(),
+            object_format=pack_data.object_format,
         )
 
     @classmethod
@@ -1046,7 +1051,7 @@ class PackIndex1(FilePackIndex):
         file: IO[bytes] | _GitFile | None = None,
         contents: bytes | None = None,
         size: int | None = None,
-        object_format: ObjectFormat | None = None,
+        object_format: Optional["ObjectFormat"] = None,
     ) -> None:
         """Initialize a version 1 pack index.
 
@@ -1055,7 +1060,7 @@ class PackIndex1(FilePackIndex):
             file: Optional file object
             contents: Optional mmap'd contents
             size: Optional size of the index
-            hash_algorithm: Hash algorithm used by the repository
+            object_format: Object format used by the repository
         """
         super().__init__(filename, file, contents, size)
         self.version = 1
@@ -1096,7 +1101,7 @@ class PackIndex2(FilePackIndex):
         file: IO[bytes] | _GitFile | None = None,
         contents: bytes | None = None,
         size: int | None = None,
-        object_format: ObjectFormat | None = None,
+        object_format: Optional["ObjectFormat"] = None,
     ) -> None:
         """Initialize a version 2 pack index.
 
@@ -1105,7 +1110,7 @@ class PackIndex2(FilePackIndex):
             file: Optional file object
             contents: Optional mmap'd contents
             size: Optional size of the index
-            hash_algorithm: Hash algorithm used by the repository
+            object_format: Object format used by the repository
         """
         super().__init__(filename, file, contents, size)
         if self._contents[:4] != b"\377tOc":
@@ -1153,10 +1158,12 @@ class PackIndex2(FilePackIndex):
     def get_pack_checksum(self) -> bytes:
         """Return the checksum stored for the corresponding packfile.
 
-        Returns: binary digest (always 20 bytes - SHA1)
+        Returns: binary digest (size depends on hash algorithm)
         """
-        # Pack checksums are always SHA1, even in SHA256 repositories
-        return bytes(self._contents[-40:-20])
+        # Index ends with: pack_checksum + index_checksum
+        # Each checksum is hash_size bytes
+        checksum_size = self.hash_size
+        return bytes(self._contents[-2 * checksum_size : -checksum_size])
 
 
 class PackIndex3(FilePackIndex):
@@ -1707,7 +1714,12 @@ class PackData:
         return self._filename
 
     @classmethod
-    def from_file(cls, file: IO[bytes], size: int | None = None, object_format: ObjectFormat | None = None) -> "PackData":
+    def from_file(
+        cls,
+        file: IO[bytes],
+        size: int | None = None,
+        object_format: Optional["ObjectFormat"] = None,
+    ) -> "PackData":
         """Create a PackData object from an open file.
 
         Args:
@@ -1721,7 +1733,11 @@ class PackData:
         return cls(str(file), file=file, size=size, object_format=object_format)
 
     @classmethod
-    def from_path(cls, path: str | os.PathLike[str], object_format: ObjectFormat | None = None) -> "PackData":
+    def from_path(
+        cls,
+        path: str | os.PathLike[str],
+        object_format: Optional["ObjectFormat"] = None,
+    ) -> "PackData":
         """Create a PackData object from a file path.
 
         Args:
@@ -1815,7 +1831,7 @@ class PackData:
 
     def sorted_entries(
         self,
-        progress: ProgressFn | None = None,
+        progress: Callable[[int, int], None] | None = None,
         resolve_ext_ref: ResolveExtRefFn | None = None,
     ) -> list[tuple[RawObjectID, int, int]]:
         """Return entries in this pack, sorted by SHA.
@@ -1896,8 +1912,10 @@ class PackData:
             progress=progress, resolve_ext_ref=resolve_ext_ref
         )
         with GitFile(filename, "wb") as f:
+            if hash_format is None:
+                hash_format = 1  # Default to SHA-1
             return write_pack_index_v3(
-                f, entries, self.calculate_checksum(), hash_format
+                f, entries, self.calculate_checksum(), hash_format=hash_format
             )
 
     def create_index(
@@ -1938,8 +1956,9 @@ class PackData:
 
     def get_stored_checksum(self) -> bytes:
         """Return the expected checksum stored in this pack."""
-        self._file.seek(-20, SEEK_END)
-        return self._file.read(20)
+        checksum_size = self.object_format.oid_length
+        self._file.seek(-checksum_size, SEEK_END)
+        return self._file.read(checksum_size)
 
     def check(self) -> None:
         """Check the consistency of this pack."""
@@ -2575,8 +2594,10 @@ class SHA1Writer(BinaryIO):
 
 
 def pack_object_header(
-    type_num: int, delta_base: bytes | int | None, size: int
-    object_format: "ObjectFormat" | None = None
+    type_num: int,
+    delta_base: bytes | int | None,
+    size: int,
+    object_format: Optional["ObjectFormat"] = None,
 ) -> bytearray:
     """Create a pack object header for the given object info.
 
@@ -2615,6 +2636,7 @@ def pack_object_header(
             delta_base >>= 7
         header.extend(ret)
     elif type_num == REF_DELTA:
+        assert isinstance(delta_base, bytes)
         assert len(delta_base) == object_format.oid_length
         header += delta_base
     return bytearray(header)
@@ -2624,7 +2646,7 @@ def pack_object_chunks(
     type: int,
     object: list[bytes] | tuple[bytes | int, list[bytes]],
     compression_level: int = -1,
-    object_format: "ObjectFormat" | None = None,
+    object_format: Optional["ObjectFormat"] = None,
 ) -> Iterator[bytes]:
     """Generate chunks for a pack object.
 
@@ -2663,7 +2685,11 @@ def pack_object_chunks(
         # Shouldn't reach here with proper typing
         raise TypeError(f"Unexpected object type: {object.__class__.__name__}")
 
-    yield bytes(pack_object_header(type, delta_base, sum(map(len, chunks)), object_format=object_format))
+    yield bytes(
+        pack_object_header(
+            type, delta_base, sum(map(len, chunks)), object_format=object_format
+        )
+    )
     compressor = zlib.compressobj(level=compression_level)
     for data in chunks:
         yield compressor.compress(data)
@@ -2676,7 +2702,7 @@ def write_pack_object(
     object: list[bytes] | tuple[bytes | int, list[bytes]],
     sha: "HashObject | None" = None,
     compression_level: int = -1,
-    object_format: "ObjectFormat" | None = None,
+    object_format: Optional["ObjectFormat"] = None,
 ) -> int:
     """Write pack object to a file.
 
@@ -2716,7 +2742,7 @@ def write_pack(
     deltify: bool | None = None,
     delta_window_size: int | None = None,
     compression_level: int = -1,
-    object_format: "ObjectFormat" | None = None
+    object_format: Optional["ObjectFormat"] = None,
 ) -> tuple[bytes, bytes]:
     """Write a new pack data file.
 
@@ -3046,7 +3072,7 @@ def write_pack_from_container(
     reuse_deltas: bool = True,
     compression_level: int = -1,
     other_haves: set[bytes] | None = None,
-    object_format: "ObjectFormat" | None = None,
+    object_format: Optional["ObjectFormat"] = None,
 ) -> tuple[dict[bytes, tuple[int, int]], bytes]:
     """Write a new pack data file.
 
@@ -3089,7 +3115,7 @@ def write_pack_objects(
     delta_window_size: int | None = None,
     deltify: bool | None = None,
     compression_level: int = -1,
-    object_format: "ObjectFormat" | None = None
+    object_format: Optional["ObjectFormat"] = None,
 ) -> tuple[dict[bytes, tuple[int, int]], bytes]:
     """Write a new pack data file.
 
@@ -3124,7 +3150,7 @@ class PackChunkGenerator:
         progress: Callable[..., None] | None = None,
         compression_level: int = -1,
         reuse_compressed: bool = True,
-        object_format: "ObjectFormat" | None = None,
+        object_format: Optional["ObjectFormat"] = None,
     ) -> None:
         """Initialize PackChunkGenerator.
 
@@ -3251,7 +3277,7 @@ def write_pack_data(
     num_records: int | None = None,
     progress: Callable[..., None] | None = None,
     compression_level: int = -1,
-    object_format: "ObjectFormat" | None = None,
+    object_format: Optional["ObjectFormat"] = None,
 ) -> tuple[dict[bytes, tuple[int, int]], bytes]:
     """Write a new pack data file.
 
@@ -3655,7 +3681,7 @@ class Pack:
         depth: int | None = None,
         threads: int | None = None,
         big_file_threshold: int | None = None,
-        object_format: ObjectFormat | None = None,
+        object_format: Optional["ObjectFormat"] = None,
     ) -> None:
         """Initialize a Pack object.
 
@@ -3683,6 +3709,10 @@ class Pack:
         self.depth = depth
         self.threads = threads
         self.big_file_threshold = big_file_threshold
+        # Always set object_format, defaulting to default
+        from .object_format import DEFAULT_OBJECT_FORMAT
+
+        self.object_format = object_format if object_format else DEFAULT_OBJECT_FORMAT
         self._data_load = lambda: PackData(
             self._data_path,
             delta_window_size=delta_window_size,
@@ -3697,15 +3727,13 @@ class Pack:
             self._idx_path, object_format=object_format
         )
         self.resolve_ext_ref = resolve_ext_ref
-        # Always set object_format, defaulting to default
-        from .object_format import DEFAULT_OBJECT_FORMAT
-
-        self.object_format = object_format if object_format else DEFAULT_OBJECT_FORMAT
 
     @classmethod
     def from_lazy_objects(
-        cls, data_fn: Callable[[], PackData], idx_fn: Callable[[], PackIndex],
-        object_format: ObjectFormat | None = None
+        cls,
+        data_fn: Callable[[], PackData],
+        idx_fn: Callable[[], PackIndex],
+        object_format: Optional["ObjectFormat"] = None,
     ) -> "Pack":
         """Create a new pack object from callables to load pack data and index objects."""
         ret = cls("", object_format=object_format)
@@ -3714,7 +3742,12 @@ class Pack:
         return ret
 
     @classmethod
-    def from_objects(cls, data: PackData, idx: PackIndex, object_format | ObjectFormat | None = None) -> "Pack":
+    def from_objects(
+        cls,
+        data: PackData,
+        idx: PackIndex,
+        object_format: Optional["ObjectFormat"] = None,
+    ) -> "Pack":
         """Create a new pack object from pack data and index objects."""
         ret = cls("", object_format=object_format)
         ret._data = data
@@ -4086,7 +4119,7 @@ class Pack:
         return base_type, chunks
 
     def entries(
-        self, progress: ProgressFn | None = None
+        self, progress: Callable[[int, int], None] | None = None
     ) -> Iterator[PackIndexEntry]:
         """Yield entries summarizing the contents of this pack.
 
@@ -4100,7 +4133,7 @@ class Pack:
         )
 
     def sorted_entries(
-        self, progress: ProgressFn | None = None
+        self, progress: Callable[[int, int], None] | None = None
     ) -> Iterator[PackIndexEntry]:
         """Return entries in this pack, sorted by SHA.
 
@@ -4145,7 +4178,7 @@ def extend_pack(
     *,
     compression_level: int = -1,
     progress: Callable[[bytes], None] | None = None,
-    object_format: ObjectFormat | None = None,
+    object_format: Optional["ObjectFormat"] = None,
 ) -> tuple[bytes, list[tuple[bytes, int, int]]]:
     """Extend a pack file with more objects.
 

+ 1 - 1
dulwich/tests/test_object_store.py

@@ -84,7 +84,7 @@ class ObjectStoreTests:
         self.assertEqual(
             [],
             self.store.determine_wants_all(
-                {Ref(b"refs/heads/foo"): ObjectID(b"0" * 40)}
+                {b"refs/heads/foo": self.store.object_format.zero_oid}
             ),
         )
 

+ 2 - 2
dulwich/tests/utils.py

@@ -37,7 +37,7 @@ from unittest import SkipTest
 from dulwich.index import commit_tree
 from dulwich.object_format import DEFAULT_OBJECT_FORMAT
 from dulwich.object_store import BaseObjectStore
-from dulwich.objects import Commit, FixedSha, ShaFile, Tag, object_class
+from dulwich.objects import ZERO_SHA, Commit, FixedSha, ShaFile, Tag, object_class
 from dulwich.pack import (
     DELTA_TYPES,
     OFS_DELTA,
@@ -138,7 +138,7 @@ def make_commit(**attrs: Any) -> Commit:
         "commit_timezone": 0,
         "message": b"Test message.",
         "parents": [],
-        "tree": b"0" * 40,
+        "tree": ZERO_SHA,
     }
     all_attrs.update(attrs)
     return make_object(Commit, **all_attrs)

+ 2 - 2
tests/compat/test_pack.py

@@ -247,7 +247,7 @@ class TestPackIndexCompat(PackTests):
 
         v3_path = os.path.join(self._tempdir, "v3_test.idx")
         with GitFile(v3_path, "wb") as f:
-            write_pack_index_v3(f, entries, b"x" * 20, object_format=1)
+            write_pack_index_v3(f, entries, b"x" * 20, hash_format=1)
 
         # Load and verify structure
         idx = load_pack_index(v3_path)
@@ -259,4 +259,4 @@ class TestPackIndexCompat(PackTests):
         # Verify SHA-256 would raise NotImplementedError
         with self.assertRaises(NotImplementedError):
             with GitFile(v3_path + ".sha256", "wb") as f:
-                write_pack_index_v3(f, entries, b"x" * 32, object_format=2)
+                write_pack_index_v3(f, entries, b"x" * 32, hash_format=2)

+ 1 - 1
tests/compat/test_sha256_packs.py

@@ -262,7 +262,7 @@ class GitSHA256PackCompatibilityTests(CompatTestCase):
         # Read all blobs
         for i in range(10):
             name = f"large{i}.txt".encode()
-            mode, sha = tree[name]
+            _mode, sha = tree[name]
             blob = repo[sha]
             expected = large_content + f" variation {i}".encode()
             self.assertEqual(blob.data, expected)

+ 2 - 1
tests/porcelain/test_merge.py

@@ -27,6 +27,7 @@ import tempfile
 import unittest
 
 from dulwich import porcelain
+from dulwich.objects import ZERO_SHA
 from dulwich.repo import Repo
 
 from .. import DependencyMissing, TestCase
@@ -682,7 +683,7 @@ class PorcelainMergeTreeTests(TestCase):
                 tmpdir,
                 None,
                 commit_id,
-                "0" * 40,  # Invalid SHA
+                ZERO_SHA,
             )
 
 

+ 4 - 4
tests/test_client.py

@@ -63,7 +63,7 @@ from dulwich.client import (
 )
 from dulwich.config import ConfigDict
 from dulwich.object_format import DEFAULT_OBJECT_FORMAT
-from dulwich.objects import Blob, Commit, Tree
+from dulwich.objects import ZERO_SHA, Blob, Commit, Tree
 from dulwich.pack import pack_objects_to_data, write_pack_data, write_pack_objects
 from dulwich.protocol import DEFAULT_GIT_PROTOCOL_VERSION_FETCH, TCP_GIT_PORT, Protocol
 from dulwich.repo import MemoryRepo, Repo
@@ -405,7 +405,7 @@ class GitClientTests(TestCase):
         self.rin.seek(0)
 
         def update_refs(refs):
-            return {b"refs/heads/master": b"0" * 40}
+            return {b"refs/heads/master": ZERO_SHA}
 
         def generate_pack_data(have, want, *, ofs_delta=False, progress=None):
             return 0, []
@@ -429,7 +429,7 @@ class GitClientTests(TestCase):
         self.rin.seek(0)
 
         def update_refs(refs):
-            return {b"refs/heads/master": b"0" * 40}
+            return {b"refs/heads/master": ZERO_SHA}
 
         def generate_pack_data(have, want, *, ofs_delta=False, progress=None):
             return 0, []
@@ -537,7 +537,7 @@ class GitClientTests(TestCase):
         self.rin.seek(0)
 
         def update_refs(refs):
-            return {b"refs/heads/master": b"0" * 40}
+            return {b"refs/heads/master": ZERO_SHA}
 
         def generate_pack_data(have, want, *, ofs_delta=False, progress=None):
             return 0, []

+ 2 - 2
tests/test_dumb.py

@@ -28,7 +28,7 @@ from unittest.mock import Mock
 
 from dulwich.dumb import DumbHTTPObjectStore, DumbRemoteHTTPRepo
 from dulwich.errors import NotGitRepository
-from dulwich.objects import Blob, Commit, ShaFile, Tag, Tree, sha_to_hex
+from dulwich.objects import ZERO_SHA, Blob, Commit, ShaFile, Tag, Tree, sha_to_hex
 
 
 class MockResponse:
@@ -171,7 +171,7 @@ P pack-abcdef1234567890abcdef1234567890abcdef12.pack
         self._add_response(path, self._make_object(blob))
 
         self.assertTrue(self.store.contains_loose(hex_sha))
-        self.assertFalse(self.store.contains_loose(b"0" * 40))
+        self.assertFalse(self.store.contains_loose(ZERO_SHA))
 
     def test_add_object_not_implemented(self) -> None:
         blob = Blob()

+ 3 - 3
tests/test_filter_branch.py

@@ -25,7 +25,7 @@ import unittest
 
 from dulwich.filter_branch import CommitFilter, filter_refs
 from dulwich.object_store import MemoryObjectStore
-from dulwich.objects import Commit, Tree
+from dulwich.objects import ZERO_SHA, Commit, Tree
 from dulwich.refs import DictRefsContainer
 
 
@@ -179,7 +179,7 @@ class FilterRefsTests(unittest.TestCase):
     def test_filter_refs_already_filtered(self):
         """Test error when refs already filtered."""
         # Set up an "already filtered" state
-        self.refs[b"refs/original/refs/heads/master"] = b"0" * 40
+        self.refs[b"refs/original/refs/heads/master"] = ZERO_SHA
 
         filter = CommitFilter(self.store)
         with self.assertRaises(ValueError) as cm:
@@ -194,7 +194,7 @@ class FilterRefsTests(unittest.TestCase):
     def test_filter_refs_force(self):
         """Test force filtering."""
         # Set up an "already filtered" state
-        self.refs[b"refs/original/refs/heads/master"] = b"0" * 40
+        self.refs[b"refs/original/refs/heads/master"] = ZERO_SHA
 
         filter = CommitFilter(self.store)
         # Should not raise with force=True

+ 2 - 2
tests/test_gc.py

@@ -17,7 +17,7 @@ from dulwich.gc import (
     prune_unreachable_objects,
     should_run_gc,
 )
-from dulwich.objects import Blob, Commit, Tag, Tree
+from dulwich.objects import ZERO_SHA, Blob, Commit, Tag, Tree
 from dulwich.repo import MemoryRepo, Repo
 
 
@@ -357,7 +357,7 @@ class GCTestCase(TestCase):
         self.repo.refs[b"HEAD"] = commit.id
 
         # Create a broken ref pointing to non-existent object
-        broken_sha = b"0" * 40
+        broken_sha = ZERO_SHA
         self.repo.refs[b"refs/heads/broken"] = broken_sha
 
         # Track progress to see warning

+ 5 - 5
tests/test_index.py

@@ -69,7 +69,7 @@ from dulwich.index import (
     write_index_dict,
 )
 from dulwich.object_store import MemoryObjectStore
-from dulwich.objects import S_IFGITLINK, Blob, Tree, TreeEntry
+from dulwich.objects import S_IFGITLINK, ZERO_SHA, Blob, Tree, TreeEntry
 from dulwich.repo import Repo
 from dulwich.tests.utils import make_commit
 
@@ -1578,7 +1578,7 @@ class TestManyFilesFeature(TestCase):
             uid=1000,
             gid=1000,
             size=5,
-            sha=b"0" * 40,
+            sha=ZERO_SHA,
         )
         index[b"test.txt"] = entry
 
@@ -1607,7 +1607,7 @@ class TestManyFilesFeature(TestCase):
             uid=1000,
             gid=1000,
             size=5,
-            sha=b"0" * 40,
+            sha=ZERO_SHA,
         )
         index[b"test.txt"] = entry
 
@@ -1638,7 +1638,7 @@ class TestManyFilesFeature(TestCase):
                 uid=1000,
                 gid=1000,
                 size=5,
-                sha=b"0" * 40,
+                sha=ZERO_SHA,
                 flags=0,
                 extended_flags=0,
             ),
@@ -1866,7 +1866,7 @@ class TestPathPrefixCompression(TestCase):
                 uid=1000,
                 gid=1000,
                 size=10,
-                sha=b"0" * 40,
+                sha=ZERO_SHA,
             )
             index_v2[path] = entry
         index_v2.write()

+ 7 - 6
tests/test_objects.py

@@ -33,6 +33,7 @@ from itertools import permutations
 from dulwich.errors import ObjectFormatException
 from dulwich.objects import (
     MAX_TIME,
+    ZERO_SHA,
     Blob,
     Commit,
     ShaFile,
@@ -994,7 +995,7 @@ class TreeTests(ShaFileCheckTests):
         o = Tree.from_path(hex_to_filename(dir, tree_sha))
         self.assertEqual(
             [(b"a", 0o100644, a_sha), (b"b", 0o100644, b_sha)],
-            list(parse_tree(o.as_raw_string())),
+            list(parse_tree(o.as_raw_string(), 20)),
         )
         # test a broken tree that has a leading 0 on the file mode
         broken_tree = b"0100644 foo\0" + hex_to_sha(a_sha)
@@ -1002,9 +1003,9 @@ class TreeTests(ShaFileCheckTests):
         def eval_parse_tree(*args, **kwargs):
             return list(parse_tree(*args, **kwargs))
 
-        self.assertEqual([(b"foo", 0o100644, a_sha)], eval_parse_tree(broken_tree))
+        self.assertEqual([(b"foo", 0o100644, a_sha)], eval_parse_tree(broken_tree, 20))
         self.assertRaises(
-            ObjectFormatException, eval_parse_tree, broken_tree, strict=True
+            ObjectFormatException, eval_parse_tree, broken_tree, 20, strict=True
         )
 
     test_parse_tree = functest_builder(_do_test_parse_tree, _parse_tree_py)
@@ -1665,7 +1666,7 @@ class ShaFileCopyTests(TestCase):
             tagger=b"Tagger <test@example.com>",
             tag_time=12345,
             tag_timezone=0,
-            object=(Commit, b"0" * 40),
+            object=(Commit, ZERO_SHA),
         )
         self.assert_copy(tag)
 
@@ -1734,7 +1735,7 @@ class ShaFileSerializeTests(TestCase):
             tagger=b"Tagger <test@example.com>",
             tag_time=12345,
             tag_timezone=0,
-            object=(Commit, b"0" * 40),
+            object=(Commit, ZERO_SHA),
         )
 
         with self.assert_serialization_on_change(tag):
@@ -1747,7 +1748,7 @@ class ShaFileSerializeTests(TestCase):
                 name=b"tag",
                 message=b"some message",
                 tagger=b"Tagger <test@example.com> 1174773719+0000",
-                object=(Commit, b"0" * 40),
+                object=(Commit, ZERO_SHA),
             )
             tag._deserialize(tag._serialize())
 

+ 5 - 5
tests/test_pack.py

@@ -1140,7 +1140,7 @@ class TestPackIndexWritingv3(TestCase, BaseTestFilePackIndexWriting):
         filename = os.path.join(self.tempdir, "test_hash.idx")
         # Write v3 index with SHA-1 (algorithm=1)
         with GitFile(filename, "wb") as f:
-            write_pack_index_v3(f, entries, b"1" * 20, object_format=1)
+            write_pack_index_v3(f, entries, b"1" * 20, hash_format=1)
         idx = load_pack_index(filename)
         self.assertEqual(idx.object_format, 1)
         self.assertEqual(idx.hash_size, 20)
@@ -1153,7 +1153,7 @@ class TestPackIndexWritingv3(TestCase, BaseTestFilePackIndexWriting):
         # SHA-256 should raise NotImplementedError
         with self.assertRaises(NotImplementedError) as cm:
             with GitFile(filename, "wb") as f:
-                write_pack_index_v3(f, entries, b"1" * 32, object_format=2)
+                write_pack_index_v3(f, entries, b"1" * 32, hash_format=2)
         self.assertIn("SHA-256", str(cm.exception))
 
     def test_v3_invalid_hash_algorithm(self) -> None:
@@ -1163,7 +1163,7 @@ class TestPackIndexWritingv3(TestCase, BaseTestFilePackIndexWriting):
         # Invalid hash algorithm should raise ValueError
         with self.assertRaises(ValueError) as cm:
             with GitFile(filename, "wb") as f:
-                write_pack_index_v3(f, entries, b"1" * 20, object_format=99)
+                write_pack_index_v3(f, entries, b"1" * 20, hash_format=99)
         self.assertIn("Unknown hash algorithm", str(cm.exception))
 
     def test_v3_wrong_hash_length(self) -> None:
@@ -1173,7 +1173,7 @@ class TestPackIndexWritingv3(TestCase, BaseTestFilePackIndexWriting):
         filename = os.path.join(self.tempdir, "test_wrong_len.idx")
         with self.assertRaises(ValueError) as cm:
             with GitFile(filename, "wb") as f:
-                write_pack_index_v3(f, entries, b"1" * 20, object_format=1)
+                write_pack_index_v3(f, entries, b"1" * 20, hash_format=1)
         self.assertIn("wrong length", str(cm.exception))
 
 
@@ -1804,7 +1804,7 @@ class DeltaChainIteratorTests(TestCase):
             # Attempting to open this REF_DELTA object would loop forever
             pack[b1.id]
         except UnresolvedDeltas as e:
-            self.assertEqual(b1.id, e.shas)
+            self.assertEqual([b1.id], [sha_to_hex(sha) for sha in e.shas])
 
 
 class DeltaEncodeSizeTests(TestCase):

+ 2 - 2
tests/test_patch.py

@@ -25,7 +25,7 @@ from io import BytesIO, StringIO
 from typing import NoReturn
 
 from dulwich.object_store import MemoryObjectStore
-from dulwich.objects import S_IFGITLINK, Blob, Commit, Tree
+from dulwich.objects import S_IFGITLINK, ZERO_SHA, Blob, Commit, Tree
 from dulwich.patch import (
     DiffAlgorithmNotAvailable,
     commit_patch_id,
@@ -875,7 +875,7 @@ index 3b0f961..a116b51 644
         # Create a commit
         commit = Commit()
         commit.tree = tree2.id
-        commit.parents = [b"0" * 40]  # Fake parent
+        commit.parents = [ZERO_SHA]  # Fake parent
         commit.author = commit.committer = b"Test <test@example.com>"
         commit.author_time = commit.commit_time = 1234567890
         commit.author_timezone = commit.commit_timezone = 0

+ 1 - 13
tests/test_sha256.py

@@ -27,7 +27,7 @@ import tempfile
 import unittest
 
 from dulwich.object_format import SHA1, SHA256, get_object_format
-from dulwich.objects import Blob, Tree, valid_hexsha, zero_sha_for
+from dulwich.objects import Blob, Tree, valid_hexsha
 from dulwich.repo import MemoryRepo, Repo
 
 
@@ -130,18 +130,6 @@ class ObjectHashingTests(unittest.TestCase):
         # Invalid characters
         self.assertFalse(valid_hexsha(b"123456789gabcdef1234567890abcdef12345678"))
 
-    def test_zero_sha_for(self):
-        """Test getting zero SHA for different algorithms."""
-        # Default (SHA1)
-        self.assertEqual(zero_sha_for(), b"0" * 40)
-        self.assertEqual(zero_sha_for(None), b"0" * 40)
-
-        # SHA1 explicit
-        self.assertEqual(zero_sha_for(SHA1), b"0" * 40)
-
-        # SHA256
-        self.assertEqual(zero_sha_for(SHA256), b"0" * 64)
-
 
 class RepositorySHA256Tests(unittest.TestCase):
     """Tests for SHA256 repository support."""

+ 1 - 1
tests/test_submodule.py

@@ -85,7 +85,7 @@ class SubmoduleTests(TestCase):
         binary_file_sha = bytes.fromhex(file_sha.decode("ascii"))
 
         # Generate a valid SHA for the submodule
-        submodule_sha = b"1" * 40
+        submodule_sha = b"1" * repo.object_format.hex_length
         binary_submodule_sha = bytes.fromhex(submodule_sha.decode("ascii"))
 
         # Create raw tree data