Explorar el Código

Add comprehensive SHA256 support for Git repositories (#1604)

- Add hash algorithm abstraction layer (dulwich/hash.py)
- Support SHA256 object IDs in refs, object store, and repository
- Fix tree parsing to handle both SHA1 (20-byte) and SHA256 (32-byte)
hashes
- Update DiskObjectStore to use correct hash algorithm for object
storage
- Add comprehensive compatibility tests for SHA256 repositories
- Enable full interoperability between Dulwich and Git for SHA256 repos

Fixes #1115
Jelmer Vernooij hace 1 mes
padre
commit
3b3a5b2390
Se han modificado 53 ficheros con 3218 adiciones y 514 borrados
  1. 7 4
      NEWS
  2. 14 8
      crates/objects/src/lib.rs
  3. 8 5
      crates/pack/src/lib.rs
  4. 18 3
      dulwich/bundle.py
  5. 23 3
      dulwich/cli.py
  6. 213 18
      dulwich/client.py
  7. 4 3
      dulwich/cloud/gcs.py
  8. 79 52
      dulwich/commit_graph.py
  9. 13 0
      dulwich/config.py
  10. 63 17
      dulwich/contrib/swift.py
  11. 12 4
      dulwich/dumb.py
  12. 2 2
      dulwich/errors.py
  13. 1 0
      dulwich/fastexport.py
  14. 3 2
      dulwich/notes.py
  15. 163 0
      dulwich/object_format.py
  16. 146 43
      dulwich/object_store.py
  17. 188 43
      dulwich/objects.py
  18. 429 97
      dulwich/pack.py
  19. 6 3
      dulwich/porcelain/__init__.py
  20. 15 0
      dulwich/protocol.py
  21. 5 3
      dulwich/refs.py
  22. 152 13
      dulwich/repo.py
  23. 24 16
      dulwich/server.py
  24. 0 9
      dulwich/tests/test_object_store.py
  25. 7 4
      dulwich/tests/utils.py
  26. 2 0
      tests/compat/__init__.py
  27. 2 7
      tests/compat/server_utils.py
  28. 4 3
      tests/compat/test_bitmap.py
  29. 27 12
      tests/compat/test_pack.py
  30. 122 6
      tests/compat/test_server.py
  31. 409 0
      tests/compat/test_sha256.py
  32. 285 0
      tests/compat/test_sha256_packs.py
  33. 34 10
      tests/compat/test_web.py
  34. 2 2
      tests/porcelain/__init__.py
  35. 2 1
      tests/porcelain/test_merge.py
  36. 31 28
      tests/test_bundle.py
  37. 44 0
      tests/test_cli.py
  38. 137 6
      tests/test_client.py
  39. 1 1
      tests/test_cloud_gcs.py
  40. 50 8
      tests/test_commit_graph.py
  41. 6 0
      tests/test_config.py
  42. 2 2
      tests/test_dumb.py
  43. 3 3
      tests/test_filter_branch.py
  44. 3 3
      tests/test_filters.py
  45. 2 2
      tests/test_gc.py
  46. 5 5
      tests/test_index.py
  47. 12 5
      tests/test_object_store.py
  48. 7 6
      tests/test_objects.py
  49. 106 49
      tests/test_pack.py
  50. 2 2
      tests/test_patch.py
  51. 194 0
      tests/test_sha256.py
  52. 128 0
      tests/test_sha256_pack.py
  53. 1 1
      tests/test_submodule.py

+ 7 - 4
NEWS

@@ -68,8 +68,12 @@ compatible.
    Dulwich version, and installed dependencies with their versions.
    (Jelmer Vernooij, #1835)
 
- * Add basic ``dulwich restore`` and ``dulwich switch``
-   commands. (Jelmer Vernooij, #1777)
+ * Add initial support for SHA256 repositories. Dulwich can now read and write Git
+   repositories using SHA256 object format. This includes support for loose
+   objects, pack files (v1 and v2 indexes), and tree parsing with SHA256 hashes.
+   The Rust extensions have been updated to support variable hash lengths.
+   SHA256 repositories require format version 1 and the objectFormat extension.
+   (Jelmer Vernooij, #1115)
 
 0.24.10	2025-11-10
 
@@ -550,8 +554,7 @@ compatible.
  * Add support for pack index format version 3. This format supports variable
    hash sizes to enable future SHA-256 support. The implementation includes
    reading and writing v3 indexes with proper hash algorithm identification
-   (1 for SHA-1, 2 for SHA-256). Note that SHA-256 support itself is not yet
-   implemented and will raise NotImplementedError. (Jelmer Vernooij)
+   (1 for SHA-1, 2 for SHA-256). (Jelmer Vernooij)
 
  * Fix ``LocalGitClient`` assertion error when fetching externally cloned repositories
    into ``MemoryRepo``. Previously, the client would fail with an AssertionError

+ 14 - 8
crates/objects/src/lib.rs

@@ -50,14 +50,16 @@ fn sha_to_pyhex(py: Python, sha: &[u8]) -> PyResult<Py<PyAny>> {
 }
 
 #[pyfunction]
-#[pyo3(signature = (text, strict=None))]
+#[pyo3(signature = (text, sha_len, strict=None))]
 fn parse_tree(
     py: Python,
     mut text: &[u8],
+    sha_len: usize,
     strict: Option<bool>,
 ) -> PyResult<Vec<(Py<PyAny>, u32, Py<PyAny>)>> {
-    let mut entries = Vec::new();
     let strict = strict.unwrap_or(false);
+
+    let mut entries = Vec::new();
     while !text.is_empty() {
         let mode_end = memchr(b' ', text)
             .ok_or_else(|| ObjectFormatException::new_err(("Missing terminator for mode",)))?;
@@ -73,17 +75,22 @@ fn parse_tree(
         let namelen = memchr(b'\0', text)
             .ok_or_else(|| ObjectFormatException::new_err(("Missing trailing \\0",)))?;
         let name = &text[..namelen];
-        if namelen + 20 >= text.len() {
+
+        // Skip name and null terminator
+        text = &text[namelen + 1..];
+
+        // Check if we have enough bytes for the hash
+        if text.len() < sha_len {
             return Err(ObjectFormatException::new_err(("SHA truncated",)));
         }
-        text = &text[namelen + 1..];
-        let sha = &text[..20];
+
+        let sha = &text[..sha_len];
         entries.push((
             PyBytes::new(py, name).into_pyobject(py)?.unbind().into(),
             mode,
             sha_to_pyhex(py, sha)?,
         ));
-        text = &text[20..];
+        text = &text[sha_len..];
     }
     Ok(entries)
 }
@@ -151,8 +158,7 @@ fn sorted_tree_items(
                         .unbind()
                         .into_any(),
                 ))?
-                .unbind()
-                .into())
+                .unbind())
         })
         .collect::<PyResult<Vec<Py<PyAny>>>>()
 }

+ 8 - 5
crates/pack/src/lib.rs

@@ -30,8 +30,9 @@ pyo3::import_exception!(dulwich.errors, ApplyDeltaError);
 fn py_is_sha(sha: &Py<PyAny>, py: Python) -> PyResult<bool> {
     // Check if the object is a bytes object
     if sha.bind(py).is_instance_of::<PyBytes>() {
-        // Check if the bytes object has a size of 20
-        if sha.extract::<&[u8]>(py)?.len() == 20 {
+        // Check if the bytes object has a size of 20 (SHA1) or 32 (SHA256)
+        let len = sha.extract::<&[u8]>(py)?.len();
+        if len == 20 || len == 32 {
             Ok(true)
         } else {
             Ok(false)
@@ -53,9 +54,11 @@ fn bisect_find_sha(
     let sha = sha.as_bytes(py);
     let sha_len = sha.len();
 
-    // Check if sha is 20 bytes long
-    if sha_len != 20 {
-        return Err(PyValueError::new_err("Sha is not 20 bytes long"));
+    // Check if sha is 20 bytes (SHA1) or 32 bytes (SHA256)
+    if sha_len != 20 && sha_len != 32 {
+        return Err(PyValueError::new_err(
+            "Sha must be 20 (SHA1) or 32 (SHA256) bytes long",
+        ));
     }
 
     // Check if start > end

+ 18 - 3
dulwich/bundle.py

@@ -38,6 +38,9 @@ from typing import (
     runtime_checkable,
 )
 
+if TYPE_CHECKING:
+    from .object_format import ObjectFormat
+
 from .objects import ObjectID
 from .pack import PackData, UnpackedObject, write_pack_data
 from .refs import Ref
@@ -47,6 +50,8 @@ from .refs import Ref
 class PackDataLike(Protocol):
     """Protocol for objects that behave like PackData."""
 
+    object_format: "ObjectFormat"
+
     def __len__(self) -> int:
         """Return the number of objects in the pack."""
         ...
@@ -160,8 +165,11 @@ def _read_bundle(f: BinaryIO, version: int) -> Bundle:
 
     from io import BytesIO
 
+    from .object_format import DEFAULT_OBJECT_FORMAT
+
     pack_file = BytesIO(pack_bytes)
-    pack_data = PackData.from_file(pack_file)
+    # TODO: Support specifying object format based on bundle metadata
+    pack_data = PackData.from_file(pack_file, object_format=DEFAULT_OBJECT_FORMAT)
     ret = Bundle()
     ret.references = references
     ret.capabilities = capabilities
@@ -225,6 +233,7 @@ def write_bundle(f: BinaryIO, bundle: Bundle) -> None:
         cast(Callable[[bytes], None], f.write),
         num_records=len(bundle.pack_data),
         records=bundle.pack_data.iter_unpacked(),
+        object_format=bundle.pack_data.object_format,
     )
 
 
@@ -307,9 +316,15 @@ def create_bundle_from_repo(
     # Store the pack objects directly, we'll write them when saving the bundle
     # For now, create a simple wrapper to hold the data
     class _BundlePackData:
-        def __init__(self, count: int, objects: Iterator[UnpackedObject]) -> None:
+        def __init__(
+            self,
+            count: int,
+            objects: Iterator[UnpackedObject],
+            object_format: "ObjectFormat",
+        ) -> None:
             self._count = count
             self._objects = list(objects)  # Materialize the iterator
+            self.object_format = object_format
 
         def __len__(self) -> int:
             return self._count
@@ -317,7 +332,7 @@ def create_bundle_from_repo(
         def iter_unpacked(self) -> Iterator[UnpackedObject]:
             return iter(self._objects)
 
-    pack_data = _BundlePackData(pack_count, pack_objects)
+    pack_data = _BundlePackData(pack_count, pack_objects, repo.object_format)
 
     # Create bundle object
     bundle = Bundle()

+ 23 - 3
dulwich/cli.py

@@ -1598,10 +1598,19 @@ class cmd_dump_pack(Command):
         """
         parser = argparse.ArgumentParser()
         parser.add_argument("filename", help="Pack file to dump")
+        parser.add_argument(
+            "--object-format",
+            choices=["sha1", "sha256"],
+            default="sha1",
+            help="Object format (hash algorithm) used in the pack file",
+        )
         parsed_args = parser.parse_args(args)
 
+        from .object_format import OBJECT_FORMATS
+
+        object_format = OBJECT_FORMATS[parsed_args.object_format]
         basename, _ = os.path.splitext(parsed_args.filename)
-        x = Pack(basename)
+        x = Pack(basename, object_format=object_format)
         logger.info("Object names checksum: %s", x.name().decode("ascii", "replace"))
         logger.info("Checksum: %r", sha_to_hex(RawObjectID(x.get_stored_checksum())))
         x.check()
@@ -1881,12 +1890,22 @@ class cmd_init(Command):
         parser.add_argument(
             "--bare", action="store_true", help="Create a bare repository"
         )
+        parser.add_argument(
+            "--objectformat",
+            type=str,
+            choices=["sha1", "sha256"],
+            help="Object format to use (sha1 or sha256)",
+        )
         parser.add_argument(
             "path", nargs="?", default=os.getcwd(), help="Repository path"
         )
         parsed_args = parser.parse_args(args)
 
-        porcelain.init(parsed_args.path, bare=parsed_args.bare)
+        porcelain.init(
+            parsed_args.path,
+            bare=parsed_args.bare,
+            object_format=parsed_args.objectformat,
+        )
 
 
 class cmd_clone(Command):
@@ -2513,7 +2532,7 @@ class cmd_show_ref(Command):
             "-s",
             "--hash",
             nargs="?",
-            const=40,
+            const=40,  # TODO: Support SHA256
             type=int,
             metavar="n",
             help="Only show the OID, not the reference name",
@@ -2590,6 +2609,7 @@ class cmd_show_ref(Command):
 
         # Output results
         if not parsed_args.quiet:
+            # TODO: Add support for SHA256
             abbrev_len = parsed_args.abbrev if parsed_args.abbrev else 40
             hash_only = parsed_args.hash is not None
             if hash_only and parsed_args.hash:

+ 213 - 18
dulwich/client.py

@@ -154,6 +154,7 @@ from .bundle import Bundle
 from .config import Config, apply_instead_of, get_xdg_config_home_path
 from .credentials import match_partial_url, match_urls
 from .errors import GitProtocolError, HangupException, NotGitRepository, SendPackError
+from .object_format import DEFAULT_OBJECT_FORMAT
 from .object_store import GraphWalker
 from .objects import ObjectID
 from .pack import (
@@ -393,6 +394,24 @@ def read_server_capabilities(pkt_seq: Iterable[bytes]) -> set[bytes]:
     return set(server_capabilities)
 
 
+def extract_object_format_from_capabilities(
+    capabilities: set[bytes],
+) -> str | None:
+    """Extract object format from server capabilities.
+
+    Args:
+        capabilities: Server capabilities
+
+    Returns:
+        Object format name as string (e.g., "sha1", "sha256"), or None if not specified
+    """
+    for capability in capabilities:
+        k, v = parse_capability(capability)
+        if k == b"object-format" and v is not None:
+            return v.decode("ascii").strip()
+    return None
+
+
 def read_pkt_refs_v2(
     pkt_seq: Iterable[bytes],
 ) -> tuple[dict[Ref, ObjectID | None], dict[Ref, Ref], dict[Ref, ObjectID]]:
@@ -509,11 +528,13 @@ class FetchPackResult(_DeprecatedDictProxy):
       refs: Dictionary with all remote refs
       symrefs: Dictionary with remote symrefs
       agent: User agent string
+      object_format: Object format name (e.g., "sha1", "sha256") used by the remote, or None if not specified
     """
 
     refs: dict[Ref, ObjectID | None]
     symrefs: dict[Ref, Ref]
     agent: bytes | None
+    object_format: str | None
 
     def __init__(
         self,
@@ -522,6 +543,7 @@ class FetchPackResult(_DeprecatedDictProxy):
         agent: bytes | None,
         new_shallow: set[ObjectID] | None = None,
         new_unshallow: set[ObjectID] | None = None,
+        object_format: str | None = None,
     ) -> None:
         """Initialize FetchPackResult.
 
@@ -531,12 +553,14 @@ class FetchPackResult(_DeprecatedDictProxy):
             agent: User agent string
             new_shallow: New shallow commits
             new_unshallow: New unshallow commits
+            object_format: Object format name (e.g., "sha1", "sha256") used by the remote
         """
         self.refs = refs
         self.symrefs = symrefs
         self.agent = agent
         self.new_shallow = new_shallow
         self.new_unshallow = new_unshallow
+        self.object_format = object_format
 
     def __eq__(self, other: object) -> bool:
         """Check equality with another object."""
@@ -562,21 +586,28 @@ class LsRemoteResult(_DeprecatedDictProxy):
     Attributes:
       refs: Dictionary with all remote refs
       symrefs: Dictionary with remote symrefs
+      object_format: Object format name (e.g., "sha1", "sha256") used by the remote, or None if not specified
     """
 
     symrefs: dict[Ref, Ref]
+    object_format: str | None
 
     def __init__(
-        self, refs: dict[Ref, ObjectID | None], symrefs: dict[Ref, Ref]
+        self,
+        refs: dict[Ref, ObjectID | None],
+        symrefs: dict[Ref, Ref],
+        object_format: str | None = None,
     ) -> None:
         """Initialize LsRemoteResult.
 
         Args:
             refs: Dictionary with all remote refs
             symrefs: Dictionary with remote symrefs
+            object_format: Object format name (e.g., "sha1", "sha256") used by the remote
         """
         self.refs = refs
         self.symrefs = symrefs
+        self.object_format = object_format
 
     def _warn_deprecated(self) -> None:
         import warnings
@@ -1074,6 +1105,10 @@ class GitClient:
             os.mkdir(target_path)
 
         try:
+            # For network clones, create repository with default SHA-1 format initially.
+            # If remote uses a different format, fetch() will auto-change the repo's format
+            # (since repo is empty at this point).
+            # Subclasses (e.g., LocalGitClient) override to detect format first for efficiency.
             target = None
             if not bare:
                 target = Repo.init(target_path)
@@ -1113,6 +1148,7 @@ class GitClient:
                 filter_spec=filter_spec,
                 protocol_version=protocol_version,
             )
+
             if origin is not None:
                 _import_remote_refs(
                     target.refs, origin, result.refs, message=ref_message
@@ -1229,6 +1265,14 @@ class GitClient:
                 shallow_since=shallow_since,
                 shallow_exclude=shallow_exclude,
             )
+
+            # Fix object format if needed
+            if (
+                result.object_format
+                and result.object_format != target.object_format.name
+            ):
+                # Change the target repo's format if it's empty
+                target._change_object_format(result.object_format)
         except BaseException:
             abort()
             raise
@@ -1586,7 +1630,11 @@ class TraditionalGitClient(GitClient):
             )
 
             if self._should_send_pack(new_refs):
-                for chunk in PackChunkGenerator(pack_data_count, pack_data):
+                for chunk in PackChunkGenerator(
+                    num_records=pack_data_count,
+                    records=pack_data,
+                    object_format=DEFAULT_OBJECT_FORMAT,
+                ):
                     proto.write(chunk)
 
             ref_status = self._handle_receive_pack_tail(
@@ -1657,6 +1705,7 @@ class TraditionalGitClient(GitClient):
             refs: dict[Ref, ObjectID | None]
             symrefs: dict[Ref, Ref]
             agent: bytes | None
+            object_format: str | None
             if self.protocol_version == 2:
                 try:
                     server_capabilities = read_server_capabilities(proto.read_pkt_seq())
@@ -1667,6 +1716,9 @@ class TraditionalGitClient(GitClient):
                     symrefs,
                     agent,
                 ) = self._negotiate_upload_pack_capabilities(server_capabilities)
+                object_format = extract_object_format_from_capabilities(
+                    server_capabilities
+                )
 
                 proto.write_pkt_line(b"command=ls-refs\n")
                 proto.write(b"0001")  # delim-pkt
@@ -1692,13 +1744,18 @@ class TraditionalGitClient(GitClient):
                     symrefs,
                     agent,
                 ) = self._negotiate_upload_pack_capabilities(server_capabilities)
+                object_format = extract_object_format_from_capabilities(
+                    server_capabilities
+                )
 
                 if ref_prefix is not None:
                     refs = filter_ref_prefix(refs, ref_prefix)
 
             if refs is None:
                 proto.write_pkt_line(None)
-                return FetchPackResult(refs, symrefs, agent)
+                return FetchPackResult(
+                    refs, symrefs, agent, object_format=object_format
+                )
 
             try:
                 # Filter out None values (shouldn't be any in v1 protocol)
@@ -1716,7 +1773,9 @@ class TraditionalGitClient(GitClient):
                 wants = [cid for cid in wants if cid != ZERO_SHA]
             if not wants:
                 proto.write_pkt_line(None)
-                return FetchPackResult(refs, symrefs, agent)
+                return FetchPackResult(
+                    refs, symrefs, agent, object_format=object_format
+                )
             if self.protocol_version == 2:
                 proto.write_pkt_line(b"command=fetch\n")
                 proto.write(b"0001")  # delim-pkt
@@ -1754,7 +1813,9 @@ class TraditionalGitClient(GitClient):
                 progress,
                 protocol_version=self.protocol_version,
             )
-            return FetchPackResult(refs, symrefs, agent, new_shallow, new_unshallow)
+            return FetchPackResult(
+                refs, symrefs, agent, new_shallow, new_unshallow, object_format
+            )
 
     def get_refs(
         self,
@@ -1782,6 +1843,7 @@ class TraditionalGitClient(GitClient):
         self.protocol_version = server_protocol_version
         if self.protocol_version == 2:
             server_capabilities = read_server_capabilities(proto.read_pkt_seq())
+            object_format = extract_object_format_from_capabilities(server_capabilities)
             proto.write_pkt_line(b"command=ls-refs\n")
             proto.write(b"0001")  # delim-pkt
             proto.write_pkt_line(b"symrefs")
@@ -1799,7 +1861,7 @@ class TraditionalGitClient(GitClient):
                 proto.write_pkt_line(None)
                 for refname, refvalue in peeled.items():
                     refs[Ref(refname + PEELED_TAG_SUFFIX)] = refvalue
-                return LsRemoteResult(refs, symrefs)
+                return LsRemoteResult(refs, symrefs, object_format=object_format)
         else:
             with proto:
                 try:
@@ -1811,10 +1873,13 @@ class TraditionalGitClient(GitClient):
                 except HangupException as exc:
                     raise _remote_error_from_stderr(stderr) from exc
                 proto.write_pkt_line(None)
+                object_format = extract_object_format_from_capabilities(
+                    server_capabilities
+                )
                 (symrefs, _agent) = _extract_symrefs_and_agent(server_capabilities)
                 if ref_prefix is not None:
                     refs = filter_ref_prefix(refs, ref_prefix)
-                return LsRemoteResult(refs, symrefs)
+                return LsRemoteResult(refs, symrefs, object_format=object_format)
 
     def archive(
         self,
@@ -2393,7 +2458,10 @@ class LocalGitClient(GitClient):
                 depth=depth,
             )
             return FetchPackResult(
-                _to_optional_dict(refs), r.refs.get_symrefs(), agent_string()
+                _to_optional_dict(refs),
+                r.refs.get_symrefs(),
+                agent_string(),
+                object_format=r.object_format.name,
             )
 
     def fetch_pack(
@@ -2451,15 +2519,23 @@ class LocalGitClient(GitClient):
             # Did the process short-circuit (e.g. in a stateless RPC call)?
             # Note that the client still expects a 0-object pack in most cases.
             if object_ids is None:
-                return FetchPackResult(None, symrefs, agent)
+                return FetchPackResult(
+                    None, symrefs, agent, object_format=r.object_format.name
+                )
             write_pack_from_container(
                 pack_data,  # type: ignore[arg-type]
                 r.object_store,
                 object_ids,
                 other_haves=other_haves,
+                object_format=r.object_format,
             )
             # Convert refs to Optional type for FetchPackResult
-            return FetchPackResult(_to_optional_dict(r.get_refs()), symrefs, agent)
+            return FetchPackResult(
+                _to_optional_dict(r.get_refs()),
+                symrefs,
+                agent,
+                object_format=r.object_format.name,
+            )
 
     def get_refs(
         self,
@@ -2485,7 +2561,116 @@ class LocalGitClient(GitClient):
                 except (KeyError, ValueError):
                     # Not a symbolic ref or error reading it
                     pass
-            return LsRemoteResult(refs, symrefs)
+            return LsRemoteResult(
+                refs, symrefs, object_format=target.object_format.name
+            )
+
+    def clone(
+        self,
+        path: str,
+        target_path: str,
+        mkdir: bool = True,
+        bare: bool = False,
+        origin: str | None = "origin",
+        checkout: bool | None = None,
+        branch: str | None = None,
+        progress: Callable[[bytes], None] | None = None,
+        depth: int | None = None,
+        ref_prefix: Sequence[bytes] | None = None,
+        filter_spec: bytes | None = None,
+        protocol_version: int | None = None,
+    ) -> Repo:
+        """Clone a local repository.
+
+        For local clones, we can detect the object format before creating
+        the target repository.
+        """
+        # Detect the object format from the source repository
+        with self._open_repo(path) as source_repo:
+            object_format_name = source_repo.object_format.name
+
+        if mkdir:
+            os.mkdir(target_path)
+
+        try:
+            # Create repository with the correct object format from the start
+            target = None
+            if not bare:
+                target = Repo.init(target_path, object_format=object_format_name)
+                if checkout is None:
+                    checkout = True
+            else:
+                if checkout:
+                    raise ValueError("checkout and bare are incompatible")
+                target = Repo.init_bare(target_path, object_format=object_format_name)
+
+            encoded_path = path.encode("utf-8")
+
+            assert target is not None
+            if origin is not None:
+                target_config = target.get_config()
+                target_config.set(
+                    (b"remote", origin.encode("utf-8")), b"url", encoded_path
+                )
+                target_config.set(
+                    (b"remote", origin.encode("utf-8")),
+                    b"fetch",
+                    b"+refs/heads/*:refs/remotes/" + origin.encode("utf-8") + b"/*",
+                )
+                target_config.write_to_path()
+
+            ref_message = b"clone: from " + encoded_path
+            result = self.fetch(
+                path.encode("utf-8"),
+                target,
+                progress=progress,
+                depth=depth,
+                ref_prefix=ref_prefix,
+                filter_spec=filter_spec,
+                protocol_version=protocol_version,
+            )
+
+            if origin is not None:
+                _import_remote_refs(
+                    target.refs, origin, result.refs, message=ref_message
+                )
+
+            origin_head = result.symrefs.get(HEADREF)
+            origin_sha = result.refs.get(HEADREF)
+            if origin is None or (origin_sha and not origin_head):
+                # set detached HEAD
+                if origin_sha is not None:
+                    target.refs[HEADREF] = origin_sha
+                    head = origin_sha
+                else:
+                    head = None
+            else:
+                _set_origin_head(target.refs, origin.encode("utf-8"), origin_head)
+                head_ref = _set_default_branch(
+                    target.refs,
+                    origin.encode("utf-8"),
+                    origin_head,
+                    branch.encode("utf-8") if branch is not None else None,
+                    ref_message,
+                )
+
+                # Update target head
+                if head_ref:
+                    head = _set_head(target.refs, head_ref, ref_message)
+                else:
+                    head = None
+
+            if checkout and head is not None:
+                target.get_worktree().reset_index()
+        except BaseException:
+            if target is not None:
+                target.close()
+            if mkdir:
+                import shutil
+
+                shutil.rmtree(target_path)
+            raise
+        return target
 
 
 class BundleClient(GitClient):
@@ -2716,7 +2901,7 @@ class BundleClient(GitClient):
         from io import BytesIO
 
         pack_io = BytesIO(pack_bytes)
-        pack_data = PackData.from_file(pack_io)
+        pack_data = PackData.from_file(pack_io, object_format=DEFAULT_OBJECT_FORMAT)
         target.object_store.add_pack_data(len(pack_data), pack_data.iter_unpacked())
 
         # Apply ref filtering if specified
@@ -3809,7 +3994,12 @@ class AbstractHttpGitClient(GitClient):
                 progress=progress,
             )
             if self._should_send_pack(new_refs):
-                yield from PackChunkGenerator(pack_data_count, pack_data)
+                yield from PackChunkGenerator(
+                    # TODO: Don't hardcode object format
+                    num_records=pack_data_count,
+                    records=pack_data,
+                    object_format=DEFAULT_OBJECT_FORMAT,
+                )
 
         resp, read = self._smart_request("git-receive-pack", url, data=body_generator())
         try:
@@ -3874,6 +4064,7 @@ class AbstractHttpGitClient(GitClient):
             capa_symrefs,
             agent,
         ) = self._negotiate_upload_pack_capabilities(server_capabilities)
+        object_format = extract_object_format_from_capabilities(server_capabilities)
         if not symrefs and capa_symrefs:
             symrefs = capa_symrefs
         # Filter out None values from refs for determine_wants
@@ -3885,7 +4076,7 @@ class AbstractHttpGitClient(GitClient):
         if wants is not None:
             wants = [cid for cid in wants if cid != ZERO_SHA]
         if not wants and not self.dumb:
-            return FetchPackResult(refs, symrefs, agent)
+            return FetchPackResult(refs, symrefs, agent, object_format=object_format)
         elif self.dumb:
             # Use dumb HTTP protocol
             from .dumb import DumbRemoteHTTPRepo
@@ -3921,9 +4112,10 @@ class AbstractHttpGitClient(GitClient):
                     iter(pack_data_list),
                     num_records=len(pack_data_list),
                     progress=progress,
+                    object_format=DEFAULT_OBJECT_FORMAT,
                 )
 
-            return FetchPackResult(refs, symrefs, agent)
+            return FetchPackResult(refs, symrefs, agent, object_format=object_format)
         req_data = BytesIO()
         req_proto = Protocol(None, req_data.write)  # type: ignore
         (new_shallow, new_unshallow) = _handle_upload_pack_head(
@@ -3970,7 +4162,9 @@ class AbstractHttpGitClient(GitClient):
                 progress,
                 protocol_version=self.protocol_version,
             )
-            return FetchPackResult(refs, symrefs, agent, new_shallow, new_unshallow)
+            return FetchPackResult(
+                refs, symrefs, agent, new_shallow, new_unshallow, object_format
+            )
         finally:
             resp.close()
 
@@ -3982,15 +4176,16 @@ class AbstractHttpGitClient(GitClient):
     ) -> LsRemoteResult:
         """Retrieve the current refs from a git smart server."""
         url = self._get_url(path)
-        refs, _, _, symrefs, peeled = self._discover_references(
+        refs, server_capabilities, _, symrefs, peeled = self._discover_references(
             b"git-upload-pack",
             url,
             protocol_version=protocol_version,
             ref_prefix=ref_prefix,
         )
+        object_format = extract_object_format_from_capabilities(server_capabilities)
         for refname, refvalue in peeled.items():
             refs[Ref(refname + PEELED_TAG_SUFFIX)] = refvalue
-        return LsRemoteResult(refs, symrefs)
+        return LsRemoteResult(refs, symrefs, object_format=object_format)
 
     def get_url(self, path: str) -> str:
         """Get the HTTP URL for a path."""

+ 4 - 3
dulwich/cloud/gcs.py

@@ -87,18 +87,19 @@ class GcsObjectStore(BucketBasedObjectStore):
         with tempfile.SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE) as f:
             b.download_to_file(f)
             f.seek(0)
-            return PackData(name + ".pack", cast(_GitFile, f))
+            return PackData(name + ".pack", self.object_format, cast(_GitFile, f))
 
     def _load_pack_index(self, name: str) -> PackIndex:
         b = self.bucket.blob(posixpath.join(self.subpath, name + ".idx"))
         with tempfile.SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE) as f:
             b.download_to_file(f)
             f.seek(0)
-            return load_pack_index_file(name + ".idx", f)
+            return load_pack_index_file(name + ".idx", f, self.object_format)
 
     def _get_pack(self, name: str) -> Pack:
         return Pack.from_lazy_objects(
-            lambda: self._load_pack_data(name), lambda: self._load_pack_index(name)
+            lambda: self._load_pack_data(name),
+            lambda: self._load_pack_index(name),
         )
 
     def _upload_pack(

+ 79 - 52
dulwich/commit_graph.py

@@ -65,6 +65,7 @@ from collections.abc import Iterator, Sequence
 from typing import TYPE_CHECKING, BinaryIO
 
 from .file import _GitFile
+from .object_format import ObjectFormat
 
 if TYPE_CHECKING:
     from .object_store import BaseObjectStore
@@ -156,26 +157,38 @@ class CommitGraphChunk:
 class CommitGraph:
     """Git commit graph file reader/writer."""
 
-    def __init__(self, hash_version: int = HASH_VERSION_SHA1) -> None:
+    def __init__(self, *, object_format: ObjectFormat | None = None) -> None:
         """Initialize CommitGraph.
 
         Args:
-          hash_version: Hash version to use (SHA1 or SHA256)
+          object_format: Object format to use (defaults to SHA1)
         """
-        self.hash_version = hash_version
+        import warnings
+
+        from .object_format import DEFAULT_OBJECT_FORMAT, SHA256
+
+        if object_format is None:
+            warnings.warn(
+                "CommitGraph() should be called with object_format parameter",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            object_format = DEFAULT_OBJECT_FORMAT
+        self.object_format = object_format
+        self.hash_version = (
+            HASH_VERSION_SHA256 if object_format == SHA256 else HASH_VERSION_SHA1
+        )
         self.chunks: dict[bytes, CommitGraphChunk] = {}
         self.entries: list[CommitGraphEntry] = []
-        self._oid_to_index: dict[ObjectID, int] = {}
-        self._hash_size = 20 if hash_version == HASH_VERSION_SHA1 else 32
+        self._oid_to_index: dict[RawObjectID, int] = {}
 
     @classmethod
     def from_file(cls, f: BinaryIO) -> "CommitGraph":
         """Read commit graph from file."""
-        graph = cls()
-        graph._read_from_file(f)
-        return graph
+        return cls._read_from_file(f)
 
-    def _read_from_file(self, f: BinaryIO) -> None:
+    @classmethod
+    def _read_from_file(cls, f: BinaryIO) -> "CommitGraph":
         """Read commit graph data from file."""
         # Read header
         signature = f.read(4)
@@ -186,11 +199,21 @@ class CommitGraph:
         if version != COMMIT_GRAPH_VERSION:
             raise ValueError(f"Unsupported commit graph version: {version}")
 
-        self.hash_version = struct.unpack(">B", f.read(1))[0]
-        if self.hash_version not in (HASH_VERSION_SHA1, HASH_VERSION_SHA256):
-            raise ValueError(f"Unsupported hash version: {self.hash_version}")
+        hash_version = struct.unpack(">B", f.read(1))[0]
 
-        self._hash_size = 20 if self.hash_version == HASH_VERSION_SHA1 else 32
+        # Set object_format based on hash_version from file
+        from .object_format import SHA1, SHA256
+
+        if hash_version == HASH_VERSION_SHA1:
+            object_format = SHA1
+        elif hash_version == HASH_VERSION_SHA256:
+            object_format = SHA256
+        else:
+            raise ValueError(f"Unsupported hash version: {hash_version}")
+
+        # Create instance with correct object_format
+        graph = cls(object_format=object_format)
+        graph.hash_version = hash_version
 
         num_chunks = struct.unpack(">B", f.read(1))[0]
         struct.unpack(">B", f.read(1))[0]
@@ -211,10 +234,12 @@ class CommitGraph:
 
             f.seek(offset)
             chunk_data = f.read(chunk_size)
-            self.chunks[chunk_id] = CommitGraphChunk(chunk_id, chunk_data)
+            graph.chunks[chunk_id] = CommitGraphChunk(chunk_id, chunk_data)
 
         # Parse chunks
-        self._parse_chunks()
+        graph._parse_chunks()
+
+        return graph
 
     def _parse_chunks(self) -> None:
         """Parse chunk data into entries."""
@@ -225,19 +250,19 @@ class CommitGraph:
 
         # Parse OID lookup chunk
         oid_lookup_data = self.chunks[CHUNK_OID_LOOKUP].data
-        num_commits = len(oid_lookup_data) // self._hash_size
+        num_commits = len(oid_lookup_data) // self.object_format.oid_length
 
         oids = []
         for i in range(num_commits):
-            start = i * self._hash_size
-            end = start + self._hash_size
+            start = i * self.object_format.oid_length
+            end = start + self.object_format.oid_length
             oid = RawObjectID(oid_lookup_data[start:end])
             oids.append(oid)
-            self._oid_to_index[sha_to_hex(oid)] = i
+            self._oid_to_index[oid] = i
 
         # Parse commit data chunk
         commit_data = self.chunks[CHUNK_COMMIT_DATA].data
-        expected_size = num_commits * (self._hash_size + 16)
+        expected_size = num_commits * (self.object_format.oid_length + 16)
         if len(commit_data) != expected_size:
             raise ValueError(
                 f"Invalid commit data chunk size: {len(commit_data)}, expected {expected_size}"
@@ -245,11 +270,11 @@ class CommitGraph:
 
         self.entries = []
         for i in range(num_commits):
-            offset = i * (self._hash_size + 16)
+            offset = i * (self.object_format.oid_length + 16)
 
             # Tree OID
-            tree_id = RawObjectID(commit_data[offset : offset + self._hash_size])
-            offset += self._hash_size
+            tree_id = commit_data[offset : offset + self.object_format.oid_length]
+            offset += self.object_format.oid_length
 
             # Parent positions (2 x 4 bytes)
             parent1_pos, parent2_pos = struct.unpack(
@@ -282,14 +307,16 @@ class CommitGraph:
 
             entry = CommitGraphEntry(
                 commit_id=sha_to_hex(oids[i]),
-                tree_id=sha_to_hex(tree_id),
+                tree_id=sha_to_hex(RawObjectID(tree_id)),
                 parents=[sha_to_hex(p) for p in parents],
                 generation=generation,
                 commit_time=commit_time,
             )
             self.entries.append(entry)
 
-    def _parse_extra_edges(self, offset: int, oids: Sequence[bytes]) -> list[bytes]:
+    def _parse_extra_edges(
+        self, offset: int, oids: Sequence[RawObjectID]
+    ) -> list[RawObjectID]:
         """Parse extra parent edges for commits with 3+ parents."""
         if CHUNK_EXTRA_EDGE_LIST not in self.chunks:
             return []
@@ -314,7 +341,14 @@ class CommitGraph:
 
     def get_entry_by_oid(self, oid: ObjectID) -> CommitGraphEntry | None:
         """Get commit graph entry by commit OID."""
-        index = self._oid_to_index.get(oid)
+        # Convert hex ObjectID to binary if needed for lookup
+        if isinstance(oid, bytes) and len(oid) == self.object_format.hex_length:
+            # Input is hex ObjectID, convert to binary for internal lookup
+            lookup_oid: RawObjectID = hex_to_sha(oid)
+        else:
+            # Input is already binary
+            lookup_oid = RawObjectID(oid)
+        index = self._oid_to_index.get(lookup_oid)
         if index is not None:
             return self.entries[index]
         return None
@@ -472,19 +506,20 @@ def generate_commit_graph(
     Returns:
         CommitGraph object containing the specified commits
     """
-    graph = CommitGraph()
+    graph = CommitGraph(object_format=object_store.object_format)
 
     if not commit_ids:
         return graph
 
     # Ensure all commit_ids are in the correct format for object store access
-    # DiskObjectStore expects hex ObjectIDs (40-byte hex strings)
-    normalized_commit_ids: list[ObjectID] = []
+    hex_length = object_store.object_format.hex_length
+    oid_length = object_store.object_format.oid_length
+    normalized_commit_ids = []
     for commit_id in commit_ids:
-        if isinstance(commit_id, bytes) and len(commit_id) == 40:
+        if isinstance(commit_id, bytes) and len(commit_id) == hex_length:
             # Already hex ObjectID
-            normalized_commit_ids.append(ObjectID(commit_id))
-        elif isinstance(commit_id, bytes) and len(commit_id) == 20:
+            normalized_commit_ids.append(commit_id)
+        elif isinstance(commit_id, bytes) and len(commit_id) == oid_length:
             # Binary SHA, convert to hex ObjectID
             normalized_commit_ids.append(sha_to_hex(RawObjectID(commit_id)))
         else:
@@ -541,20 +576,9 @@ def generate_commit_graph(
         # commit_id is already hex ObjectID from normalized_commit_ids
         commit_hex: ObjectID = commit_id
 
-        # Handle tree ID - might already be hex ObjectID
-        tree_hex: ObjectID
-        if isinstance(commit_obj.tree, bytes) and len(commit_obj.tree) == 40:
-            tree_hex = ObjectID(commit_obj.tree)  # Already hex ObjectID
-        else:
-            tree_hex = sha_to_hex(commit_obj.tree)  # Binary, convert to hex
-
-        # Handle parent IDs - might already be hex ObjectIDs
-        parents_hex: list[ObjectID] = []
-        for parent_id in commit_obj.parents:
-            if isinstance(parent_id, bytes) and len(parent_id) == 40:
-                parents_hex.append(ObjectID(parent_id))  # Already hex ObjectID
-            else:
-                parents_hex.append(sha_to_hex(parent_id))  # Binary, convert to hex
+        # commit_obj.tree and commit_obj.parents are already ObjectIDs
+        tree_hex = commit_obj.tree
+        parents_hex: list[ObjectID] = commit_obj.parents
 
         entry = CommitGraphEntry(
             commit_id=commit_hex,
@@ -568,7 +592,8 @@ def generate_commit_graph(
     # Build the OID to index mapping for lookups
     graph._oid_to_index = {}
     for i, entry in enumerate(graph.entries):
-        graph._oid_to_index[entry.commit_id] = i
+        # Convert hex ObjectID to binary RawObjectID for consistent lookup
+        graph._oid_to_index[hex_to_sha(entry.commit_id)] = i
 
     return graph
 
@@ -622,14 +647,16 @@ def get_reachable_commits(
     reachable: list[ObjectID] = []
     stack: list[ObjectID] = []
 
+    hex_length = object_store.object_format.hex_length
+    oid_length = object_store.object_format.oid_length
+
     # Normalize commit IDs for object store access and tracking
     for commit_id in start_commits:
-        if isinstance(commit_id, bytes) and len(commit_id) == 40:
+        if isinstance(commit_id, bytes) and len(commit_id) == hex_length:
             # Hex ObjectID - use directly for object store access
-            oid = ObjectID(commit_id)
-            if oid not in visited:
-                stack.append(oid)
-        elif isinstance(commit_id, bytes) and len(commit_id) == 20:
+            if commit_id not in visited:
+                stack.append(commit_id)
+        elif isinstance(commit_id, bytes) and len(commit_id) == oid_length:
             # Binary SHA, convert to hex ObjectID for object store access
             hex_id = sha_to_hex(RawObjectID(commit_id))
             if hex_id not in visited:

+ 13 - 0
dulwich/config.py

@@ -709,6 +709,19 @@ class ConfigDict(Config):
 
         self._values.setdefault(section)[name] = value
 
+    def remove(self, section: SectionLike, name: NameLike) -> None:
+        """Remove a configuration setting.
+
+        Args:
+            section: Section name
+            name: Setting name
+
+        Raises:
+            KeyError: If the section or name doesn't exist
+        """
+        section, name = self._check_section_and_name(section, name)
+        del self._values[section][name]
+
     def items(self, section: SectionLike) -> Iterator[tuple[Name, Value]]:
         """Get items in a section."""
         section_bytes, _ = self._check_section_and_name(section, b"")

+ 63 - 17
dulwich/contrib/swift.py

@@ -59,7 +59,10 @@ import zlib
 from collections.abc import Callable, Iterator, Mapping
 from configparser import ConfigParser
 from io import BytesIO
-from typing import Any, BinaryIO, cast
+from typing import TYPE_CHECKING, Any, BinaryIO, cast
+
+if TYPE_CHECKING:
+    from dulwich.object_format import ObjectFormat
 
 from geventhttpclient import HTTPClient
 
@@ -194,12 +197,15 @@ def load_conf(path: str | None = None, file: str | None = None) -> ConfigParser:
     return conf
 
 
-def swift_load_pack_index(scon: "SwiftConnector", filename: str) -> "PackIndex":
+def swift_load_pack_index(
+    scon: "SwiftConnector", filename: str, object_format: ObjectFormat
+) -> "PackIndex":
     """Read a pack index file from Swift.
 
     Args:
       scon: a `SwiftConnector` instance
       filename: Path to the index file objectise
+      object_format: Object format for this pack
     Returns: a `PackIndexer` instance
     """
     f = scon.get_object(filename)
@@ -207,7 +213,7 @@ def swift_load_pack_index(scon: "SwiftConnector", filename: str) -> "PackIndex":
         raise Exception(f"Could not retrieve index file {filename}")
     if isinstance(f, bytes):
         f = BytesIO(f)
-    return load_pack_index_file(filename, f)
+    return load_pack_index_file(filename, f, object_format)
 
 
 def pack_info_create(pack_data: "PackData", pack_index: "PackIndex") -> bytes:
@@ -655,13 +661,31 @@ class SwiftPackData(PackData):
     using the Range header feature of Swift.
     """
 
-    def __init__(self, scon: SwiftConnector, filename: str | os.PathLike[str]) -> None:
+    def __init__(
+        self,
+        scon: SwiftConnector,
+        filename: str | os.PathLike[str],
+        object_format: "ObjectFormat | None" = None,
+    ) -> None:
         """Initialize a SwiftPackReader.
 
         Args:
           scon: a `SwiftConnector` instance
           filename: the pack filename
+          object_format: Object format for this pack
         """
+        from dulwich.object_format import DEFAULT_OBJECT_FORMAT
+
+        if object_format is None:
+            import warnings
+
+            warnings.warn(
+                "SwiftPackData() should be called with object_format parameter",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            object_format = DEFAULT_OBJECT_FORMAT
+        self.object_format = object_format
         self.scon = scon
         self._filename = filename
         self._header_size = 12
@@ -693,7 +717,7 @@ class SwiftPackData(PackData):
         assert offset >= self._header_size
         pack_reader = SwiftPackReader(self.scon, str(self._filename), self.pack_length)
         pack_reader.seek(offset)
-        unpacked, _ = unpack_object(pack_reader.read)
+        unpacked, _ = unpack_object(pack_reader.read, self.object_format.hash_func)
         obj_data = unpacked._obj()
         return (unpacked.pack_type_num, obj_data)
 
@@ -733,7 +757,9 @@ class SwiftPack(Pack):
         self._pack_info_load: Callable[[], dict[str, Any] | None] = (
             lambda: load_pack_info(self._pack_info_path, self.scon)
         )
-        self._idx_load = lambda: swift_load_pack_index(self.scon, self._idx_path)
+        self._idx_load = lambda: swift_load_pack_index(
+            self.scon, self._idx_path, self.object_format
+        )
         self._data_load = lambda: SwiftPackData(self.scon, self._data_path)
 
     @property
@@ -774,7 +800,7 @@ class SwiftObjectStore(PackBasedObjectStore):
         ]
         ret = []
         for basename in pack_files:
-            pack = SwiftPack(basename, scon=self.scon)
+            pack = SwiftPack(basename, object_format=self.object_format, scon=self.scon)
             self._pack_cache[basename] = pack
             ret.append(pack)
         return ret
@@ -852,7 +878,9 @@ class SwiftObjectStore(PackBasedObjectStore):
 
             from ..file import _GitFile
 
-            pack = PackData(file=cast(_GitFile, f), filename="")
+            pack = PackData(
+                file=cast(_GitFile, f), filename="", object_format=self.object_format
+            )
             entries = pack.sorted_entries()
             if entries:
                 basename = posixpath.join(
@@ -865,7 +893,9 @@ class SwiftObjectStore(PackBasedObjectStore):
                 f.close()
                 self.scon.put_object(basename + ".idx", index)
                 index.close()
-                final_pack = SwiftPack(basename, scon=self.scon)
+                final_pack = SwiftPack(
+                    basename, object_format=self.object_format, scon=self.scon
+                )
                 final_pack.check_length_and_checksum()
                 self._add_cached_pack(basename, final_pack)
                 return final_pack
@@ -910,9 +940,17 @@ class SwiftObjectStore(PackBasedObjectStore):
         fd, path = tempfile.mkstemp(prefix="tmp_pack_")
         f = os.fdopen(fd, "w+b")
         try:
-            pack_data = PackData(file=cast(_GitFile, f), filename=path)
-            indexer = PackIndexer(cast(BinaryIO, pack_data._file), resolve_ext_ref=None)
-            copier = PackStreamCopier(read_all, read_some, f, delta_iter=None)
+            pack_data = PackData(
+                file=cast(_GitFile, f), filename=path, object_format=self.object_format
+            )
+            indexer = PackIndexer(
+                cast(BinaryIO, pack_data._file),
+                self.object_format.hash_func,
+                resolve_ext_ref=None,
+            )
+            copier = PackStreamCopier(
+                self.object_format.hash_func, read_all, read_some, f, delta_iter=None
+            )
             copier.verify()
             return self._complete_thin_pack(f, path, copier, indexer)
         finally:
@@ -932,14 +970,18 @@ class SwiftObjectStore(PackBasedObjectStore):
         f.flush()
 
         # Rescan the rest of the pack, computing the SHA with the new header.
-        new_sha = compute_file_sha(f, end_ofs=-20)
+        new_sha = compute_file_sha(
+            f,
+            hash_func=self.object_format.hash_func,
+            end_ofs=-self.object_format.oid_length,
+        )
 
         # Must reposition before writing (http://bugs.python.org/issue3207)
         f.seek(0, os.SEEK_CUR)
 
         # Complete the pack.
         for ext_sha in indexer.ext_refs():  # type: ignore
-            assert len(ext_sha) == 20
+            assert len(ext_sha) in (20, 32)  # SHA-1 or SHA-256
             type_num, data = self.get_raw(ext_sha)
             offset = f.tell()
             crc32 = write_pack_object(f, type_num, data, sha=new_sha)  # type: ignore
@@ -964,9 +1006,11 @@ class SwiftObjectStore(PackBasedObjectStore):
 
         # Write pack info.
         f.seek(0)
-        pack_data = PackData(filename="", file=cast(_GitFile, f))
+        pack_data = PackData(
+            filename="", file=cast(_GitFile, f), object_format=self.object_format
+        )
         index_file.seek(0)
-        pack_index = load_pack_index_file("", index_file)
+        pack_index = load_pack_index_file("", index_file, self.object_format)
         serialized_pack_info = pack_info_create(pack_data, pack_index)
         f.close()
         index_file.close()
@@ -976,7 +1020,9 @@ class SwiftObjectStore(PackBasedObjectStore):
         pack_info_file.close()
 
         # Add the pack to the store and return it.
-        final_pack = SwiftPack(pack_base_name, scon=self.scon)
+        final_pack = SwiftPack(
+            pack_base_name, object_format=self.object_format, scon=self.scon
+        )
         final_pack.check_length_and_checksum()
         self._add_cached_pack(pack_base_name, final_pack)
         return final_pack

+ 12 - 4
dulwich/dumb.py

@@ -31,9 +31,12 @@ import tempfile
 import zlib
 from collections.abc import Callable, Iterator, Mapping, Sequence
 from io import BytesIO
-from typing import Any
+from typing import TYPE_CHECKING, Any
 from urllib.parse import urljoin
 
+if TYPE_CHECKING:
+    from .object_format import ObjectFormat
+
 from .errors import NotGitRepository, ObjectFormatException
 from .object_store import BaseObjectStore
 from .objects import (
@@ -62,6 +65,7 @@ class DumbHTTPObjectStore(BaseObjectStore):
         http_request_func: Callable[
             [str, dict[str, str]], tuple[Any, Callable[..., bytes]]
         ],
+        object_format: "ObjectFormat | None" = None,
     ) -> None:
         """Initialize a DumbHTTPObjectStore.
 
@@ -69,7 +73,9 @@ class DumbHTTPObjectStore(BaseObjectStore):
           base_url: Base URL of the remote repository (e.g. "https://example.com/repo.git/")
           http_request_func: Function to make HTTP requests, should accept (url, headers)
                            and return (response, read_func).
+          object_format: Object format to use (defaults to DEFAULT_OBJECT_FORMAT)
         """
+        super().__init__(object_format=object_format)
         self.base_url = base_url.rstrip("/") + "/"
         self._http_request = http_request_func
         self._packs: list[tuple[str, PackIndex | None]] | None = None
@@ -202,7 +208,9 @@ class DumbHTTPObjectStore(BaseObjectStore):
                     # Fetch and cache the index
                     idx_data = self._fetch_url(f"objects/pack/{pack_name}.idx")
 
-                    idx = load_pack_index_file("<http>", BytesIO(idx_data))
+                    idx = load_pack_index_file(
+                        "<http>", BytesIO(idx_data), self.object_format
+                    )
                     if self._packs is not None:
                         self._packs[i] = (name, idx)
                 return idx
@@ -252,7 +260,7 @@ class DumbHTTPObjectStore(BaseObjectStore):
                     f.write(data)
 
             # Open the pack and get the object
-            pack_data = PackData(pack_path)
+            pack_data = PackData(pack_path, object_format=self.object_format)
             pack = Pack.from_objects(pack_data, pack_idx)
             try:
                 return pack.get_raw(binsha)
@@ -441,7 +449,7 @@ class DumbRemoteHTTPRepo:
         """Get the peeled value of a ref."""
         # For dumb HTTP, we don't have peeled refs readily available
         # We would need to fetch and parse tag objects
-        sha = self.get_refs().get(ref, None)
+        sha: ObjectID | None = self.get_refs().get(ref, None)
         return sha if sha is not None else ZERO_SHA
 
     def fetch_pack_data(

+ 2 - 2
dulwich/errors.py

@@ -70,13 +70,13 @@ class ChecksumMismatch(Exception):
             got: The actual checksum value (bytes or hex string).
             extra: Optional additional error information.
         """
-        if isinstance(expected, bytes) and len(expected) == 20:
+        if isinstance(expected, bytes) and len(expected) in (20, 32):
             expected_str = binascii.hexlify(expected).decode("ascii")
         else:
             expected_str = (
                 expected if isinstance(expected, str) else expected.decode("ascii")
             )
-        if isinstance(got, bytes) and len(got) == 20:
+        if isinstance(got, bytes) and len(got) in (20, 32):
             got_str = binascii.hexlify(got).decode("ascii")
         else:
             got_str = got if isinstance(got, str) else got.decode("ascii")

+ 1 - 0
dulwich/fastexport.py

@@ -369,6 +369,7 @@ class GitImportProcessor(processor.ImportProcessor):  # type: ignore[misc,unused
 
     def reset_handler(self, cmd: commands.ResetCommand) -> None:
         """Process a ResetCommand."""
+        from_: ObjectID
         if cmd.from_ is None:
             from_ = ZERO_SHA
         else:

+ 3 - 2
dulwich/notes.py

@@ -174,11 +174,12 @@ class NotesTree:
 
         # If we have files at the root level, check if they're full SHA names
         if has_files and not has_dirs:
-            # Check if any file names are full 40-char hex strings
+            # Check if any file names are full hex strings (40 for SHA-1, 64 for SHA-256)
+            hex_length = self._object_store.object_format.hex_length
             for name, mode, sha in self._tree.items():
                 assert name is not None
                 assert mode is not None
-                if stat.S_ISREG(mode) and len(name) == 40:
+                if stat.S_ISREG(mode) and len(name) == hex_length:
                     try:
                         int(name, 16)  # Verify it's a valid hex string
                         return 0  # No fanout

+ 163 - 0
dulwich/object_format.py

@@ -0,0 +1,163 @@
+# hash.py -- Object format abstraction layer for Git
+# Copyright (C) 2024 The Dulwich contributors
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as published by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Object format abstraction for Git objects.
+
+This module provides an abstraction layer for different object formats
+used in Git repositories (SHA-1 and SHA-256).
+"""
+
+from collections.abc import Callable
+from hashlib import sha1, sha256
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from _hashlib import HASH
+
+
+class ObjectFormat:
+    """Object format (hash algorithm) used in Git."""
+
+    def __init__(
+        self,
+        name: str,
+        type_num: int,
+        oid_length: int,
+        hex_length: int,
+        hash_func: Callable[[], "HASH"],
+    ) -> None:
+        """Initialize an object format.
+
+        Args:
+            name: Name of the format (e.g., "sha1", "sha256")
+            type_num: Format type number used in Git
+            oid_length: Length of the binary object ID in bytes
+            hex_length: Length of the hexadecimal object ID in characters
+            hash_func: Hash function from hashlib
+        """
+        self.name = name
+        self.type_num = type_num
+        self.oid_length = oid_length
+        self.hex_length = hex_length
+        self.hash_func = hash_func
+
+    def __str__(self) -> str:
+        """Return string representation."""
+        return self.name
+
+    def __repr__(self) -> str:
+        """Return repr."""
+        return f"ObjectFormat({self.name!r})"
+
+    def new_hash(self) -> "HASH":
+        """Create a new hash object."""
+        return self.hash_func()
+
+    def hash_object(self, data: bytes) -> bytes:
+        """Hash data and return the digest.
+
+        Args:
+            data: Data to hash
+
+        Returns:
+            Binary digest
+        """
+        h = self.new_hash()
+        h.update(data)
+        return h.digest()
+
+    def hash_object_hex(self, data: bytes) -> bytes:
+        """Hash data and return the hexadecimal digest.
+
+        Args:
+            data: Data to hash
+
+        Returns:
+            Hexadecimal digest as bytes
+        """
+        h = self.new_hash()
+        h.update(data)
+        return h.hexdigest().encode("ascii")
+
+
+# Define the supported object formats
+SHA1 = ObjectFormat("sha1", type_num=1, oid_length=20, hex_length=40, hash_func=sha1)
+SHA256 = ObjectFormat(
+    "sha256", type_num=20, oid_length=32, hex_length=64, hash_func=sha256
+)
+
+# Map of format names to ObjectFormat instances
+OBJECT_FORMATS = {
+    "sha1": SHA1,
+    "sha256": SHA256,
+}
+
+# Map of format numbers to ObjectFormat instances
+OBJECT_FORMAT_TYPE_NUMS = {
+    1: SHA1,
+    2: SHA256,
+}
+
+# Default format for backward compatibility
+DEFAULT_OBJECT_FORMAT = SHA1
+
+
+def get_object_format(name: str | None = None) -> ObjectFormat:
+    """Get an object format by name.
+
+    Args:
+        name: Format name ("sha1" or "sha256"). If None, returns default.
+
+    Returns:
+        ObjectFormat instance
+
+    Raises:
+        ValueError: If the format name is not supported
+    """
+    if name is None:
+        return DEFAULT_OBJECT_FORMAT
+    try:
+        return OBJECT_FORMATS[name.lower()]
+    except KeyError:
+        raise ValueError(f"Unsupported object format: {name}")
+
+
+def verify_same_object_format(*formats: ObjectFormat) -> ObjectFormat:
+    """Verify that all provided object formats are the same.
+
+    Args:
+        *formats: Object format instances to verify
+
+    Returns:
+        The common object format
+
+    Raises:
+        ValueError: If formats don't match or no formats provided
+    """
+    if not formats:
+        raise ValueError("At least one object format must be provided")
+
+    first = formats[0]
+    for fmt in formats[1:]:
+        if fmt != first:
+            raise ValueError(f"Object format mismatch: {first.name} != {fmt.name}")
+
+    return first

+ 146 - 43
dulwich/object_store.py

@@ -70,12 +70,14 @@ from typing import (
     cast,
 )
 
+if TYPE_CHECKING:
+    from .object_format import ObjectFormat
+
 from .errors import NotTreeError
 from .file import GitFile, _GitFile
 from .midx import MultiPackIndex, load_midx
 from .objects import (
     S_ISGITLINK,
-    ZERO_SHA,
     Blob,
     Commit,
     ObjectID,
@@ -319,6 +321,16 @@ class PackContainer(Protocol):
 class BaseObjectStore:
     """Object store interface."""
 
+    def __init__(self, *, object_format: "ObjectFormat | None" = None) -> None:
+        """Initialize object store.
+
+        Args:
+            object_format: Object format to use (defaults to DEFAULT_OBJECT_FORMAT)
+        """
+        from .object_format import DEFAULT_OBJECT_FORMAT
+
+        self.object_format = object_format if object_format else DEFAULT_OBJECT_FORMAT
+
     def determine_wants_all(
         self, refs: Mapping[Ref, ObjectID], depth: int | None = None
     ) -> list[ObjectID]:
@@ -336,7 +348,6 @@ class BaseObjectStore:
             for (ref, sha) in refs.items()
             if (sha not in self or _want_deepen(sha))
             and not ref.endswith(PEELED_TAG_SUFFIX)
-            and not sha == ZERO_SHA
         ]
 
     def contains_loose(self, sha: ObjectID | RawObjectID) -> bool:
@@ -371,7 +382,9 @@ class BaseObjectStore:
     def __getitem__(self, sha1: ObjectID | RawObjectID) -> ShaFile:
         """Obtain an object by SHA1."""
         type_num, uncomp = self.get_raw(sha1)
-        return ShaFile.from_raw_string(type_num, uncomp, sha=sha1)
+        return ShaFile.from_raw_string(
+            type_num, uncomp, sha=sha1, object_format=self.object_format
+        )
 
     def __iter__(self) -> Iterator[ObjectID]:
         """Iterate over the SHAs that are present in this store."""
@@ -816,6 +829,8 @@ class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
         pack_depth: int | None = None,
         pack_threads: int | None = None,
         pack_big_file_threshold: int | None = None,
+        *,
+        object_format: "ObjectFormat | None" = None,
     ) -> None:
         """Initialize a PackBasedObjectStore.
 
@@ -828,7 +843,9 @@ class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
           pack_depth: Maximum depth for pack deltas
           pack_threads: Number of threads to use for packing
           pack_big_file_threshold: Threshold for treating files as "big"
+          object_format: Hash algorithm to use
         """
+        super().__init__(object_format=object_format)
         self._pack_cache: dict[str, Pack] = {}
         self.pack_compression_level = pack_compression_level
         self.pack_index_version = pack_index_version
@@ -899,6 +916,7 @@ class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
                 num_records=count,
                 progress=progress,
                 compression_level=self.pack_compression_level,
+                object_format=self.object_format,
             )
         except BaseException:
             abort()
@@ -1167,13 +1185,12 @@ class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
           name: sha for the object.
         Returns: tuple with numeric type and object contents.
         """
-        if name == ZERO_SHA:
-            raise KeyError(name)
-        if len(name) == 40:
-            sha = hex_to_sha(cast(ObjectID, name))
-            hexsha = cast(ObjectID, name)
-        elif len(name) == 20:
-            sha = cast(RawObjectID, name)
+        sha: RawObjectID
+        if len(name) == self.object_format.hex_length:
+            sha = hex_to_sha(ObjectID(name))
+            hexsha = name
+        elif len(name) == self.object_format.oid_length:
+            sha = RawObjectID(name)
             hexsha = None
         else:
             raise AssertionError(f"Invalid object name {name!r}")
@@ -1305,12 +1322,10 @@ class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
           sha1: sha for the object.
           include_comp: Whether to include compression metadata.
         """
-        if sha1 == ZERO_SHA:
-            raise KeyError(sha1)
-        if len(sha1) == 40:
+        if len(sha1) == self.object_format.hex_length:
             sha = hex_to_sha(cast(ObjectID, sha1))
             hexsha = cast(ObjectID, sha1)
-        elif len(sha1) == 20:
+        elif len(sha1) == self.object_format.oid_length:
             sha = cast(RawObjectID, sha1)
             hexsha = None
         else:
@@ -1382,6 +1397,7 @@ class DiskObjectStore(PackBasedObjectStore):
         pack_write_bitmap_lookup_table: bool = True,
         file_mode: int | None = None,
         dir_mode: int | None = None,
+        object_format: "ObjectFormat | None" = None,
     ) -> None:
         """Open an object store.
 
@@ -1402,7 +1418,11 @@ class DiskObjectStore(PackBasedObjectStore):
           pack_write_bitmap_lookup_table: whether to include lookup table in bitmaps
           file_mode: File permission mask for shared repository
           dir_mode: Directory permission mask for shared repository
+          object_format: Hash algorithm to use (SHA1 or SHA256)
         """
+        # Import here to avoid circular dependency
+        from .object_format import DEFAULT_OBJECT_FORMAT
+
         super().__init__(
             pack_compression_level=pack_compression_level,
             pack_index_version=pack_index_version,
@@ -1412,6 +1432,7 @@ class DiskObjectStore(PackBasedObjectStore):
             pack_depth=pack_depth,
             pack_threads=pack_threads,
             pack_big_file_threshold=pack_big_file_threshold,
+            object_format=object_format if object_format else DEFAULT_OBJECT_FORMAT,
         )
         self.path = path
         self.pack_dir = os.path.join(self.path, PACKDIR)
@@ -1540,6 +1561,24 @@ class DiskObjectStore(PackBasedObjectStore):
                 (b"repack",), b"writeBitmaps", False
             )
 
+        # Get hash algorithm from config
+        from .object_format import get_object_format
+
+        object_format = None
+        try:
+            try:
+                version = int(config.get((b"core",), b"repositoryformatversion"))
+            except KeyError:
+                version = 0
+            if version == 1:
+                try:
+                    object_format_name = config.get((b"extensions",), b"objectformat")
+                except KeyError:
+                    object_format_name = b"sha1"
+                object_format = get_object_format(object_format_name.decode("ascii"))
+        except (KeyError, ValueError):
+            pass
+
         instance = cls(
             path,
             loose_compression_level=loose_compression_level,
@@ -1557,6 +1596,7 @@ class DiskObjectStore(PackBasedObjectStore):
             pack_write_bitmap_lookup_table=pack_write_bitmap_lookup_table,
             file_mode=file_mode,
             dir_mode=dir_mode,
+            object_format=object_format,
         )
         instance._use_commit_graph = use_commit_graph
         instance._use_midx = use_midx
@@ -1641,6 +1681,7 @@ class DiskObjectStore(PackBasedObjectStore):
             if f not in self._pack_cache:
                 pack = Pack(
                     os.path.join(self.pack_dir, f),
+                    object_format=self.object_format,
                     delta_window_size=self.pack_delta_window_size,
                     window_memory=self.pack_window_memory,
                     delta_cache_size=self.pack_delta_cache_size,
@@ -1675,6 +1716,9 @@ class DiskObjectStore(PackBasedObjectStore):
         Returns:
             Number of loose objects
         """
+        # Calculate expected filename length for loose
+        # objects (excluding directory)
+        fn_length = self.object_format.hex_length - 2
         count = 0
         if not os.path.exists(self.path):
             return 0
@@ -1683,11 +1727,7 @@ class DiskObjectStore(PackBasedObjectStore):
             subdir = os.path.join(self.path, f"{i:02x}")
             try:
                 count += len(
-                    [
-                        name
-                        for name in os.listdir(subdir)
-                        if len(name) == 38  # 40 - 2 for the prefix
-                    ]
+                    [name for name in os.listdir(subdir) if len(name) == fn_length]
                 )
             except FileNotFoundError:
                 # Directory may have been removed or is inaccessible
@@ -1698,7 +1738,13 @@ class DiskObjectStore(PackBasedObjectStore):
     def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None:
         path = self._get_shafile_path(sha)
         try:
-            return ShaFile.from_path(path)
+            # Load the object from path with SHA and hash algorithm from object store
+            # Convert to hex ObjectID if needed
+            if len(sha) == self.object_format.oid_length:
+                hex_sha: ObjectID = sha_to_hex(RawObjectID(sha))
+            else:
+                hex_sha = ObjectID(sha)
+            return ShaFile.from_path(path, hex_sha, object_format=self.object_format)
         except FileNotFoundError:
             return None
 
@@ -1800,6 +1846,7 @@ class DiskObjectStore(PackBasedObjectStore):
             get_raw=self.get_raw,
             compression_level=self.pack_compression_level,
             progress=progress,
+            object_format=self.object_format,
         )
         f.flush()
         if self.fsync_object_files:
@@ -1853,7 +1900,9 @@ class DiskObjectStore(PackBasedObjectStore):
             # Load the index we just wrote
             with open(target_index_path, "rb") as idx_file:
                 pack_index = load_pack_index_file(
-                    os.path.basename(target_index_path), idx_file
+                    os.path.basename(target_index_path),
+                    idx_file,
+                    self.object_format,
                 )
 
             # Generate the bitmap
@@ -1879,6 +1928,7 @@ class DiskObjectStore(PackBasedObjectStore):
         # Add the pack to the store and return it.
         final_pack = Pack(
             pack_base_name,
+            object_format=self.object_format,
             delta_window_size=self.pack_delta_window_size,
             window_memory=self.pack_window_memory,
             delta_cache_size=self.pack_delta_cache_size,
@@ -1916,8 +1966,18 @@ class DiskObjectStore(PackBasedObjectStore):
         fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_")
         with os.fdopen(fd, "w+b") as f:
             os.chmod(path, PACK_MODE)
-            indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)  # type: ignore[arg-type]
-            copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer)  # type: ignore[arg-type]
+            indexer = PackIndexer(
+                f,
+                self.object_format.hash_func,
+                resolve_ext_ref=self.get_raw,  # type: ignore[arg-type]
+            )
+            copier = PackStreamCopier(
+                self.object_format.hash_func,
+                read_all,
+                read_some,
+                f,
+                delta_iter=indexer,  # type: ignore[arg-type]
+            )
             copier.verify(progress=progress)
             return self._complete_pack(f, path, len(copier), indexer, progress=progress)
 
@@ -1941,7 +2001,7 @@ class DiskObjectStore(PackBasedObjectStore):
             if f.tell() > 0:
                 f.seek(0)
 
-                with PackData(path, f) as pd:
+                with PackData(path, file=f, object_format=self.object_format) as pd:
                     indexer = PackIndexer.for_pack_data(
                         pd,
                         resolve_ext_ref=self.get_raw,  # type: ignore[arg-type]
@@ -1964,7 +2024,9 @@ class DiskObjectStore(PackBasedObjectStore):
         Args:
           obj: Object to add
         """
-        path = self._get_shafile_path(obj.id)
+        # Use the correct hash algorithm for the object ID
+        obj_id = ObjectID(obj.get_id(self.object_format))
+        path = self._get_shafile_path(obj_id)
         dir = os.path.dirname(path)
         try:
             os.mkdir(dir)
@@ -1987,6 +2049,7 @@ class DiskObjectStore(PackBasedObjectStore):
         *,
         file_mode: int | None = None,
         dir_mode: int | None = None,
+        object_format: "ObjectFormat | None" = None,
     ) -> "DiskObjectStore":
         """Initialize a new disk object store.
 
@@ -1996,6 +2059,7 @@ class DiskObjectStore(PackBasedObjectStore):
           path: Path where the object store should be created
           file_mode: Optional file permission mask for shared repository
           dir_mode: Optional directory permission mask for shared repository
+          object_format: Hash algorithm to use (SHA1 or SHA256)
 
         Returns:
           New DiskObjectStore instance
@@ -2013,7 +2077,9 @@ class DiskObjectStore(PackBasedObjectStore):
         if dir_mode is not None:
             os.chmod(info_path, dir_mode)
             os.chmod(pack_path, dir_mode)
-        return cls(path, file_mode=file_mode, dir_mode=dir_mode)
+        return cls(
+            path, file_mode=file_mode, dir_mode=dir_mode, object_format=object_format
+        )
 
     def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:
         """Iterate over all object SHAs with the given prefix.
@@ -2126,6 +2192,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
         pack = Pack(
             pack_path,
+            object_format=self.object_format,
             delta_window_size=self.pack_delta_window_size,
             window_memory=self.pack_window_memory,
             delta_cache_size=self.pack_delta_cache_size,
@@ -2170,15 +2237,14 @@ class DiskObjectStore(PackBasedObjectStore):
         Raises:
             KeyError: If object not found
         """
-        if name == ZERO_SHA:
-            raise KeyError(name)
-
         sha: RawObjectID
-        if len(name) == 40:
+        if len(name) in (40, 64):
             # name is ObjectID (hex), convert to RawObjectID
+            # Support both SHA1 (40) and SHA256 (64)
             sha = hex_to_sha(cast(ObjectID, name))
-        elif len(name) == 20:
+        elif len(name) in (20, 32):
             # name is already RawObjectID (binary)
+            # Support both SHA1 (20) and SHA256 (32)
             sha = RawObjectID(name)
         else:
             raise AssertionError(f"Invalid object name {name!r}")
@@ -2265,8 +2331,22 @@ class DiskObjectStore(PackBasedObjectStore):
             # Get all reachable commits
             commit_ids = get_reachable_commits(self, all_refs)
         else:
-            # Just use the direct ref targets (already ObjectIDs)
-            commit_ids = all_refs
+            # Just use the direct ref targets - ensure they're hex ObjectIDs
+            commit_ids = []
+            for ref in all_refs:
+                if isinstance(ref, bytes) and len(ref) == self.object_format.hex_length:
+                    # Already hex ObjectID
+                    commit_ids.append(ref)
+                elif (
+                    isinstance(ref, bytes) and len(ref) == self.object_format.oid_length
+                ):
+                    # Binary SHA, convert to hex ObjectID
+                    from .objects import sha_to_hex
+
+                    commit_ids.append(sha_to_hex(RawObjectID(ref)))
+                else:
+                    # Assume it's already correct format
+                    commit_ids.append(ref)
 
         if commit_ids:
             # Write commit graph directly to our object store path
@@ -2367,19 +2447,22 @@ class DiskObjectStore(PackBasedObjectStore):
 class MemoryObjectStore(PackCapableObjectStore):
     """Object store that keeps all objects in memory."""
 
-    def __init__(self) -> None:
+    def __init__(self, *, object_format: "ObjectFormat | None" = None) -> None:
         """Initialize a MemoryObjectStore.
 
         Creates an empty in-memory object store.
+
+        Args:
+            object_format: Hash algorithm to use (defaults to SHA1)
         """
-        super().__init__()
+        super().__init__(object_format=object_format)
         self._data: dict[ObjectID, ShaFile] = {}
         self.pack_compression_level = -1
 
     def _to_hexsha(self, sha: ObjectID | RawObjectID) -> ObjectID:
-        if len(sha) == 40:
+        if len(sha) == self.object_format.hex_length:
             return cast(ObjectID, sha)
-        elif len(sha) == 20:
+        elif len(sha) == self.object_format.oid_length:
             return sha_to_hex(cast(RawObjectID, sha))
         else:
             raise ValueError(f"Invalid sha {sha!r}")
@@ -2465,7 +2548,7 @@ class MemoryObjectStore(PackCapableObjectStore):
             if size > 0:
                 f.seek(0)
 
-                p = PackData.from_file(f, size)
+                p = PackData.from_file(f, self.object_format, size)
                 for obj in PackInflater.for_pack_data(p, self.get_raw):  # type: ignore[arg-type]
                     self.add_object(obj)
                 p.close()
@@ -2504,6 +2587,7 @@ class MemoryObjectStore(PackCapableObjectStore):
                 unpacked_objects,
                 num_records=count,
                 progress=progress,
+                object_format=self.object_format,
             )
         except BaseException:
             abort()
@@ -2513,8 +2597,8 @@ class MemoryObjectStore(PackCapableObjectStore):
 
     def add_thin_pack(
         self,
-        read_all: Callable[[], bytes],
-        read_some: Callable[[int], bytes],
+        read_all: Callable[[int], bytes],
+        read_some: Callable[[int], bytes] | None,
         progress: Callable[[str], None] | None = None,
     ) -> None:
         """Add a new thin pack to this object store.
@@ -2532,7 +2616,12 @@ class MemoryObjectStore(PackCapableObjectStore):
         """
         f, commit, abort = self.add_pack()
         try:
-            copier = PackStreamCopier(read_all, read_some, f)  # type: ignore[arg-type]
+            copier = PackStreamCopier(
+                self.object_format.hash_func,
+                read_all,
+                read_some,
+                f,
+            )
             copier.verify()
         except BaseException:
             abort()
@@ -2871,6 +2960,7 @@ class ObjectStoreGraphWalker:
     def ack(self, sha: ObjectID) -> None:
         """Ack that a revision and its ancestors are present in the source."""
         if len(sha) != 40:
+            # TODO: support SHA256
             raise ValueError(f"unexpected sha {sha!r} received")
         ancestors = {sha}
 
@@ -2986,7 +3076,20 @@ class OverlayObjectStore(BaseObjectStore):
         Args:
           bases: List of base object stores to overlay
           add_store: Optional store to write new objects to
+
+        Raises:
+          ValueError: If stores have different hash algorithms
         """
+        from .object_format import verify_same_object_format
+
+        # Verify all stores use the same hash algorithm
+        store_algorithms = [store.object_format for store in bases]
+        if add_store:
+            store_algorithms.append(add_store.object_format)
+
+        object_format = verify_same_object_format(*store_algorithms)
+
+        super().__init__(object_format=object_format)
         self.bases = bases
         self.add_store = add_store
 
@@ -3250,7 +3353,7 @@ class BucketBasedObjectStore(PackBasedObjectStore):
 
             pf.seek(0)
 
-            p = PackData(pf.name, pf)
+            p = PackData(pf.name, file=pf, object_format=self.object_format)
             entries = p.sorted_entries()
             basename = iter_sha1(entry[0] for entry in entries).decode("ascii")
             idxf = tempfile.SpooledTemporaryFile(
@@ -3259,7 +3362,7 @@ class BucketBasedObjectStore(PackBasedObjectStore):
             checksum = p.get_stored_checksum()
             write_pack_index(idxf, entries, checksum, version=self.pack_index_version)
             idxf.seek(0)
-            idx = load_pack_index_file(basename + ".idx", idxf)
+            idx = load_pack_index_file(basename + ".idx", idxf, self.object_format)
             for pack in self.packs:
                 if pack.get_stored_checksum() == p.get_stored_checksum():
                     p.close()

+ 188 - 43
dulwich/objects.py

@@ -106,12 +106,16 @@ from .errors import (
     ObjectFormatException,
 )
 from .file import GitFile
+from .object_format import DEFAULT_OBJECT_FORMAT, ObjectFormat
 
 if TYPE_CHECKING:
     from _hashlib import HASH
 
     from .file import _GitFile
 
+# Zero SHA constants for backward compatibility - now defined below as ObjectID
+
+
 # Header fields for commits
 _TREE_HEADER = b"tree"
 _PARENT_HEADER = b"parent"
@@ -175,13 +179,17 @@ def _decompress(string: bytes) -> bytes:
 def sha_to_hex(sha: RawObjectID) -> ObjectID:
     """Takes a string and returns the hex of the sha within."""
     hexsha = binascii.hexlify(sha)
-    assert len(hexsha) == 40, f"Incorrect length of sha1 string: {hexsha!r}"
+    # Support both SHA1 (40 chars) and SHA256 (64 chars)
+    if len(hexsha) not in (40, 64):
+        raise ValueError(f"Incorrect length of sha string: {hexsha!r}")
     return ObjectID(hexsha)
 
 
 def hex_to_sha(hex: ObjectID | str) -> RawObjectID:
     """Takes a hex sha and returns a binary sha."""
-    assert len(hex) == 40, f"Incorrect length of hexsha: {hex!r}"
+    # Support both SHA1 (40 chars) and SHA256 (64 chars)
+    if len(hex) not in (40, 64):
+        raise ValueError(f"Incorrect length of hexsha: {hex!r}")
     try:
         return RawObjectID(binascii.unhexlify(hex))
     except TypeError as exc:
@@ -191,15 +199,15 @@ def hex_to_sha(hex: ObjectID | str) -> RawObjectID:
 
 
 def valid_hexsha(hex: bytes | str) -> bool:
-    """Check if a string is a valid hex SHA.
+    """Check if a hex string is a valid SHA1 or SHA256.
 
     Args:
-      hex: Hex string to check
+        hex: Hex string to validate
 
     Returns:
-      True if valid hex SHA, False otherwise
+        True if valid SHA1 (40 chars) or SHA256 (64 chars), False otherwise
     """
-    if len(hex) != 40:
+    if len(hex) not in (40, 64):
         return False
     try:
         binascii.unhexlify(hex)
@@ -443,13 +451,21 @@ else:
 class ShaFile:
     """A git SHA file."""
 
-    __slots__ = ("_chunked_text", "_needs_serialization", "_sha")
+    __slots__ = ("_chunked_text", "_needs_serialization", "_sha", "object_format")
 
     _needs_serialization: bool
     type_name: bytes
     type_num: int
     _chunked_text: list[bytes] | None
     _sha: "FixedSha | None | HASH"
+    object_format: ObjectFormat
+
+    def __init__(self) -> None:
+        """Initialize a ShaFile."""
+        self._sha = None
+        self._chunked_text = None
+        self._needs_serialization = True
+        self.object_format = DEFAULT_OBJECT_FORMAT
 
     @staticmethod
     def _parse_legacy_object_header(
@@ -545,15 +561,23 @@ class ShaFile:
         self.set_raw_chunks([text], sha)
 
     def set_raw_chunks(
-        self, chunks: list[bytes], sha: ObjectID | RawObjectID | None = None
+        self,
+        chunks: list[bytes],
+        sha: ObjectID | RawObjectID | None = None,
+        *,
+        object_format: ObjectFormat | None = None,
     ) -> None:
         """Set the contents of this object from a list of chunks."""
         self._chunked_text = chunks
-        self._deserialize(chunks)
+        # Set hash algorithm if provided
+        if object_format is not None:
+            self.object_format = object_format
+        # Set SHA before deserialization so Tree can use hash algorithm
         if sha is None:
             self._sha = None
         else:
             self._sha = FixedSha(sha)
+        self._deserialize(chunks)
         self._needs_serialization = False
 
     @staticmethod
@@ -587,25 +611,28 @@ class ShaFile:
         return (b0 & 0x8F) == 0x08 and (word % 31) == 0
 
     @classmethod
-    def _parse_file(cls, f: BufferedIOBase | IO[bytes] | "_GitFile") -> "ShaFile":
+    def _parse_file(
+        cls,
+        f: BufferedIOBase | IO[bytes] | "_GitFile",
+        *,
+        object_format: ObjectFormat | None = None,
+    ) -> "ShaFile":
         map = f.read()
         if not map:
             raise EmptyFileException("Corrupted empty file detected")
 
         if cls._is_legacy_object(map):
             obj = cls._parse_legacy_object_header(map, f)
+            if object_format is not None:
+                obj.object_format = object_format
             obj._parse_legacy_object(map)
         else:
             obj = cls._parse_object_header(map, f)
+            if object_format is not None:
+                obj.object_format = object_format
             obj._parse_object(map)
         return obj
 
-    def __init__(self) -> None:
-        """Don't call this directly."""
-        self._sha = None
-        self._chunked_text = []
-        self._needs_serialization = True
-
     def _deserialize(self, chunks: list[bytes]) -> None:
         raise NotImplementedError(self._deserialize)
 
@@ -613,24 +640,52 @@ class ShaFile:
         raise NotImplementedError(self._serialize)
 
     @classmethod
-    def from_path(cls, path: str | bytes) -> "ShaFile":
+    def from_path(
+        cls,
+        path: str | bytes,
+        sha: ObjectID | None = None,
+        *,
+        object_format: ObjectFormat | None = None,
+    ) -> "ShaFile":
         """Open a SHA file from disk."""
         with GitFile(path, "rb") as f:
-            return cls.from_file(f)
+            return cls.from_file(f, sha, object_format=object_format)
 
     @classmethod
-    def from_file(cls, f: BufferedIOBase | IO[bytes] | "_GitFile") -> "ShaFile":
+    def from_file(
+        cls,
+        f: BufferedIOBase | IO[bytes] | "_GitFile",
+        sha: ObjectID | None = None,
+        *,
+        object_format: ObjectFormat | None = None,
+    ) -> "ShaFile":
         """Get the contents of a SHA file on disk."""
         try:
-            obj = cls._parse_file(f)
-            obj._sha = None
+            # Validate SHA length matches hash algorithm if both provided
+            if sha is not None and object_format is not None:
+                expected_len = object_format.hex_length
+                if len(sha) != expected_len:
+                    raise ValueError(
+                        f"SHA length {len(sha)} doesn't match hash algorithm "
+                        f"{object_format.name} (expected {expected_len})"
+                    )
+
+            obj = cls._parse_file(f, object_format=object_format)
+            if sha is not None:
+                obj._sha = FixedSha(sha)
+            else:
+                obj._sha = None
             return obj
         except (IndexError, ValueError) as exc:
             raise ObjectFormatException("invalid object header") from exc
 
     @staticmethod
     def from_raw_string(
-        type_num: int, string: bytes, sha: ObjectID | RawObjectID | None = None
+        type_num: int,
+        string: bytes,
+        sha: ObjectID | RawObjectID | None = None,
+        *,
+        object_format: ObjectFormat | None = None,
     ) -> "ShaFile":
         """Creates an object of the indicated type from the raw string given.
 
@@ -638,11 +693,14 @@ class ShaFile:
           type_num: The numeric type of the object.
           string: The raw uncompressed contents.
           sha: Optional known sha for the object
+          object_format: Optional hash algorithm for the object
         """
         cls = object_class(type_num)
         if cls is None:
             raise AssertionError(f"unsupported class type num: {type_num}")
         obj = cls()
+        if object_format is not None:
+            obj.object_format = object_format
         obj.set_raw_string(string, sha)
         return obj
 
@@ -713,8 +771,21 @@ class ShaFile:
         """Returns the length of the raw string of this object."""
         return sum(map(len, self.as_raw_chunks()))
 
-    def sha(self) -> "FixedSha | HASH":
-        """The SHA1 object that is the name of this object."""
+    def sha(self, object_format: ObjectFormat | None = None) -> "FixedSha | HASH":
+        """The SHA object that is the name of this object.
+
+        Args:
+            object_format: Optional HashAlgorithm to use. Defaults to SHA1.
+        """
+        # If using a different hash algorithm, always recalculate
+        if object_format is not None:
+            new_sha = object_format.new_hash()
+            new_sha.update(self._header())
+            for chunk in self.as_raw_chunks():
+                new_sha.update(chunk)
+            return new_sha
+
+        # Otherwise use cached SHA1 value
         if self._sha is None or self._needs_serialization:
             # this is a local because as_raw_chunks() overwrites self._sha
             new_sha = sha1()
@@ -733,9 +804,32 @@ class ShaFile:
 
     @property
     def id(self) -> ObjectID:
-        """The hex SHA of this object."""
+        """The hex SHA1 of this object.
+
+        For SHA256 repositories, use get_id(object_format) instead.
+        This property always returns SHA1 for backward compatibility.
+        """
         return ObjectID(self.sha().hexdigest().encode("ascii"))
 
+    def get_id(self, object_format: ObjectFormat | None = None) -> bytes:
+        """Get the hex SHA of this object using the specified hash algorithm.
+
+        Args:
+            object_format: Optional HashAlgorithm to use. Defaults to SHA1.
+
+        Example:
+            >>> blob = Blob()
+            >>> blob.data = b"Hello, World!"
+            >>> blob.id  # Always returns SHA1 for backward compatibility
+            b'4ab299c8ad6ed14f31923dd94f8b5f5cb89dfb54'
+            >>> blob.get_id()  # Same as .id
+            b'4ab299c8ad6ed14f31923dd94f8b5f5cb89dfb54'
+            >>> from dulwich.object_format import SHA256
+            >>> blob.get_id(SHA256)  # Get SHA256 hash
+            b'03ba204e2f2e707...'  # 64-character SHA256
+        """
+        return self.sha(object_format).hexdigest().encode("ascii")
+
     def __repr__(self) -> str:
         """Return string representation of this object."""
         return f"<{self.__class__.__name__} {self.id!r}>"
@@ -806,11 +900,19 @@ class Blob(ShaFile):
     )
 
     @classmethod
-    def from_path(cls, path: str | bytes) -> "Blob":
+    def from_path(
+        cls,
+        path: str | bytes,
+        sha: ObjectID | None = None,
+        *,
+        object_format: ObjectFormat | None = None,
+    ) -> "Blob":
         """Read a blob from a file on disk.
 
         Args:
           path: Path to the blob file
+          sha: Optional known SHA for the object
+          object_format: Optional object format to use
 
         Returns:
           A Blob object
@@ -818,7 +920,7 @@ class Blob(ShaFile):
         Raises:
           NotBlobError: If the file is not a blob
         """
-        blob = ShaFile.from_path(path)
+        blob = ShaFile.from_path(path, sha, object_format=object_format)
         if not isinstance(blob, cls):
             raise NotBlobError(_path_to_bytes(path))
         return blob
@@ -967,11 +1069,19 @@ class Tag(ShaFile):
         self._signature: bytes | None = None
 
     @classmethod
-    def from_path(cls, filename: str | bytes) -> "Tag":
+    def from_path(
+        cls,
+        filename: str | bytes,
+        sha: ObjectID | None = None,
+        *,
+        object_format: ObjectFormat | None = None,
+    ) -> "Tag":
         """Read a tag from a file on disk.
 
         Args:
           filename: Path to the tag file
+          sha: Optional known SHA for the object
+          object_format: Optional object format to use
 
         Returns:
           A Tag object
@@ -979,7 +1089,7 @@ class Tag(ShaFile):
         Raises:
           NotTagError: If the file is not a tag
         """
-        tag = ShaFile.from_path(filename)
+        tag = ShaFile.from_path(filename, sha, object_format=object_format)
         if not isinstance(tag, cls):
             raise NotTagError(_path_to_bytes(filename))
         return tag
@@ -1247,13 +1357,14 @@ class TreeEntry(NamedTuple):
 
 
 def parse_tree(
-    text: bytes, strict: bool = False
-) -> Iterator[tuple[bytes, int, ObjectID]]:
+    text: bytes, sha_len: int | None = None, *, strict: bool = False
+) -> Iterator[tuple[bytes, int, bytes]]:
     """Parse a tree text.
 
     Args:
       text: Serialized text to parse
-      strict: If True, enforce strict validation
+      sha_len: Length of the object IDs in bytes
+      strict: Whether to be strict about format
     Returns: iterator of tuples of (name, mode, sha)
 
     Raises:
@@ -1261,6 +1372,7 @@ def parse_tree(
     """
     count = 0
     length = len(text)
+
     while count < length:
         mode_end = text.index(b" ", count)
         mode_text = text[count:mode_end]
@@ -1272,10 +1384,20 @@ def parse_tree(
             raise ObjectFormatException(f"Invalid mode {mode_text!r}") from exc
         name_end = text.index(b"\0", mode_end)
         name = text[mode_end + 1 : name_end]
-        count = name_end + 21
+
+        if sha_len is None:
+            raise ObjectFormatException("sha_len must be specified")
+        count = name_end + 1 + sha_len
+        if count > length:
+            raise ObjectFormatException(
+                f"Tree entry extends beyond tree length: {count} > {length}"
+            )
+
         sha = text[name_end + 1 : count]
-        if len(sha) != 20:
-            raise ObjectFormatException("Sha has invalid length")
+        if len(sha) != sha_len:
+            raise ObjectFormatException(
+                f"Sha has invalid length: {len(sha)} != {sha_len}"
+            )
         hexsha = sha_to_hex(RawObjectID(sha))
         yield (name, mode, hexsha)
 
@@ -1387,11 +1509,19 @@ class Tree(ShaFile):
         self._entries: dict[bytes, tuple[int, ObjectID]] = {}
 
     @classmethod
-    def from_path(cls, filename: str | bytes) -> "Tree":
+    def from_path(
+        cls,
+        filename: str | bytes,
+        sha: ObjectID | None = None,
+        *,
+        object_format: ObjectFormat | None = None,
+    ) -> "Tree":
         """Read a tree from a file on disk.
 
         Args:
           filename: Path to the tree file
+          sha: Optional known SHA for the object
+          object_format: Optional object format to use
 
         Returns:
           A Tree object
@@ -1399,7 +1529,7 @@ class Tree(ShaFile):
         Raises:
           NotTreeError: If the file is not a tree
         """
-        tree = ShaFile.from_path(filename)
+        tree = ShaFile.from_path(filename, sha, object_format=object_format)
         if not isinstance(tree, cls):
             raise NotTreeError(_path_to_bytes(filename))
         return tree
@@ -1470,13 +1600,16 @@ class Tree(ShaFile):
     def _deserialize(self, chunks: list[bytes]) -> None:
         """Grab the entries in the tree."""
         try:
-            parsed_entries = parse_tree(b"".join(chunks))
+            parsed_entries = parse_tree(
+                b"".join(chunks),
+                sha_len=self.object_format.oid_length,
+            )
         except ValueError as exc:
             raise ObjectFormatException(exc) from exc
         # TODO: list comprehension is for efficiency in the common (small)
         # case; if memory efficiency in the large case is a concern, use a
         # genexp.
-        self._entries = {n: (m, s) for n, m, s in parsed_entries}
+        self._entries = {n: (m, ObjectID(s)) for n, m, s in parsed_entries}
 
     def check(self) -> None:
         """Check this object for internal consistency.
@@ -1496,7 +1629,11 @@ class Tree(ShaFile):
             # TODO: optionally exclude as in git fsck --strict
             stat.S_IFREG | 0o664,
         )
-        for name, mode, sha in parse_tree(b"".join(self._chunked_text), True):
+        for name, mode, sha in parse_tree(
+            b"".join(self._chunked_text),
+            strict=True,
+            sha_len=self.object_format.oid_length,
+        ):
             check_hexsha(sha, f"invalid sha {sha!r}")
             if b"/" in name or name in (b"", b".", b"..", b".git"):
                 raise ObjectFormatException(
@@ -1506,7 +1643,7 @@ class Tree(ShaFile):
             if mode not in allowed_modes:
                 raise ObjectFormatException(f"invalid mode {mode:06o}")
 
-            entry = (name, (mode, sha))
+            entry = (name, (mode, ObjectID(sha)))
             if last:
                 if key_entry(last) > key_entry(entry):
                     raise ObjectFormatException("entries not sorted")
@@ -1768,11 +1905,19 @@ class Commit(ShaFile):
         self._commit_timezone_neg_utc: bool | None = False
 
     @classmethod
-    def from_path(cls, path: str | bytes) -> "Commit":
+    def from_path(
+        cls,
+        path: str | bytes,
+        sha: ObjectID | None = None,
+        *,
+        object_format: ObjectFormat | None = None,
+    ) -> "Commit":
         """Read a commit from a file on disk.
 
         Args:
           path: Path to the commit file
+          sha: Optional known SHA for the object
+          object_format: Optional object format to use
 
         Returns:
           A Commit object
@@ -1780,7 +1925,7 @@ class Commit(ShaFile):
         Raises:
           NotCommitError: If the file is not a commit
         """
-        commit = ShaFile.from_path(path)
+        commit = ShaFile.from_path(path, sha, object_format=object_format)
         if not isinstance(commit, cls):
             raise NotCommitError(_path_to_bytes(path))
         return commit

La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 429 - 97
dulwich/pack.py


+ 6 - 3
dulwich/porcelain/__init__.py

@@ -1310,6 +1310,7 @@ def init(
     *,
     bare: bool = False,
     symlinks: bool | None = None,
+    object_format: str | None = None,
 ) -> Repo:
     """Create a new git repository.
 
@@ -1317,15 +1318,16 @@ def init(
       path: Path to repository.
       bare: Whether to create a bare repository.
       symlinks: Whether to create actual symlinks (defaults to autodetect)
+      object_format: Object format to use ("sha1" or "sha256", defaults to "sha1")
     Returns: A Repo instance
     """
     if not os.path.exists(path):
         os.mkdir(path)
 
     if bare:
-        return Repo.init_bare(path)
+        return Repo.init_bare(path, object_format=object_format)
     else:
-        return Repo.init(path, symlinks=symlinks)
+        return Repo.init(path, symlinks=symlinks, object_format=object_format)
 
 
 def _filter_transport_kwargs(**kwargs: object) -> TransportKwargs:
@@ -4552,6 +4554,7 @@ def pack_objects(
             deltify=deltify,
             delta_window_size=delta_window_size,
             reuse_deltas=reuse_deltas,
+            object_format=r.object_format,
         )
     if idxf is not None:
         index_entries = sorted([(k, v[0], v[1]) for (k, v) in entries.items()])
@@ -6146,7 +6149,7 @@ def unpack_objects(
 
     with open_repo_closing(target) as r:
         pack_basename = os.path.splitext(pack_path)[0]
-        with Pack(pack_basename) as pack:
+        with Pack(pack_basename, object_format=r.object_store.object_format) as pack:
             count = 0
             for unpacked in pack.iter_unpacked():
                 obj = unpacked.sha_file()

+ 15 - 0
dulwich/protocol.py

@@ -39,6 +39,7 @@ __all__ = [
     "CAPABILITY_MULTI_ACK_DETAILED",
     "CAPABILITY_NO_DONE",
     "CAPABILITY_NO_PROGRESS",
+    "CAPABILITY_OBJECT_FORMAT",
     "CAPABILITY_OFS_DELTA",
     "CAPABILITY_QUIET",
     "CAPABILITY_REPORT_STATUS",
@@ -78,6 +79,7 @@ __all__ = [
     "ack_type",
     "agent_string",
     "capability_agent",
+    "capability_object_format",
     "capability_symref",
     "extract_capabilities",
     "extract_capability_names",
@@ -171,6 +173,7 @@ CAPABILITY_ALLOW_TIP_SHA1_IN_WANT = b"allow-tip-sha1-in-want"
 CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT = b"allow-reachable-sha1-in-want"
 CAPABILITY_FETCH = b"fetch"
 CAPABILITY_FILTER = b"filter"
+CAPABILITY_OBJECT_FORMAT = b"object-format"
 
 # Magic ref that is used to attach capabilities to when
 # there are no refs. Should always be ste to ZERO_SHA.
@@ -233,6 +236,18 @@ def capability_agent() -> bytes:
     return CAPABILITY_AGENT + b"=" + agent_string()
 
 
+def capability_object_format(fmt: str) -> bytes:
+    """Generate the object-format capability string.
+
+    Args:
+      fmt: Object format name (e.g., "sha1" or "sha256")
+
+    Returns:
+      Object-format capability with format name
+    """
+    return CAPABILITY_OBJECT_FORMAT + b"=" + fmt.encode("ascii")
+
+
 def capability_symref(from_ref: bytes, to_ref: bytes) -> bytes:
     """Generate a symref capability string.
 

+ 5 - 3
dulwich/refs.py

@@ -1000,7 +1000,7 @@ class DiskRefsContainer(RefsContainer):
         """Read a reference file and return its contents.
 
         If the reference file a symbolic reference, only read the first line of
-        the file. Otherwise, only read the first 40 bytes.
+        the file. Otherwise, read the hash (40 bytes for SHA1, 64 bytes for SHA256).
 
         Args:
           name: the refname to read, relative to refpath
@@ -1018,8 +1018,10 @@ class DiskRefsContainer(RefsContainer):
                     # Read only the first line
                     return header + next(iter(f)).rstrip(b"\r\n")
                 else:
-                    # Read only the first 40 bytes
-                    return header + f.read(40 - len(SYMREF))
+                    # Read the entire line to get the full hash (handles both SHA1 and SHA256)
+                    f.seek(0)
+                    line = f.readline().rstrip(b"\r\n")
+                    return line
         except (OSError, UnicodeError):
             # don't assume anything specific about the error; in
             # particular, invalid or forbidden paths can raise weird

+ 152 - 13
dulwich/repo.py

@@ -83,6 +83,7 @@ if TYPE_CHECKING:
     from .filters import FilterBlobNormalizer, FilterContext
     from .index import Index
     from .notes import Notes
+    from .object_format import ObjectFormat
     from .object_store import BaseObjectStore, GraphWalker
     from .pack import UnpackedObject
     from .rebase import RebaseStateManager
@@ -499,7 +500,10 @@ class BaseRepo:
     """
 
     def __init__(
-        self, object_store: "PackCapableObjectStore", refs: RefsContainer
+        self,
+        object_store: "PackCapableObjectStore",
+        refs: RefsContainer,
+        object_format: "ObjectFormat | None" = None,
     ) -> None:
         """Open a repository.
 
@@ -509,12 +513,17 @@ class BaseRepo:
         Args:
           object_store: Object store to use
           refs: Refs container to use
+          object_format: Hash algorithm to use (if None, will use object_store's format)
         """
         self.object_store = object_store
         self.refs = refs
 
         self._graftpoints: dict[ObjectID, list[ObjectID]] = {}
         self.hooks: dict[str, Hook] = {}
+        if object_format is None:
+            self.object_format: ObjectFormat = object_store.object_format
+        else:
+            self.object_format = object_format
 
     def _determine_file_mode(self) -> bool:
         """Probe the file-system to determine whether permissions can be trusted.
@@ -537,6 +546,7 @@ class BaseRepo:
         symlinks: bool | None = None,
         format: int | None = None,
         shared_repository: str | bool | None = None,
+        object_format: str | None = None,
     ) -> None:
         """Initialize a default set of named files."""
         from .config import ConfigFile
@@ -544,11 +554,35 @@ class BaseRepo:
         self._put_named_file("description", b"Unnamed repository")
         f = BytesIO()
         cf = ConfigFile()
-        if format is None:
-            format = 0
+
+        # Determine the appropriate format version
+        if object_format == "sha256":
+            # SHA256 requires format version 1
+            if format is None:
+                format = 1
+            elif format != 1:
+                raise ValueError(
+                    "SHA256 object format requires repository format version 1"
+                )
+        else:
+            # SHA1 (default) can use format 0 or 1
+            if format is None:
+                format = 0
+
         if format not in (0, 1):
             raise ValueError(f"Unsupported repository format version: {format}")
+
         cf.set("core", "repositoryformatversion", str(format))
+
+        # Set object format extension if using SHA256
+        if object_format == "sha256":
+            cf.set("extensions", "objectformat", "sha256")
+
+        # Set hash algorithm based on object format
+        from .object_format import get_object_format
+
+        self.object_format = get_object_format(object_format)
+
         if self._determine_file_mode():
             cf.set("core", "filemode", True)
         else:
@@ -574,6 +608,19 @@ class BaseRepo:
         self._put_named_file("config", f.getvalue())
         self._put_named_file(os.path.join("info", "exclude"), b"")
 
+        # Allow subclasses to handle config initialization
+        self._init_config(cf)
+
+    def _init_config(self, config: "ConfigFile") -> None:
+        """Initialize repository configuration.
+
+        This method can be overridden by subclasses to handle config initialization.
+
+        Args:
+            config: The ConfigFile object that was just created
+        """
+        # Default implementation does nothing
+
     def get_named_file(self, path: str) -> BinaryIO | None:
         """Get a file from the control dir with a specific name.
 
@@ -609,6 +656,46 @@ class BaseRepo:
         """
         raise NotImplementedError(self.open_index)
 
+    def _change_object_format(self, object_format_name: str) -> None:
+        """Change the object format of this repository.
+
+        This can only be done if the object store is empty (no objects written yet).
+
+        Args:
+          object_format_name: Name of the new object format (e.g., "sha1", "sha256")
+
+        Raises:
+          AssertionError: If the object store is not empty
+        """
+        # Check if object store has any objects
+        for _ in self.object_store:
+            raise AssertionError(
+                "Cannot change object format: repository already contains objects"
+            )
+
+        # Update the object format
+        from .object_format import get_object_format
+
+        new_format = get_object_format(object_format_name)
+        self.object_format = new_format
+        self.object_store.object_format = new_format
+
+        # Update config file
+        config = self.get_config()
+
+        if object_format_name == "sha1":
+            # For SHA-1, explicitly remove objectformat extension if present
+            try:
+                config.remove("extensions", "objectformat")
+            except KeyError:
+                pass
+        else:
+            # For non-SHA-1 formats, set repositoryformatversion to 1 and objectformat extension
+            config.set("core", "repositoryformatversion", "1")
+            config.set("extensions", "objectformat", object_format_name)
+
+        config.write_to_path()
+
     def fetch(
         self,
         target: "BaseRepo",
@@ -627,6 +714,11 @@ class BaseRepo:
           depth: Optional shallow fetch depth
         Returns: The local refs
         """
+        # Fix object format if needed
+        if self.object_format != target.object_format:
+            # Change the target repo's format if it's empty
+            target._change_object_format(self.object_format.name)
+
         if determine_wants is None:
             determine_wants = target.object_store.determine_wants_all
         count, pack_data = self.fetch_pack_data(
@@ -850,10 +942,12 @@ class BaseRepo:
         # TODO: move this method to WorkTree
         return self.refs[HEADREF]
 
-    def _get_object(self, sha: bytes, cls: type[T]) -> T:
-        assert len(sha) in (20, 40)
-        obj_id = ObjectID(sha) if len(sha) == 40 else RawObjectID(sha)
-        ret = self.get_object(obj_id)
+    def _get_object(self, sha: ObjectID | RawObjectID, cls: type[T]) -> T:
+        assert len(sha) in (
+            self.object_format.oid_length,
+            self.object_format.hex_length,
+        )
+        ret = self.get_object(sha)
         if not isinstance(ret, cls):
             if cls is Commit:
                 raise NotCommitError(ret.id)
@@ -1103,7 +1197,7 @@ class BaseRepo:
         """
         if not isinstance(name, bytes):
             raise TypeError(f"'name' must be bytestring, not {type(name).__name__:.80}")
-        if len(name) in (20, 40):
+        if len(name) in (20, 32, 40, 64):  # Support both SHA1 and SHA256
             try:
                 # Try as ObjectID/RawObjectID
                 return self.object_store[
@@ -1120,12 +1214,17 @@ class BaseRepo:
         """Check if a specific Git object or ref is present.
 
         Args:
-          name: Git object SHA1 or ref name
+          name: Git object SHA1/SHA256 or ref name
         """
         if len(name) == 20:
             return RawObjectID(name) in self.object_store or Ref(name) in self.refs
         elif len(name) == 40 and valid_hexsha(name):
             return ObjectID(name) in self.object_store or Ref(name) in self.refs
+        # Check if it's a binary or hex SHA
+        if len(name) == self.object_format.oid_length:
+            return RawObjectID(name) in self.object_store or Ref(name) in self.refs
+        elif len(name) == self.object_format.hex_length and valid_hexsha(name):
+            return ObjectID(name) in self.object_store or Ref(name) in self.refs
         else:
             return Ref(name) in self.refs
 
@@ -1424,7 +1523,7 @@ class Repo(BaseRepo):
                     has_reftable_extension = True
                 else:
                     raise UnsupportedExtension(f"refStorage = {value.decode()}")
-            elif extension.lower() not in (b"worktreeconfig",):
+            elif extension.lower() not in (b"worktreeconfig", b"objectformat"):
                 raise UnsupportedExtension(extension.decode("utf-8"))
 
         if object_store is None:
@@ -1451,6 +1550,21 @@ class Repo(BaseRepo):
             self.worktrees = WorkTreeContainer(self)
         BaseRepo.__init__(self, object_store, self.refs)
 
+        # Determine hash algorithm from config if not already set
+        if self.object_format is None:
+            from .object_format import DEFAULT_OBJECT_FORMAT, get_object_format
+
+            if format_version == 1:
+                try:
+                    object_format = config.get((b"extensions",), b"objectformat")
+                    self.object_format = get_object_format(
+                        object_format.decode("ascii")
+                    )
+                except KeyError:
+                    self.object_format = DEFAULT_OBJECT_FORMAT
+            else:
+                self.object_format = DEFAULT_OBJECT_FORMAT
+
         self._graftpoints = {}
         graft_file = self.get_named_file(
             os.path.join("info", "grafts"), basedir=self.commondir()
@@ -2055,6 +2169,7 @@ class Repo(BaseRepo):
         symlinks: bool | None = None,
         format: int | None = None,
         shared_repository: str | bool | None = None,
+        object_format: str | None = None,
     ) -> "Repo":
         path = os.fspath(path)
         if isinstance(path, bytes):
@@ -2076,11 +2191,17 @@ class Repo(BaseRepo):
             if dir_mode is not None:
                 os.chmod(dir_path, dir_mode)
 
+        # Determine hash algorithm
+        from .object_format import get_object_format
+
+        hash_alg = get_object_format(object_format)
+
         if object_store is None:
             object_store = DiskObjectStore.init(
                 os.path.join(controldir, OBJECTDIR),
                 file_mode=file_mode,
                 dir_mode=dir_mode,
+                object_format=hash_alg,
             )
         ret = cls(path, bare=bare, object_store=object_store)
         if default_branch is None:
@@ -2098,6 +2219,7 @@ class Repo(BaseRepo):
             symlinks=symlinks,
             format=format,
             shared_repository=shared_repository,
+            object_format=object_format,
         )
         return ret
 
@@ -2112,6 +2234,7 @@ class Repo(BaseRepo):
         symlinks: bool | None = None,
         format: int | None = None,
         shared_repository: str | bool | None = None,
+        object_format: str | None = None,
     ) -> "Repo":
         """Create a new repository.
 
@@ -2123,6 +2246,7 @@ class Repo(BaseRepo):
           symlinks: Whether to support symlinks
           format: Repository format version (defaults to 0)
           shared_repository: Shared repository setting (group, all, umask, or octal)
+          object_format: Object format to use ("sha1" or "sha256", defaults to "sha1")
         Returns: `Repo` instance
         """
         path = os.fspath(path)
@@ -2142,6 +2266,7 @@ class Repo(BaseRepo):
             symlinks=symlinks,
             format=format,
             shared_repository=shared_repository,
+            object_format=object_format,
         )
 
     @classmethod
@@ -2213,6 +2338,7 @@ class Repo(BaseRepo):
         default_branch: bytes | None = None,
         format: int | None = None,
         shared_repository: str | bool | None = None,
+        object_format: str | None = None,
     ) -> "Repo":
         """Create a new bare repository.
 
@@ -2226,6 +2352,7 @@ class Repo(BaseRepo):
           default_branch: Default branch name
           format: Repository format version (defaults to 0)
           shared_repository: Shared repository setting (group, all, umask, or octal)
+          object_format: Object format to use ("sha1" or "sha256", defaults to "sha1")
         Returns: a `Repo` instance
         """
         path = os.fspath(path)
@@ -2242,6 +2369,7 @@ class Repo(BaseRepo):
             default_branch=default_branch,
             format=format,
             shared_repository=shared_repository,
+            object_format=object_format,
         )
 
     create = init_bare
@@ -2452,6 +2580,7 @@ class MemoryRepo(BaseRepo):
     def __init__(self) -> None:
         """Create a new repository in memory."""
         from .config import ConfigFile
+        from .object_format import DEFAULT_OBJECT_FORMAT
 
         self._reflog: list[Any] = []
         refs_container = DictRefsContainer({}, logger=self._append_reflog)
@@ -2461,6 +2590,8 @@ class MemoryRepo(BaseRepo):
         self._config = ConfigFile()
         self._description: bytes | None = None
         self.filter_context = None
+        # MemoryRepo defaults to default object format
+        self.object_format = DEFAULT_OBJECT_FORMAT
 
     def _append_reflog(
         self,
@@ -2551,6 +2682,10 @@ class MemoryRepo(BaseRepo):
         """
         raise NoIndexPresent
 
+    def _init_config(self, config: "ConfigFile") -> None:
+        """Initialize repository configuration for MemoryRepo."""
+        self._config = config
+
     def get_config(self) -> "ConfigFile":
         """Retrieve the config object.
 
@@ -2651,8 +2786,10 @@ class MemoryRepo(BaseRepo):
             raise ValueError("tree must be specified for MemoryRepo")
 
         c = Commit()
-        if len(tree) != 40:
-            raise ValueError("tree must be a 40-byte hex sha string")
+        if len(tree) != self.object_format.hex_length:
+            raise ValueError(
+                f"tree must be a {self.object_format.hex_length}-character hex sha string"
+            )
         c.tree = tree
 
         config = self.get_config_stack()
@@ -2739,6 +2876,7 @@ class MemoryRepo(BaseRepo):
         objects: Iterable[ShaFile],
         refs: Mapping[Ref, ObjectID],
         format: int | None = None,
+        object_format: str | None = None,
     ) -> "MemoryRepo":
         """Create a new bare repository in memory.
 
@@ -2748,11 +2886,12 @@ class MemoryRepo(BaseRepo):
           refs: Refs as dictionary, mapping names
             to object SHA1s
           format: Repository format version (defaults to 0)
+          object_format: Object format to use ("sha1" or "sha256", defaults to "sha1")
         """
         ret = cls()
         for obj in objects:
             ret.object_store.add_object(obj)
         for refname, sha in refs.items():
             ret.refs.add_if_new(refname, sha)
-        ret._init_files(bare=True, format=format)
+        ret._init_files(bare=True, format=format, object_format=object_format)
         return ret

+ 24 - 16
dulwich/server.py

@@ -81,6 +81,7 @@ from typing import IO, TYPE_CHECKING
 from typing import Protocol as TypingProtocol
 
 if TYPE_CHECKING:
+    from .object_format import ObjectFormat
     from .object_store import BaseObjectStore
     from .repo import BaseRepo
 
@@ -129,12 +130,12 @@ from .protocol import (
     SIDE_BAND_CHANNEL_PROGRESS,
     SINGLE_ACK,
     TCP_GIT_PORT,
-    ZERO_SHA,
     BufferedPktLineWriter,
     Protocol,
     ReceivableProtocol,
     ack_type,
     capability_agent,
+    capability_object_format,
     extract_capabilities,
     extract_want_line_capabilities,
     format_ack_line,
@@ -174,6 +175,7 @@ class BackendRepo(TypingProtocol):
 
     object_store: PackBasedObjectStore
     refs: RefsContainer
+    object_format: "ObjectFormat"
 
     def get_refs(self) -> dict[bytes, bytes]:
         """Get all the refs in the repository.
@@ -339,10 +341,9 @@ class PackHandler(Handler):
         self._done_received = False
         self.advertise_refs = False
 
-    @classmethod
-    def capabilities(cls) -> Iterable[bytes]:
+    def capabilities(self) -> Iterable[bytes]:
         """Return a list of capabilities supported by this handler."""
-        raise NotImplementedError(cls.capabilities)
+        raise NotImplementedError(self.capabilities)
 
     @classmethod
     def innocuous_capabilities(cls) -> Iterable[bytes]:
@@ -438,9 +439,12 @@ class UploadPackHandler(PackHandler):
         # data (such as side-band, see the progress method here).
         self._processing_have_lines = False
 
-    @classmethod
-    def capabilities(cls) -> list[bytes]:
-        """Return the list of capabilities supported by upload-pack."""
+    def capabilities(self) -> list[bytes]:
+        """Return the list of capabilities supported by upload-pack.
+
+        Returns:
+            List of capabilities including object-format for the repository
+        """
         return [
             CAPABILITY_MULTI_ACK_DETAILED,
             CAPABILITY_MULTI_ACK,
@@ -451,6 +455,7 @@ class UploadPackHandler(PackHandler):
             CAPABILITY_INCLUDE_TAG,
             CAPABILITY_SHALLOW,
             CAPABILITY_NO_DONE,
+            capability_object_format(self.repo.object_format.name),
         ]
 
     @classmethod
@@ -585,6 +590,7 @@ class UploadPackHandler(PackHandler):
             self.write_pack_data,
             self.repo.object_store,
             object_ids,
+            object_format=self.repo.object_format,
         )
         # we are done
         self.proto.write_pkt_line(None)
@@ -871,12 +877,12 @@ class _ProtocolGraphWalker:
         """Acknowledge a have reference.
 
         Args:
-          have_ref: SHA to acknowledge (40 bytes hex)
+          have_ref: SHA to acknowledge (40 bytes hex for SHA-1, 64 bytes for SHA-256)
 
         Raises:
-          ValueError: If have_ref is not 40 bytes
+          ValueError: If have_ref is not a valid length
         """
-        if len(have_ref) != 40:
+        if len(have_ref) not in (40, 64):
             raise ValueError(f"invalid sha {have_ref!r}")
         assert self._impl is not None
         return self._impl.ack(have_ref)
@@ -1294,12 +1300,11 @@ class ReceivePackHandler(PackHandler):
         self.repo = backend.open_repository(args[0])
         self.advertise_refs = advertise_refs
 
-    @classmethod
-    def capabilities(cls) -> Iterable[bytes]:
+    def capabilities(self) -> Iterable[bytes]:
         """Return supported capabilities.
 
         Returns:
-            List of capability names
+            List of capability names including object-format for the repository
         """
         return [
             CAPABILITY_REPORT_STATUS,
@@ -1308,6 +1313,7 @@ class ReceivePackHandler(PackHandler):
             CAPABILITY_OFS_DELTA,
             CAPABILITY_SIDE_BAND_64K,
             CAPABILITY_NO_DONE,
+            capability_object_format(self.repo.object_format.name),
         ]
 
     def _apply_pack(
@@ -1332,9 +1338,10 @@ class ReceivePackHandler(PackHandler):
             ObjectFormatException,
         )
         will_send_pack = False
+        zero_sha = ObjectID(b"0" * self.repo.object_format.hex_length)
 
         for command in refs:
-            if command[1] != ZERO_SHA:
+            if command[1] != zero_sha:
                 will_send_pack = True
 
         if will_send_pack:
@@ -1356,7 +1363,7 @@ class ReceivePackHandler(PackHandler):
         for oldsha, sha, ref in refs:
             ref_status = b"ok"
             try:
-                if sha == ZERO_SHA:
+                if sha == zero_sha:
                     if CAPABILITY_DELETE_REFS not in self.capabilities():
                         raise GitProtocolError(
                             "Attempted to delete refs without delete-refs capability."
@@ -1429,7 +1436,8 @@ class ReceivePackHandler(PackHandler):
             symrefs = sorted(self.repo.refs.get_symrefs().items())
 
             if not refs:
-                refs = [(CAPABILITIES_REF, ZERO_SHA)]
+                zero_sha = ObjectID(b"0" * self.repo.object_format.hex_length)
+                refs = [(CAPABILITIES_REF, zero_sha)]
             logger.info("Sending capabilities: %s", self.capabilities())
             self.proto.write_pkt_line(
                 format_ref_line(

+ 0 - 9
dulwich/tests/test_object_store.py

@@ -79,15 +79,6 @@ class ObjectStoreTests:
             ),
         )
 
-    def test_determine_wants_all_zero(self) -> None:
-        """Test determine_wants_all with zero ref."""
-        self.assertEqual(
-            [],
-            self.store.determine_wants_all(
-                {Ref(b"refs/heads/foo"): ObjectID(b"0" * 40)}
-            ),
-        )
-
     def test_determine_wants_all_depth(self) -> None:
         """Test determine_wants_all with depth parameter."""
         self.store.add_object(testobject)

+ 7 - 4
dulwich/tests/utils.py

@@ -35,8 +35,9 @@ from typing import Any, BinaryIO, TypeVar
 from unittest import SkipTest
 
 from dulwich.index import commit_tree
+from dulwich.object_format import DEFAULT_OBJECT_FORMAT
 from dulwich.object_store import BaseObjectStore
-from dulwich.objects import Commit, FixedSha, ShaFile, Tag, object_class
+from dulwich.objects import ZERO_SHA, Commit, FixedSha, ShaFile, Tag, object_class
 from dulwich.pack import (
     DELTA_TYPES,
     OFS_DELTA,
@@ -114,7 +115,7 @@ def make_object(cls: type[T], **attrs: Any) -> T:
         if name == "id":
             # id property is read-only, so we overwrite sha instead.
             sha = FixedSha(value)
-            obj.sha = lambda: sha
+            obj.sha = lambda hash_algorithm=None: sha
         else:
             setattr(obj, name, value)
     return obj
@@ -137,7 +138,7 @@ def make_commit(**attrs: Any) -> Commit:
         "commit_timezone": 0,
         "message": b"Test message.",
         "parents": [],
-        "tree": b"0" * 40,
+        "tree": ZERO_SHA,
     }
     all_attrs.update(attrs)
     return make_object(Commit, **all_attrs)
@@ -276,7 +277,9 @@ def build_pack(
                 base = obj_sha(base_type_num, base_data)
             obj = (base, list(create_delta(base_data, data)))
 
-        crc32 = write_pack_object(sf.write, type_num, obj)
+        crc32 = write_pack_object(
+            sf.write, type_num, obj, object_format=DEFAULT_OBJECT_FORMAT
+        )
         offsets[i] = offset
         crc32s[i] = crc32
 

+ 2 - 0
tests/compat/__init__.py

@@ -41,6 +41,8 @@ def test_suite() -> unittest.TestSuite:
         "reftable",
         "repository",
         "server",
+        "sha256",
+        "sha256_packs",
         "utils",
         "web",
     ]

+ 2 - 7
tests/compat/server_utils.py

@@ -350,13 +350,8 @@ class ServerTests:
 class NoSideBand64kReceivePackHandler(ReceivePackHandler):
     """ReceivePackHandler that does not support side-band-64k."""
 
-    @classmethod
-    def capabilities(cls):
-        return [
-            c
-            for c in ReceivePackHandler.capabilities()
-            if c != CAPABILITY_SIDE_BAND_64K
-        ]
+    def capabilities(self):
+        return [c for c in super().capabilities() if c != CAPABILITY_SIDE_BAND_64K]
 
 
 def ignore_error(error):

+ 4 - 3
tests/compat/test_bitmap.py

@@ -33,6 +33,7 @@ from dulwich.bitmap import (
     PackBitmap,
     write_bitmap,
 )
+from dulwich.object_format import DEFAULT_OBJECT_FORMAT
 from dulwich.pack import Pack
 from dulwich.repo import Repo
 
@@ -99,7 +100,7 @@ class BitmapCompatTests(TestCase):
         )
 
         # Try to load the bitmap using Dulwich
-        with Pack(pack_path) as pack:
+        with Pack(pack_path, object_format=DEFAULT_OBJECT_FORMAT) as pack:
             bitmap = pack.bitmap
 
             # Basic checks
@@ -196,7 +197,7 @@ class BitmapCompatTests(TestCase):
         pack_path = os.path.join(pack_dir, pack_basename)
 
         # Load the pack and create bitmap data, then close before writing
-        with Pack(pack_path) as pack:
+        with Pack(pack_path, object_format=DEFAULT_OBJECT_FORMAT) as pack:
             # Create a simple bitmap for testing
             # Git requires BITMAP_OPT_FULL_DAG flag
             bitmap = PackBitmap(
@@ -261,7 +262,7 @@ class BitmapCompatTests(TestCase):
         bitmap_name = bitmap_files[0]
         pack_basename = bitmap_name.replace(".bitmap", "")
         pack_path = os.path.join(pack_dir, pack_basename)
-        with Pack(pack_path) as pack:
+        with Pack(pack_path, object_format=DEFAULT_OBJECT_FORMAT) as pack:
             bitmap = pack.bitmap
 
             self.assertIsNotNone(bitmap)

+ 27 - 12
tests/compat/test_pack.py

@@ -29,6 +29,7 @@ import tempfile
 from typing import NoReturn
 
 from dulwich.file import GitFile
+from dulwich.object_format import DEFAULT_OBJECT_FORMAT
 from dulwich.objects import Blob
 from dulwich.pack import (
     PackData,
@@ -70,7 +71,9 @@ class TestPack(PackTests):
         with self.get_pack(pack1_sha) as origpack:
             self.assertSucceeds(origpack.index.check)
             pack_path = os.path.join(self._tempdir, "Elch")
-            write_pack(pack_path, origpack.pack_tuples())
+            write_pack(
+                pack_path, origpack.pack_tuples(), object_format=DEFAULT_OBJECT_FORMAT
+            )
             output = run_git_or_fail(["verify-pack", "-v", pack_path])
             orig_shas = {o.id for o in origpack.iterobjects()}
             self.assertEqual(orig_shas, _git_verify_pack_object_list(output))
@@ -84,7 +87,9 @@ class TestPack(PackTests):
                 (new_blob, None)
             ]
         pack_path = os.path.join(self._tempdir, "pack_with_deltas")
-        write_pack(pack_path, all_to_pack, deltify=True)
+        write_pack(
+            pack_path, all_to_pack, deltify=True, object_format=DEFAULT_OBJECT_FORMAT
+        )
         output = run_git_or_fail(["verify-pack", "-v", pack_path])
         self.assertEqual(
             {x[0].id for x in all_to_pack},
@@ -115,7 +120,12 @@ class TestPack(PackTests):
                 (new_blob_2, None),
             ]
             pack_path = os.path.join(self._tempdir, "pack_with_deltas")
-            write_pack(pack_path, all_to_pack, deltify=True)
+            write_pack(
+                pack_path,
+                all_to_pack,
+                deltify=True,
+                object_format=DEFAULT_OBJECT_FORMAT,
+            )
         output = run_git_or_fail(["verify-pack", "-v", pack_path])
         self.assertEqual(
             {x[0].id for x in all_to_pack},
@@ -155,7 +165,12 @@ class TestPack(PackTests):
                 (new_blob_2, None),
             ]
             pack_path = os.path.join(self._tempdir, "pack_with_deltas")
-            write_pack(pack_path, all_to_pack, deltify=True)
+            write_pack(
+                pack_path,
+                all_to_pack,
+                deltify=True,
+                object_format=DEFAULT_OBJECT_FORMAT,
+            )
         output = run_git_or_fail(["verify-pack", "-v", pack_path])
         self.assertEqual(
             {x[0].id for x in all_to_pack},
@@ -189,10 +204,10 @@ class TestPackIndexCompat(PackTests):
 
         pack_path = os.path.join(self._tempdir, "test_pack")
         entries = [(blob, None)]
-        write_pack(pack_path, entries)
+        write_pack(pack_path, entries, object_format=DEFAULT_OBJECT_FORMAT)
 
         # Load the pack and create v2 index (most compatible)
-        pack_data = PackData(pack_path + ".pack")
+        pack_data = PackData(pack_path + ".pack", object_format=DEFAULT_OBJECT_FORMAT)
         try:
             pack_data.create_index(pack_path + ".idx", version=2)
         finally:
@@ -210,13 +225,13 @@ class TestPackIndexCompat(PackTests):
 
         pack_path = os.path.join(self._tempdir, "git_pack")
         entries = [(blob, None)]
-        write_pack(pack_path, entries)
+        write_pack(pack_path, entries, object_format=DEFAULT_OBJECT_FORMAT)
 
         # Create index with git
         run_git_or_fail(["index-pack", pack_path + ".pack"])
 
         # Load with dulwich
-        idx = load_pack_index(pack_path + ".idx")
+        idx = load_pack_index(pack_path + ".idx", DEFAULT_OBJECT_FORMAT)
 
         # Verify it works
         self.assertIn(blob.id, idx)
@@ -232,16 +247,16 @@ class TestPackIndexCompat(PackTests):
 
         v3_path = os.path.join(self._tempdir, "v3_test.idx")
         with GitFile(v3_path, "wb") as f:
-            write_pack_index_v3(f, entries, b"x" * 20, hash_algorithm=1)
+            write_pack_index_v3(f, entries, b"x" * 20, hash_format=1)
 
         # Load and verify structure
-        idx = load_pack_index(v3_path)
+        idx = load_pack_index(v3_path, DEFAULT_OBJECT_FORMAT)
         self.assertIsInstance(idx, PackIndex3)
         self.assertEqual(idx.version, 3)
-        self.assertEqual(idx.hash_algorithm, 1)  # SHA-1
+        self.assertEqual(idx.hash_format, 1)  # SHA-1
         self.assertEqual(idx.hash_size, 20)
 
         # Verify SHA-256 would raise NotImplementedError
         with self.assertRaises(NotImplementedError):
             with GitFile(v3_path + ".sha256", "wb") as f:
-                write_pack_index_v3(f, entries, b"x" * 32, hash_algorithm=2)
+                write_pack_index_v3(f, entries, b"x" * 32, hash_format=2)

+ 122 - 6
tests/compat/test_server.py

@@ -27,14 +27,19 @@ Warning: these tests should be fairly stable, but when writing/debugging new
 """
 
 import os
+import shutil
 import sys
+import tempfile
 import threading
 
+from dulwich.object_format import SHA256
+from dulwich.objects import Blob, Commit, Tree
+from dulwich.repo import Repo
 from dulwich.server import DictBackend, TCPGitServer
 
 from .. import skipIf
 from .server_utils import NoSideBand64kReceivePackHandler, ServerTests
-from .utils import CompatTestCase, require_git_version
+from .utils import CompatTestCase, require_git_version, run_git_or_fail
 
 
 @skipIf(sys.platform == "win32", "Broken on windows, with very long fail time.")
@@ -49,15 +54,23 @@ class GitServerTestCase(ServerTests, CompatTestCase):
     def _handlers(self):
         return {b"git-receive-pack": NoSideBand64kReceivePackHandler}
 
-    def _check_server(self, dul_server) -> None:
+    def _check_server(self, dul_server, repo) -> None:
+        from dulwich.protocol import Protocol
+
         receive_pack_handler_cls = dul_server.handlers[b"git-receive-pack"]
-        caps = receive_pack_handler_cls.capabilities()
+        # Create a handler instance to check capabilities
+        handler = receive_pack_handler_cls(
+            dul_server.backend,
+            [b"/"],
+            Protocol(lambda x: b"", lambda x: None),
+        )
+        caps = handler.capabilities()
         self.assertNotIn(b"side-band-64k", caps)
 
     def _start_server(self, repo):
         backend = DictBackend({b"/": repo})
         dul_server = TCPGitServer(backend, b"localhost", 0, handlers=self._handlers())
-        self._check_server(dul_server)
+        self._check_server(dul_server, repo)
 
         # Start server in a thread
         server_thread = threading.Thread(target=dul_server.serve)
@@ -95,7 +108,110 @@ class GitServerSideBand64kTestCase(GitServerTestCase):
     def _handlers(self) -> None:
         return None  # default handlers include side-band-64k
 
-    def _check_server(self, server) -> None:
+    def _check_server(self, server, repo) -> None:
+        from dulwich.protocol import Protocol
+
         receive_pack_handler_cls = server.handlers[b"git-receive-pack"]
-        caps = receive_pack_handler_cls.capabilities()
+        # Create a handler instance to check capabilities
+        handler = receive_pack_handler_cls(
+            server.backend,
+            [b"/"],
+            Protocol(lambda x: b"", lambda x: None),
+        )
+        caps = handler.capabilities()
         self.assertIn(b"side-band-64k", caps)
+
+
+@skipIf(sys.platform == "win32", "Broken on windows, with very long fail time.")
+class GitServerSHA256TestCase(CompatTestCase):
+    """Tests for SHA-256 repository server compatibility with git client."""
+
+    protocol = "git"
+    # SHA-256 support was introduced in git 2.29.0
+    min_git_version = (2, 29, 0)
+
+    def setUp(self) -> None:
+        super().setUp()
+        require_git_version(self.min_git_version)
+
+    def _start_server(self, repo):
+        backend = DictBackend({b"/": repo})
+        dul_server = TCPGitServer(backend, b"localhost", 0)
+
+        # Start server in a thread
+        server_thread = threading.Thread(target=dul_server.serve)
+        server_thread.daemon = True
+        server_thread.start()
+
+        # Add cleanup
+        def cleanup_server():
+            dul_server.shutdown()
+            dul_server.server_close()
+            server_thread.join(timeout=1.0)
+
+        self.addCleanup(cleanup_server)
+        self._server = dul_server
+        _, port = self._server.socket.getsockname()
+        return port
+
+    def url(self, port) -> str:
+        return f"{self.protocol}://localhost:{port}/"
+
+    def test_clone_sha256_repo_from_dulwich_server(self) -> None:
+        """Test that git client can clone SHA-256 repo from dulwich server."""
+        # Create SHA-256 repository with dulwich
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, repo_path)
+        source_repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
+
+        # Create test content
+        blob = Blob.from_string(b"Test SHA-256 content from dulwich server")
+        tree = Tree()
+        tree.add(b"test.txt", 0o100644, blob.get_id(SHA256))
+
+        commit = Commit()
+        commit.tree = tree.get_id(SHA256)
+        commit.author = commit.committer = b"Test User <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        commit.message = b"Test SHA-256 commit"
+
+        # Add objects to repo
+        source_repo.object_store.add_object(blob)
+        source_repo.object_store.add_object(tree)
+        source_repo.object_store.add_object(commit)
+
+        # Set master ref
+        source_repo.refs[b"refs/heads/master"] = commit.get_id(SHA256)
+
+        # Start dulwich server
+        port = self._start_server(source_repo)
+
+        # Clone with git client
+        clone_path = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, clone_path)
+        clone_dir = os.path.join(clone_path, "cloned_repo")
+
+        run_git_or_fail(["clone", self.url(port), clone_dir], cwd=clone_path)
+
+        # Verify cloned repo is SHA-256
+        cloned_repo = Repo(clone_dir)
+        self.addCleanup(cloned_repo.close)
+        self.assertEqual(cloned_repo.object_format, SHA256)
+
+        # Verify object format config
+        output = run_git_or_fail(
+            ["config", "--get", "extensions.objectformat"], cwd=clone_dir
+        )
+        self.assertEqual(output.strip(), b"sha256")
+
+        # Verify commit was cloned
+        cloned_head = cloned_repo.refs[b"refs/heads/master"]
+        self.assertEqual(len(cloned_head), 64)  # SHA-256 length
+        self.assertEqual(cloned_head, commit.get_id(SHA256))
+
+        # Verify git can read the commit
+        log_output = run_git_or_fail(["log", "--format=%s", "-n", "1"], cwd=clone_dir)
+        self.assertEqual(log_output.strip(), b"Test SHA-256 commit")
+
+        source_repo.close()

+ 409 - 0
tests/compat/test_sha256.py

@@ -0,0 +1,409 @@
+# test_sha256.py -- Compatibility tests for SHA256 support
+# Copyright (C) 2024 The Dulwich contributors
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Compatibility tests for SHA256 support with git command line tools."""
+
+import os
+import tempfile
+
+from dulwich.object_format import SHA256
+from dulwich.objects import Blob, Commit, Tree
+from dulwich.repo import Repo
+
+from .utils import CompatTestCase, rmtree_ro, run_git_or_fail
+
+
+class GitSHA256CompatibilityTests(CompatTestCase):
+    """Test SHA256 compatibility with git command line tools."""
+
+    min_git_version = (2, 29, 0)
+
+    def _run_git(self, args, cwd=None):
+        """Run git command in the specified directory."""
+        return run_git_or_fail(args, cwd=cwd)
+
+    def test_sha256_repo_creation_compat(self):
+        """Test that dulwich-created SHA256 repos are readable by git."""
+        # Create SHA256 repo with dulwich
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, repo_path)
+        repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
+
+        # Add a blob and tree using dulwich
+        blob = Blob.from_string(b"Hello SHA256 world!")
+        tree = Tree()
+        tree.add(b"hello.txt", 0o100644, blob.get_id(SHA256))
+
+        # Create objects in the repository
+        object_store = repo.object_store
+        object_store.add_object(blob)
+        object_store.add_object(tree)
+
+        repo.close()
+
+        # Verify git can read the repository
+        config_output = self._run_git(
+            ["config", "--get", "extensions.objectformat"], cwd=repo_path
+        )
+        self.assertEqual(config_output.strip(), b"sha256")
+
+        # Verify git recognizes it as a SHA256 repository
+        rev_parse_output = self._run_git(
+            ["rev-parse", "--show-object-format"], cwd=repo_path
+        )
+        self.assertEqual(rev_parse_output.strip(), b"sha256")
+
+    def test_git_created_sha256_repo_readable(self):
+        """Test that git-created SHA256 repos are readable by dulwich."""
+        # Create SHA256 repo with git
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, repo_path)
+        self._run_git(["init", "--object-format=sha256", repo_path])
+
+        # Create a file and commit with git
+        test_file = os.path.join(repo_path, "test.txt")
+        with open(test_file, "w") as f:
+            f.write("Test SHA256 content")
+
+        self._run_git(["add", "test.txt"], cwd=repo_path)
+        self._run_git(["commit", "-m", "Test SHA256 commit"], cwd=repo_path)
+
+        # Read with dulwich
+        repo = Repo(repo_path)
+
+        # Verify dulwich detects SHA256
+        self.assertEqual(repo.object_format, SHA256)
+
+        # Verify dulwich can read objects
+        # Try both main and master branches (git default changed over time)
+        try:
+            head_ref = repo.refs[b"refs/heads/main"]
+        except KeyError:
+            head_ref = repo.refs[b"refs/heads/master"]
+        self.assertEqual(len(head_ref), 64)  # SHA256 length
+
+        # Read the commit object
+        commit = repo[head_ref]
+        self.assertIsInstance(commit, Commit)
+        self.assertEqual(len(commit.tree), 64)  # SHA256 tree ID
+
+        repo.close()
+
+    def test_object_hashing_consistency(self):
+        """Test that object hashing is consistent between dulwich and git."""
+        # Create SHA256 repo with git
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, repo_path)
+        self._run_git(["init", "--object-format=sha256", repo_path])
+
+        # Create a test file with known content
+        test_content = b"Test content for SHA256 hashing consistency"
+        test_file = os.path.join(repo_path, "test.txt")
+        with open(test_file, "wb") as f:
+            f.write(test_content)
+
+        # Get git's hash for the content
+        git_hash = self._run_git(["hash-object", "test.txt"], cwd=repo_path)
+        git_hash = git_hash.strip().decode("ascii")
+
+        # Create same blob with dulwich
+        blob = Blob.from_string(test_content)
+        dulwich_hash = blob.get_id(SHA256).decode("ascii")
+
+        # Hashes should match
+        self.assertEqual(git_hash, dulwich_hash)
+
+    def test_tree_hashing_consistency(self):
+        """Test that tree hashing is consistent between dulwich and git."""
+        # Create SHA256 repo with git
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, repo_path)
+        self._run_git(["init", "--object-format=sha256", repo_path])
+
+        # Create a test file and add to index
+        test_content = b"Tree test content"
+        test_file = os.path.join(repo_path, "tree_test.txt")
+        with open(test_file, "wb") as f:
+            f.write(test_content)
+
+        self._run_git(["add", "tree_test.txt"], cwd=repo_path)
+
+        # Get git's tree hash
+        git_tree_hash = self._run_git(["write-tree"], cwd=repo_path)
+        git_tree_hash = git_tree_hash.strip().decode("ascii")
+
+        # Create same tree with dulwich
+        blob = Blob.from_string(test_content)
+        tree = Tree()
+        tree.add(b"tree_test.txt", 0o100644, blob.get_id(SHA256))
+
+        dulwich_tree_hash = tree.get_id(SHA256).decode("ascii")
+
+        # Tree hashes should match
+        self.assertEqual(git_tree_hash, dulwich_tree_hash)
+
+    def test_commit_creation_interop(self):
+        """Test commit creation interoperability between dulwich and git."""
+        # Create SHA256 repo with dulwich
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, repo_path)
+        repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
+
+        # Create objects with dulwich
+        blob = Blob.from_string(b"Interop test content")
+        tree = Tree()
+        tree.add(b"interop.txt", 0o100644, blob.get_id(SHA256))
+
+        commit = Commit()
+        commit.tree = tree.get_id(SHA256)
+        commit.author = commit.committer = b"Test User <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        commit.message = b"Test SHA256 commit from dulwich"
+
+        # Add objects to repo
+        object_store = repo.object_store
+        object_store.add_object(blob)
+        object_store.add_object(tree)
+        object_store.add_object(commit)
+
+        # Update HEAD
+        commit_id = commit.get_id(SHA256)
+        repo.refs[b"refs/heads/master"] = commit_id
+        repo.close()
+
+        # Verify git can read the commit
+        commit_hash = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
+        commit_hash = commit_hash.strip().decode("ascii")
+        self.assertEqual(len(commit_hash), 64)  # SHA256 length
+
+        # Verify git can show the commit
+        commit_message = self._run_git(["log", "--format=%s", "-n", "1"], cwd=repo_path)
+        self.assertEqual(commit_message.strip(), b"Test SHA256 commit from dulwich")
+
+        # Verify git can list the tree
+        tree_content = self._run_git(["ls-tree", "HEAD"], cwd=repo_path)
+        self.assertIn(b"interop.txt", tree_content)
+
+    def test_ref_updates_interop(self):
+        """Test that ref updates work between dulwich and git."""
+        # Create repo with git
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, repo_path)
+        self._run_git(["init", "--object-format=sha256", repo_path])
+
+        # Create initial commit with git
+        test_file = os.path.join(repo_path, "initial.txt")
+        with open(test_file, "w") as f:
+            f.write("Initial content")
+
+        self._run_git(["add", "initial.txt"], cwd=repo_path)
+        self._run_git(["commit", "-m", "Initial commit"], cwd=repo_path)
+
+        initial_commit = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
+        initial_commit = initial_commit.strip()
+
+        # Update ref with dulwich
+        repo = Repo(repo_path)
+
+        # Create new commit with dulwich
+        blob = Blob.from_string(b"New content from dulwich")
+        tree = Tree()
+        tree.add(b"dulwich.txt", 0o100644, blob.get_id(SHA256))
+
+        commit = Commit()
+        commit.tree = tree.get_id(SHA256)
+        commit.parents = [initial_commit]
+        commit.author = commit.committer = b"Dulwich User <dulwich@example.com>"
+        commit.commit_time = commit.author_time = 1234567891
+        commit.commit_timezone = commit.author_timezone = 0
+        commit.message = b"Commit from dulwich"
+
+        # Add objects and update ref
+        object_store = repo.object_store
+        object_store.add_object(blob)
+        object_store.add_object(tree)
+        object_store.add_object(commit)
+
+        new_commit_hash = commit.get_id(SHA256)
+        repo.refs[b"refs/heads/master"] = new_commit_hash
+        repo.close()
+
+        # Verify git sees the update
+        current_commit = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
+        current_commit = current_commit.strip().decode("ascii")
+        self.assertEqual(current_commit, new_commit_hash.decode("ascii"))
+
+        # Verify git can access the new tree
+        tree_listing = self._run_git(["ls-tree", "HEAD"], cwd=repo_path)
+        self.assertIn(b"dulwich.txt", tree_listing)
+
+    def test_clone_sha256_repo_git_to_dulwich(self):
+        """Test cloning a git SHA256 repository with dulwich."""
+        # Create source repo with git
+        source_path = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, source_path)
+        self._run_git(["init", "--object-format=sha256", source_path])
+
+        # Add content
+        test_file = os.path.join(source_path, "clone_test.txt")
+        with open(test_file, "w") as f:
+            f.write("Content to be cloned")
+
+        self._run_git(["add", "clone_test.txt"], cwd=source_path)
+        self._run_git(["commit", "-m", "Initial commit"], cwd=source_path)
+
+        # Clone with dulwich
+        target_path = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, target_path)
+
+        target_repo = Repo.init(target_path, mkdir=False, object_format="sha256")
+
+        # Copy objects (simplified clone)
+        source_repo = Repo(source_path)
+
+        # Copy all objects
+        for obj_id in source_repo.object_store:
+            obj = source_repo.object_store[obj_id]
+            target_repo.object_store.add_object(obj)
+
+        # Copy refs
+        for ref_name in source_repo.refs.keys():
+            ref_id = source_repo.refs[ref_name]
+            target_repo.refs[ref_name] = ref_id
+
+        # Set HEAD
+        target_repo.refs.set_symbolic_ref(b"HEAD", b"refs/heads/master")
+
+        source_repo.close()
+        target_repo.close()
+
+        # Verify with git
+        output = self._run_git(["rev-parse", "--show-object-format"], cwd=target_path)
+        self.assertEqual(output.strip(), b"sha256")
+
+        # Verify content
+        self._run_git(["checkout", "HEAD", "--", "."], cwd=target_path)
+        cloned_file = os.path.join(target_path, "clone_test.txt")
+        with open(cloned_file) as f:
+            content = f.read()
+        self.assertEqual(content, "Content to be cloned")
+
+    def test_fsck_sha256_repo(self):
+        """Test that git fsck works on dulwich-created SHA256 repos."""
+        # Create repo with dulwich
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, repo_path)
+        repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
+
+        # Create a more complex object graph
+        # Multiple blobs
+        blobs = []
+        for i in range(5):
+            blob = Blob.from_string(f"Blob content {i}".encode())
+            repo.object_store.add_object(blob)
+            blobs.append(blob)
+
+        # Multiple trees
+        subtree = Tree()
+        subtree.add(b"subfile1.txt", 0o100644, blobs[0].get_id(SHA256))
+        subtree.add(b"subfile2.txt", 0o100644, blobs[1].get_id(SHA256))
+        repo.object_store.add_object(subtree)
+
+        main_tree = Tree()
+        main_tree.add(b"file1.txt", 0o100644, blobs[2].get_id(SHA256))
+        main_tree.add(b"file2.txt", 0o100644, blobs[3].get_id(SHA256))
+        main_tree.add(b"subdir", 0o040000, subtree.get_id(SHA256))
+        repo.object_store.add_object(main_tree)
+
+        # Create commits
+        commit1 = Commit()
+        commit1.tree = main_tree.get_id(SHA256)
+        commit1.author = commit1.committer = b"Test <test@example.com>"
+        commit1.commit_time = commit1.author_time = 1234567890
+        commit1.commit_timezone = commit1.author_timezone = 0
+        commit1.message = b"First commit"
+        repo.object_store.add_object(commit1)
+
+        commit2 = Commit()
+        commit2.tree = main_tree.get_id(SHA256)
+        commit2.parents = [commit1.get_id(SHA256)]
+        commit2.author = commit2.committer = b"Test <test@example.com>"
+        commit2.commit_time = commit2.author_time = 1234567891
+        commit2.commit_timezone = commit2.author_timezone = 0
+        commit2.message = b"Second commit"
+        repo.object_store.add_object(commit2)
+
+        # Set refs
+        repo.refs[b"refs/heads/master"] = commit2.get_id(SHA256)
+        repo.refs[b"refs/heads/branch1"] = commit1.get_id(SHA256)
+
+        repo.close()
+
+        # Run git fsck
+        fsck_output = self._run_git(["fsck", "--full"], cwd=repo_path)
+        # fsck should not report any errors (empty output or success message)
+        self.assertNotIn(b"error", fsck_output.lower())
+        self.assertNotIn(b"missing", fsck_output.lower())
+        self.assertNotIn(b"broken", fsck_output.lower())
+
+    def test_dulwich_clone_sha256_repo(self):
+        """Test that dulwich's clone() auto-detects SHA-256 format from git repo."""
+        from dulwich.client import LocalGitClient
+
+        # Create source SHA-256 repo with git
+        source_path = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, source_path)
+        self._run_git(
+            ["init", "--object-format=sha256", "--initial-branch=main", source_path]
+        )
+
+        # Add content and commit
+        test_file = os.path.join(source_path, "test.txt")
+        with open(test_file, "w") as f:
+            f.write("SHA-256 clone test")
+
+        self._run_git(["add", "test.txt"], cwd=source_path)
+        self._run_git(["commit", "-m", "Test commit"], cwd=source_path)
+
+        # Clone with dulwich LocalGitClient
+        target_path = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, target_path)
+
+        client = LocalGitClient()
+        cloned_repo = client.clone(source_path, target_path, mkdir=False)
+        self.addCleanup(cloned_repo.close)
+
+        # Verify the cloned repo is SHA-256
+        self.assertEqual(cloned_repo.object_format, SHA256)
+
+        # Verify config has correct objectformat extension
+        config = cloned_repo.get_config()
+        self.assertEqual(b"sha256", config.get((b"extensions",), b"objectformat"))
+
+        # Verify git also sees it as SHA-256
+        output = self._run_git(["rev-parse", "--show-object-format"], cwd=target_path)
+        self.assertEqual(output.strip(), b"sha256")
+
+        # Verify objects were cloned correctly
+        source_head = self._run_git(["rev-parse", "refs/heads/main"], cwd=source_path)
+        cloned_head = cloned_repo.refs[b"refs/heads/main"]
+        self.assertEqual(source_head.strip(), cloned_head)

+ 285 - 0
tests/compat/test_sha256_packs.py

@@ -0,0 +1,285 @@
+# test_sha256_packs.py -- Compatibility tests for SHA256 pack files
+# Copyright (C) 2024 The Dulwich contributors
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Compatibility tests for SHA256 pack files with git command line tools."""
+
+import os
+import tempfile
+
+from dulwich.object_format import SHA256
+from dulwich.objects import Blob, Commit, Tree
+from dulwich.pack import load_pack_index_file
+from dulwich.repo import Repo
+
+from .utils import CompatTestCase, rmtree_ro, run_git_or_fail
+
+
+class GitSHA256PackCompatibilityTests(CompatTestCase):
+    """Test SHA256 pack file compatibility with git command line tools."""
+
+    min_git_version = (2, 29, 0)
+
+    def _run_git(self, args, cwd=None):
+        """Run git command in the specified directory."""
+        return run_git_or_fail(args, cwd=cwd)
+
+    def test_git_pack_readable_by_dulwich(self):
+        """Test that git-created SHA256 pack files are readable by dulwich."""
+        # Create SHA256 repo with git
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, repo_path)
+        self._run_git(["init", "--object-format=sha256", repo_path])
+
+        # Create multiple files to ensure pack creation
+        for i in range(20):
+            test_file = os.path.join(repo_path, f"file{i}.txt")
+            with open(test_file, "w") as f:
+                f.write(f"Content for file {i}\n")
+
+        self._run_git(["add", "."], cwd=repo_path)
+        self._run_git(["commit", "-m", "Add 20 files"], cwd=repo_path)
+
+        # Force pack creation
+        self._run_git(["gc"], cwd=repo_path)
+
+        # Open with dulwich
+        repo = Repo(repo_path)
+        self.assertEqual(repo.object_format, SHA256)
+
+        # Find pack files
+        pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
+        pack_files = [f for f in os.listdir(pack_dir) if f.endswith(".pack")]
+        self.assertGreater(len(pack_files), 0, "No pack files created")
+
+        # Read pack with dulwich
+        for pack_file in pack_files:
+            pack_path = os.path.join(pack_dir, pack_file)
+            idx_path = pack_path[:-5] + ".idx"
+
+            # Load pack index with SHA256 algorithm
+            with open(idx_path, "rb") as f:
+                pack_idx = load_pack_index_file(idx_path, f, SHA256)
+
+            # Verify it's detected as SHA256
+            self.assertEqual(pack_idx.hash_size, 32)
+
+            # Verify we can iterate objects
+            obj_count = 0
+            for sha, offset, crc32 in pack_idx.iterentries():
+                self.assertEqual(len(sha), 32)  # SHA256
+                obj_count += 1
+
+            self.assertGreater(obj_count, 20)  # At least our files + trees + commit
+
+        # Verify we can read all objects through the repo interface
+        head_ref = repo.refs[b"refs/heads/master"]
+        commit = repo[head_ref]
+        self.assertIsInstance(commit, Commit)
+
+        # Read the tree
+        tree = repo[commit.tree]
+        self.assertIsInstance(tree, Tree)
+
+        # Verify all files are there
+        file_count = 0
+        for name, mode, sha in tree.items():
+            if name.startswith(b"file") and name.endswith(b".txt"):
+                file_count += 1
+                # Read the blob
+                blob = repo[sha]
+                self.assertIsInstance(blob, Blob)
+
+        self.assertEqual(file_count, 20)
+        repo.close()
+
+    def test_dulwich_objects_readable_by_git(self):
+        """Test that dulwich-created SHA256 objects are readable by git."""
+        # Create SHA256 repo with dulwich
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, repo_path)
+        repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
+
+        # Create objects
+        blobs = []
+        for i in range(10):
+            blob = Blob.from_string(f"Dulwich blob content {i}".encode())
+            repo.object_store.add_object(blob)
+            blobs.append(blob)
+
+        # Create a tree with all blobs
+        tree = Tree()
+        for i, blob in enumerate(blobs):
+            tree.add(f"blob{i}.txt".encode(), 0o100644, blob.get_id(SHA256))
+        repo.object_store.add_object(tree)
+
+        # Create a commit
+        commit = Commit()
+        commit.tree = tree.get_id(SHA256)
+        commit.author = commit.committer = b"Dulwich Test <test@dulwich.org>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        commit.message = b"Test commit with blobs"
+        repo.object_store.add_object(commit)
+
+        # Update HEAD
+        repo.refs[b"refs/heads/master"] = commit.get_id(SHA256)
+        repo.close()
+
+        # Verify git can read all objects
+        output = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
+        self.assertEqual(len(output.strip()), 64)  # SHA256
+
+        # List tree contents
+        tree_output = self._run_git(["ls-tree", "HEAD"], cwd=repo_path)
+        # Count lines instead of occurrences of "blob" since "blob" appears twice per line
+        lines = tree_output.strip().split(b"\n")
+        self.assertEqual(len(lines), 10)
+
+        # Verify git can check out the content
+        self._run_git(["checkout", "HEAD", "--", "."], cwd=repo_path)
+
+        # Verify files exist with correct content
+        for i in range(10):
+            file_path = os.path.join(repo_path, f"blob{i}.txt")
+            self.assertTrue(os.path.exists(file_path))
+            with open(file_path, "rb") as f:
+                content = f.read()
+                self.assertEqual(content, f"Dulwich blob content {i}".encode())
+
+    def test_large_pack_interop(self):
+        """Test large pack file interoperability."""
+        # Create repo with dulwich
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, repo_path)
+        repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
+
+        # Create a large file that will use delta compression
+        large_content = b"A" * 10000
+        blobs = []
+
+        # Create similar blobs to trigger delta compression
+        for i in range(10):
+            content = large_content + f" variation {i}".encode()
+            blob = Blob.from_string(content)
+            repo.object_store.add_object(blob)
+            blobs.append(blob)
+
+        # Create tree
+        tree = Tree()
+        for i, blob in enumerate(blobs):
+            tree.add(f"large{i}.txt".encode(), 0o100644, blob.get_id(SHA256))
+        repo.object_store.add_object(tree)
+
+        # Create commit
+        commit = Commit()
+        commit.tree = tree.get_id(SHA256)
+        commit.author = commit.committer = b"Test <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        commit.message = b"Large files for delta compression test"
+        repo.object_store.add_object(commit)
+
+        repo.refs[b"refs/heads/master"] = commit.get_id(SHA256)
+        repo.close()
+
+        # Run git gc to create packs with delta compression
+        self._run_git(["gc", "--aggressive"], cwd=repo_path)
+
+        # Verify git created a pack
+        pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
+        pack_files = [f for f in os.listdir(pack_dir) if f.endswith(".pack")]
+        self.assertGreater(len(pack_files), 0)
+
+        # Re-open with dulwich and verify we can read everything
+        repo = Repo(repo_path)
+        head = repo.refs[b"refs/heads/master"]
+        commit = repo[head]
+        tree = repo[commit.tree]
+
+        # Read all blobs
+        for i in range(10):
+            name = f"large{i}.txt".encode()
+            _mode, sha = tree[name]
+            blob = repo[sha]
+            expected = large_content + f" variation {i}".encode()
+            self.assertEqual(blob.data, expected)
+
+        repo.close()
+
+    def test_mixed_loose_packed_objects(self):
+        """Test repositories with both loose and packed objects."""
+        # Create repo with git
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, repo_path)
+        self._run_git(["init", "--object-format=sha256", repo_path])
+
+        # Create initial objects that will be packed
+        for i in range(5):
+            test_file = os.path.join(repo_path, f"packed{i}.txt")
+            with open(test_file, "w") as f:
+                f.write(f"Will be packed {i}\n")
+
+        self._run_git(["add", "."], cwd=repo_path)
+        self._run_git(["commit", "-m", "Initial packed objects"], cwd=repo_path)
+        self._run_git(["gc"], cwd=repo_path)
+
+        # Create more objects that will remain loose
+        for i in range(5):
+            test_file = os.path.join(repo_path, f"loose{i}.txt")
+            with open(test_file, "w") as f:
+                f.write(f"Will stay loose {i}\n")
+
+        self._run_git(["add", "."], cwd=repo_path)
+        self._run_git(["commit", "-m", "Loose objects"], cwd=repo_path)
+
+        # Open with dulwich
+        repo = Repo(repo_path)
+
+        # Count objects in packs vs loose
+        pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
+        pack_count = len([f for f in os.listdir(pack_dir) if f.endswith(".pack")])
+        self.assertGreater(pack_count, 0)
+
+        # Verify we can read all objects
+        head = repo.refs[b"refs/heads/master"]
+        commit = repo[head]
+
+        # Walk the commit history
+        commit_count = 0
+        while commit.parents:
+            commit_count += 1
+            tree = repo[commit.tree]
+            # Verify we can read the tree
+            self.assertGreater(len(tree), 0)
+
+            if commit.parents:
+                commit = repo[commit.parents[0]]
+            else:
+                break
+
+        self.assertEqual(commit_count, 1)  # We made 2 commits total
+        repo.close()
+
+
+if __name__ == "__main__":
+    import unittest
+
+    unittest.main()

+ 34 - 10
tests/compat/test_web.py

@@ -85,8 +85,16 @@ class SmartWebTestCase(WebTests, CompatTestCase):
         return {b"git-receive-pack": NoSideBand64kReceivePackHandler}
 
     def _check_app(self, app) -> None:
+        from dulwich.protocol import Protocol
+
         receive_pack_handler_cls = app.handlers[b"git-receive-pack"]
-        caps = receive_pack_handler_cls.capabilities()
+        # Create a handler instance to check capabilities
+        handler = receive_pack_handler_cls(
+            app.backend,
+            [b"/"],
+            Protocol(lambda x: b"", lambda x: None),
+        )
+        caps = handler.capabilities()
         self.assertNotIn(b"side-band-64k", caps)
 
     def _make_app(self, backend):
@@ -101,16 +109,16 @@ class SmartWebTestCase(WebTests, CompatTestCase):
 
 def patch_capabilities(handler, caps_removed):
     # Patch a handler's capabilities by specifying a list of them to be
-    # removed, and return the original classmethod for restoration.
+    # removed, and return the original method for restoration.
     original_capabilities = handler.capabilities
-    filtered_capabilities = [
-        i for i in original_capabilities() if i not in caps_removed
-    ]
 
-    def capabilities(cls):
-        return filtered_capabilities
+    def capabilities(self):
+        # Call original to get base capabilities (including object-format)
+        base_caps = original_capabilities(self)
+        # Filter out the capabilities we want to remove
+        return [i for i in base_caps if i not in caps_removed]
 
-    handler.capabilities = classmethod(capabilities)
+    handler.capabilities = capabilities
     return original_capabilities
 
 
@@ -135,8 +143,16 @@ class SmartWebSideBand64kTestCase(SmartWebTestCase):
         return None  # default handlers include side-band-64k
 
     def _check_app(self, app) -> None:
+        from dulwich.protocol import Protocol
+
         receive_pack_handler_cls = app.handlers[b"git-receive-pack"]
-        caps = receive_pack_handler_cls.capabilities()
+        # Create a handler instance to check capabilities
+        handler = receive_pack_handler_cls(
+            app.backend,
+            [b"/"],
+            Protocol(lambda x: b"", lambda x: None),
+        )
+        caps = handler.capabilities()
         self.assertIn(b"side-band-64k", caps)
         self.assertNotIn(b"no-done", caps)
 
@@ -153,8 +169,16 @@ class SmartWebSideBand64kNoDoneTestCase(SmartWebTestCase):
         return None  # default handlers include side-band-64k
 
     def _check_app(self, app) -> None:
+        from dulwich.protocol import Protocol
+
         receive_pack_handler_cls = app.handlers[b"git-receive-pack"]
-        caps = receive_pack_handler_cls.capabilities()
+        # Create a handler instance to check capabilities
+        handler = receive_pack_handler_cls(
+            app.backend,
+            [b"/"],
+            Protocol(lambda x: b"", lambda x: None),
+        )
+        caps = handler.capabilities()
         self.assertIn(b"side-band-64k", caps)
         self.assertIn(b"no-done", caps)
 

+ 2 - 2
tests/porcelain/__init__.py

@@ -6938,9 +6938,9 @@ class ReceivePackTests(PorcelainTestCase):
         outlines = outf.getvalue().splitlines()
         self.assertEqual(
             [
-                b"0091319b56ce3aee2d489f759736a79cc552c9bb86d9 HEAD\x00 report-status "
+                b"00a4319b56ce3aee2d489f759736a79cc552c9bb86d9 HEAD\x00 report-status "
                 b"delete-refs quiet ofs-delta side-band-64k "
-                b"no-done symref=HEAD:refs/heads/master",
+                b"no-done object-format=sha1 symref=HEAD:refs/heads/master",
                 b"003f319b56ce3aee2d489f759736a79cc552c9bb86d9 refs/heads/master",
                 b"0000",
             ],

+ 2 - 1
tests/porcelain/test_merge.py

@@ -27,6 +27,7 @@ import tempfile
 import unittest
 
 from dulwich import porcelain
+from dulwich.objects import ZERO_SHA
 from dulwich.repo import Repo
 
 from .. import DependencyMissing, TestCase
@@ -682,7 +683,7 @@ class PorcelainMergeTreeTests(TestCase):
                 tmpdir,
                 None,
                 commit_id,
-                "0" * 40,  # Invalid SHA
+                ZERO_SHA,
             )
 
 

+ 31 - 28
tests/test_bundle.py

@@ -26,6 +26,7 @@ import tempfile
 from io import BytesIO
 
 from dulwich.bundle import Bundle, create_bundle_from_repo, read_bundle, write_bundle
+from dulwich.object_format import DEFAULT_OBJECT_FORMAT
 from dulwich.objects import Blob, Commit, Tree
 from dulwich.pack import PackData, write_pack_objects
 from dulwich.repo import MemoryRepo
@@ -49,9 +50,9 @@ class BundleTests(TestCase):
 
         # Create a simple pack data
         b = BytesIO()
-        write_pack_objects(b.write, [])
+        write_pack_objects(b.write, [], object_format=DEFAULT_OBJECT_FORMAT)
         b.seek(0)
-        bundle.pack_data = PackData.from_file(b)
+        bundle.pack_data = PackData.from_file(b, object_format=DEFAULT_OBJECT_FORMAT)
 
         # Check the repr output
         rep = repr(bundle)
@@ -70,9 +71,9 @@ class BundleTests(TestCase):
         bundle1.references = {b"refs/heads/master": b"ab" * 20}
 
         b1 = BytesIO()
-        write_pack_objects(b1.write, [])
+        write_pack_objects(b1.write, [], object_format=DEFAULT_OBJECT_FORMAT)
         b1.seek(0)
-        bundle1.pack_data = PackData.from_file(b1)
+        bundle1.pack_data = PackData.from_file(b1, object_format=DEFAULT_OBJECT_FORMAT)
 
         bundle2 = Bundle()
         bundle2.version = 3
@@ -81,9 +82,9 @@ class BundleTests(TestCase):
         bundle2.references = {b"refs/heads/master": b"ab" * 20}
 
         b2 = BytesIO()
-        write_pack_objects(b2.write, [])
+        write_pack_objects(b2.write, [], object_format=DEFAULT_OBJECT_FORMAT)
         b2.seek(0)
-        bundle2.pack_data = PackData.from_file(b2)
+        bundle2.pack_data = PackData.from_file(b2, object_format=DEFAULT_OBJECT_FORMAT)
 
         # Test equality
         self.assertEqual(bundle1, bundle2)
@@ -95,9 +96,9 @@ class BundleTests(TestCase):
         bundle3.prerequisites = [(b"cc" * 20, "comment")]
         bundle3.references = {b"refs/heads/master": b"ab" * 20}
         b3 = BytesIO()
-        write_pack_objects(b3.write, [])
+        write_pack_objects(b3.write, [], object_format=DEFAULT_OBJECT_FORMAT)
         b3.seek(0)
-        bundle3.pack_data = PackData.from_file(b3)
+        bundle3.pack_data = PackData.from_file(b3, object_format=DEFAULT_OBJECT_FORMAT)
         self.assertNotEqual(bundle1, bundle3)
 
         bundle4 = Bundle()
@@ -106,9 +107,9 @@ class BundleTests(TestCase):
         bundle4.prerequisites = [(b"cc" * 20, "comment")]
         bundle4.references = {b"refs/heads/master": b"ab" * 20}
         b4 = BytesIO()
-        write_pack_objects(b4.write, [])
+        write_pack_objects(b4.write, [], object_format=DEFAULT_OBJECT_FORMAT)
         b4.seek(0)
-        bundle4.pack_data = PackData.from_file(b4)
+        bundle4.pack_data = PackData.from_file(b4, object_format=DEFAULT_OBJECT_FORMAT)
         self.assertNotEqual(bundle1, bundle4)
 
         bundle5 = Bundle()
@@ -117,9 +118,9 @@ class BundleTests(TestCase):
         bundle5.prerequisites = [(b"dd" * 20, "different")]  # Different prerequisites
         bundle5.references = {b"refs/heads/master": b"ab" * 20}
         b5 = BytesIO()
-        write_pack_objects(b5.write, [])
+        write_pack_objects(b5.write, [], object_format=DEFAULT_OBJECT_FORMAT)
         b5.seek(0)
-        bundle5.pack_data = PackData.from_file(b5)
+        bundle5.pack_data = PackData.from_file(b5, object_format=DEFAULT_OBJECT_FORMAT)
         self.assertNotEqual(bundle1, bundle5)
 
         bundle6 = Bundle()
@@ -130,9 +131,9 @@ class BundleTests(TestCase):
             b"refs/heads/different": b"ab" * 20
         }  # Different references
         b6 = BytesIO()
-        write_pack_objects(b6.write, [])
+        write_pack_objects(b6.write, [], object_format=DEFAULT_OBJECT_FORMAT)
         b6.seek(0)
-        bundle6.pack_data = PackData.from_file(b6)
+        bundle6.pack_data = PackData.from_file(b6, object_format=DEFAULT_OBJECT_FORMAT)
         self.assertNotEqual(bundle1, bundle6)
 
         # Test inequality with different type
@@ -147,7 +148,7 @@ class BundleTests(TestCase):
         f.write(b"\n")
         # Add pack data
         b = BytesIO()
-        write_pack_objects(b.write, [])
+        write_pack_objects(b.write, [], object_format=DEFAULT_OBJECT_FORMAT)
         f.write(b.getvalue())
         f.seek(0)
 
@@ -168,7 +169,7 @@ class BundleTests(TestCase):
         f.write(b"\n")
         # Add pack data
         b = BytesIO()
-        write_pack_objects(b.write, [])
+        write_pack_objects(b.write, [], object_format=DEFAULT_OBJECT_FORMAT)
         f.write(b.getvalue())
         f.seek(0)
 
@@ -199,9 +200,9 @@ class BundleTests(TestCase):
 
         # Create a simple pack data
         b = BytesIO()
-        write_pack_objects(b.write, [])
+        write_pack_objects(b.write, [], object_format=DEFAULT_OBJECT_FORMAT)
         b.seek(0)
-        bundle.pack_data = PackData.from_file(b)
+        bundle.pack_data = PackData.from_file(b, object_format=DEFAULT_OBJECT_FORMAT)
 
         # Write the bundle
         f = BytesIO()
@@ -225,9 +226,9 @@ class BundleTests(TestCase):
 
         # Create a simple pack data
         b = BytesIO()
-        write_pack_objects(b.write, [])
+        write_pack_objects(b.write, [], object_format=DEFAULT_OBJECT_FORMAT)
         b.seek(0)
-        bundle.pack_data = PackData.from_file(b)
+        bundle.pack_data = PackData.from_file(b, object_format=DEFAULT_OBJECT_FORMAT)
 
         # Write the bundle
         f = BytesIO()
@@ -253,9 +254,9 @@ class BundleTests(TestCase):
         bundle1.references = {b"refs/heads/master": b"ab" * 20}
 
         b1 = BytesIO()
-        write_pack_objects(b1.write, [])
+        write_pack_objects(b1.write, [], object_format=DEFAULT_OBJECT_FORMAT)
         b1.seek(0)
-        bundle1.pack_data = PackData.from_file(b1)
+        bundle1.pack_data = PackData.from_file(b1, object_format=DEFAULT_OBJECT_FORMAT)
 
         f1 = BytesIO()
         write_bundle(f1, bundle1)
@@ -271,9 +272,9 @@ class BundleTests(TestCase):
         bundle2.references = {b"refs/heads/master": b"ab" * 20}
 
         b2 = BytesIO()
-        write_pack_objects(b2.write, [])
+        write_pack_objects(b2.write, [], object_format=DEFAULT_OBJECT_FORMAT)
         b2.seek(0)
-        bundle2.pack_data = PackData.from_file(b2)
+        bundle2.pack_data = PackData.from_file(b2, object_format=DEFAULT_OBJECT_FORMAT)
 
         f2 = BytesIO()
         write_bundle(f2, bundle2)
@@ -290,9 +291,9 @@ class BundleTests(TestCase):
         bundle.references = {}
 
         b = BytesIO()
-        write_pack_objects(b.write, [])
+        write_pack_objects(b.write, [], object_format=DEFAULT_OBJECT_FORMAT)
         b.seek(0)
-        bundle.pack_data = PackData.from_file(b)
+        bundle.pack_data = PackData.from_file(b, object_format=DEFAULT_OBJECT_FORMAT)
 
         f = BytesIO()
         with self.assertRaises(AssertionError):
@@ -305,9 +306,11 @@ class BundleTests(TestCase):
         origbundle.references = {b"refs/heads/master": b"ab" * 20}
         origbundle.prerequisites = [(b"cc" * 20, b"comment")]
         b = BytesIO()
-        write_pack_objects(b.write, [])
+        write_pack_objects(b.write, [], object_format=DEFAULT_OBJECT_FORMAT)
         b.seek(0)
-        origbundle.pack_data = PackData.from_file(b)
+        origbundle.pack_data = PackData.from_file(
+            b, object_format=DEFAULT_OBJECT_FORMAT
+        )
         with tempfile.TemporaryDirectory() as td:
             with open(os.path.join(td, "foo"), "wb") as f:
                 write_bundle(f, origbundle)

+ 44 - 0
tests/test_cli.py

@@ -126,6 +126,50 @@ class InitCommandTest(DulwichCliTestCase):
         self.assertTrue(os.path.exists(os.path.join(bare_repo_path, "HEAD")))
         self.assertFalse(os.path.exists(os.path.join(bare_repo_path, ".git")))
 
+    def test_init_objectformat_sha256(self) -> None:
+        # Create a new directory for init with SHA-256
+        new_repo_path = os.path.join(self.test_dir, "sha256_repo")
+        _result, _stdout, _stderr = self._run_cli(
+            "init", "--objectformat=sha256", new_repo_path
+        )
+        self.assertTrue(os.path.exists(os.path.join(new_repo_path, ".git")))
+        # Verify the object format
+        repo = Repo(new_repo_path)
+        self.addCleanup(repo.close)
+        config = repo.get_config()
+        self.assertEqual(b"sha256", config.get((b"extensions",), b"objectformat"))
+
+    def test_init_objectformat_sha1(self) -> None:
+        # Create a new directory for init with SHA-1
+        new_repo_path = os.path.join(self.test_dir, "sha1_repo")
+        _result, _stdout, _stderr = self._run_cli(
+            "init", "--objectformat=sha1", new_repo_path
+        )
+        self.assertTrue(os.path.exists(os.path.join(new_repo_path, ".git")))
+        # SHA-1 is the default, so objectformat should not be set
+        repo = Repo(new_repo_path)
+        self.addCleanup(repo.close)
+        config = repo.get_config()
+        # The extensions section may not exist at all for SHA-1
+        if config.has_section((b"extensions",)):
+            object_format = config.get((b"extensions",), b"objectformat")
+            self.assertNotEqual(b"sha256", object_format)
+        # If the section doesn't exist, that's also fine (SHA-1 is default)
+
+    def test_init_bare_objectformat_sha256(self) -> None:
+        # Create a bare repo with SHA-256
+        bare_repo_path = os.path.join(self.test_dir, "bare_sha256_repo")
+        _result, _stdout, _stderr = self._run_cli(
+            "init", "--bare", "--objectformat=sha256", bare_repo_path
+        )
+        self.assertTrue(os.path.exists(os.path.join(bare_repo_path, "HEAD")))
+        self.assertFalse(os.path.exists(os.path.join(bare_repo_path, ".git")))
+        # Verify the object format
+        repo = Repo(bare_repo_path)
+        self.addCleanup(repo.close)
+        config = repo.get_config()
+        self.assertEqual(b"sha256", config.get((b"extensions",), b"objectformat"))
+
 
 class HelperFunctionsTest(TestCase):
     """Tests for CLI helper functions."""

+ 137 - 6
tests/test_client.py

@@ -62,7 +62,8 @@ from dulwich.client import (
     parse_rsync_url,
 )
 from dulwich.config import ConfigDict
-from dulwich.objects import Blob, Commit, Tree
+from dulwich.object_format import DEFAULT_OBJECT_FORMAT
+from dulwich.objects import ZERO_SHA, Blob, Commit, Tree
 from dulwich.pack import pack_objects_to_data, write_pack_data, write_pack_objects
 from dulwich.protocol import DEFAULT_GIT_PROTOCOL_VERSION_FETCH, TCP_GIT_PORT, Protocol
 from dulwich.repo import MemoryRepo, Repo
@@ -404,7 +405,7 @@ class GitClientTests(TestCase):
         self.rin.seek(0)
 
         def update_refs(refs):
-            return {b"refs/heads/master": b"0" * 40}
+            return {b"refs/heads/master": ZERO_SHA}
 
         def generate_pack_data(have, want, *, ofs_delta=False, progress=None):
             return 0, []
@@ -428,7 +429,7 @@ class GitClientTests(TestCase):
         self.rin.seek(0)
 
         def update_refs(refs):
-            return {b"refs/heads/master": b"0" * 40}
+            return {b"refs/heads/master": ZERO_SHA}
 
         def generate_pack_data(have, want, *, ofs_delta=False, progress=None):
             return 0, []
@@ -461,7 +462,7 @@ class GitClientTests(TestCase):
             return 0, []
 
         f = BytesIO()
-        write_pack_objects(f.write, [])
+        write_pack_objects(f.write, [], object_format=DEFAULT_OBJECT_FORMAT)
         self.client.send_pack("/", update_refs, generate_pack_data)
         self.assertEqual(
             self.rout.getvalue(),
@@ -507,7 +508,11 @@ class GitClientTests(TestCase):
 
         f = BytesIO()
         count, records = generate_pack_data(None, None)
-        write_pack_data(f.write, records, num_records=count)
+        from dulwich.object_format import DEFAULT_OBJECT_FORMAT
+
+        write_pack_data(
+            f.write, records, num_records=count, object_format=DEFAULT_OBJECT_FORMAT
+        )
         self.client.send_pack(b"/", update_refs, generate_pack_data)
         self.assertEqual(
             self.rout.getvalue(),
@@ -532,7 +537,7 @@ class GitClientTests(TestCase):
         self.rin.seek(0)
 
         def update_refs(refs):
-            return {b"refs/heads/master": b"0" * 40}
+            return {b"refs/heads/master": ZERO_SHA}
 
         def generate_pack_data(have, want, *, ofs_delta=False, progress=None):
             return 0, []
@@ -1110,6 +1115,54 @@ class LocalGitClientTests(TestCase):
         expected[b"refs/remotes/origin/master"] = expected[b"refs/heads/master"]
         self.assertEqual(expected, result_repo.get_refs())
 
+    def test_clone_sha256_local(self) -> None:
+        """Test that cloning a SHA-256 local repo creates a SHA-256 clone."""
+        client = LocalGitClient()
+
+        # Create a SHA-256 source repository
+        source_path = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, source_path)
+        source_repo = Repo.init(source_path, object_format="sha256")
+
+        # Verify source is SHA-256
+        self.assertEqual("sha256", source_repo.object_format.name)
+        source_repo.close()
+
+        # Clone the repository
+        target_path = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, target_path)
+        cloned_repo = client.clone(source_path, target_path, mkdir=False)
+        self.addCleanup(cloned_repo.close)
+
+        # Verify the clone uses SHA-256
+        self.assertEqual("sha256", cloned_repo.object_format.name)
+
+        # Verify the config has the correct objectformat extension
+        config = cloned_repo.get_config()
+        self.assertEqual(b"sha256", config.get((b"extensions",), b"objectformat"))
+
+    def test_clone_sha1_local(self) -> None:
+        """Test that cloning a SHA-1 local repo creates a SHA-1 clone."""
+        client = LocalGitClient()
+
+        # Create a SHA-1 source repository
+        source_path = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, source_path)
+        source_repo = Repo.init(source_path, object_format="sha1")
+
+        # Verify source is SHA-1
+        self.assertEqual("sha1", source_repo.object_format.name)
+        source_repo.close()
+
+        # Clone the repository
+        target_path = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, target_path)
+        cloned_repo = client.clone(source_path, target_path, mkdir=False)
+        self.addCleanup(cloned_repo.close)
+
+        # Verify the clone uses SHA-1
+        self.assertEqual("sha1", cloned_repo.object_format.name)
+
     def test_fetch_empty(self) -> None:
         c = LocalGitClient()
         s = open_repo("a.git")
@@ -1193,6 +1246,84 @@ class LocalGitClientTests(TestCase):
         # Check that symrefs are detected correctly
         self.assertIn(b"HEAD", result.symrefs)
 
+    def test_fetch_object_format_mismatch_sha256_to_sha1(self) -> None:
+        """Test that fetching from SHA-256 to non-empty SHA-1 repository fails."""
+        from dulwich.objects import Blob
+
+        client = LocalGitClient()
+
+        # Create SHA-256 source repository
+        sha256_path = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, sha256_path)
+        sha256_repo = Repo.init(sha256_path, object_format="sha256")
+        self.addCleanup(sha256_repo.close)
+
+        # Create SHA-1 target repository with an object (so it can't be auto-changed)
+        sha1_path = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, sha1_path)
+        sha1_repo = Repo.init(sha1_path, object_format="sha1")
+        self.addCleanup(sha1_repo.close)
+
+        # Add an object to make the repo non-empty
+        blob = Blob.from_string(b"test content")
+        sha1_repo.object_store.add_object(blob)
+
+        # Attempt to fetch should raise AssertionError (repo not empty)
+        with self.assertRaises(AssertionError) as cm:
+            client.fetch(sha256_path, sha1_repo)
+
+        self.assertIn("Cannot change object format", str(cm.exception))
+        self.assertIn("already contains objects", str(cm.exception))
+
+    def test_fetch_object_format_mismatch_sha1_to_sha256(self) -> None:
+        """Test that fetching from SHA-1 to non-empty SHA-256 repository fails."""
+        from dulwich.objects import Blob
+
+        client = LocalGitClient()
+
+        # Create SHA-1 source repository
+        sha1_path = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, sha1_path)
+        sha1_repo = Repo.init(sha1_path, object_format="sha1")
+        self.addCleanup(sha1_repo.close)
+
+        # Create SHA-256 target repository with an object (so it can't be auto-changed)
+        sha256_path = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, sha256_path)
+        sha256_repo = Repo.init(sha256_path, object_format="sha256")
+        self.addCleanup(sha256_repo.close)
+
+        # Add an object to make the repo non-empty
+        blob = Blob.from_string(b"test content")
+        sha256_repo.object_store.add_object(blob)
+
+        # Attempt to fetch should raise AssertionError (repo not empty)
+        with self.assertRaises(AssertionError) as cm:
+            client.fetch(sha1_path, sha256_repo)
+
+        self.assertIn("Cannot change object format", str(cm.exception))
+        self.assertIn("already contains objects", str(cm.exception))
+
+    def test_fetch_object_format_same(self) -> None:
+        """Test that fetching between repositories with same object format works."""
+        client = LocalGitClient()
+
+        # Create SHA-256 source repository
+        sha256_src = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, sha256_src)
+        src_repo = Repo.init(sha256_src, object_format="sha256")
+        self.addCleanup(src_repo.close)
+
+        # Create SHA-256 target repository
+        sha256_dst = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, sha256_dst)
+        dst_repo = Repo.init(sha256_dst, object_format="sha256")
+        self.addCleanup(dst_repo.close)
+
+        # Fetch should succeed without error
+        result = client.fetch(sha256_src, dst_repo)
+        self.assertIsNotNone(result)
+
     def send_and_verify(self, branch, local, target) -> None:
         """Send branch from local to remote repository and verify it worked."""
         client = LocalGitClient()

+ 1 - 1
tests/test_cloud_gcs.py

@@ -140,7 +140,7 @@ class GcsObjectStoreTests(unittest.TestCase):
             args = mock_pack_cls.from_lazy_objects.call_args[0]
             self.assertEqual(2, len(args))
 
-            # They should be callables
+            # Both should be callables (lazy loaders for data and index)
             self.assertTrue(callable(args[0]))
             self.assertTrue(callable(args[1]))
 

+ 50 - 8
tests/test_commit_graph.py

@@ -30,6 +30,7 @@ from dulwich.commit_graph import (
     get_reachable_commits,
     read_commit_graph,
 )
+from dulwich.object_format import SHA1
 
 
 class CommitGraphEntryTests(unittest.TestCase):
@@ -76,13 +77,13 @@ class CommitGraphTests(unittest.TestCase):
     """Tests for CommitGraph."""
 
     def test_init(self) -> None:
-        graph = CommitGraph()
+        graph = CommitGraph(object_format=SHA1)
         self.assertEqual(graph.hash_version, HASH_VERSION_SHA1)
         self.assertEqual(len(graph.entries), 0)
         self.assertEqual(len(graph.chunks), 0)
 
     def test_len(self) -> None:
-        graph = CommitGraph()
+        graph = CommitGraph(object_format=SHA1)
         self.assertEqual(len(graph), 0)
 
         # Add a dummy entry
@@ -91,7 +92,7 @@ class CommitGraphTests(unittest.TestCase):
         self.assertEqual(len(graph), 1)
 
     def test_iter(self) -> None:
-        graph = CommitGraph()
+        graph = CommitGraph(object_format=SHA1)
         entry1 = CommitGraphEntry(b"a" * 40, b"b" * 40, [], 1, 1000)
         entry2 = CommitGraphEntry(b"c" * 40, b"d" * 40, [], 2, 2000)
         graph.entries.extend([entry1, entry2])
@@ -102,17 +103,17 @@ class CommitGraphTests(unittest.TestCase):
         self.assertEqual(entries[1], entry2)
 
     def test_get_entry_by_oid_missing(self) -> None:
-        graph = CommitGraph()
+        graph = CommitGraph(object_format=SHA1)
         result = graph.get_entry_by_oid(b"f" * 40)
         self.assertIsNone(result)
 
     def test_get_generation_number_missing(self) -> None:
-        graph = CommitGraph()
+        graph = CommitGraph(object_format=SHA1)
         result = graph.get_generation_number(b"f" * 40)
         self.assertIsNone(result)
 
     def test_get_parents_missing(self) -> None:
-        graph = CommitGraph()
+        graph = CommitGraph(object_format=SHA1)
         result = graph.get_parents(b"f" * 40)
         self.assertIsNone(result)
 
@@ -262,7 +263,7 @@ class CommitGraphTests(unittest.TestCase):
 
     def test_write_empty_graph_raises(self) -> None:
         """Test that writing empty graph raises ValueError."""
-        graph = CommitGraph()
+        graph = CommitGraph(object_format=SHA1)
         f = io.BytesIO()
 
         with self.assertRaises(ValueError):
@@ -271,7 +272,7 @@ class CommitGraphTests(unittest.TestCase):
     def test_write_and_read_round_trip(self) -> None:
         """Test writing and reading a commit graph."""
         # Create a simple commit graph
-        graph = CommitGraph()
+        graph = CommitGraph(object_format=SHA1)
         entry = CommitGraphEntry(
             commit_id=b"aa" + b"00" * 19,
             tree_id=b"bb" + b"00" * 19,
@@ -434,6 +435,47 @@ class CommitGraphGenerationTests(unittest.TestCase):
         self.assertEqual(entry.generation, 1)
         self.assertEqual(entry.commit_time, 1234567890)
 
+    def test_generate_commit_graph_oid_index_uses_binary_keys(self) -> None:
+        """Test that generated commit graph _oid_to_index uses binary RawObjectID keys."""
+        from dulwich.object_store import MemoryObjectStore
+        from dulwich.objects import Commit, Tree, hex_to_sha
+
+        object_store = MemoryObjectStore()
+
+        # Create a tree and commit
+        tree = Tree()
+        object_store.add_object(tree)
+
+        commit = Commit()
+        commit.tree = tree.id
+        commit.author = b"Test Author <test@example.com>"
+        commit.committer = b"Test Author <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        commit.message = b"Test commit"
+        object_store.add_object(commit)
+
+        # Generate graph
+        graph = generate_commit_graph(object_store, [commit.id])
+
+        # Verify _oid_to_index uses binary keys (RawObjectID)
+        # commit.id is hex (ObjectID), so we need to convert to binary for lookup
+        binary_commit_id = hex_to_sha(commit.id)
+        self.assertIn(binary_commit_id, graph._oid_to_index)
+        self.assertEqual(graph._oid_to_index[binary_commit_id], 0)
+
+        # Verify lookup with hex ObjectID works via get_entry_by_oid
+        entry = graph.get_entry_by_oid(commit.id)
+        self.assertIsNotNone(entry)
+        assert entry is not None
+        self.assertEqual(entry.commit_id, commit.id)
+
+        # Verify lookup with binary RawObjectID also works
+        entry_binary = graph.get_entry_by_oid(binary_commit_id)
+        self.assertIsNotNone(entry_binary)
+        assert entry_binary is not None
+        self.assertEqual(entry_binary.commit_id, commit.id)
+
     def test_get_reachable_commits(self) -> None:
         """Test getting reachable commits."""
         from dulwich.object_store import MemoryObjectStore

+ 6 - 0
tests/test_config.py

@@ -616,6 +616,7 @@ who\"
         handler = logging.StreamHandler(log_capture)
         handler.setLevel(logging.DEBUG)
         logger = logging.getLogger("dulwich.config")
+        old_level = logger.level
         logger.addHandler(handler)
         logger.setLevel(logging.DEBUG)
 
@@ -636,6 +637,7 @@ who\"
                 self.assertIn("nonexistent.config", log_output)
         finally:
             logger.removeHandler(handler)
+            logger.setLevel(old_level)
 
     def test_invalid_include_path_logging(self) -> None:
         """Test that invalid include paths are logged but don't cause failure."""
@@ -647,6 +649,7 @@ who\"
         handler = logging.StreamHandler(log_capture)
         handler.setLevel(logging.DEBUG)
         logger = logging.getLogger("dulwich.config")
+        old_level = logger.level
         logger.addHandler(handler)
         logger.setLevel(logging.DEBUG)
 
@@ -667,6 +670,7 @@ who\"
                 self.assertIn("Invalid include path", log_output)
         finally:
             logger.removeHandler(handler)
+            logger.setLevel(old_level)
 
     def test_unknown_includeif_condition_logging(self) -> None:
         """Test that unknown includeIf conditions are logged."""
@@ -678,6 +682,7 @@ who\"
         handler = logging.StreamHandler(log_capture)
         handler.setLevel(logging.DEBUG)
         logger = logging.getLogger("dulwich.config")
+        old_level = logger.level
         logger.addHandler(handler)
         logger.setLevel(logging.DEBUG)
 
@@ -700,6 +705,7 @@ who\"
                 self.assertIn("futurefeature:value", log_output)
         finally:
             logger.removeHandler(handler)
+            logger.setLevel(old_level)
 
     def test_custom_file_opener_with_include_depth(self) -> None:
         """Test that custom file opener is passed through include chain."""

+ 2 - 2
tests/test_dumb.py

@@ -28,7 +28,7 @@ from unittest.mock import Mock
 
 from dulwich.dumb import DumbHTTPObjectStore, DumbRemoteHTTPRepo
 from dulwich.errors import NotGitRepository
-from dulwich.objects import Blob, Commit, ShaFile, Tag, Tree, sha_to_hex
+from dulwich.objects import ZERO_SHA, Blob, Commit, ShaFile, Tag, Tree, sha_to_hex
 
 
 class MockResponse:
@@ -171,7 +171,7 @@ P pack-abcdef1234567890abcdef1234567890abcdef12.pack
         self._add_response(path, self._make_object(blob))
 
         self.assertTrue(self.store.contains_loose(hex_sha))
-        self.assertFalse(self.store.contains_loose(b"0" * 40))
+        self.assertFalse(self.store.contains_loose(ZERO_SHA))
 
     def test_add_object_not_implemented(self) -> None:
         blob = Blob()

+ 3 - 3
tests/test_filter_branch.py

@@ -25,7 +25,7 @@ import unittest
 
 from dulwich.filter_branch import CommitFilter, filter_refs
 from dulwich.object_store import MemoryObjectStore
-from dulwich.objects import Commit, Tree
+from dulwich.objects import ZERO_SHA, Commit, Tree
 from dulwich.refs import DictRefsContainer
 
 
@@ -179,7 +179,7 @@ class FilterRefsTests(unittest.TestCase):
     def test_filter_refs_already_filtered(self):
         """Test error when refs already filtered."""
         # Set up an "already filtered" state
-        self.refs[b"refs/original/refs/heads/master"] = b"0" * 40
+        self.refs[b"refs/original/refs/heads/master"] = ZERO_SHA
 
         filter = CommitFilter(self.store)
         with self.assertRaises(ValueError) as cm:
@@ -194,7 +194,7 @@ class FilterRefsTests(unittest.TestCase):
     def test_filter_refs_force(self):
         """Test force filtering."""
         # Set up an "already filtered" state
-        self.refs[b"refs/original/refs/heads/master"] = b"0" * 40
+        self.refs[b"refs/original/refs/heads/master"] = ZERO_SHA
 
         filter = CommitFilter(self.store)
         # Should not raise with force=True

+ 3 - 3
tests/test_filters.py

@@ -22,6 +22,8 @@
 """Tests for filters."""
 
 import os
+import shutil
+import sys
 import tempfile
 import threading
 from collections.abc import Iterator
@@ -711,11 +713,9 @@ class FilterContextTests(TestCase):
 
     def test_filter_context_with_real_process_filter(self):
         """Test FilterContext with real ProcessFilterDriver instances."""
-        import sys
-
         # Use existing test filter from ProcessFilterDriverTests
         test_dir = tempfile.mkdtemp()
-        self.addCleanup(lambda: __import__("shutil").rmtree(test_dir))
+        self.addCleanup(shutil.rmtree, test_dir)
 
         # Create a simple test filter that just passes data through
         filter_script = _PASSTHROUGH_FILTER_SCRIPT

+ 2 - 2
tests/test_gc.py

@@ -17,7 +17,7 @@ from dulwich.gc import (
     prune_unreachable_objects,
     should_run_gc,
 )
-from dulwich.objects import Blob, Commit, Tag, Tree
+from dulwich.objects import ZERO_SHA, Blob, Commit, Tag, Tree
 from dulwich.repo import MemoryRepo, Repo
 
 
@@ -357,7 +357,7 @@ class GCTestCase(TestCase):
         self.repo.refs[b"HEAD"] = commit.id
 
         # Create a broken ref pointing to non-existent object
-        broken_sha = b"0" * 40
+        broken_sha = ZERO_SHA
         self.repo.refs[b"refs/heads/broken"] = broken_sha
 
         # Track progress to see warning

+ 5 - 5
tests/test_index.py

@@ -69,7 +69,7 @@ from dulwich.index import (
     write_index_dict,
 )
 from dulwich.object_store import MemoryObjectStore
-from dulwich.objects import S_IFGITLINK, Blob, Tree, TreeEntry
+from dulwich.objects import S_IFGITLINK, ZERO_SHA, Blob, Tree, TreeEntry
 from dulwich.repo import Repo
 from dulwich.tests.utils import make_commit
 
@@ -1578,7 +1578,7 @@ class TestManyFilesFeature(TestCase):
             uid=1000,
             gid=1000,
             size=5,
-            sha=b"0" * 40,
+            sha=ZERO_SHA,
         )
         index[b"test.txt"] = entry
 
@@ -1607,7 +1607,7 @@ class TestManyFilesFeature(TestCase):
             uid=1000,
             gid=1000,
             size=5,
-            sha=b"0" * 40,
+            sha=ZERO_SHA,
         )
         index[b"test.txt"] = entry
 
@@ -1638,7 +1638,7 @@ class TestManyFilesFeature(TestCase):
                 uid=1000,
                 gid=1000,
                 size=5,
-                sha=b"0" * 40,
+                sha=ZERO_SHA,
                 flags=0,
                 extended_flags=0,
             ),
@@ -1866,7 +1866,7 @@ class TestPathPrefixCompression(TestCase):
                 uid=1000,
                 gid=1000,
                 size=10,
-                sha=b"0" * 40,
+                sha=ZERO_SHA,
             )
             index_v2[path] = entry
         index_v2.write()

+ 12 - 5
tests/test_object_store.py

@@ -31,6 +31,7 @@ from io import BytesIO
 
 from dulwich.errors import NotTreeError
 from dulwich.index import commit_tree
+from dulwich.object_format import DEFAULT_OBJECT_FORMAT
 from dulwich.object_store import (
     DiskObjectStore,
     MemoryObjectStore,
@@ -75,7 +76,9 @@ class MemoryObjectStoreTests(ObjectStoreTests, TestCase):
         f, commit, abort = o.add_pack()
         try:
             b = make_object(Blob, data=b"more yummy data")
-            write_pack_objects(f.write, [(b, None)])
+            write_pack_objects(
+                f.write, [(b, None)], object_format=DEFAULT_OBJECT_FORMAT
+            )
         except BaseException:
             abort()
             raise
@@ -299,7 +302,9 @@ class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase):
         f, commit, abort = o.add_pack()
         try:
             b = make_object(Blob, data=b"more yummy data")
-            write_pack_objects(f.write, [(b, None)])
+            write_pack_objects(
+                f.write, [(b, None)], object_format=DEFAULT_OBJECT_FORMAT
+            )
         except BaseException:
             abort()
             raise
@@ -382,7 +387,7 @@ class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase):
 
         # Load and verify it's version 1
         idx_path = os.path.join(pack_dir, idx_files[0])
-        idx = load_pack_index(idx_path)
+        idx = load_pack_index(idx_path, DEFAULT_OBJECT_FORMAT)
         self.assertEqual(1, idx.version)
 
         # Test version 3
@@ -411,7 +416,7 @@ class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase):
         self.assertEqual(1, len(idx_files2))
 
         idx_path2 = os.path.join(pack_dir2, idx_files2[0])
-        idx2 = load_pack_index(idx_path2)
+        idx2 = load_pack_index(idx_path2, DEFAULT_OBJECT_FORMAT)
         self.assertEqual(3, idx2.version)
 
     def test_prune_orphaned_tempfiles(self) -> None:
@@ -839,7 +844,9 @@ class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase):
         with patch("os.fsync") as mock_fsync:
             f, commit, abort = store.add_pack()
             try:
-                write_pack_objects(f.write, [(blob, None)])
+                write_pack_objects(
+                    f.write, [(blob, None)], object_format=DEFAULT_OBJECT_FORMAT
+                )
             except BaseException:
                 abort()
                 raise

+ 7 - 6
tests/test_objects.py

@@ -33,6 +33,7 @@ from itertools import permutations
 from dulwich.errors import ObjectFormatException
 from dulwich.objects import (
     MAX_TIME,
+    ZERO_SHA,
     Blob,
     Commit,
     ShaFile,
@@ -994,7 +995,7 @@ class TreeTests(ShaFileCheckTests):
         o = Tree.from_path(hex_to_filename(dir, tree_sha))
         self.assertEqual(
             [(b"a", 0o100644, a_sha), (b"b", 0o100644, b_sha)],
-            list(parse_tree(o.as_raw_string())),
+            list(parse_tree(o.as_raw_string(), 20)),
         )
         # test a broken tree that has a leading 0 on the file mode
         broken_tree = b"0100644 foo\0" + hex_to_sha(a_sha)
@@ -1002,9 +1003,9 @@ class TreeTests(ShaFileCheckTests):
         def eval_parse_tree(*args, **kwargs):
             return list(parse_tree(*args, **kwargs))
 
-        self.assertEqual([(b"foo", 0o100644, a_sha)], eval_parse_tree(broken_tree))
+        self.assertEqual([(b"foo", 0o100644, a_sha)], eval_parse_tree(broken_tree, 20))
         self.assertRaises(
-            ObjectFormatException, eval_parse_tree, broken_tree, strict=True
+            ObjectFormatException, eval_parse_tree, broken_tree, 20, strict=True
         )
 
     test_parse_tree = functest_builder(_do_test_parse_tree, _parse_tree_py)
@@ -1665,7 +1666,7 @@ class ShaFileCopyTests(TestCase):
             tagger=b"Tagger <test@example.com>",
             tag_time=12345,
             tag_timezone=0,
-            object=(Commit, b"0" * 40),
+            object=(Commit, ZERO_SHA),
         )
         self.assert_copy(tag)
 
@@ -1734,7 +1735,7 @@ class ShaFileSerializeTests(TestCase):
             tagger=b"Tagger <test@example.com>",
             tag_time=12345,
             tag_timezone=0,
-            object=(Commit, b"0" * 40),
+            object=(Commit, ZERO_SHA),
         )
 
         with self.assert_serialization_on_change(tag):
@@ -1747,7 +1748,7 @@ class ShaFileSerializeTests(TestCase):
                 name=b"tag",
                 message=b"some message",
                 tagger=b"Tagger <test@example.com> 1174773719+0000",
-                object=(Commit, b"0" * 40),
+                object=(Commit, ZERO_SHA),
             )
             tag._deserialize(tag._serialize())
 

+ 106 - 49
tests/test_pack.py

@@ -33,6 +33,7 @@ from typing import NoReturn
 
 from dulwich.errors import ApplyDeltaError, ChecksumMismatch
 from dulwich.file import GitFile
+from dulwich.object_format import DEFAULT_OBJECT_FORMAT
 from dulwich.object_store import MemoryObjectStore
 from dulwich.objects import Blob, Commit, Tree, hex_to_sha, sha_to_hex
 from dulwich.pack import (
@@ -100,17 +101,22 @@ class PackTests(TestCase):
     def get_pack_index(self, sha):
         """Returns a PackIndex from the datadir with the given sha."""
         return load_pack_index(
-            os.path.join(self.datadir, "pack-{}.idx".format(sha.decode("ascii")))
+            os.path.join(self.datadir, "pack-{}.idx".format(sha.decode("ascii"))),
+            DEFAULT_OBJECT_FORMAT,
         )
 
     def get_pack_data(self, sha):
         """Returns a PackData object from the datadir with the given sha."""
         return PackData(
-            os.path.join(self.datadir, "pack-{}.pack".format(sha.decode("ascii")))
+            os.path.join(self.datadir, "pack-{}.pack".format(sha.decode("ascii"))),
+            object_format=DEFAULT_OBJECT_FORMAT,
         )
 
     def get_pack(self, sha):
-        return Pack(os.path.join(self.datadir, "pack-{}".format(sha.decode("ascii"))))
+        return Pack(
+            os.path.join(self.datadir, "pack-{}".format(sha.decode("ascii"))),
+            object_format=DEFAULT_OBJECT_FORMAT,
+        )
 
     def assertSucceeds(self, func, *args, **kwargs) -> None:
         try:
@@ -472,7 +478,7 @@ class TestPackData(PackTests):
             self.datadir, "pack-{}.pack".format(pack1_sha.decode("ascii"))
         )
         with open(path, "rb") as f:
-            PackData.from_file(f, os.path.getsize(path))
+            PackData.from_file(f, DEFAULT_OBJECT_FORMAT, os.path.getsize(path))
 
     def test_pack_len(self) -> None:
         with self.get_pack_data(pack1_sha) as p:
@@ -565,7 +571,7 @@ class TestPackData(PackTests):
         with self.get_pack_data(pack1_sha) as p:
             filename = os.path.join(self.tempdir, "v1test.idx")
             p.create_index_v1(filename)
-            idx1 = load_pack_index(filename)
+            idx1 = load_pack_index(filename, DEFAULT_OBJECT_FORMAT)
             idx2 = self.get_pack_index(pack1_sha)
             self.assertEqual(oct(os.stat(filename).st_mode), indexmode)
             self.assertEqual(idx1, idx2)
@@ -574,7 +580,7 @@ class TestPackData(PackTests):
         with self.get_pack_data(pack1_sha) as p:
             filename = os.path.join(self.tempdir, "v2test.idx")
             p.create_index_v2(filename)
-            idx1 = load_pack_index(filename)
+            idx1 = load_pack_index(filename, DEFAULT_OBJECT_FORMAT)
             idx2 = self.get_pack_index(pack1_sha)
             self.assertEqual(oct(os.stat(filename).st_mode), indexmode)
             self.assertEqual(idx1, idx2)
@@ -583,7 +589,7 @@ class TestPackData(PackTests):
         with self.get_pack_data(pack1_sha) as p:
             filename = os.path.join(self.tempdir, "v3test.idx")
             p.create_index_v3(filename)
-            idx1 = load_pack_index(filename)
+            idx1 = load_pack_index(filename, DEFAULT_OBJECT_FORMAT)
             idx2 = self.get_pack_index(pack1_sha)
             self.assertEqual(oct(os.stat(filename).st_mode), indexmode)
             self.assertEqual(idx1, idx2)
@@ -594,7 +600,7 @@ class TestPackData(PackTests):
         with self.get_pack_data(pack1_sha) as p:
             filename = os.path.join(self.tempdir, "version3test.idx")
             p.create_index(filename, version=3)
-            idx = load_pack_index(filename)
+            idx = load_pack_index(filename, DEFAULT_OBJECT_FORMAT)
             self.assertIsInstance(idx, PackIndex3)
             self.assertEqual(idx.version, 3)
 
@@ -602,23 +608,32 @@ class TestPackData(PackTests):
         f = BytesIO(b"abcd1234wxyz")
         try:
             self.assertEqual(
-                sha1(b"abcd1234wxyz").hexdigest(), compute_file_sha(f).hexdigest()
+                sha1(b"abcd1234wxyz").hexdigest(),
+                compute_file_sha(f, DEFAULT_OBJECT_FORMAT.hash_func).hexdigest(),
             )
             self.assertEqual(
                 sha1(b"abcd1234wxyz").hexdigest(),
-                compute_file_sha(f, buffer_size=5).hexdigest(),
+                compute_file_sha(
+                    f, DEFAULT_OBJECT_FORMAT.hash_func, buffer_size=5
+                ).hexdigest(),
             )
             self.assertEqual(
                 sha1(b"abcd1234").hexdigest(),
-                compute_file_sha(f, end_ofs=-4).hexdigest(),
+                compute_file_sha(
+                    f, DEFAULT_OBJECT_FORMAT.hash_func, end_ofs=-4
+                ).hexdigest(),
             )
             self.assertEqual(
                 sha1(b"1234wxyz").hexdigest(),
-                compute_file_sha(f, start_ofs=4).hexdigest(),
+                compute_file_sha(
+                    f, DEFAULT_OBJECT_FORMAT.hash_func, start_ofs=4
+                ).hexdigest(),
             )
             self.assertEqual(
                 sha1(b"1234").hexdigest(),
-                compute_file_sha(f, start_ofs=4, end_ofs=-4).hexdigest(),
+                compute_file_sha(
+                    f, DEFAULT_OBJECT_FORMAT.hash_func, start_ofs=4, end_ofs=-4
+                ).hexdigest(),
             )
         finally:
             f.close()
@@ -626,10 +641,28 @@ class TestPackData(PackTests):
     def test_compute_file_sha_short_file(self) -> None:
         f = BytesIO(b"abcd1234wxyz")
         try:
-            self.assertRaises(AssertionError, compute_file_sha, f, end_ofs=-20)
-            self.assertRaises(AssertionError, compute_file_sha, f, end_ofs=20)
             self.assertRaises(
-                AssertionError, compute_file_sha, f, start_ofs=10, end_ofs=-12
+                AssertionError,
+                compute_file_sha,
+                f,
+                DEFAULT_OBJECT_FORMAT.hash_func,
+                -20,
+            )
+            self.assertRaises(
+                AssertionError,
+                compute_file_sha,
+                f,
+                DEFAULT_OBJECT_FORMAT.hash_func,
+                0,
+                20,
+            )
+            self.assertRaises(
+                AssertionError,
+                compute_file_sha,
+                f,
+                DEFAULT_OBJECT_FORMAT.hash_func,
+                10,
+                -12,
             )
         finally:
             f.close()
@@ -684,9 +717,11 @@ class TestPack(PackTests):
         with self.get_pack(pack1_sha) as origpack:
             self.assertSucceeds(origpack.index.check)
             basename = os.path.join(self.tempdir, "Elch")
-            write_pack(basename, origpack.pack_tuples())
+            write_pack(
+                basename, origpack.pack_tuples(), object_format=DEFAULT_OBJECT_FORMAT
+            )
 
-            with Pack(basename) as newpack:
+            with Pack(basename, object_format=DEFAULT_OBJECT_FORMAT) as newpack:
                 self.assertEqual(origpack, newpack)
                 self.assertSucceeds(newpack.index.check)
                 self.assertEqual(origpack.name(), newpack.name())
@@ -711,8 +746,10 @@ class TestPack(PackTests):
 
     def _copy_pack(self, origpack):
         basename = os.path.join(self.tempdir, "somepack")
-        write_pack(basename, origpack.pack_tuples())
-        return Pack(basename)
+        write_pack(
+            basename, origpack.pack_tuples(), object_format=DEFAULT_OBJECT_FORMAT
+        )
+        return Pack(basename, object_format=DEFAULT_OBJECT_FORMAT)
 
     def test_keep_no_message(self) -> None:
         with self.get_pack(pack1_sha) as p:
@@ -758,7 +795,7 @@ class TestPack(PackTests):
             write_pack_header(bad_file.write, 9999)
             bad_file.write(data._file.read())
             bad_file = BytesIO(bad_file.getvalue())
-            bad_data = PackData("", file=bad_file)
+            bad_data = PackData("", file=bad_file, object_format=DEFAULT_OBJECT_FORMAT)
             bad_pack = Pack.from_lazy_objects(lambda: bad_data, lambda: index)
             self.assertRaises(AssertionError, lambda: bad_pack.data)
             self.assertRaises(AssertionError, bad_pack.check_length_and_checksum)
@@ -770,7 +807,7 @@ class TestPack(PackTests):
 
             data._file.seek(0)
             bad_file = BytesIO(data._file.read()[:-20] + (b"\xff" * 20))
-            bad_data = PackData("", file=bad_file)
+            bad_data = PackData("", file=bad_file, object_format=DEFAULT_OBJECT_FORMAT)
             bad_pack = Pack.from_lazy_objects(lambda: bad_data, lambda: index)
             self.assertRaises(ChecksumMismatch, lambda: bad_pack.data)
             self.assertRaises(ChecksumMismatch, bad_pack.check_length_and_checksum)
@@ -846,7 +883,7 @@ class TestThinPack(PackTests):
 
         # Index the new pack.
         with self.make_pack(True) as pack:
-            with PackData(pack._data_path) as data:
+            with PackData(pack._data_path, object_format=DEFAULT_OBJECT_FORMAT) as data:
                 data.create_index(
                     self.pack_prefix + ".idx", resolve_ext_ref=pack.resolve_ext_ref
                 )
@@ -856,6 +893,7 @@ class TestThinPack(PackTests):
     def make_pack(self, resolve_ext_ref):
         return Pack(
             self.pack_prefix,
+            object_format=DEFAULT_OBJECT_FORMAT,
             resolve_ext_ref=self.store.get_raw if resolve_ext_ref else None,
         )
 
@@ -919,12 +957,16 @@ class WritePackTests(TestCase):
         try:
             f.write(b"header")
             offset = f.tell()
-            crc32 = write_pack_object(f.write, Blob.type_num, b"blob")
+            crc32 = write_pack_object(
+                f.write, Blob.type_num, b"blob", object_format=DEFAULT_OBJECT_FORMAT
+            )
             self.assertEqual(crc32, zlib.crc32(f.getvalue()[6:]) & 0xFFFFFFFF)
 
             f.write(b"x")  # unpack_object needs extra trailing data.
             f.seek(offset)
-            unpacked, unused = unpack_object(f.read, compute_crc32=True)
+            unpacked, unused = unpack_object(
+                f.read, DEFAULT_OBJECT_FORMAT.hash_func, compute_crc32=True
+            )
             self.assertEqual(Blob.type_num, unpacked.pack_type_num)
             self.assertEqual(Blob.type_num, unpacked.obj_type_num)
             self.assertEqual([b"blob"], unpacked.decomp_chunks)
@@ -939,7 +981,13 @@ class WritePackTests(TestCase):
         offset = f.tell()
         sha_a = sha1(b"foo")
         sha_b = sha_a.copy()
-        write_pack_object(f.write, Blob.type_num, b"blob", sha=sha_a)
+        write_pack_object(
+            f.write,
+            Blob.type_num,
+            b"blob",
+            sha=sha_a,
+            object_format=DEFAULT_OBJECT_FORMAT,
+        )
         self.assertNotEqual(sha_a.digest(), sha_b.digest())
         sha_b.update(f.getvalue()[offset:])
         self.assertEqual(sha_a.digest(), sha_b.digest())
@@ -951,7 +999,12 @@ class WritePackTests(TestCase):
         sha_a = sha1(b"foo")
         sha_b = sha_a.copy()
         write_pack_object(
-            f.write, Blob.type_num, b"blob", sha=sha_a, compression_level=6
+            f.write,
+            Blob.type_num,
+            b"blob",
+            sha=sha_a,
+            compression_level=6,
+            object_format=DEFAULT_OBJECT_FORMAT,
         )
         self.assertNotEqual(sha_a.digest(), sha_b.digest())
         sha_b.update(f.getvalue()[offset:])
@@ -1032,7 +1085,7 @@ class BaseTestFilePackIndexWriting(BaseTestPackIndexWriting):
     def index(self, filename, entries, pack_checksum):
         path = os.path.join(self.tempdir, filename)
         self.writeIndex(path, entries, pack_checksum)
-        idx = load_pack_index(path)
+        idx = load_pack_index(path, DEFAULT_OBJECT_FORMAT)
         self.assertSucceeds(idx.check)
         self.assertEqual(idx.version, self._expected_version)
         return idx
@@ -1050,7 +1103,9 @@ class TestMemoryIndexWriting(TestCase, BaseTestPackIndexWriting):
         self._supports_large = True
 
     def index(self, filename, entries, pack_checksum):
-        return MemoryPackIndex(entries, pack_checksum)
+        from dulwich.object_format import DEFAULT_OBJECT_FORMAT
+
+        return MemoryPackIndex(entries, DEFAULT_OBJECT_FORMAT, pack_checksum)
 
     def tearDown(self) -> None:
         TestCase.tearDown(self)
@@ -1102,10 +1157,10 @@ class TestPackIndexWritingv3(TestCase, BaseTestFilePackIndexWriting):
         entries = [(b"abcd" * 5, 0, zlib.crc32(b""))]
         filename = os.path.join(self.tempdir, "test.idx")
         self.writeIndex(filename, entries, b"1234567890" * 2)
-        idx = load_pack_index(filename)
+        idx = load_pack_index(filename, DEFAULT_OBJECT_FORMAT)
         self.assertIsInstance(idx, PackIndex3)
         self.assertEqual(idx.version, 3)
-        self.assertEqual(idx.hash_algorithm, 1)  # SHA-1
+        self.assertEqual(idx.hash_format, 1)  # SHA-1
         self.assertEqual(idx.hash_size, 20)
         self.assertEqual(idx.shortened_oid_len, 20)
 
@@ -1115,9 +1170,9 @@ class TestPackIndexWritingv3(TestCase, BaseTestFilePackIndexWriting):
         filename = os.path.join(self.tempdir, "test_hash.idx")
         # Write v3 index with SHA-1 (algorithm=1)
         with GitFile(filename, "wb") as f:
-            write_pack_index_v3(f, entries, b"1" * 20, hash_algorithm=1)
-        idx = load_pack_index(filename)
-        self.assertEqual(idx.hash_algorithm, 1)
+            write_pack_index_v3(f, entries, b"1" * 20, hash_format=1)
+        idx = load_pack_index(filename, DEFAULT_OBJECT_FORMAT)
+        self.assertEqual(idx.hash_format, 1)
         self.assertEqual(idx.hash_size, 20)
 
     def test_v3_sha256_length(self) -> None:
@@ -1128,7 +1183,7 @@ class TestPackIndexWritingv3(TestCase, BaseTestFilePackIndexWriting):
         # SHA-256 should raise NotImplementedError
         with self.assertRaises(NotImplementedError) as cm:
             with GitFile(filename, "wb") as f:
-                write_pack_index_v3(f, entries, b"1" * 32, hash_algorithm=2)
+                write_pack_index_v3(f, entries, b"1" * 32, hash_format=2)
         self.assertIn("SHA-256", str(cm.exception))
 
     def test_v3_invalid_hash_algorithm(self) -> None:
@@ -1138,7 +1193,7 @@ class TestPackIndexWritingv3(TestCase, BaseTestFilePackIndexWriting):
         # Invalid hash algorithm should raise ValueError
         with self.assertRaises(ValueError) as cm:
             with GitFile(filename, "wb") as f:
-                write_pack_index_v3(f, entries, b"1" * 20, hash_algorithm=99)
+                write_pack_index_v3(f, entries, b"1" * 20, hash_format=99)
         self.assertIn("Unknown hash algorithm", str(cm.exception))
 
     def test_v3_wrong_hash_length(self) -> None:
@@ -1148,7 +1203,7 @@ class TestPackIndexWritingv3(TestCase, BaseTestFilePackIndexWriting):
         filename = os.path.join(self.tempdir, "test_wrong_len.idx")
         with self.assertRaises(ValueError) as cm:
             with GitFile(filename, "wb") as f:
-                write_pack_index_v3(f, entries, b"1" * 20, hash_algorithm=1)
+                write_pack_index_v3(f, entries, b"1" * 20, hash_format=1)
         self.assertIn("wrong length", str(cm.exception))
 
 
@@ -1179,7 +1234,7 @@ class WritePackIndexTests(TestCase):
         with GitFile(filename, "wb") as f:
             write_pack_index(f, entries, b"P" * 20)
 
-        idx = load_pack_index(filename)
+        idx = load_pack_index(filename, DEFAULT_OBJECT_FORMAT)
         self.assertEqual(DEFAULT_PACK_INDEX_VERSION, idx.version)
 
     def test_write_pack_index_version_1(self) -> None:
@@ -1196,7 +1251,7 @@ class WritePackIndexTests(TestCase):
         with GitFile(filename, "wb") as f:
             write_pack_index(f, entries, b"P" * 20, version=1)
 
-        idx = load_pack_index(filename)
+        idx = load_pack_index(filename, DEFAULT_OBJECT_FORMAT)
         self.assertEqual(1, idx.version)
 
     def test_write_pack_index_version_3(self) -> None:
@@ -1213,7 +1268,7 @@ class WritePackIndexTests(TestCase):
         with GitFile(filename, "wb") as f:
             write_pack_index(f, entries, b"P" * 20, version=3)
 
-        idx = load_pack_index(filename)
+        idx = load_pack_index(filename, DEFAULT_OBJECT_FORMAT)
         self.assertEqual(3, idx.version)
 
     def test_write_pack_index_invalid_version(self) -> None:
@@ -1385,7 +1440,7 @@ class TestPackStreamReader(TestCase):
     def test_read_objects_emtpy(self) -> None:
         f = BytesIO()
         build_pack(f, [])
-        reader = PackStreamReader(f.read)
+        reader = PackStreamReader(DEFAULT_OBJECT_FORMAT.hash_func, f.read)
         self.assertEqual(0, len(list(reader.read_objects())))
 
     def test_read_objects(self) -> None:
@@ -1397,7 +1452,7 @@ class TestPackStreamReader(TestCase):
                 (OFS_DELTA, (0, b"blob1")),
             ],
         )
-        reader = PackStreamReader(f.read)
+        reader = PackStreamReader(DEFAULT_OBJECT_FORMAT.hash_func, f.read)
         objects = list(reader.read_objects(compute_crc32=True))
         self.assertEqual(2, len(objects))
 
@@ -1430,11 +1485,13 @@ class TestPackStreamReader(TestCase):
                 (OFS_DELTA, (0, b"blob1")),
             ],
         )
-        reader = PackStreamReader(f.read, zlib_bufsize=4)
+        reader = PackStreamReader(
+            DEFAULT_OBJECT_FORMAT.hash_func, f.read, zlib_bufsize=4
+        )
         self.assertEqual(2, len(list(reader.read_objects())))
 
     def test_read_objects_empty(self) -> None:
-        reader = PackStreamReader(BytesIO().read)
+        reader = PackStreamReader(DEFAULT_OBJECT_FORMAT.hash_func, BytesIO().read)
         self.assertRaises(AssertionError, list, reader.read_objects())
 
 
@@ -1490,14 +1547,14 @@ class DeltaChainIteratorTests(TestCase):
         if thin is None:
             thin = bool(list(self.store))
         resolve_ext_ref = (thin and self.get_raw_no_repeat) or None
-        data = PackData("test.pack", file=f)
+        data = PackData("test.pack", file=f, object_format=DEFAULT_OBJECT_FORMAT)
         return TestPackIterator.for_pack_data(data, resolve_ext_ref=resolve_ext_ref)
 
     def make_pack_iter_subset(self, f, subset, thin=None):
         if thin is None:
             thin = bool(list(self.store))
         resolve_ext_ref = (thin and self.get_raw_no_repeat) or None
-        data = PackData("test.pack", file=f)
+        data = PackData("test.pack", file=f, object_format=DEFAULT_OBJECT_FORMAT)
         assert data
         index = MemoryPackIndex.for_pack(data)
         pack = Pack.from_objects(data, index)
@@ -1764,7 +1821,7 @@ class DeltaChainIteratorTests(TestCase):
         )
         fsize = f.tell()
         f.seek(0)
-        packdata = PackData.from_file(f, fsize)
+        packdata = PackData.from_file(f, DEFAULT_OBJECT_FORMAT, fsize)
         td = tempfile.mkdtemp()
         idx_path = os.path.join(td, "test.idx")
         self.addCleanup(shutil.rmtree, td)
@@ -1773,13 +1830,13 @@ class DeltaChainIteratorTests(TestCase):
             version=2,
             resolve_ext_ref=self.get_raw_no_repeat,
         )
-        packindex = load_pack_index(idx_path)
+        packindex = load_pack_index(idx_path, DEFAULT_OBJECT_FORMAT)
         pack = Pack.from_objects(packdata, packindex)
         try:
             # Attempting to open this REF_DELTA object would loop forever
             pack[b1.id]
         except UnresolvedDeltas as e:
-            self.assertEqual([hex_to_sha(b1.id)], e.shas)
+            self.assertEqual([b1.id], [sha_to_hex(sha) for sha in e.shas])
 
 
 class DeltaEncodeSizeTests(TestCase):

+ 2 - 2
tests/test_patch.py

@@ -25,7 +25,7 @@ from io import BytesIO, StringIO
 from typing import NoReturn
 
 from dulwich.object_store import MemoryObjectStore
-from dulwich.objects import S_IFGITLINK, Blob, Commit, Tree
+from dulwich.objects import S_IFGITLINK, ZERO_SHA, Blob, Commit, Tree
 from dulwich.patch import (
     DiffAlgorithmNotAvailable,
     commit_patch_id,
@@ -875,7 +875,7 @@ index 3b0f961..a116b51 644
         # Create a commit
         commit = Commit()
         commit.tree = tree2.id
-        commit.parents = [b"0" * 40]  # Fake parent
+        commit.parents = [ZERO_SHA]  # Fake parent
         commit.author = commit.committer = b"Test <test@example.com>"
         commit.author_time = commit.commit_time = 1234567890
         commit.author_timezone = commit.commit_timezone = 0

+ 194 - 0
tests/test_sha256.py

@@ -0,0 +1,194 @@
+# test_sha256.py -- Tests for SHA256 support
+# Copyright (C) 2024 The Dulwich contributors
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Tests for SHA256 support in Dulwich."""
+
+import os
+import shutil
+import tempfile
+import unittest
+
+from dulwich.object_format import SHA1, SHA256, get_object_format
+from dulwich.objects import Blob, Tree, valid_hexsha
+from dulwich.repo import MemoryRepo, Repo
+
+
+class HashAlgorithmTests(unittest.TestCase):
+    """Tests for the hash algorithm abstraction."""
+
+    def test_sha1_properties(self):
+        """Test SHA1 algorithm properties."""
+        alg = SHA1
+        self.assertEqual(alg.name, "sha1")
+        self.assertEqual(alg.oid_length, 20)
+        self.assertEqual(alg.hex_length, 40)
+
+    def test_sha256_properties(self):
+        """Test SHA256 algorithm properties."""
+        alg = SHA256
+        self.assertEqual(alg.name, "sha256")
+        self.assertEqual(alg.oid_length, 32)
+        self.assertEqual(alg.hex_length, 64)
+
+    def test_get_hash_algorithm(self):
+        """Test getting hash algorithms by name."""
+        self.assertEqual(get_object_format("sha1"), SHA1)
+        self.assertEqual(get_object_format("sha256"), SHA256)
+        self.assertEqual(get_object_format(None), SHA1)  # Default
+
+        with self.assertRaises(ValueError):
+            get_object_format("invalid")
+
+
+class ObjectHashingTests(unittest.TestCase):
+    """Tests for object hashing with different algorithms."""
+
+    def test_blob_sha1(self):
+        """Test blob hashing with SHA1."""
+        blob = Blob()
+        blob.data = b"Hello, World!"
+
+        # Default should be SHA1
+        sha1_id = blob.id
+        self.assertEqual(len(sha1_id), 40)
+        self.assertTrue(valid_hexsha(sha1_id))
+
+    def test_blob_sha256(self):
+        """Test blob hashing with SHA256."""
+        blob = Blob()
+        blob.data = b"Hello, World!"
+
+        # Get SHA256 hash
+        sha256_id = blob.get_id(SHA256)
+        self.assertEqual(len(sha256_id), 64)
+        self.assertTrue(valid_hexsha(sha256_id))
+
+        # SHA256 ID should be different from SHA1
+        sha1_id = blob.id
+        self.assertNotEqual(sha1_id, sha256_id)
+
+        # Verify .id property returns SHA1 for backward compatibility
+        self.assertEqual(blob.id, sha1_id)
+        self.assertEqual(blob.get_id(), sha1_id)  # Default should be SHA1
+
+    def test_tree_sha256(self):
+        """Test tree hashing with SHA256."""
+        tree = Tree()
+        tree.add(b"file.txt", 0o100644, b"a" * 40)  # SHA1 hex
+
+        # Get SHA1 (default)
+        sha1_id = tree.id
+        self.assertEqual(len(sha1_id), 40)
+
+        # Get SHA256
+        sha256_id = tree.get_id(SHA256)
+        self.assertEqual(len(sha256_id), 64)
+
+        # Verify they're different
+        self.assertNotEqual(sha1_id, sha256_id)
+
+    def test_valid_hexsha(self):
+        """Test hex SHA validation for both algorithms."""
+        # Valid SHA1
+        self.assertTrue(valid_hexsha(b"1234567890abcdef1234567890abcdef12345678"))
+
+        # Valid SHA256
+        self.assertTrue(
+            valid_hexsha(
+                b"1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"
+            )
+        )
+
+        # Invalid lengths
+        self.assertFalse(valid_hexsha(b"1234"))
+        self.assertFalse(
+            valid_hexsha(b"1234567890abcdef1234567890abcdef123456")
+        )  # 38 chars
+
+        # Invalid characters
+        self.assertFalse(valid_hexsha(b"123456789gabcdef1234567890abcdef12345678"))
+
+
+class RepositorySHA256Tests(unittest.TestCase):
+    """Tests for SHA256 repository support."""
+
+    def setUp(self):
+        """Set up test repository directory."""
+        self.test_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        """Clean up test repository."""
+        shutil.rmtree(self.test_dir)
+
+    def test_init_sha256_repo(self):
+        """Test initializing a SHA256 repository."""
+        repo_path = os.path.join(self.test_dir, "sha256_repo")
+        repo = Repo.init(repo_path, mkdir=True, object_format="sha256")
+
+        # Check repository format version
+        config = repo.get_config()
+        self.assertEqual(config.get(("core",), "repositoryformatversion"), b"1")
+
+        # Check object format extension
+        self.assertEqual(config.get(("extensions",), "objectformat"), b"sha256")
+
+        # Check hash algorithm detection
+        self.assertEqual(repo.object_format, SHA256)
+
+        repo.close()
+
+    def test_init_sha1_repo(self):
+        """Test initializing a SHA1 repository (default)."""
+        repo_path = os.path.join(self.test_dir, "sha1_repo")
+        repo = Repo.init(repo_path, mkdir=True)
+
+        # Check repository format version
+        config = repo.get_config()
+        self.assertEqual(config.get(("core",), "repositoryformatversion"), b"0")
+
+        # Object format extension should not exist
+        with self.assertRaises(KeyError):
+            config.get(("extensions",), "objectformat")
+
+        # Check hash algorithm detection
+        self.assertEqual(repo.object_format, SHA1)
+
+        repo.close()
+
+    def test_format_version_validation(self):
+        """Test format version validation for SHA256."""
+        repo_path = os.path.join(self.test_dir, "invalid_repo")
+
+        # SHA256 with format version 0 should fail
+        with self.assertRaises(ValueError) as cm:
+            Repo.init(repo_path, mkdir=True, format=0, object_format="sha256")
+        self.assertIn("SHA256", str(cm.exception))
+
+    def test_memory_repo_sha256(self):
+        """Test SHA256 support in memory repository."""
+        repo = MemoryRepo.init_bare([], {}, object_format="sha256")
+
+        # Check hash algorithm
+        self.assertEqual(repo.object_format, SHA256)
+
+
+if __name__ == "__main__":
+    unittest.main()

+ 128 - 0
tests/test_sha256_pack.py

@@ -0,0 +1,128 @@
+# test_sha256_pack.py -- Tests for SHA256 pack support
+# Copyright (C) 2024 The Dulwich contributors
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Tests for SHA256 pack support in Dulwich."""
+
+import shutil
+import tempfile
+import unittest
+from io import BytesIO
+
+from dulwich.object_format import SHA256
+from dulwich.pack import (
+    load_pack_index_file,
+    write_pack_index_v2,
+)
+
+
+class SHA256PackTests(unittest.TestCase):
+    """Tests for SHA256 pack support."""
+
+    def setUp(self):
+        """Set up test repository directory."""
+        self.test_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        """Clean up test repository."""
+        shutil.rmtree(self.test_dir)
+
+    def test_pack_index_v2_with_sha256(self):
+        """Test that pack index v2 correctly handles SHA256 hashes."""
+        # Create SHA256 entries manually (simulating what would happen in a SHA256 repo)
+        entries = []
+        for i in range(5):
+            # Create a fake SHA256 hash
+            sha256_hash = SHA256.hash_func(f"test object {i}".encode()).digest()
+            offset = i * 1000  # Fake offsets
+            crc32 = i  # Fake CRC32
+            entries.append((sha256_hash, offset, crc32))
+
+        # Sort entries by SHA (required for pack index)
+        entries.sort(key=lambda e: e[0])
+
+        # Write SHA256 pack index with SHA1 pack checksum (Git always uses SHA1 for pack checksums)
+        index_buf = BytesIO()
+        from hashlib import sha1
+
+        pack_checksum = sha1(b"fake pack data").digest()
+        write_pack_index_v2(index_buf, entries, pack_checksum)
+
+        # Load and verify the index
+        index_buf.seek(0)
+        pack_idx = load_pack_index_file("<memory>", index_buf, SHA256)
+
+        # Check that the index loaded correctly
+        self.assertEqual(len(pack_idx), 5)
+        self.assertEqual(pack_idx.version, 2)
+
+        # Verify hash_size detection
+        self.assertEqual(pack_idx.hash_size, 32)
+
+        # Verify we can look up objects by SHA256
+        for sha256_hash, offset, _ in entries:
+            # This should not raise KeyError
+            found_offset = pack_idx.object_offset(sha256_hash)
+            self.assertEqual(found_offset, offset)
+
+    def test_pack_index_v1_with_sha256(self):
+        """Test that pack index v1 correctly handles SHA256 hashes."""
+        # Create SHA256 entries manually
+        entries = []
+        for i in range(5):
+            # Create a fake SHA256 hash
+            sha256_hash = SHA256.hash_func(f"test v1 object {i}".encode()).digest()
+            offset = i * 1000  # Fake offsets
+            crc32 = None  # v1 doesn't store CRC32
+            entries.append((sha256_hash, offset, crc32))
+
+        # Sort entries by SHA (required for pack index)
+        entries.sort(key=lambda e: e[0])
+
+        # Import write_pack_index_v1
+        from dulwich.pack import write_pack_index_v1
+
+        # Write SHA256 pack index v1 with SHA1 pack checksum
+        index_buf = BytesIO()
+        from hashlib import sha1
+
+        pack_checksum = sha1(b"fake v1 pack data").digest()
+        write_pack_index_v1(index_buf, entries, pack_checksum)
+
+        # Load and verify the index
+        index_buf.seek(0)
+        pack_idx = load_pack_index_file("<memory>", index_buf, SHA256)
+
+        # Check that the index loaded correctly
+        self.assertEqual(len(pack_idx), 5)
+        self.assertEqual(pack_idx.version, 1)
+
+        # Verify hash_size detection
+        self.assertEqual(pack_idx.hash_size, 32)
+
+        # Verify we can look up objects by SHA256
+        for sha256_hash, offset, _ in entries:
+            # This should not raise KeyError
+            found_offset = pack_idx.object_offset(sha256_hash)
+            self.assertEqual(found_offset, offset)
+
+
+if __name__ == "__main__":
+    unittest.main()

+ 1 - 1
tests/test_submodule.py

@@ -85,7 +85,7 @@ class SubmoduleTests(TestCase):
         binary_file_sha = bytes.fromhex(file_sha.decode("ascii"))
 
         # Generate a valid SHA for the submodule
-        submodule_sha = b"1" * 40
+        submodule_sha = b"1" * repo.object_format.hex_length
         binary_submodule_sha = bytes.fromhex(submodule_sha.decode("ascii"))
 
         # Create raw tree data

Algunos archivos no se mostraron porque demasiados archivos cambiaron en este cambio