Explorar o código

Add SHA256 support

Jelmer Vernooij hai 7 meses
pai
achega
a7e3b91227

+ 8 - 2
NEWS

@@ -531,6 +531,13 @@ compatible.
 
 
  * Add ``gc`` command to ``dulwich.porcelain.`` (Jelmer Vernooij, #92)
  * Add ``gc`` command to ``dulwich.porcelain.`` (Jelmer Vernooij, #92)
 
 
+ * Add initial support for SHA256 repositories. Dulwich can now read and write Git
+   repositories using SHA256 object format. This includes support for loose
+   objects, pack files (v1 and v2 indexes), and tree parsing with SHA256 hashes.
+   The Rust extensions have been updated to support variable hash lengths.
+   SHA256 repositories require format version 1 and the objectFormat extension.
+   (Jelmer Vernooij, #1115)
+
  * Add ``unpack-objects`` plumbing command to unpack objects from pack files
  * Add ``unpack-objects`` plumbing command to unpack objects from pack files
    into loose objects in the repository. This command extracts all objects
    into loose objects in the repository. This command extracts all objects
    from a pack file and writes them to the object store as individual files.
    from a pack file and writes them to the object store as individual files.
@@ -550,8 +557,7 @@ compatible.
  * Add support for pack index format version 3. This format supports variable
  * Add support for pack index format version 3. This format supports variable
    hash sizes to enable future SHA-256 support. The implementation includes
    hash sizes to enable future SHA-256 support. The implementation includes
    reading and writing v3 indexes with proper hash algorithm identification
    reading and writing v3 indexes with proper hash algorithm identification
-   (1 for SHA-1, 2 for SHA-256). Note that SHA-256 support itself is not yet
-   implemented and will raise NotImplementedError. (Jelmer Vernooij)
+   (1 for SHA-1, 2 for SHA-256). (Jelmer Vernooij)
 
 
  * Fix ``LocalGitClient`` assertion error when fetching externally cloned repositories
  * Fix ``LocalGitClient`` assertion error when fetching externally cloned repositories
    into ``MemoryRepo``. Previously, the client would fail with an AssertionError
    into ``MemoryRepo``. Previously, the client would fail with an AssertionError

+ 41 - 10
crates/objects/src/lib.rs

@@ -49,15 +49,13 @@ fn sha_to_pyhex(py: Python, sha: &[u8]) -> PyResult<Py<PyAny>> {
     Ok(PyBytes::new(py, hexsha.as_slice()).into())
     Ok(PyBytes::new(py, hexsha.as_slice()).into())
 }
 }
 
 
-#[pyfunction]
-#[pyo3(signature = (text, strict=None))]
-fn parse_tree(
+fn parse_tree_with_length(
     py: Python,
     py: Python,
     mut text: &[u8],
     mut text: &[u8],
-    strict: Option<bool>,
-) -> PyResult<Vec<(Py<PyAny>, u32, Py<PyAny>)>> {
+    strict: bool,
+    hash_len: usize,
+) -> PyResult<Vec<(PyObject, u32, PyObject)>> {
     let mut entries = Vec::new();
     let mut entries = Vec::new();
-    let strict = strict.unwrap_or(false);
     while !text.is_empty() {
     while !text.is_empty() {
         let mode_end = memchr(b' ', text)
         let mode_end = memchr(b' ', text)
             .ok_or_else(|| ObjectFormatException::new_err(("Missing terminator for mode",)))?;
             .ok_or_else(|| ObjectFormatException::new_err(("Missing terminator for mode",)))?;
@@ -73,21 +71,54 @@ fn parse_tree(
         let namelen = memchr(b'\0', text)
         let namelen = memchr(b'\0', text)
             .ok_or_else(|| ObjectFormatException::new_err(("Missing trailing \\0",)))?;
             .ok_or_else(|| ObjectFormatException::new_err(("Missing trailing \\0",)))?;
         let name = &text[..namelen];
         let name = &text[..namelen];
-        if namelen + 20 >= text.len() {
+
+        // Skip name and null terminator
+        text = &text[namelen + 1..];
+
+        // Check if we have enough bytes for the hash
+        if text.len() < hash_len {
             return Err(ObjectFormatException::new_err(("SHA truncated",)));
             return Err(ObjectFormatException::new_err(("SHA truncated",)));
         }
         }
-        text = &text[namelen + 1..];
-        let sha = &text[..20];
+
+        let sha = &text[..hash_len];
         entries.push((
         entries.push((
             PyBytes::new(py, name).into_pyobject(py)?.unbind().into(),
             PyBytes::new(py, name).into_pyobject(py)?.unbind().into(),
             mode,
             mode,
             sha_to_pyhex(py, sha)?,
             sha_to_pyhex(py, sha)?,
         ));
         ));
-        text = &text[20..];
+        text = &text[hash_len..];
     }
     }
     Ok(entries)
     Ok(entries)
 }
 }
 
 
+#[pyfunction]
+#[pyo3(signature = (text, strict=None, hash_algorithm=None))]
+fn parse_tree(
+    py: Python,
+    text: &[u8],
+    strict: Option<bool>,
+    hash_algorithm: Option<PyObject>,
+) -> PyResult<Vec<(PyObject, u32, PyObject)>> {
+    let strict = strict.unwrap_or(false);
+
+    // Determine hash length from hash_algorithm if provided
+    if let Some(algo) = hash_algorithm {
+        // Get oid_length attribute from hash algorithm object
+        let oid_length: usize = algo.getattr(py, "oid_length")?.extract(py)?;
+        parse_tree_with_length(py, text, strict, oid_length)
+    } else {
+        // Try to auto-detect by attempting to parse with both lengths
+        // We'll attempt to parse with SHA1 first (20 bytes), then SHA256 (32 bytes)
+        match parse_tree_with_length(py, text, strict, 20) {
+            Ok(entries) => Ok(entries),
+            Err(_) => {
+                // SHA1 failed, try SHA256
+                parse_tree_with_length(py, text, strict, 32)
+            }
+        }
+    }
+}
+
 fn cmp_with_suffix(a: (u32, &[u8]), b: (u32, &[u8])) -> std::cmp::Ordering {
 fn cmp_with_suffix(a: (u32, &[u8]), b: (u32, &[u8])) -> std::cmp::Ordering {
     let len = std::cmp::min(a.1.len(), b.1.len());
     let len = std::cmp::min(a.1.len(), b.1.len());
     let cmp = a.1[..len].cmp(&b.1[..len]);
     let cmp = a.1[..len].cmp(&b.1[..len]);

+ 8 - 5
crates/pack/src/lib.rs

@@ -30,8 +30,9 @@ pyo3::import_exception!(dulwich.errors, ApplyDeltaError);
 fn py_is_sha(sha: &Py<PyAny>, py: Python) -> PyResult<bool> {
 fn py_is_sha(sha: &Py<PyAny>, py: Python) -> PyResult<bool> {
     // Check if the object is a bytes object
     // Check if the object is a bytes object
     if sha.bind(py).is_instance_of::<PyBytes>() {
     if sha.bind(py).is_instance_of::<PyBytes>() {
-        // Check if the bytes object has a size of 20
-        if sha.extract::<&[u8]>(py)?.len() == 20 {
+        // Check if the bytes object has a size of 20 (SHA1) or 32 (SHA256)
+        let len = sha.extract::<&[u8]>(py)?.len();
+        if len == 20 || len == 32 {
             Ok(true)
             Ok(true)
         } else {
         } else {
             Ok(false)
             Ok(false)
@@ -53,9 +54,11 @@ fn bisect_find_sha(
     let sha = sha.as_bytes(py);
     let sha = sha.as_bytes(py);
     let sha_len = sha.len();
     let sha_len = sha.len();
 
 
-    // Check if sha is 20 bytes long
-    if sha_len != 20 {
-        return Err(PyValueError::new_err("Sha is not 20 bytes long"));
+    // Check if sha is 20 bytes (SHA1) or 32 bytes (SHA256)
+    if sha_len != 20 && sha_len != 32 {
+        return Err(PyValueError::new_err(
+            "Sha must be 20 (SHA1) or 32 (SHA256) bytes long",
+        ));
     }
     }
 
 
     // Check if start > end
     // Check if start > end

+ 121 - 0
dulwich/hash.py

@@ -0,0 +1,121 @@
+# hash.py -- Hash algorithm abstraction layer for Git
+# Copyright (C) 2024 The Dulwich contributors
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Hash algorithm abstraction for Git objects.
+
+This module provides an abstraction layer for different hash algorithms
+used in Git repositories (SHA-1 and SHA-256).
+"""
+
+from hashlib import sha1, sha256
+from typing import Callable, Optional
+
+
+class HashAlgorithm:
+    """Base class for hash algorithms used in Git."""
+
+    def __init__(
+        self, name: str, oid_length: int, hex_length: int, hash_func: Callable
+    ) -> None:
+        """Initialize a hash algorithm.
+
+        Args:
+            name: Name of the algorithm (e.g., "sha1", "sha256")
+            oid_length: Length of the binary object ID in bytes
+            hex_length: Length of the hexadecimal object ID in characters
+            hash_func: Hash function from hashlib
+        """
+        self.name = name
+        self.oid_length = oid_length
+        self.hex_length = hex_length
+        self.hash_func = hash_func
+        self.zero_oid = b"0" * hex_length
+        self.zero_oid_bin = b"\x00" * oid_length
+
+    def __str__(self) -> str:
+        return self.name
+
+    def __repr__(self) -> str:
+        return f"HashAlgorithm({self.name!r})"
+
+    def new_hash(self):
+        """Create a new hash object."""
+        return self.hash_func()
+
+    def hash_object(self, data: bytes) -> bytes:
+        """Hash data and return the digest.
+
+        Args:
+            data: Data to hash
+
+        Returns:
+            Binary digest
+        """
+        h = self.new_hash()
+        h.update(data)
+        return h.digest()
+
+    def hash_object_hex(self, data: bytes) -> bytes:
+        """Hash data and return the hexadecimal digest.
+
+        Args:
+            data: Data to hash
+
+        Returns:
+            Hexadecimal digest as bytes
+        """
+        h = self.new_hash()
+        h.update(data)
+        return h.hexdigest().encode("ascii")
+
+
+# Define the supported hash algorithms
+SHA1 = HashAlgorithm("sha1", 20, 40, sha1)
+SHA256 = HashAlgorithm("sha256", 32, 64, sha256)
+
+# Map of algorithm names to HashAlgorithm instances
+HASH_ALGORITHMS = {
+    "sha1": SHA1,
+    "sha256": SHA256,
+}
+
+# Default algorithm for backward compatibility
+DEFAULT_HASH_ALGORITHM = SHA1
+
+
+def get_hash_algorithm(name: Optional[str] = None) -> HashAlgorithm:
+    """Get a hash algorithm by name.
+
+    Args:
+        name: Algorithm name ("sha1" or "sha256"). If None, returns default.
+
+    Returns:
+        HashAlgorithm instance
+
+    Raises:
+        ValueError: If the algorithm name is not supported
+    """
+    if name is None:
+        return DEFAULT_HASH_ALGORITHM
+    try:
+        return HASH_ALGORITHMS[name.lower()]
+    except KeyError:
+        raise ValueError(f"Unsupported hash algorithm: {name}")

+ 39 - 5
dulwich/object_store.py

@@ -1169,10 +1169,10 @@ class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
         """
         """
         if name == ZERO_SHA:
         if name == ZERO_SHA:
             raise KeyError(name)
             raise KeyError(name)
-        if len(name) == 40:
+        if len(name) in (40, 64):  # Support both SHA1 (40) and SHA256 (64) hex
             sha = hex_to_sha(cast(ObjectID, name))
             sha = hex_to_sha(cast(ObjectID, name))
             hexsha = cast(ObjectID, name)
             hexsha = cast(ObjectID, name)
-        elif len(name) == 20:
+        elif len(name) in (20, 32):  # Support both SHA1 (20) and SHA256 (32) binary
             sha = cast(RawObjectID, name)
             sha = cast(RawObjectID, name)
             hexsha = None
             hexsha = None
         else:
         else:
@@ -1382,6 +1382,7 @@ class DiskObjectStore(PackBasedObjectStore):
         pack_write_bitmap_lookup_table: bool = True,
         pack_write_bitmap_lookup_table: bool = True,
         file_mode: int | None = None,
         file_mode: int | None = None,
         dir_mode: int | None = None,
         dir_mode: int | None = None,
+        hash_algorithm=None,
     ) -> None:
     ) -> None:
         """Open an object store.
         """Open an object store.
 
 
@@ -1402,6 +1403,7 @@ class DiskObjectStore(PackBasedObjectStore):
           pack_write_bitmap_lookup_table: whether to include lookup table in bitmaps
           pack_write_bitmap_lookup_table: whether to include lookup table in bitmaps
           file_mode: File permission mask for shared repository
           file_mode: File permission mask for shared repository
           dir_mode: Directory permission mask for shared repository
           dir_mode: Directory permission mask for shared repository
+          hash_algorithm: Hash algorithm to use (SHA1 or SHA256)
         """
         """
         super().__init__(
         super().__init__(
             pack_compression_level=pack_compression_level,
             pack_compression_level=pack_compression_level,
@@ -1426,6 +1428,11 @@ class DiskObjectStore(PackBasedObjectStore):
         self.file_mode = file_mode
         self.file_mode = file_mode
         self.dir_mode = dir_mode
         self.dir_mode = dir_mode
 
 
+        # Import here to avoid circular dependency
+        from .hash import get_hash_algorithm
+
+        self.hash_algorithm = hash_algorithm if hash_algorithm else get_hash_algorithm()
+
         # Commit graph support - lazy loaded
         # Commit graph support - lazy loaded
         self._commit_graph = None
         self._commit_graph = None
         self._use_commit_graph = True  # Default to true
         self._use_commit_graph = True  # Default to true
@@ -1540,6 +1547,24 @@ class DiskObjectStore(PackBasedObjectStore):
                 (b"repack",), b"writeBitmaps", False
                 (b"repack",), b"writeBitmaps", False
             )
             )
 
 
+        # Get hash algorithm from config
+        from .hash import get_hash_algorithm
+
+        hash_algorithm = None
+        try:
+            try:
+                version = int(config.get((b"core",), b"repositoryformatversion"))
+            except KeyError:
+                version = 0
+            if version == 1:
+                try:
+                    object_format = config.get((b"extensions",), b"objectformat")
+                except KeyError:
+                    object_format = b"sha1"
+                hash_algorithm = get_hash_algorithm(object_format.decode("ascii"))
+        except (KeyError, ValueError):
+            pass
+
         instance = cls(
         instance = cls(
             path,
             path,
             loose_compression_level=loose_compression_level,
             loose_compression_level=loose_compression_level,
@@ -1557,6 +1582,7 @@ class DiskObjectStore(PackBasedObjectStore):
             pack_write_bitmap_lookup_table=pack_write_bitmap_lookup_table,
             pack_write_bitmap_lookup_table=pack_write_bitmap_lookup_table,
             file_mode=file_mode,
             file_mode=file_mode,
             dir_mode=dir_mode,
             dir_mode=dir_mode,
+            hash_algorithm=hash_algorithm,
         )
         )
         instance._use_commit_graph = use_commit_graph
         instance._use_commit_graph = use_commit_graph
         instance._use_midx = use_midx
         instance._use_midx = use_midx
@@ -1647,6 +1673,7 @@ class DiskObjectStore(PackBasedObjectStore):
                     depth=self.pack_depth,
                     depth=self.pack_depth,
                     threads=self.pack_threads,
                     threads=self.pack_threads,
                     big_file_threshold=self.pack_big_file_threshold,
                     big_file_threshold=self.pack_big_file_threshold,
+                    hash_algorithm=self.hash_algorithm,
                 )
                 )
                 new_packs.append(pack)
                 new_packs.append(pack)
                 self._pack_cache[f] = pack
                 self._pack_cache[f] = pack
@@ -1698,7 +1725,9 @@ class DiskObjectStore(PackBasedObjectStore):
     def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None:
     def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None:
         path = self._get_shafile_path(sha)
         path = self._get_shafile_path(sha)
         try:
         try:
-            return ShaFile.from_path(path)
+            # Load the object from path with SHA for hash algorithm detection
+            # sha parameter here is already hex, so pass it directly
+            return ShaFile.from_path(path, sha)
         except FileNotFoundError:
         except FileNotFoundError:
             return None
             return None
 
 
@@ -1885,6 +1914,7 @@ class DiskObjectStore(PackBasedObjectStore):
             depth=self.pack_depth,
             depth=self.pack_depth,
             threads=self.pack_threads,
             threads=self.pack_threads,
             big_file_threshold=self.pack_big_file_threshold,
             big_file_threshold=self.pack_big_file_threshold,
+            hash_algorithm=self.hash_algorithm,
         )
         )
         final_pack.check_length_and_checksum()
         final_pack.check_length_and_checksum()
         self._add_cached_pack(pack_base_name, final_pack)
         self._add_cached_pack(pack_base_name, final_pack)
@@ -1964,7 +1994,9 @@ class DiskObjectStore(PackBasedObjectStore):
         Args:
         Args:
           obj: Object to add
           obj: Object to add
         """
         """
-        path = self._get_shafile_path(obj.id)
+        # Use the correct hash algorithm for the object ID
+        obj_id = obj.get_id(self.hash_algorithm)
+        path = self._get_shafile_path(obj_id)
         dir = os.path.dirname(path)
         dir = os.path.dirname(path)
         try:
         try:
             os.mkdir(dir)
             os.mkdir(dir)
@@ -1987,6 +2019,7 @@ class DiskObjectStore(PackBasedObjectStore):
         *,
         *,
         file_mode: int | None = None,
         file_mode: int | None = None,
         dir_mode: int | None = None,
         dir_mode: int | None = None,
+        hash_algorithm=None,
     ) -> "DiskObjectStore":
     ) -> "DiskObjectStore":
         """Initialize a new disk object store.
         """Initialize a new disk object store.
 
 
@@ -1996,6 +2029,7 @@ class DiskObjectStore(PackBasedObjectStore):
           path: Path where the object store should be created
           path: Path where the object store should be created
           file_mode: Optional file permission mask for shared repository
           file_mode: Optional file permission mask for shared repository
           dir_mode: Optional directory permission mask for shared repository
           dir_mode: Optional directory permission mask for shared repository
+          hash_algorithm: Hash algorithm to use (SHA1 or SHA256)
 
 
         Returns:
         Returns:
           New DiskObjectStore instance
           New DiskObjectStore instance
@@ -2013,7 +2047,7 @@ class DiskObjectStore(PackBasedObjectStore):
         if dir_mode is not None:
         if dir_mode is not None:
             os.chmod(info_path, dir_mode)
             os.chmod(info_path, dir_mode)
             os.chmod(pack_path, dir_mode)
             os.chmod(pack_path, dir_mode)
-        return cls(path, file_mode=file_mode, dir_mode=dir_mode)
+        return cls(path, file_mode=file_mode, dir_mode=dir_mode, hash_algorithm=hash_algorithm)
 
 
     def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:
     def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:
         """Iterate over all object SHAs with the given prefix.
         """Iterate over all object SHAs with the given prefix.

+ 141 - 25
dulwich/objects.py

@@ -112,6 +112,24 @@ if TYPE_CHECKING:
 
 
     from .file import _GitFile
     from .file import _GitFile
 
 
+# Zero SHA constants for backward compatibility
+ZERO_SHA = b"0" * 40  # SHA1 - kept for backward compatibility
+
+
+def zero_sha_for(hash_algorithm=None):
+    """Get the zero SHA for a given hash algorithm.
+
+    Args:
+        hash_algorithm: HashAlgorithm instance. If None, returns SHA1 zero.
+
+    Returns:
+        Zero SHA as hex bytes (40 chars for SHA1, 64 for SHA256)
+    """
+    if hash_algorithm is None:
+        return ZERO_SHA
+    return hash_algorithm.zero_oid
+
+
 # Header fields for commits
 # Header fields for commits
 _TREE_HEADER = b"tree"
 _TREE_HEADER = b"tree"
 _PARENT_HEADER = b"parent"
 _PARENT_HEADER = b"parent"
@@ -175,13 +193,17 @@ def _decompress(string: bytes) -> bytes:
 def sha_to_hex(sha: RawObjectID) -> ObjectID:
 def sha_to_hex(sha: RawObjectID) -> ObjectID:
     """Takes a string and returns the hex of the sha within."""
     """Takes a string and returns the hex of the sha within."""
     hexsha = binascii.hexlify(sha)
     hexsha = binascii.hexlify(sha)
-    assert len(hexsha) == 40, f"Incorrect length of sha1 string: {hexsha!r}"
+    # Support both SHA1 (40 chars) and SHA256 (64 chars)
+    if len(hexsha) not in (40, 64):
+        raise ValueError(f"Incorrect length of sha string: {hexsha!r}")
     return ObjectID(hexsha)
     return ObjectID(hexsha)
 
 
 
 
 def hex_to_sha(hex: ObjectID | str) -> RawObjectID:
 def hex_to_sha(hex: ObjectID | str) -> RawObjectID:
     """Takes a hex sha and returns a binary sha."""
     """Takes a hex sha and returns a binary sha."""
-    assert len(hex) == 40, f"Incorrect length of hexsha: {hex!r}"
+    # Support both SHA1 (40 chars) and SHA256 (64 chars)
+    if len(hex) not in (40, 64):
+        raise ValueError(f"Incorrect length of hexsha: {hex}")
     try:
     try:
         return RawObjectID(binascii.unhexlify(hex))
         return RawObjectID(binascii.unhexlify(hex))
     except TypeError as exc:
     except TypeError as exc:
@@ -191,15 +213,15 @@ def hex_to_sha(hex: ObjectID | str) -> RawObjectID:
 
 
 
 
 def valid_hexsha(hex: bytes | str) -> bool:
 def valid_hexsha(hex: bytes | str) -> bool:
-    """Check if a string is a valid hex SHA.
+    """Check if a hex string is a valid SHA1 or SHA256.
 
 
     Args:
     Args:
-      hex: Hex string to check
+        hex: Hex string to validate
 
 
     Returns:
     Returns:
-      True if valid hex SHA, False otherwise
+        True if valid SHA1 (40 chars) or SHA256 (64 chars), False otherwise
     """
     """
-    if len(hex) != 40:
+    if len(hex) not in (40, 64):
         return False
         return False
     try:
     try:
         binascii.unhexlify(hex)
         binascii.unhexlify(hex)
@@ -549,11 +571,12 @@ class ShaFile:
     ) -> None:
     ) -> None:
         """Set the contents of this object from a list of chunks."""
         """Set the contents of this object from a list of chunks."""
         self._chunked_text = chunks
         self._chunked_text = chunks
-        self._deserialize(chunks)
+        # Set SHA before deserialization so Tree can detect hash algorithm
         if sha is None:
         if sha is None:
             self._sha = None
             self._sha = None
         else:
         else:
-            self._sha = FixedSha(sha)
+            self._sha = FixedSha(sha)  # type: ignore
+        self._deserialize(chunks)
         self._needs_serialization = False
         self._needs_serialization = False
 
 
     @staticmethod
     @staticmethod
@@ -613,17 +636,21 @@ class ShaFile:
         raise NotImplementedError(self._serialize)
         raise NotImplementedError(self._serialize)
 
 
     @classmethod
     @classmethod
-    def from_path(cls, path: str | bytes) -> "ShaFile":
+    def from_path(cls, path: str | bytes, sha: ObjectID | None = None) -> "ShaFile":
         """Open a SHA file from disk."""
         """Open a SHA file from disk."""
         with GitFile(path, "rb") as f:
         with GitFile(path, "rb") as f:
-            return cls.from_file(f)
+            return cls.from_file(f, sha)
 
 
     @classmethod
     @classmethod
-    def from_file(cls, f: BufferedIOBase | IO[bytes] | "_GitFile") -> "ShaFile":
+    def from_file(cls, f: BufferedIOBase | IO[bytes] | "_GitFile", sha: ObjectID | None = None) -> "ShaFile":
         """Get the contents of a SHA file on disk."""
         """Get the contents of a SHA file on disk."""
         try:
         try:
             obj = cls._parse_file(f)
             obj = cls._parse_file(f)
-            obj._sha = None
+            # Set SHA after parsing but before any further processing
+            if sha is not None:
+                obj._sha = FixedSha(sha)
+            else:
+                obj._sha = None
             return obj
             return obj
         except (IndexError, ValueError) as exc:
         except (IndexError, ValueError) as exc:
             raise ObjectFormatException("invalid object header") from exc
             raise ObjectFormatException("invalid object header") from exc
@@ -713,8 +740,21 @@ class ShaFile:
         """Returns the length of the raw string of this object."""
         """Returns the length of the raw string of this object."""
         return sum(map(len, self.as_raw_chunks()))
         return sum(map(len, self.as_raw_chunks()))
 
 
-    def sha(self) -> "FixedSha | HASH":
-        """The SHA1 object that is the name of this object."""
+    def sha(self, hash_algorithm=None) -> "FixedSha | HASH":
+        """The SHA object that is the name of this object.
+
+        Args:
+            hash_algorithm: Optional HashAlgorithm to use. Defaults to SHA1.
+        """
+        # If using a different hash algorithm, always recalculate
+        if hash_algorithm is not None:
+            new_sha = hash_algorithm.new_hash()
+            new_sha.update(self._header())
+            for chunk in self.as_raw_chunks():
+                new_sha.update(chunk)
+            return new_sha
+
+        # Otherwise use cached SHA1 value
         if self._sha is None or self._needs_serialization:
         if self._sha is None or self._needs_serialization:
             # this is a local because as_raw_chunks() overwrites self._sha
             # this is a local because as_raw_chunks() overwrites self._sha
             new_sha = sha1()
             new_sha = sha1()
@@ -733,9 +773,32 @@ class ShaFile:
 
 
     @property
     @property
     def id(self) -> ObjectID:
     def id(self) -> ObjectID:
-        """The hex SHA of this object."""
+        """The hex SHA1 of this object.
+
+        For SHA256 repositories, use get_id(hash_algorithm) instead.
+        This property always returns SHA1 for backward compatibility.
+        """
         return ObjectID(self.sha().hexdigest().encode("ascii"))
         return ObjectID(self.sha().hexdigest().encode("ascii"))
 
 
+    def get_id(self, hash_algorithm=None):
+        """Get the hex SHA of this object using the specified hash algorithm.
+
+        Args:
+            hash_algorithm: Optional HashAlgorithm to use. Defaults to SHA1.
+
+        Example:
+            >>> blob = Blob()
+            >>> blob.data = b"Hello, World!"
+            >>> blob.id  # Always returns SHA1 for backward compatibility
+            b'4ab299c8ad6ed14f31923dd94f8b5f5cb89dfb54'
+            >>> blob.get_id()  # Same as .id
+            b'4ab299c8ad6ed14f31923dd94f8b5f5cb89dfb54'
+            >>> from dulwich.hash import SHA256
+            >>> blob.get_id(SHA256)  # Get SHA256 hash
+            b'03ba204e2f2e707...'  # 64-character SHA256
+        """
+        return self.sha(hash_algorithm).hexdigest().encode("ascii")
+
     def __repr__(self) -> str:
     def __repr__(self) -> str:
         """Return string representation of this object."""
         """Return string representation of this object."""
         return f"<{self.__class__.__name__} {self.id!r}>"
         return f"<{self.__class__.__name__} {self.id!r}>"
@@ -1247,20 +1310,37 @@ class TreeEntry(NamedTuple):
 
 
 
 
 def parse_tree(
 def parse_tree(
-    text: bytes, strict: bool = False
+    text: bytes, strict: bool = False, hash_algorithm=None
 ) -> Iterator[tuple[bytes, int, ObjectID]]:
 ) -> Iterator[tuple[bytes, int, ObjectID]]:
     """Parse a tree text.
     """Parse a tree text.
 
 
     Args:
     Args:
       text: Serialized text to parse
       text: Serialized text to parse
-      strict: If True, enforce strict validation
+      strict: Whether to be strict about format
+      hash_algorithm: Hash algorithm object (SHA1 or SHA256) - if None, auto-detect
     Returns: iterator of tuples of (name, mode, sha)
     Returns: iterator of tuples of (name, mode, sha)
 
 
     Raises:
     Raises:
       ObjectFormatException: if the object was malformed in some way
       ObjectFormatException: if the object was malformed in some way
     """
     """
+    if hash_algorithm is not None:
+        sha_len = hash_algorithm.oid_length
+        return _parse_tree_with_sha_len(text, strict, sha_len)
+
+    # Try both hash lengths and use the one that works
+    try:
+        # Try SHA1 first (more common)
+        return _parse_tree_with_sha_len(text, strict, 20)
+    except ObjectFormatException:
+        # If SHA1 fails, try SHA256
+        return _parse_tree_with_sha_len(text, strict, 32)
+
+
+def _parse_tree_with_sha_len(text, strict, sha_len):
+    """Helper function to parse tree with a specific hash length."""
     count = 0
     count = 0
     length = len(text)
     length = len(text)
+
     while count < length:
     while count < length:
         mode_end = text.index(b" ", count)
         mode_end = text.index(b" ", count)
         mode_text = text[count:mode_end]
         mode_text = text[count:mode_end]
@@ -1272,10 +1352,18 @@ def parse_tree(
             raise ObjectFormatException(f"Invalid mode {mode_text!r}") from exc
             raise ObjectFormatException(f"Invalid mode {mode_text!r}") from exc
         name_end = text.index(b"\0", mode_end)
         name_end = text.index(b"\0", mode_end)
         name = text[mode_end + 1 : name_end]
         name = text[mode_end + 1 : name_end]
-        count = name_end + 21
+
+        count = name_end + 1 + sha_len
+        if count > length:
+            raise ObjectFormatException(
+                f"Tree entry extends beyond tree length: {count} > {length}"
+            )
+
         sha = text[name_end + 1 : count]
         sha = text[name_end + 1 : count]
-        if len(sha) != 20:
-            raise ObjectFormatException("Sha has invalid length")
+        if len(sha) != sha_len:
+            raise ObjectFormatException(
+                f"Sha has invalid length: {len(sha)} != {sha_len}"
+            )
         hexsha = sha_to_hex(RawObjectID(sha))
         hexsha = sha_to_hex(RawObjectID(sha))
         yield (name, mode, hexsha)
         yield (name, mode, hexsha)
 
 
@@ -1386,12 +1474,34 @@ class Tree(ShaFile):
         super().__init__()
         super().__init__()
         self._entries: dict[bytes, tuple[int, ObjectID]] = {}
         self._entries: dict[bytes, tuple[int, ObjectID]] = {}
 
 
+    def _get_hash_algorithm(self):
+        """Get the hash algorithm based on the object's SHA."""
+        if not hasattr(self, "_sha") or self._sha is None:
+            return None
+
+        # Get the raw SHA bytes
+        sha = self._sha.digest() if hasattr(self._sha, "digest") else self._sha
+        if not isinstance(sha, bytes):
+            return None
+
+        # Import hash modules lazily to avoid circular imports
+        if len(sha) == 32:
+            from .hash import SHA256
+
+            return SHA256
+        elif len(sha) == 20:
+            from .hash import SHA1
+
+            return SHA1
+        return None
+
     @classmethod
     @classmethod
-    def from_path(cls, filename: str | bytes) -> "Tree":
+    def from_path(cls, filename: str | bytes, sha: ObjectID | None = None) -> "Tree":
         """Read a tree from a file on disk.
         """Read a tree from a file on disk.
 
 
         Args:
         Args:
           filename: Path to the tree file
           filename: Path to the tree file
+          sha: Optional known SHA for the object
 
 
         Returns:
         Returns:
           A Tree object
           A Tree object
@@ -1399,7 +1509,7 @@ class Tree(ShaFile):
         Raises:
         Raises:
           NotTreeError: If the file is not a tree
           NotTreeError: If the file is not a tree
         """
         """
-        tree = ShaFile.from_path(filename)
+        tree = ShaFile.from_path(filename, sha)
         if not isinstance(tree, cls):
         if not isinstance(tree, cls):
             raise NotTreeError(_path_to_bytes(filename))
             raise NotTreeError(_path_to_bytes(filename))
         return tree
         return tree
@@ -1470,7 +1580,9 @@ class Tree(ShaFile):
     def _deserialize(self, chunks: list[bytes]) -> None:
     def _deserialize(self, chunks: list[bytes]) -> None:
         """Grab the entries in the tree."""
         """Grab the entries in the tree."""
         try:
         try:
-            parsed_entries = parse_tree(b"".join(chunks))
+            parsed_entries = parse_tree(
+                b"".join(chunks), hash_algorithm=self._get_hash_algorithm()
+            )
         except ValueError as exc:
         except ValueError as exc:
             raise ObjectFormatException(exc) from exc
             raise ObjectFormatException(exc) from exc
         # TODO: list comprehension is for efficiency in the common (small)
         # TODO: list comprehension is for efficiency in the common (small)
@@ -1496,8 +1608,12 @@ class Tree(ShaFile):
             # TODO: optionally exclude as in git fsck --strict
             # TODO: optionally exclude as in git fsck --strict
             stat.S_IFREG | 0o664,
             stat.S_IFREG | 0o664,
         )
         )
-        for name, mode, sha in parse_tree(b"".join(self._chunked_text), True):
-            check_hexsha(sha, f"invalid sha {sha!r}")
+        for name, mode, sha in parse_tree(
+            b"".join(self._chunked_text),
+            strict=True,
+            hash_algorithm=self._get_hash_algorithm(),
+        ):
+            check_hexsha(sha, f"invalid sha {sha}")
             if b"/" in name or name in (b"", b".", b"..", b".git"):
             if b"/" in name or name in (b"", b".", b"..", b".git"):
                 raise ObjectFormatException(
                 raise ObjectFormatException(
                     "invalid name {}".format(name.decode("utf-8", "replace"))
                     "invalid name {}".format(name.decode("utf-8", "replace"))

+ 84 - 33
dulwich/pack.py

@@ -530,15 +530,16 @@ def iter_sha1(iter: Iterable[bytes]) -> bytes:
     return sha.hexdigest().encode("ascii")
     return sha.hexdigest().encode("ascii")
 
 
 
 
-def load_pack_index(path: str | os.PathLike[str]) -> "PackIndex":
+def load_pack_index(path: str | os.PathLike[str], hash_algorithm: int | None = None) -> "PackIndex":
     """Load an index file by path.
     """Load an index file by path.
 
 
     Args:
     Args:
       path: Path to the index file
       path: Path to the index file
+      hash_algorithm: Hash algorithm used by the repository
     Returns: A PackIndex loaded from the given path
     Returns: A PackIndex loaded from the given path
     """
     """
     with GitFile(path, "rb") as f:
     with GitFile(path, "rb") as f:
-        return load_pack_index_file(path, f)
+        return load_pack_index_file(path, f, hash_algorithm=hash_algorithm)
 
 
 
 
 def _load_file_contents(
 def _load_file_contents(
@@ -574,25 +575,35 @@ def _load_file_contents(
 
 
 def load_pack_index_file(
 def load_pack_index_file(
     path: str | os.PathLike[str], f: IO[bytes] | _GitFile
     path: str | os.PathLike[str], f: IO[bytes] | _GitFile
+    hash_algorithm: int | None = None,
 ) -> "PackIndex":
 ) -> "PackIndex":
     """Load an index file from a file-like object.
     """Load an index file from a file-like object.
 
 
     Args:
     Args:
       path: Path for the index file
       path: Path for the index file
       f: File-like object
       f: File-like object
+      hash_algorithm: Hash algorithm used by the repository
     Returns: A PackIndex loaded from the given file
     Returns: A PackIndex loaded from the given file
     """
     """
     contents, size = _load_file_contents(f)
     contents, size = _load_file_contents(f)
     if contents[:4] == b"\377tOc":
     if contents[:4] == b"\377tOc":
         version = struct.unpack(b">L", contents[4:8])[0]
         version = struct.unpack(b">L", contents[4:8])[0]
         if version == 2:
         if version == 2:
-            return PackIndex2(path, file=f, contents=contents, size=size)
+            return PackIndex2(
+                path,
+                file=f,
+                contents=contents,
+                size=size,
+                hash_algorithm=hash_algorithm,
+            )
         elif version == 3:
         elif version == 3:
             return PackIndex3(path, file=f, contents=contents, size=size)
             return PackIndex3(path, file=f, contents=contents, size=size)
         else:
         else:
             raise KeyError(f"Unknown pack index format {version}")
             raise KeyError(f"Unknown pack index format {version}")
     else:
     else:
-        return PackIndex1(path, file=f, contents=contents, size=size)
+        return PackIndex1(
+            path, file=f, contents=contents, size=size, hash_algorithm=hash_algorithm
+        )
 
 
 
 
 def bisect_find_sha(
 def bisect_find_sha(
@@ -777,7 +788,7 @@ class MemoryPackIndex(PackIndex):
           sha: SHA to look up (binary or hex)
           sha: SHA to look up (binary or hex)
         Returns: Offset in the pack file
         Returns: Offset in the pack file
         """
         """
-        if len(sha) == 40:
+        if len(sha) in (40, 64):  # Hex string (SHA1 or SHA256)
             sha = hex_to_sha(cast(ObjectID, sha))
             sha = hex_to_sha(cast(ObjectID, sha))
         return self._by_sha[cast(RawObjectID, sha)]
         return self._by_sha[cast(RawObjectID, sha)]
 
 
@@ -976,7 +987,8 @@ class FilePackIndex(PackIndex):
         Args:
         Args:
           sha: A *binary* SHA string. (20 characters long)_
           sha: A *binary* SHA string. (20 characters long)_
         """
         """
-        assert len(sha) == 20
+        hash_size = getattr(self, "hash_size", 20)  # Default to SHA1 for v1
+        assert len(sha) == hash_size
         idx = ord(sha[:1])
         idx = ord(sha[:1])
         if idx == 0:
         if idx == 0:
             start = 0
             start = 0
@@ -1020,6 +1032,7 @@ class PackIndex1(FilePackIndex):
         file: IO[bytes] | _GitFile | None = None,
         file: IO[bytes] | _GitFile | None = None,
         contents: bytes | None = None,
         contents: bytes | None = None,
         size: int | None = None,
         size: int | None = None,
+        hash_algorithm: int | None = None,
     ) -> None:
     ) -> None:
         """Initialize a version 1 pack index.
         """Initialize a version 1 pack index.
 
 
@@ -1028,24 +1041,35 @@ class PackIndex1(FilePackIndex):
             file: Optional file object
             file: Optional file object
             contents: Optional mmap'd contents
             contents: Optional mmap'd contents
             size: Optional size of the index
             size: Optional size of the index
+            hash_algorithm: Hash algorithm used by the repository
         """
         """
         super().__init__(filename, file, contents, size)
         super().__init__(filename, file, contents, size)
         self.version = 1
         self.version = 1
         self._fan_out_table = self._read_fan_out_table(0)
         self._fan_out_table = self._read_fan_out_table(0)
+        # Use provided hash algorithm if available, otherwise default to SHA1
+        if hash_algorithm:
+            self.hash_size = hash_algorithm.oid_length
+        else:
+            self.hash_size = 20  # Default to SHA1
+
+        self._entry_size = 4 + self.hash_size
 
 
     def _unpack_entry(self, i: int) -> tuple[RawObjectID, int, None]:
     def _unpack_entry(self, i: int) -> tuple[RawObjectID, int, None]:
-        (offset, name) = unpack_from(">L20s", self._contents, (0x100 * 4) + (i * 24))
+        base_offset = (0x100 * 4) + (i * self._entry_size)
+        if self.hash_size == 20:
+            (offset, name) = unpack_from(">L20s", self._contents, base_offset)
+        else:  # SHA256
+            offset = unpack_from(">L", self._contents, base_offset)[0]
+            name = self._contents[base_offset + 4 : base_offset + 4 + self.hash_size]
         return (RawObjectID(name), offset, None)
         return (RawObjectID(name), offset, None)
 
 
     def _unpack_name(self, i: int) -> bytes:
     def _unpack_name(self, i: int) -> bytes:
-        offset = (0x100 * 4) + (i * 24) + 4
-        return self._contents[offset : offset + 20]
+        offset = (0x100 * 4) + (i * self._entry_size) + 4
+        return self._contents[offset : offset + self.hash_size]
 
 
     def _unpack_offset(self, i: int) -> int:
     def _unpack_offset(self, i: int) -> int:
-        offset = (0x100 * 4) + (i * 24)
-        result = unpack_from(">L", self._contents, offset)[0]
-        assert isinstance(result, int)
-        return result
+        offset = (0x100 * 4) + (i * self._entry_size)
+        return unpack_from(">L", self._contents, offset)[0]
 
 
     def _unpack_crc32_checksum(self, i: int) -> None:
     def _unpack_crc32_checksum(self, i: int) -> None:
         # Not stored in v1 index files
         # Not stored in v1 index files
@@ -1061,6 +1085,7 @@ class PackIndex2(FilePackIndex):
         file: IO[bytes] | _GitFile | None = None,
         file: IO[bytes] | _GitFile | None = None,
         contents: bytes | None = None,
         contents: bytes | None = None,
         size: int | None = None,
         size: int | None = None,
+        hash_algorithm: int | None = None,
     ) -> None:
     ) -> None:
         """Initialize a version 2 pack index.
         """Initialize a version 2 pack index.
 
 
@@ -1069,6 +1094,7 @@ class PackIndex2(FilePackIndex):
             file: Optional file object
             file: Optional file object
             contents: Optional mmap'd contents
             contents: Optional mmap'd contents
             size: Optional size of the index
             size: Optional size of the index
+            hash_algorithm: Hash algorithm used by the repository
         """
         """
         super().__init__(filename, file, contents, size)
         super().__init__(filename, file, contents, size)
         if self._contents[:4] != b"\377tOc":
         if self._contents[:4] != b"\377tOc":
@@ -1077,8 +1103,15 @@ class PackIndex2(FilePackIndex):
         if self.version != 2:
         if self.version != 2:
             raise AssertionError(f"Version was {self.version}")
             raise AssertionError(f"Version was {self.version}")
         self._fan_out_table = self._read_fan_out_table(8)
         self._fan_out_table = self._read_fan_out_table(8)
+
+        # Use provided hash algorithm if available, otherwise default to SHA1
+        if hash_algorithm:
+            self.hash_size = hash_algorithm.oid_length
+        else:
+            self.hash_size = 20  # Default to SHA1
+
         self._name_table_offset = 8 + 0x100 * 4
         self._name_table_offset = 8 + 0x100 * 4
-        self._crc32_table_offset = self._name_table_offset + 20 * len(self)
+        self._crc32_table_offset = self._name_table_offset + self.hash_size * len(self)
         self._pack_offset_table_offset = self._crc32_table_offset + 4 * len(self)
         self._pack_offset_table_offset = self._crc32_table_offset + 4 * len(self)
         self._pack_offset_largetable_offset = self._pack_offset_table_offset + 4 * len(
         self._pack_offset_largetable_offset = self._pack_offset_table_offset + 4 * len(
             self
             self
@@ -1092,25 +1125,27 @@ class PackIndex2(FilePackIndex):
         )
         )
 
 
     def _unpack_name(self, i: int) -> bytes:
     def _unpack_name(self, i: int) -> bytes:
-        offset = self._name_table_offset + i * 20
-        return self._contents[offset : offset + 20]
+        offset = self._name_table_offset + i * self.hash_size
+        return self._contents[offset : offset + self.hash_size]
 
 
     def _unpack_offset(self, i: int) -> int:
     def _unpack_offset(self, i: int) -> int:
-        offset_pos = self._pack_offset_table_offset + i * 4
-        offset = unpack_from(">L", self._contents, offset_pos)[0]
-        assert isinstance(offset, int)
+        offset = self._pack_offset_table_offset + i * 4
+        offset = unpack_from(">L", self._contents, offset)[0]
         if offset & (2**31):
         if offset & (2**31):
-            large_offset_pos = (
-                self._pack_offset_largetable_offset + (offset & (2**31 - 1)) * 8
-            )
-            offset = unpack_from(">Q", self._contents, large_offset_pos)[0]
-            assert isinstance(offset, int)
+            offset = self._pack_offset_largetable_offset + (offset & (2**31 - 1)) * 8
+            offset = unpack_from(">Q", self._contents, offset)[0]
         return offset
         return offset
 
 
     def _unpack_crc32_checksum(self, i: int) -> int:
     def _unpack_crc32_checksum(self, i: int) -> int:
-        result = unpack_from(">L", self._contents, self._crc32_table_offset + i * 4)[0]
-        assert isinstance(result, int)
-        return result
+        return unpack_from(">L", self._contents, self._crc32_table_offset + i * 4)[0]
+
+    def get_pack_checksum(self) -> bytes:
+        """Return the checksum stored for the corresponding packfile.
+
+        Returns: binary digest (always 20 bytes - SHA1)
+        """
+        # Pack checksums are always SHA1, even in SHA256 repositories
+        return bytes(self._contents[-40:-20])
 
 
 
 
 class PackIndex3(FilePackIndex):
 class PackIndex3(FilePackIndex):
@@ -3172,6 +3207,8 @@ def write_pack_index_v1(
         f.write(struct.pack(">L", fan_out_table[i]))
         f.write(struct.pack(">L", fan_out_table[i]))
         fan_out_table[i + 1] += fan_out_table[i]
         fan_out_table[i + 1] += fan_out_table[i]
     for name, offset, _entry_checksum in entries:
     for name, offset, _entry_checksum in entries:
+        if len(name) != 20:
+            raise TypeError("pack index v1 only supports SHA-1 names")
         if not (offset <= 0xFFFFFFFF):
         if not (offset <= 0xFFFFFFFF):
             raise TypeError("pack format 1 only supports offsets < 2Gb")
             raise TypeError("pack format 1 only supports offsets < 2Gb")
         f.write(struct.pack(">L20s", offset, name))
         f.write(struct.pack(">L20s", offset, name))
@@ -3250,11 +3287,11 @@ def _create_delta_py(base_buf: bytes, target_buf: bytes) -> Iterator[bytes]:
             o = j1
             o = j1
             while s > 127:
             while s > 127:
                 yield bytes([127])
                 yield bytes([127])
-                yield bytes(memoryview(target_buf)[o : o + 127])
+                yield memoryview(target_buf)[o : o + 127]
                 s -= 127
                 s -= 127
                 o += 127
                 o += 127
             yield bytes([s])
             yield bytes([s])
-            yield bytes(memoryview(target_buf)[o : o + s])
+            yield memoryview(target_buf)[o : o + s]
 
 
 
 
 # Default to pure Python implementation
 # Default to pure Python implementation
@@ -3357,12 +3394,20 @@ def write_pack_index_v2(
     fan_out_table: dict[int, int] = defaultdict(lambda: 0)
     fan_out_table: dict[int, int] = defaultdict(lambda: 0)
     for name, offset, entry_checksum in entries:
     for name, offset, entry_checksum in entries:
         fan_out_table[ord(name[:1])] += 1
         fan_out_table[ord(name[:1])] += 1
+    try:
+        hash_size = len(next(iter(entries))[0])
+    except StopIteration:
+        hash_size = 20  # Default to SHA-1 size if no entries
     # Fan-out table
     # Fan-out table
     largetable: list[int] = []
     largetable: list[int] = []
     for i in range(0x100):
     for i in range(0x100):
         f.write(struct.pack(b">L", fan_out_table[i]))
         f.write(struct.pack(b">L", fan_out_table[i]))
         fan_out_table[i + 1] += fan_out_table[i]
         fan_out_table[i + 1] += fan_out_table[i]
     for name, offset, entry_checksum in entries:
     for name, offset, entry_checksum in entries:
+        if len(name) != hash_size:
+            raise TypeError(
+                f"Object name has wrong length: expected {hash_size}, got {len(name)}"
+            )
         f.write(name)
         f.write(name)
     for name, offset, entry_checksum in entries:
     for name, offset, entry_checksum in entries:
         f.write(struct.pack(b">L", entry_checksum))
         f.write(struct.pack(b">L", entry_checksum))
@@ -3512,6 +3557,7 @@ class Pack:
         depth: int | None = None,
         depth: int | None = None,
         threads: int | None = None,
         threads: int | None = None,
         big_file_threshold: int | None = None,
         big_file_threshold: int | None = None,
+        hash_algorithm: int | None = None,
     ) -> None:
     ) -> None:
         """Initialize a Pack object.
         """Initialize a Pack object.
 
 
@@ -3524,6 +3570,7 @@ class Pack:
           depth: Maximum depth for delta chains
           depth: Maximum depth for delta chains
           threads: Number of threads to use for operations
           threads: Number of threads to use for operations
           big_file_threshold: Size threshold for big file handling
           big_file_threshold: Size threshold for big file handling
+          hash_algorithm: Hash algorithm identifier (1 = SHA-1, 2 = SHA-256)
         """
         """
         self._basename = basename
         self._basename = basename
         self._data = None
         self._data = None
@@ -3549,21 +3596,25 @@ class Pack:
         )
         )
         self._idx_load = lambda: load_pack_index(self._idx_path)
         self._idx_load = lambda: load_pack_index(self._idx_path)
         self.resolve_ext_ref = resolve_ext_ref
         self.resolve_ext_ref = resolve_ext_ref
+        self.hash_algorithm = (
+            hash_algorithm if hash_algorithm is not None else DEFAULT_HASH_ALGORITHM
+        )
 
 
     @classmethod
     @classmethod
     def from_lazy_objects(
     def from_lazy_objects(
-        cls, data_fn: Callable[[], PackData], idx_fn: Callable[[], PackIndex]
+        cls, data_fn: Callable[[], PackData], idx_fn: Callable[[], PackIndex],
+        hash_algorithm: int | None = None
     ) -> "Pack":
     ) -> "Pack":
         """Create a new pack object from callables to load pack data and index objects."""
         """Create a new pack object from callables to load pack data and index objects."""
-        ret = cls("")
+        ret = cls("", hash_algorithm=hash_algorithm)
         ret._data_load = data_fn
         ret._data_load = data_fn
         ret._idx_load = idx_fn
         ret._idx_load = idx_fn
         return ret
         return ret
 
 
     @classmethod
     @classmethod
-    def from_objects(cls, data: PackData, idx: PackIndex) -> "Pack":
+    def from_objects(cls, data: PackData, idx: PackIndex, hash_algorithm: int | None = None) -> "Pack":
         """Create a new pack object from pack data and index objects."""
         """Create a new pack object from pack data and index objects."""
-        ret = cls("")
+        ret = cls("", hash_algorithm=hash_algorithm)
         ret._data = data
         ret._data = data
         ret._data_load = None
         ret._data_load = None
         ret._idx = idx
         ret._idx = idx

+ 5 - 3
dulwich/refs.py

@@ -1000,7 +1000,7 @@ class DiskRefsContainer(RefsContainer):
         """Read a reference file and return its contents.
         """Read a reference file and return its contents.
 
 
         If the reference file a symbolic reference, only read the first line of
         If the reference file a symbolic reference, only read the first line of
-        the file. Otherwise, only read the first 40 bytes.
+        the file. Otherwise, read the hash (40 bytes for SHA1, 64 bytes for SHA256).
 
 
         Args:
         Args:
           name: the refname to read, relative to refpath
           name: the refname to read, relative to refpath
@@ -1018,8 +1018,10 @@ class DiskRefsContainer(RefsContainer):
                     # Read only the first line
                     # Read only the first line
                     return header + next(iter(f)).rstrip(b"\r\n")
                     return header + next(iter(f)).rstrip(b"\r\n")
                 else:
                 else:
-                    # Read only the first 40 bytes
-                    return header + f.read(40 - len(SYMREF))
+                    # Read the entire line to get the full hash (handles both SHA1 and SHA256)
+                    f.seek(0)
+                    line = f.readline().rstrip(b"\r\n")
+                    return line
         except (OSError, UnicodeError):
         except (OSError, UnicodeError):
             # don't assume anything specific about the error; in
             # don't assume anything specific about the error; in
             # particular, invalid or forbidden paths can raise weird
             # particular, invalid or forbidden paths can raise weird

+ 96 - 5
dulwich/repo.py

@@ -515,6 +515,7 @@ class BaseRepo:
 
 
         self._graftpoints: dict[ObjectID, list[ObjectID]] = {}
         self._graftpoints: dict[ObjectID, list[ObjectID]] = {}
         self.hooks: dict[str, Hook] = {}
         self.hooks: dict[str, Hook] = {}
+        self._hash_algorithm = None  # Cached hash algorithm
 
 
     def _determine_file_mode(self) -> bool:
     def _determine_file_mode(self) -> bool:
         """Probe the file-system to determine whether permissions can be trusted.
         """Probe the file-system to determine whether permissions can be trusted.
@@ -537,6 +538,7 @@ class BaseRepo:
         symlinks: bool | None = None,
         symlinks: bool | None = None,
         format: int | None = None,
         format: int | None = None,
         shared_repository: str | bool | None = None,
         shared_repository: str | bool | None = None,
+        object_format: str | None = None,
     ) -> None:
     ) -> None:
         """Initialize a default set of named files."""
         """Initialize a default set of named files."""
         from .config import ConfigFile
         from .config import ConfigFile
@@ -544,11 +546,30 @@ class BaseRepo:
         self._put_named_file("description", b"Unnamed repository")
         self._put_named_file("description", b"Unnamed repository")
         f = BytesIO()
         f = BytesIO()
         cf = ConfigFile()
         cf = ConfigFile()
-        if format is None:
-            format = 0
+
+        # Determine the appropriate format version
+        if object_format == "sha256":
+            # SHA256 requires format version 1
+            if format is None:
+                format = 1
+            elif format != 1:
+                raise ValueError(
+                    "SHA256 object format requires repository format version 1"
+                )
+        else:
+            # SHA1 (default) can use format 0 or 1
+            if format is None:
+                format = 0
+
         if format not in (0, 1):
         if format not in (0, 1):
             raise ValueError(f"Unsupported repository format version: {format}")
             raise ValueError(f"Unsupported repository format version: {format}")
+
         cf.set("core", "repositoryformatversion", str(format))
         cf.set("core", "repositoryformatversion", str(format))
+
+        # Set object format extension if using SHA256
+        if object_format == "sha256":
+            cf.set("extensions", "objectformat", "sha256")
+
         if self._determine_file_mode():
         if self._determine_file_mode():
             cf.set("core", "filemode", True)
             cf.set("core", "filemode", True)
         else:
         else:
@@ -574,6 +595,19 @@ class BaseRepo:
         self._put_named_file("config", f.getvalue())
         self._put_named_file("config", f.getvalue())
         self._put_named_file(os.path.join("info", "exclude"), b"")
         self._put_named_file(os.path.join("info", "exclude"), b"")
 
 
+        # Allow subclasses to handle config initialization
+        self._init_config(cf)
+
+    def _init_config(self, config: "ConfigFile") -> None:
+        """Initialize repository configuration.
+
+        This method can be overridden by subclasses to handle config initialization.
+
+        Args:
+            config: The ConfigFile object that was just created
+        """
+        # Default implementation does nothing
+
     def get_named_file(self, path: str) -> BinaryIO | None:
     def get_named_file(self, path: str) -> BinaryIO | None:
         """Get a file from the control dir with a specific name.
         """Get a file from the control dir with a specific name.
 
 
@@ -912,6 +946,42 @@ class BaseRepo:
         """
         """
         raise NotImplementedError(self.get_config)
         raise NotImplementedError(self.get_config)
 
 
+    def get_hash_algorithm(self):
+        """Get the hash algorithm used by this repository.
+
+        Returns: HashAlgorithm instance (SHA1 or SHA256)
+        """
+        if self._hash_algorithm is None:
+            from .hash import get_hash_algorithm
+
+            # Check if repository uses SHA256
+            try:
+                config = self.get_config()
+                try:
+                    version = int(config.get(("core",), "repositoryformatversion"))
+                except KeyError:
+                    version = 0  # Default version is 0
+
+                if version == 1:
+                    # Check for SHA256 extension
+                    try:
+                        object_format = config.get(("extensions",), "objectformat")
+                        if object_format == b"sha256":
+                            self._hash_algorithm = get_hash_algorithm("sha256")
+                        else:
+                            self._hash_algorithm = get_hash_algorithm("sha1")
+                    except KeyError:
+                        # No objectformat extension, default to SHA1
+                        self._hash_algorithm = get_hash_algorithm("sha1")
+                else:
+                    # Version 0 always uses SHA1
+                    self._hash_algorithm = get_hash_algorithm("sha1")
+            except (KeyError, ValueError):
+                # If we can't read config, default to SHA1
+                self._hash_algorithm = get_hash_algorithm("sha1")
+
+        return self._hash_algorithm
+
     def get_worktree_config(self) -> "ConfigFile":
     def get_worktree_config(self) -> "ConfigFile":
         """Retrieve the worktree config object."""
         """Retrieve the worktree config object."""
         raise NotImplementedError(self.get_worktree_config)
         raise NotImplementedError(self.get_worktree_config)
@@ -1103,7 +1173,7 @@ class BaseRepo:
         """
         """
         if not isinstance(name, bytes):
         if not isinstance(name, bytes):
             raise TypeError(f"'name' must be bytestring, not {type(name).__name__:.80}")
             raise TypeError(f"'name' must be bytestring, not {type(name).__name__:.80}")
-        if len(name) in (20, 40):
+        if len(name) in (20, 32, 40, 64):  # Support both SHA1 and SHA256
             try:
             try:
                 # Try as ObjectID/RawObjectID
                 # Try as ObjectID/RawObjectID
                 return self.object_store[
                 return self.object_store[
@@ -1424,7 +1494,7 @@ class Repo(BaseRepo):
                     has_reftable_extension = True
                     has_reftable_extension = True
                 else:
                 else:
                     raise UnsupportedExtension(f"refStorage = {value.decode()}")
                     raise UnsupportedExtension(f"refStorage = {value.decode()}")
-            elif extension.lower() not in (b"worktreeconfig",):
+            elif extension.lower() not in (b"worktreeconfig", b"objectformat"):
                 raise UnsupportedExtension(extension.decode("utf-8"))
                 raise UnsupportedExtension(extension.decode("utf-8"))
 
 
         if object_store is None:
         if object_store is None:
@@ -2055,6 +2125,7 @@ class Repo(BaseRepo):
         symlinks: bool | None = None,
         symlinks: bool | None = None,
         format: int | None = None,
         format: int | None = None,
         shared_repository: str | bool | None = None,
         shared_repository: str | bool | None = None,
+        object_format: str | None = None,
     ) -> "Repo":
     ) -> "Repo":
         path = os.fspath(path)
         path = os.fspath(path)
         if isinstance(path, bytes):
         if isinstance(path, bytes):
@@ -2077,10 +2148,17 @@ class Repo(BaseRepo):
                 os.chmod(dir_path, dir_mode)
                 os.chmod(dir_path, dir_mode)
 
 
         if object_store is None:
         if object_store is None:
+            # Get hash algorithm for object store
+            from .hash import get_hash_algorithm
+
+            hash_alg = get_hash_algorithm(
+                "sha256" if object_format == "sha256" else "sha1"
+            )
             object_store = DiskObjectStore.init(
             object_store = DiskObjectStore.init(
                 os.path.join(controldir, OBJECTDIR),
                 os.path.join(controldir, OBJECTDIR),
                 file_mode=file_mode,
                 file_mode=file_mode,
                 dir_mode=dir_mode,
                 dir_mode=dir_mode,
+                hash_algorithm=hash_alg,
             )
             )
         ret = cls(path, bare=bare, object_store=object_store)
         ret = cls(path, bare=bare, object_store=object_store)
         if default_branch is None:
         if default_branch is None:
@@ -2098,6 +2176,7 @@ class Repo(BaseRepo):
             symlinks=symlinks,
             symlinks=symlinks,
             format=format,
             format=format,
             shared_repository=shared_repository,
             shared_repository=shared_repository,
+            object_format=object_format,
         )
         )
         return ret
         return ret
 
 
@@ -2112,6 +2191,7 @@ class Repo(BaseRepo):
         symlinks: bool | None = None,
         symlinks: bool | None = None,
         format: int | None = None,
         format: int | None = None,
         shared_repository: str | bool | None = None,
         shared_repository: str | bool | None = None,
+        object_format: str | None = None,
     ) -> "Repo":
     ) -> "Repo":
         """Create a new repository.
         """Create a new repository.
 
 
@@ -2123,6 +2203,7 @@ class Repo(BaseRepo):
           symlinks: Whether to support symlinks
           symlinks: Whether to support symlinks
           format: Repository format version (defaults to 0)
           format: Repository format version (defaults to 0)
           shared_repository: Shared repository setting (group, all, umask, or octal)
           shared_repository: Shared repository setting (group, all, umask, or octal)
+          object_format: Object format to use ("sha1" or "sha256", defaults to "sha1")
         Returns: `Repo` instance
         Returns: `Repo` instance
         """
         """
         path = os.fspath(path)
         path = os.fspath(path)
@@ -2142,6 +2223,7 @@ class Repo(BaseRepo):
             symlinks=symlinks,
             symlinks=symlinks,
             format=format,
             format=format,
             shared_repository=shared_repository,
             shared_repository=shared_repository,
+            object_format=object_format,
         )
         )
 
 
     @classmethod
     @classmethod
@@ -2213,6 +2295,7 @@ class Repo(BaseRepo):
         default_branch: bytes | None = None,
         default_branch: bytes | None = None,
         format: int | None = None,
         format: int | None = None,
         shared_repository: str | bool | None = None,
         shared_repository: str | bool | None = None,
+        object_format: str | None = None,
     ) -> "Repo":
     ) -> "Repo":
         """Create a new bare repository.
         """Create a new bare repository.
 
 
@@ -2226,6 +2309,7 @@ class Repo(BaseRepo):
           default_branch: Default branch name
           default_branch: Default branch name
           format: Repository format version (defaults to 0)
           format: Repository format version (defaults to 0)
           shared_repository: Shared repository setting (group, all, umask, or octal)
           shared_repository: Shared repository setting (group, all, umask, or octal)
+          object_format: Object format to use ("sha1" or "sha256", defaults to "sha1")
         Returns: a `Repo` instance
         Returns: a `Repo` instance
         """
         """
         path = os.fspath(path)
         path = os.fspath(path)
@@ -2242,6 +2326,7 @@ class Repo(BaseRepo):
             default_branch=default_branch,
             default_branch=default_branch,
             format=format,
             format=format,
             shared_repository=shared_repository,
             shared_repository=shared_repository,
+            object_format=object_format,
         )
         )
 
 
     create = init_bare
     create = init_bare
@@ -2551,6 +2636,10 @@ class MemoryRepo(BaseRepo):
         """
         """
         raise NoIndexPresent
         raise NoIndexPresent
 
 
+    def _init_config(self, config: "ConfigFile") -> None:
+        """Initialize repository configuration for MemoryRepo."""
+        self._config = config
+
     def get_config(self) -> "ConfigFile":
     def get_config(self) -> "ConfigFile":
         """Retrieve the config object.
         """Retrieve the config object.
 
 
@@ -2739,6 +2828,7 @@ class MemoryRepo(BaseRepo):
         objects: Iterable[ShaFile],
         objects: Iterable[ShaFile],
         refs: Mapping[Ref, ObjectID],
         refs: Mapping[Ref, ObjectID],
         format: int | None = None,
         format: int | None = None,
+        object_format: str | None = None,
     ) -> "MemoryRepo":
     ) -> "MemoryRepo":
         """Create a new bare repository in memory.
         """Create a new bare repository in memory.
 
 
@@ -2748,11 +2838,12 @@ class MemoryRepo(BaseRepo):
           refs: Refs as dictionary, mapping names
           refs: Refs as dictionary, mapping names
             to object SHA1s
             to object SHA1s
           format: Repository format version (defaults to 0)
           format: Repository format version (defaults to 0)
+          object_format: Object format to use ("sha1" or "sha256", defaults to "sha1")
         """
         """
         ret = cls()
         ret = cls()
         for obj in objects:
         for obj in objects:
             ret.object_store.add_object(obj)
             ret.object_store.add_object(obj)
         for refname, sha in refs.items():
         for refname, sha in refs.items():
             ret.refs.add_if_new(refname, sha)
             ret.refs.add_if_new(refname, sha)
-        ret._init_files(bare=True, format=format)
+        ret._init_files(bare=True, format=format, object_format=object_format)
         return ret
         return ret

+ 1 - 1
dulwich/tests/utils.py

@@ -114,7 +114,7 @@ def make_object(cls: type[T], **attrs: Any) -> T:
         if name == "id":
         if name == "id":
             # id property is read-only, so we overwrite sha instead.
             # id property is read-only, so we overwrite sha instead.
             sha = FixedSha(value)
             sha = FixedSha(value)
-            obj.sha = lambda: sha
+            obj.sha = lambda hash_algorithm=None: sha
         else:
         else:
             setattr(obj, name, value)
             setattr(obj, name, value)
     return obj
     return obj

+ 2 - 0
tests/compat/__init__.py

@@ -41,6 +41,8 @@ def test_suite() -> unittest.TestSuite:
         "reftable",
         "reftable",
         "repository",
         "repository",
         "server",
         "server",
+        "sha256",
+        "sha256_packs",
         "utils",
         "utils",
         "web",
         "web",
     ]
     ]

+ 367 - 0
tests/compat/test_sha256.py

@@ -0,0 +1,367 @@
+# test_sha256.py -- Compatibility tests for SHA256 support
+# Copyright (C) 2024 The Dulwich contributors
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Compatibility tests for SHA256 support with git command line tools."""
+
+import os
+import tempfile
+
+from dulwich.hash import SHA256
+from dulwich.objects import Blob, Commit, Tree
+from dulwich.repo import Repo
+
+from .utils import CompatTestCase, run_git_or_fail
+
+
+class GitSHA256CompatibilityTests(CompatTestCase):
+    """Test SHA256 compatibility with git command line tools."""
+
+    min_git_version = (2, 29, 0)
+
+    def _run_git(self, args, cwd=None):
+        """Run git command in the specified directory."""
+        return run_git_or_fail(args, cwd=cwd)
+
+    def test_sha256_repo_creation_compat(self):
+        """Test that dulwich-created SHA256 repos are readable by git."""
+        # Create SHA256 repo with dulwich
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
+
+        # Add a blob and tree using dulwich
+        blob = Blob.from_string(b"Hello SHA256 world!")
+        tree = Tree()
+        tree.add(b"hello.txt", 0o100644, blob.get_id(SHA256))
+
+        # Create objects in the repository
+        object_store = repo.object_store
+        object_store.add_object(blob)
+        object_store.add_object(tree)
+
+        repo.close()
+
+        # Verify git can read the repository
+        config_output = self._run_git(
+            ["config", "--get", "extensions.objectformat"], cwd=repo_path
+        )
+        self.assertEqual(config_output.strip(), b"sha256")
+
+        # Verify git recognizes it as a SHA256 repository
+        rev_parse_output = self._run_git(
+            ["rev-parse", "--show-object-format"], cwd=repo_path
+        )
+        self.assertEqual(rev_parse_output.strip(), b"sha256")
+
+    def test_git_created_sha256_repo_readable(self):
+        """Test that git-created SHA256 repos are readable by dulwich."""
+        # Create SHA256 repo with git
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self._run_git(["init", "--object-format=sha256", repo_path])
+
+        # Create a file and commit with git
+        test_file = os.path.join(repo_path, "test.txt")
+        with open(test_file, "w") as f:
+            f.write("Test SHA256 content")
+
+        self._run_git(["add", "test.txt"], cwd=repo_path)
+        self._run_git(["commit", "-m", "Test SHA256 commit"], cwd=repo_path)
+
+        # Read with dulwich
+        repo = Repo(repo_path)
+
+        # Verify dulwich detects SHA256
+        hash_alg = repo.get_hash_algorithm()
+        self.assertEqual(hash_alg, SHA256)
+
+        # Verify dulwich can read objects
+        # Try both main and master branches (git default changed over time)
+        try:
+            head_ref = repo.refs[b"refs/heads/main"]
+        except KeyError:
+            head_ref = repo.refs[b"refs/heads/master"]
+        self.assertEqual(len(head_ref), 64)  # SHA256 length
+
+        # Read the commit object
+        commit = repo[head_ref]
+        self.assertIsInstance(commit, Commit)
+        self.assertEqual(len(commit.tree), 64)  # SHA256 tree ID
+
+        repo.close()
+
+    def test_object_hashing_consistency(self):
+        """Test that object hashing is consistent between dulwich and git."""
+        # Create SHA256 repo with git
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self._run_git(["init", "--object-format=sha256", repo_path])
+
+        # Create a test file with known content
+        test_content = b"Test content for SHA256 hashing consistency"
+        test_file = os.path.join(repo_path, "test.txt")
+        with open(test_file, "wb") as f:
+            f.write(test_content)
+
+        # Get git's hash for the content
+        git_hash = self._run_git(["hash-object", "test.txt"], cwd=repo_path)
+        git_hash = git_hash.strip().decode("ascii")
+
+        # Create same blob with dulwich
+        blob = Blob.from_string(test_content)
+        dulwich_hash = blob.get_id(SHA256).decode("ascii")
+
+        # Hashes should match
+        self.assertEqual(git_hash, dulwich_hash)
+
+    def test_tree_hashing_consistency(self):
+        """Test that tree hashing is consistent between dulwich and git."""
+        # Create SHA256 repo with git
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self._run_git(["init", "--object-format=sha256", repo_path])
+
+        # Create a test file and add to index
+        test_content = b"Tree test content"
+        test_file = os.path.join(repo_path, "tree_test.txt")
+        with open(test_file, "wb") as f:
+            f.write(test_content)
+
+        self._run_git(["add", "tree_test.txt"], cwd=repo_path)
+
+        # Get git's tree hash
+        git_tree_hash = self._run_git(["write-tree"], cwd=repo_path)
+        git_tree_hash = git_tree_hash.strip().decode("ascii")
+
+        # Create same tree with dulwich
+        blob = Blob.from_string(test_content)
+        tree = Tree()
+        tree.add(b"tree_test.txt", 0o100644, blob.get_id(SHA256))
+
+        dulwich_tree_hash = tree.get_id(SHA256).decode("ascii")
+
+        # Tree hashes should match
+        self.assertEqual(git_tree_hash, dulwich_tree_hash)
+
+    def test_commit_creation_interop(self):
+        """Test commit creation interoperability between dulwich and git."""
+        # Create SHA256 repo with dulwich
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
+
+        # Create objects with dulwich
+        blob = Blob.from_string(b"Interop test content")
+        tree = Tree()
+        tree.add(b"interop.txt", 0o100644, blob.get_id(SHA256))
+
+        commit = Commit()
+        commit.tree = tree.get_id(SHA256)
+        commit.author = commit.committer = b"Test User <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        commit.message = b"Test SHA256 commit from dulwich"
+
+        # Add objects to repo
+        object_store = repo.object_store
+        object_store.add_object(blob)
+        object_store.add_object(tree)
+        object_store.add_object(commit)
+
+        # Update HEAD
+        commit_id = commit.get_id(SHA256)
+        repo.refs[b"refs/heads/master"] = commit_id
+        repo.close()
+
+        # Verify git can read the commit
+        commit_hash = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
+        commit_hash = commit_hash.strip().decode("ascii")
+        self.assertEqual(len(commit_hash), 64)  # SHA256 length
+
+        # Verify git can show the commit
+        commit_message = self._run_git(["log", "--format=%s", "-n", "1"], cwd=repo_path)
+        self.assertEqual(commit_message.strip(), b"Test SHA256 commit from dulwich")
+
+        # Verify git can list the tree
+        tree_content = self._run_git(["ls-tree", "HEAD"], cwd=repo_path)
+        self.assertIn(b"interop.txt", tree_content)
+
+    def test_ref_updates_interop(self):
+        """Test that ref updates work between dulwich and git."""
+        # Create repo with git
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self._run_git(["init", "--object-format=sha256", repo_path])
+
+        # Create initial commit with git
+        test_file = os.path.join(repo_path, "initial.txt")
+        with open(test_file, "w") as f:
+            f.write("Initial content")
+
+        self._run_git(["add", "initial.txt"], cwd=repo_path)
+        self._run_git(["commit", "-m", "Initial commit"], cwd=repo_path)
+
+        initial_commit = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
+        initial_commit = initial_commit.strip()
+
+        # Update ref with dulwich
+        repo = Repo(repo_path)
+
+        # Create new commit with dulwich
+        blob = Blob.from_string(b"New content from dulwich")
+        tree = Tree()
+        tree.add(b"dulwich.txt", 0o100644, blob.get_id(SHA256))
+
+        commit = Commit()
+        commit.tree = tree.get_id(SHA256)
+        commit.parents = [initial_commit]
+        commit.author = commit.committer = b"Dulwich User <dulwich@example.com>"
+        commit.commit_time = commit.author_time = 1234567891
+        commit.commit_timezone = commit.author_timezone = 0
+        commit.message = b"Commit from dulwich"
+
+        # Add objects and update ref
+        object_store = repo.object_store
+        object_store.add_object(blob)
+        object_store.add_object(tree)
+        object_store.add_object(commit)
+
+        new_commit_hash = commit.get_id(SHA256)
+        repo.refs[b"refs/heads/master"] = new_commit_hash
+        repo.close()
+
+        # Verify git sees the update
+        current_commit = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
+        current_commit = current_commit.strip().decode("ascii")
+        self.assertEqual(current_commit, new_commit_hash.decode("ascii"))
+
+        # Verify git can access the new tree
+        tree_listing = self._run_git(["ls-tree", "HEAD"], cwd=repo_path)
+        self.assertIn(b"dulwich.txt", tree_listing)
+
+    def test_clone_sha256_repo_git_to_dulwich(self):
+        """Test cloning a git SHA256 repository with dulwich."""
+        # Create source repo with git
+        source_path = tempfile.mkdtemp()
+        self.addCleanup(lambda: __import__("shutil").rmtree(source_path))
+        self._run_git(["init", "--object-format=sha256", source_path])
+
+        # Add content
+        test_file = os.path.join(source_path, "clone_test.txt")
+        with open(test_file, "w") as f:
+            f.write("Content to be cloned")
+
+        self._run_git(["add", "clone_test.txt"], cwd=source_path)
+        self._run_git(["commit", "-m", "Initial commit"], cwd=source_path)
+
+        # Clone with dulwich
+        target_path = tempfile.mkdtemp()
+        self.addCleanup(lambda: __import__("shutil").rmtree(target_path))
+
+        target_repo = Repo.init(target_path, mkdir=False, object_format="sha256")
+
+        # Copy objects (simplified clone)
+        source_repo = Repo(source_path)
+
+        # Copy all objects
+        for obj_id in source_repo.object_store:
+            obj = source_repo.object_store[obj_id]
+            target_repo.object_store.add_object(obj)
+
+        # Copy refs
+        for ref_name in source_repo.refs.keys():
+            ref_id = source_repo.refs[ref_name]
+            target_repo.refs[ref_name] = ref_id
+
+        # Set HEAD
+        target_repo.refs.set_symbolic_ref(b"HEAD", b"refs/heads/master")
+
+        source_repo.close()
+        target_repo.close()
+
+        # Verify with git
+        output = self._run_git(["rev-parse", "--show-object-format"], cwd=target_path)
+        self.assertEqual(output.strip(), b"sha256")
+
+        # Verify content
+        self._run_git(["checkout", "HEAD", "--", "."], cwd=target_path)
+        cloned_file = os.path.join(target_path, "clone_test.txt")
+        with open(cloned_file) as f:
+            content = f.read()
+        self.assertEqual(content, "Content to be cloned")
+
+    def test_fsck_sha256_repo(self):
+        """Test that git fsck works on dulwich-created SHA256 repos."""
+        # Create repo with dulwich
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
+
+        # Create a more complex object graph
+        # Multiple blobs
+        blobs = []
+        for i in range(5):
+            blob = Blob.from_string(f"Blob content {i}".encode())
+            repo.object_store.add_object(blob)
+            blobs.append(blob)
+
+        # Multiple trees
+        subtree = Tree()
+        subtree.add(b"subfile1.txt", 0o100644, blobs[0].get_id(SHA256))
+        subtree.add(b"subfile2.txt", 0o100644, blobs[1].get_id(SHA256))
+        repo.object_store.add_object(subtree)
+
+        main_tree = Tree()
+        main_tree.add(b"file1.txt", 0o100644, blobs[2].get_id(SHA256))
+        main_tree.add(b"file2.txt", 0o100644, blobs[3].get_id(SHA256))
+        main_tree.add(b"subdir", 0o040000, subtree.get_id(SHA256))
+        repo.object_store.add_object(main_tree)
+
+        # Create commits
+        commit1 = Commit()
+        commit1.tree = main_tree.get_id(SHA256)
+        commit1.author = commit1.committer = b"Test <test@example.com>"
+        commit1.commit_time = commit1.author_time = 1234567890
+        commit1.commit_timezone = commit1.author_timezone = 0
+        commit1.message = b"First commit"
+        repo.object_store.add_object(commit1)
+
+        commit2 = Commit()
+        commit2.tree = main_tree.get_id(SHA256)
+        commit2.parents = [commit1.get_id(SHA256)]
+        commit2.author = commit2.committer = b"Test <test@example.com>"
+        commit2.commit_time = commit2.author_time = 1234567891
+        commit2.commit_timezone = commit2.author_timezone = 0
+        commit2.message = b"Second commit"
+        repo.object_store.add_object(commit2)
+
+        # Set refs
+        repo.refs[b"refs/heads/master"] = commit2.get_id(SHA256)
+        repo.refs[b"refs/heads/branch1"] = commit1.get_id(SHA256)
+
+        repo.close()
+
+        # Run git fsck
+        fsck_output = self._run_git(["fsck", "--full"], cwd=repo_path)
+        # fsck should not report any errors (empty output or success message)
+        self.assertNotIn(b"error", fsck_output.lower())
+        self.assertNotIn(b"missing", fsck_output.lower())
+        self.assertNotIn(b"broken", fsck_output.lower())

+ 330 - 0
tests/compat/test_sha256_packs.py

@@ -0,0 +1,330 @@
+# test_sha256_packs.py -- Compatibility tests for SHA256 pack files
+# Copyright (C) 2024 The Dulwich contributors
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Compatibility tests for SHA256 pack files with git command line tools."""
+
+import os
+import tempfile
+
+from dulwich.hash import SHA256
+from dulwich.objects import Blob, Commit, Tree
+from dulwich.pack import load_pack_index_file
+from dulwich.repo import Repo
+
+from .utils import CompatTestCase, run_git_or_fail
+
+
+class GitSHA256PackCompatibilityTests(CompatTestCase):
+    """Test SHA256 pack file compatibility with git command line tools."""
+
+    min_git_version = (2, 29, 0)
+
+    def _run_git(self, args, cwd=None):
+        """Run git command in the specified directory."""
+        return run_git_or_fail(args, cwd=cwd)
+
+    def test_git_pack_readable_by_dulwich(self):
+        """Test that git-created SHA256 pack files are readable by dulwich."""
+        # Create SHA256 repo with git
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self._run_git(["init", "--object-format=sha256", repo_path])
+
+        # Create multiple files to ensure pack creation
+        for i in range(20):
+            test_file = os.path.join(repo_path, f"file{i}.txt")
+            with open(test_file, "w") as f:
+                f.write(f"Content for file {i}\n")
+
+        self._run_git(["add", "."], cwd=repo_path)
+        self._run_git(["commit", "-m", "Add 20 files"], cwd=repo_path)
+
+        # Force pack creation
+        self._run_git(["gc"], cwd=repo_path)
+
+        # Open with dulwich
+        repo = Repo(repo_path)
+        self.assertEqual(repo.get_hash_algorithm(), SHA256)
+
+        # Find pack files
+        pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
+        pack_files = [f for f in os.listdir(pack_dir) if f.endswith(".pack")]
+        self.assertGreater(len(pack_files), 0, "No pack files created")
+
+        # Read pack with dulwich
+        for pack_file in pack_files:
+            pack_path = os.path.join(pack_dir, pack_file)
+            idx_path = pack_path[:-5] + ".idx"
+
+            # Load pack index with SHA256 algorithm
+            with open(idx_path, "rb") as f:
+                pack_idx = load_pack_index_file(
+                    idx_path, f, hash_algorithm=repo.get_hash_algorithm()
+                )
+
+            # Verify it's detected as SHA256
+            self.assertEqual(pack_idx.hash_size, 32)
+
+            # Verify we can iterate objects
+            obj_count = 0
+            for sha, offset, crc32 in pack_idx.iterentries():
+                self.assertEqual(len(sha), 32)  # SHA256
+                obj_count += 1
+
+            self.assertGreater(obj_count, 20)  # At least our files + trees + commit
+
+        # Verify we can read all objects through the repo interface
+        head_ref = repo.refs[b"refs/heads/master"]
+        commit = repo[head_ref]
+        self.assertIsInstance(commit, Commit)
+
+        # Read the tree
+        tree = repo[commit.tree]
+        self.assertIsInstance(tree, Tree)
+
+        # Verify all files are there
+        file_count = 0
+        for name, mode, sha in tree.items():
+            if name.startswith(b"file") and name.endswith(b".txt"):
+                file_count += 1
+                # Read the blob
+                blob = repo[sha]
+                self.assertIsInstance(blob, Blob)
+
+        self.assertEqual(file_count, 20)
+        repo.close()
+
+    def test_dulwich_objects_readable_by_git(self):
+        """Test that dulwich-created SHA256 objects are readable by git."""
+        # Create SHA256 repo with dulwich
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
+
+        # Create objects
+        blobs = []
+        for i in range(10):
+            blob = Blob.from_string(f"Dulwich blob content {i}".encode())
+            repo.object_store.add_object(blob)
+            blobs.append(blob)
+
+        # Create a tree with all blobs
+        tree = Tree()
+        for i, blob in enumerate(blobs):
+            tree.add(f"blob{i}.txt".encode(), 0o100644, blob.get_id(SHA256))
+        repo.object_store.add_object(tree)
+
+        # Create a commit
+        commit = Commit()
+        commit.tree = tree.get_id(SHA256)
+        commit.author = commit.committer = b"Dulwich Test <test@dulwich.org>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        commit.message = b"Test commit with blobs"
+        repo.object_store.add_object(commit)
+
+        # Update HEAD
+        repo.refs[b"refs/heads/master"] = commit.get_id(SHA256)
+        repo.close()
+
+        # Verify git can read all objects
+        output = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
+        self.assertEqual(len(output.strip()), 64)  # SHA256
+
+        # List tree contents
+        tree_output = self._run_git(["ls-tree", "HEAD"], cwd=repo_path)
+        # Count lines instead of occurrences of "blob" since "blob" appears twice per line
+        lines = tree_output.strip().split(b"\n")
+        self.assertEqual(len(lines), 10)
+
+        # Verify git can check out the content
+        self._run_git(["checkout", "HEAD", "--", "."], cwd=repo_path)
+
+        # Verify files exist with correct content
+        for i in range(10):
+            file_path = os.path.join(repo_path, f"blob{i}.txt")
+            self.assertTrue(os.path.exists(file_path))
+            with open(file_path, "rb") as f:
+                content = f.read()
+                self.assertEqual(content, f"Dulwich blob content {i}".encode())
+
+    def test_pack_index_v1_interop(self):
+        """Test pack index v1 interoperability with SHA256."""
+        # Create repo with git using pack index v1
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self._run_git(["init", "--object-format=sha256", repo_path])
+        self._run_git(["config", "pack.indexVersion", "1"], cwd=repo_path)
+
+        # Create files
+        for i in range(10):
+            test_file = os.path.join(repo_path, f"v1test{i}.txt")
+            with open(test_file, "w") as f:
+                f.write(f"Pack v1 test {i}\n")
+
+        self._run_git(["add", "."], cwd=repo_path)
+        self._run_git(["commit", "-m", "Test pack v1"], cwd=repo_path)
+        self._run_git(["gc"], cwd=repo_path)
+
+        # Read with dulwich
+        repo = Repo(repo_path)
+
+        # Find pack index
+        pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
+        idx_files = [f for f in os.listdir(pack_dir) if f.endswith(".idx")]
+
+        for idx_file in idx_files:
+            idx_path = os.path.join(pack_dir, idx_file)
+            with open(idx_path, "rb") as f:
+                pack_idx = load_pack_index_file(
+                    idx_path, f, hash_algorithm=repo.get_hash_algorithm()
+                )
+
+            # Verify it's v1 with SHA256
+            self.assertEqual(pack_idx.version, 1)
+            self.assertEqual(pack_idx.hash_size, 32)
+
+            # Verify we can iterate
+            for sha, offset, crc32 in pack_idx.iterentries():
+                self.assertEqual(len(sha), 32)
+                self.assertIsNone(crc32)  # v1 doesn't store CRC32
+
+        repo.close()
+
+    def test_large_pack_interop(self):
+        """Test large pack file interoperability."""
+        # Create repo with dulwich
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
+
+        # Create a large file that will use delta compression
+        large_content = b"A" * 10000
+        blobs = []
+
+        # Create similar blobs to trigger delta compression
+        for i in range(10):
+            content = large_content + f" variation {i}".encode()
+            blob = Blob.from_string(content)
+            repo.object_store.add_object(blob)
+            blobs.append(blob)
+
+        # Create tree
+        tree = Tree()
+        for i, blob in enumerate(blobs):
+            tree.add(f"large{i}.txt".encode(), 0o100644, blob.get_id(SHA256))
+        repo.object_store.add_object(tree)
+
+        # Create commit
+        commit = Commit()
+        commit.tree = tree.get_id(SHA256)
+        commit.author = commit.committer = b"Test <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        commit.message = b"Large files for delta compression test"
+        repo.object_store.add_object(commit)
+
+        repo.refs[b"refs/heads/master"] = commit.get_id(SHA256)
+        repo.close()
+
+        # Run git gc to create packs with delta compression
+        self._run_git(["gc", "--aggressive"], cwd=repo_path)
+
+        # Verify git created a pack
+        pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
+        pack_files = [f for f in os.listdir(pack_dir) if f.endswith(".pack")]
+        self.assertGreater(len(pack_files), 0)
+
+        # Re-open with dulwich and verify we can read everything
+        repo = Repo(repo_path)
+        head = repo.refs[b"refs/heads/master"]
+        commit = repo[head]
+        tree = repo[commit.tree]
+
+        # Read all blobs
+        for i in range(10):
+            name = f"large{i}.txt".encode()
+            mode, sha = tree[name]
+            blob = repo[sha]
+            expected = large_content + f" variation {i}".encode()
+            self.assertEqual(blob.data, expected)
+
+        repo.close()
+
+    def test_mixed_loose_packed_objects(self):
+        """Test repositories with both loose and packed objects."""
+        # Create repo with git
+        repo_path = tempfile.mkdtemp()
+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
+        self._run_git(["init", "--object-format=sha256", repo_path])
+
+        # Create initial objects that will be packed
+        for i in range(5):
+            test_file = os.path.join(repo_path, f"packed{i}.txt")
+            with open(test_file, "w") as f:
+                f.write(f"Will be packed {i}\n")
+
+        self._run_git(["add", "."], cwd=repo_path)
+        self._run_git(["commit", "-m", "Initial packed objects"], cwd=repo_path)
+        self._run_git(["gc"], cwd=repo_path)
+
+        # Create more objects that will remain loose
+        for i in range(5):
+            test_file = os.path.join(repo_path, f"loose{i}.txt")
+            with open(test_file, "w") as f:
+                f.write(f"Will stay loose {i}\n")
+
+        self._run_git(["add", "."], cwd=repo_path)
+        self._run_git(["commit", "-m", "Loose objects"], cwd=repo_path)
+
+        # Open with dulwich
+        repo = Repo(repo_path)
+
+        # Count objects in packs vs loose
+        pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
+        pack_count = len([f for f in os.listdir(pack_dir) if f.endswith(".pack")])
+        self.assertGreater(pack_count, 0)
+
+        # Verify we can read all objects
+        head = repo.refs[b"refs/heads/master"]
+        commit = repo[head]
+
+        # Walk the commit history
+        commit_count = 0
+        while commit.parents:
+            commit_count += 1
+            tree = repo[commit.tree]
+            # Verify we can read the tree
+            self.assertGreater(len(tree), 0)
+
+            if commit.parents:
+                commit = repo[commit.parents[0]]
+            else:
+                break
+
+        self.assertEqual(commit_count, 1)  # We made 2 commits total
+        repo.close()
+
+
+if __name__ == "__main__":
+    import unittest
+
+    unittest.main()

+ 213 - 0
tests/test_sha256.py

@@ -0,0 +1,213 @@
+# test_sha256.py -- Tests for SHA256 support
+# Copyright (C) 2024 The Dulwich contributors
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Tests for SHA256 support in Dulwich."""
+
+import os
+import shutil
+import tempfile
+import unittest
+
+from dulwich.hash import SHA1, SHA256, get_hash_algorithm
+from dulwich.objects import Blob, Tree, valid_hexsha, zero_sha_for
+from dulwich.repo import MemoryRepo, Repo
+
+
+class HashAlgorithmTests(unittest.TestCase):
+    """Tests for the hash algorithm abstraction."""
+
+    def test_sha1_properties(self):
+        """Test SHA1 algorithm properties."""
+        alg = SHA1
+        self.assertEqual(alg.name, "sha1")
+        self.assertEqual(alg.oid_length, 20)
+        self.assertEqual(alg.hex_length, 40)
+        self.assertEqual(len(alg.zero_oid), 40)
+        self.assertEqual(len(alg.zero_oid_bin), 20)
+
+    def test_sha256_properties(self):
+        """Test SHA256 algorithm properties."""
+        alg = SHA256
+        self.assertEqual(alg.name, "sha256")
+        self.assertEqual(alg.oid_length, 32)
+        self.assertEqual(alg.hex_length, 64)
+        self.assertEqual(len(alg.zero_oid), 64)
+        self.assertEqual(len(alg.zero_oid_bin), 32)
+
+    def test_get_hash_algorithm(self):
+        """Test getting hash algorithms by name."""
+        self.assertEqual(get_hash_algorithm("sha1"), SHA1)
+        self.assertEqual(get_hash_algorithm("sha256"), SHA256)
+        self.assertEqual(get_hash_algorithm(None), SHA1)  # Default
+
+        with self.assertRaises(ValueError):
+            get_hash_algorithm("invalid")
+
+
+class ObjectHashingTests(unittest.TestCase):
+    """Tests for object hashing with different algorithms."""
+
+    def test_blob_sha1(self):
+        """Test blob hashing with SHA1."""
+        blob = Blob()
+        blob.data = b"Hello, World!"
+
+        # Default should be SHA1
+        sha1_id = blob.id
+        self.assertEqual(len(sha1_id), 40)
+        self.assertTrue(valid_hexsha(sha1_id))
+
+    def test_blob_sha256(self):
+        """Test blob hashing with SHA256."""
+        blob = Blob()
+        blob.data = b"Hello, World!"
+
+        # Get SHA256 hash
+        sha256_id = blob.get_id(SHA256)
+        self.assertEqual(len(sha256_id), 64)
+        self.assertTrue(valid_hexsha(sha256_id))
+
+        # SHA256 ID should be different from SHA1
+        sha1_id = blob.id
+        self.assertNotEqual(sha1_id, sha256_id)
+
+        # Verify .id property returns SHA1 for backward compatibility
+        self.assertEqual(blob.id, sha1_id)
+        self.assertEqual(blob.get_id(), sha1_id)  # Default should be SHA1
+
+    def test_tree_sha256(self):
+        """Test tree hashing with SHA256."""
+        tree = Tree()
+        tree.add(b"file.txt", 0o100644, b"a" * 40)  # SHA1 hex
+
+        # Get SHA1 (default)
+        sha1_id = tree.id
+        self.assertEqual(len(sha1_id), 40)
+
+        # Get SHA256
+        sha256_id = tree.get_id(SHA256)
+        self.assertEqual(len(sha256_id), 64)
+
+        # Verify they're different
+        self.assertNotEqual(sha1_id, sha256_id)
+
+    def test_valid_hexsha(self):
+        """Test hex SHA validation for both algorithms."""
+        # Valid SHA1
+        self.assertTrue(valid_hexsha(b"1234567890abcdef1234567890abcdef12345678"))
+
+        # Valid SHA256
+        self.assertTrue(
+            valid_hexsha(
+                b"1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"
+            )
+        )
+
+        # Invalid lengths
+        self.assertFalse(valid_hexsha(b"1234"))
+        self.assertFalse(
+            valid_hexsha(b"1234567890abcdef1234567890abcdef123456")
+        )  # 38 chars
+
+        # Invalid characters
+        self.assertFalse(valid_hexsha(b"123456789gabcdef1234567890abcdef12345678"))
+
+    def test_zero_sha_for(self):
+        """Test getting zero SHA for different algorithms."""
+        # Default (SHA1)
+        self.assertEqual(zero_sha_for(), b"0" * 40)
+        self.assertEqual(zero_sha_for(None), b"0" * 40)
+
+        # SHA1 explicit
+        self.assertEqual(zero_sha_for(SHA1), b"0" * 40)
+
+        # SHA256
+        self.assertEqual(zero_sha_for(SHA256), b"0" * 64)
+
+
+class RepositorySHA256Tests(unittest.TestCase):
+    """Tests for SHA256 repository support."""
+
+    def setUp(self):
+        """Set up test repository directory."""
+        self.test_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        """Clean up test repository."""
+        shutil.rmtree(self.test_dir)
+
+    def test_init_sha256_repo(self):
+        """Test initializing a SHA256 repository."""
+        repo_path = os.path.join(self.test_dir, "sha256_repo")
+        repo = Repo.init(repo_path, mkdir=True, object_format="sha256")
+
+        # Check repository format version
+        config = repo.get_config()
+        self.assertEqual(config.get(("core",), "repositoryformatversion"), b"1")
+
+        # Check object format extension
+        self.assertEqual(config.get(("extensions",), "objectformat"), b"sha256")
+
+        # Check hash algorithm detection
+        hash_alg = repo.get_hash_algorithm()
+        self.assertEqual(hash_alg, SHA256)
+
+        repo.close()
+
+    def test_init_sha1_repo(self):
+        """Test initializing a SHA1 repository (default)."""
+        repo_path = os.path.join(self.test_dir, "sha1_repo")
+        repo = Repo.init(repo_path, mkdir=True)
+
+        # Check repository format version
+        config = repo.get_config()
+        self.assertEqual(config.get(("core",), "repositoryformatversion"), b"0")
+
+        # Object format extension should not exist
+        with self.assertRaises(KeyError):
+            config.get(("extensions",), "objectformat")
+
+        # Check hash algorithm detection
+        hash_alg = repo.get_hash_algorithm()
+        self.assertEqual(hash_alg, SHA1)
+
+        repo.close()
+
+    def test_format_version_validation(self):
+        """Test format version validation for SHA256."""
+        repo_path = os.path.join(self.test_dir, "invalid_repo")
+
+        # SHA256 with format version 0 should fail
+        with self.assertRaises(ValueError) as cm:
+            Repo.init(repo_path, mkdir=True, format=0, object_format="sha256")
+        self.assertIn("SHA256", str(cm.exception))
+
+    def test_memory_repo_sha256(self):
+        """Test SHA256 support in memory repository."""
+        repo = MemoryRepo.init_bare([], {}, object_format="sha256")
+
+        # Check hash algorithm
+        hash_alg = repo.get_hash_algorithm()
+        self.assertEqual(hash_alg, SHA256)
+
+
+if __name__ == "__main__":
+    unittest.main()

+ 128 - 0
tests/test_sha256_pack.py

@@ -0,0 +1,128 @@
+# test_sha256_pack.py -- Tests for SHA256 pack support
+# Copyright (C) 2024 The Dulwich contributors
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Tests for SHA256 pack support in Dulwich."""
+
+import shutil
+import tempfile
+import unittest
+from io import BytesIO
+
+from dulwich.hash import SHA256
+from dulwich.pack import (
+    load_pack_index_file,
+    write_pack_index_v2,
+)
+
+
+class SHA256PackTests(unittest.TestCase):
+    """Tests for SHA256 pack support."""
+
+    def setUp(self):
+        """Set up test repository directory."""
+        self.test_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        """Clean up test repository."""
+        shutil.rmtree(self.test_dir)
+
+    def test_pack_index_v2_with_sha256(self):
+        """Test that pack index v2 correctly handles SHA256 hashes."""
+        # Create SHA256 entries manually (simulating what would happen in a SHA256 repo)
+        entries = []
+        for i in range(5):
+            # Create a fake SHA256 hash
+            sha256_hash = SHA256.hash_func(f"test object {i}".encode()).digest()
+            offset = i * 1000  # Fake offsets
+            crc32 = i  # Fake CRC32
+            entries.append((sha256_hash, offset, crc32))
+
+        # Sort entries by SHA (required for pack index)
+        entries.sort(key=lambda e: e[0])
+
+        # Write SHA256 pack index with SHA1 pack checksum (Git always uses SHA1 for pack checksums)
+        index_buf = BytesIO()
+        from hashlib import sha1
+
+        pack_checksum = sha1(b"fake pack data").digest()
+        write_pack_index_v2(index_buf, entries, pack_checksum)
+
+        # Load and verify the index
+        index_buf.seek(0)
+        pack_idx = load_pack_index_file("<memory>", index_buf)
+
+        # Check that the index loaded correctly
+        self.assertEqual(len(pack_idx), 5)
+        self.assertEqual(pack_idx.version, 2)
+
+        # Verify hash_size detection
+        self.assertEqual(pack_idx.hash_size, 32)
+
+        # Verify we can look up objects by SHA256
+        for sha256_hash, offset, _ in entries:
+            # This should not raise KeyError
+            found_offset = pack_idx.object_offset(sha256_hash)
+            self.assertEqual(found_offset, offset)
+
+    def test_pack_index_v1_with_sha256(self):
+        """Test that pack index v1 correctly handles SHA256 hashes."""
+        # Create SHA256 entries manually
+        entries = []
+        for i in range(5):
+            # Create a fake SHA256 hash
+            sha256_hash = SHA256.hash_func(f"test v1 object {i}".encode()).digest()
+            offset = i * 1000  # Fake offsets
+            crc32 = None  # v1 doesn't store CRC32
+            entries.append((sha256_hash, offset, crc32))
+
+        # Sort entries by SHA (required for pack index)
+        entries.sort(key=lambda e: e[0])
+
+        # Import write_pack_index_v1
+        from dulwich.pack import write_pack_index_v1
+
+        # Write SHA256 pack index v1 with SHA1 pack checksum
+        index_buf = BytesIO()
+        from hashlib import sha1
+
+        pack_checksum = sha1(b"fake v1 pack data").digest()
+        write_pack_index_v1(index_buf, entries, pack_checksum)
+
+        # Load and verify the index
+        index_buf.seek(0)
+        pack_idx = load_pack_index_file("<memory>", index_buf)
+
+        # Check that the index loaded correctly
+        self.assertEqual(len(pack_idx), 5)
+        self.assertEqual(pack_idx.version, 1)
+
+        # Verify hash_size detection
+        self.assertEqual(pack_idx.hash_size, 32)
+
+        # Verify we can look up objects by SHA256
+        for sha256_hash, offset, _ in entries:
+            # This should not raise KeyError
+            found_offset = pack_idx.object_offset(sha256_hash)
+            self.assertEqual(found_offset, offset)
+
+
+if __name__ == "__main__":
+    unittest.main()