hai 7 meses · a7e3b91227
--- a/NEWS
+++ b/NEWS
@@ -531,6 +531,13 @@ compatible.
 
															  * Add ``gc`` command to ``dulwich.porcelain.`` (Jelmer Vernooĳ, #92)
														
 
															+ * Add initial support for SHA256 repositories. Dulwich can now read and write Git
														
 
															+   repositories using SHA256 object format. This includes support for loose
														
 
															+   objects, pack files (v1 and v2 indexes), and tree parsing with SHA256 hashes.
														
 
															+   The Rust extensions have been updated to support variable hash lengths.
														
 
															+   SHA256 repositories require format version 1 and the objectFormat extension.
														
 
															+   (Jelmer Vernooĳ, #1115)
														
 
															+
														
 
															  * Add ``unpack-objects`` plumbing command to unpack objects from pack files
														
 
															    into loose objects in the repository. This command extracts all objects
														
 
															    from a pack file and writes them to the object store as individual files.
														
@@ -550,8 +557,7 @@ compatible.
 
															  * Add support for pack index format version 3. This format supports variable
														
 
															    hash sizes to enable future SHA-256 support. The implementation includes
														
 
															    reading and writing v3 indexes with proper hash algorithm identification
														
 
															-   (1 for SHA-1, 2 for SHA-256). Note that SHA-256 support itself is not yet
														
 
															-   implemented and will raise NotImplementedError. (Jelmer Vernooĳ)
														
 
															+   (1 for SHA-1, 2 for SHA-256). (Jelmer Vernooĳ)
														
 
															  * Fix ``LocalGitClient`` assertion error when fetching externally cloned repositories
														
 
															    into ``MemoryRepo``. Previously, the client would fail with an AssertionError
														
--- a/crates/objects/src/lib.rs
+++ b/crates/objects/src/lib.rs
@@ -49,15 +49,13 @@ fn sha_to_pyhex(py: Python, sha: &[u8]) -> PyResult<Py<PyAny>> {
 
															     Ok(PyBytes::new(py, hexsha.as_slice()).into())
														
 
															 }
														
 
															-#[pyfunction]
														
 
															-#[pyo3(signature = (text, strict=None))]
														
 
															-fn parse_tree(
														
 
															+fn parse_tree_with_length(
														
 
															     py: Python,
														
 
															     mut text: &[u8],
														
 
															-    strict: Option<bool>,
														
 
															-) -> PyResult<Vec<(Py<PyAny>, u32, Py<PyAny>)>> {
														
 
															+    strict: bool,
														
 
															+    hash_len: usize,
														
 
															+) -> PyResult<Vec<(PyObject, u32, PyObject)>> {
														
 
															     let mut entries = Vec::new();
														
 
															-    let strict = strict.unwrap_or(false);
														
 
															     while !text.is_empty() {
														
 
															         let mode_end = memchr(b' ', text)
														
 
															             .ok_or_else(|| ObjectFormatException::new_err(("Missing terminator for mode",)))?;
														
@@ -73,21 +71,54 @@ fn parse_tree(
 
															         let namelen = memchr(b'\0', text)
														
 
															             .ok_or_else(|| ObjectFormatException::new_err(("Missing trailing \\0",)))?;
														
 
															         let name = &text[..namelen];
														
 
															-        if namelen + 20 >= text.len() {
														
 
															+
														
 
															+        // Skip name and null terminator
														
 
															+        text = &text[namelen + 1..];
														
 
															+
														
 
															+        // Check if we have enough bytes for the hash
														
 
															+        if text.len() < hash_len {
														
 
															             return Err(ObjectFormatException::new_err(("SHA truncated",)));
														
 
															         }
														
 
															-        text = &text[namelen + 1..];
														
 
															-        let sha = &text[..20];
														
 
															+
														
 
															+        let sha = &text[..hash_len];
														
 
															         entries.push((
														
 
															             PyBytes::new(py, name).into_pyobject(py)?.unbind().into(),
														
 
															             mode,
														
 
															             sha_to_pyhex(py, sha)?,
														
 
															         ));
														
 
															-        text = &text[20..];
														
 
															+        text = &text[hash_len..];
														
 
															     }
														
 
															     Ok(entries)
														
 
															 }
														
 
															+#[pyfunction]
														
 
															+#[pyo3(signature = (text, strict=None, hash_algorithm=None))]
														
 
															+fn parse_tree(
														
 
															+    py: Python,
														
 
															+    text: &[u8],
														
 
															+    strict: Option<bool>,
														
 
															+    hash_algorithm: Option<PyObject>,
														
 
															+) -> PyResult<Vec<(PyObject, u32, PyObject)>> {
														
 
															+    let strict = strict.unwrap_or(false);
														
 
															+
														
 
															+    // Determine hash length from hash_algorithm if provided
														
 
															+    if let Some(algo) = hash_algorithm {
														
 
															+        // Get oid_length attribute from hash algorithm object
														
 
															+        let oid_length: usize = algo.getattr(py, "oid_length")?.extract(py)?;
														
 
															+        parse_tree_with_length(py, text, strict, oid_length)
														
 
															+    } else {
														
 
															+        // Try to auto-detect by attempting to parse with both lengths
														
 
															+        // We'll attempt to parse with SHA1 first (20 bytes), then SHA256 (32 bytes)
														
 
															+        match parse_tree_with_length(py, text, strict, 20) {
														
 
															+            Ok(entries) => Ok(entries),
														
 
															+            Err(_) => {
														
 
															+                // SHA1 failed, try SHA256
														
 
															+                parse_tree_with_length(py, text, strict, 32)
														
 
															+            }
														
 
															+        }
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															 fn cmp_with_suffix(a: (u32, &[u8]), b: (u32, &[u8])) -> std::cmp::Ordering {
														
 
															     let len = std::cmp::min(a.1.len(), b.1.len());
														
 
															     let cmp = a.1[..len].cmp(&b.1[..len]);
														
--- a/crates/pack/src/lib.rs
+++ b/crates/pack/src/lib.rs
@@ -30,8 +30,9 @@ pyo3::import_exception!(dulwich.errors, ApplyDeltaError);
 
															 fn py_is_sha(sha: &Py<PyAny>, py: Python) -> PyResult<bool> {
														
 
															     // Check if the object is a bytes object
														
 
															     if sha.bind(py).is_instance_of::<PyBytes>() {
														
 
															-        // Check if the bytes object has a size of 20
														
 
															-        if sha.extract::<&[u8]>(py)?.len() == 20 {
														
 
															+        // Check if the bytes object has a size of 20 (SHA1) or 32 (SHA256)
														
 
															+        let len = sha.extract::<&[u8]>(py)?.len();
														
 
															+        if len == 20 || len == 32 {
														
 
															             Ok(true)
														
 
															         } else {
														
 
															             Ok(false)
														
@@ -53,9 +54,11 @@ fn bisect_find_sha(
 
															     let sha = sha.as_bytes(py);
														
 
															     let sha_len = sha.len();
														
 
															-    // Check if sha is 20 bytes long
														
 
															-    if sha_len != 20 {
														
 
															-        return Err(PyValueError::new_err("Sha is not 20 bytes long"));
														
 
															+    // Check if sha is 20 bytes (SHA1) or 32 bytes (SHA256)
														
 
															+    if sha_len != 20 && sha_len != 32 {
														
 
															+        return Err(PyValueError::new_err(
														
 
															+            "Sha must be 20 (SHA1) or 32 (SHA256) bytes long",
														
 
															+        ));
														
 
															     }
														
 
															     // Check if start > end
														
--- a/dulwich/hash.py
+++ b/dulwich/hash.py
@@ -0,0 +1,121 @@
 
															+# hash.py -- Hash algorithm abstraction layer for Git
														
 
															+# Copyright (C) 2024 The Dulwich contributors
														
 
															+#
														
 
															+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
														
 
															+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
														
 
															+# General Public License as public by the Free Software Foundation; version 2.0
														
 
															+# or (at your option) any later version. You can redistribute it and/or
														
 
															+# modify it under the terms of either of these two licenses.
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+#
														
 
															+# You should have received a copy of the licenses; if not, see
														
 
															+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
														
 
															+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
														
 
															+# License, Version 2.0.
														
 
															+#
														
 
															+
														
 
															+"""Hash algorithm abstraction for Git objects.
														
 
															+
														
 
															+This module provides an abstraction layer for different hash algorithms
														
 
															+used in Git repositories (SHA-1 and SHA-256).
														
 
															+"""
														
 
															+
														
 
															+from hashlib import sha1, sha256
														
 
															+from typing import Callable, Optional
														
 
															+
														
 
															+
														
 
															+class HashAlgorithm:
														
 
															+    """Base class for hash algorithms used in Git."""
														
 
															+
														
 
															+    def __init__(
														
 
															+        self, name: str, oid_length: int, hex_length: int, hash_func: Callable
														
 
															+    ) -> None:
														
 
															+        """Initialize a hash algorithm.
														
 
															+
														
 
															+        Args:
														
 
															+            name: Name of the algorithm (e.g., "sha1", "sha256")
														
 
															+            oid_length: Length of the binary object ID in bytes
														
 
															+            hex_length: Length of the hexadecimal object ID in characters
														
 
															+            hash_func: Hash function from hashlib
														
 
															+        """
														
 
															+        self.name = name
														
 
															+        self.oid_length = oid_length
														
 
															+        self.hex_length = hex_length
														
 
															+        self.hash_func = hash_func
														
 
															+        self.zero_oid = b"0" * hex_length
														
 
															+        self.zero_oid_bin = b"\x00" * oid_length
														
 
															+
														
 
															+    def __str__(self) -> str:
														
 
															+        return self.name
														
 
															+
														
 
															+    def __repr__(self) -> str:
														
 
															+        return f"HashAlgorithm({self.name!r})"
														
 
															+
														
 
															+    def new_hash(self):
														
 
															+        """Create a new hash object."""
														
 
															+        return self.hash_func()
														
 
															+
														
 
															+    def hash_object(self, data: bytes) -> bytes:
														
 
															+        """Hash data and return the digest.
														
 
															+
														
 
															+        Args:
														
 
															+            data: Data to hash
														
 
															+
														
 
															+        Returns:
														
 
															+            Binary digest
														
 
															+        """
														
 
															+        h = self.new_hash()
														
 
															+        h.update(data)
														
 
															+        return h.digest()
														
 
															+
														
 
															+    def hash_object_hex(self, data: bytes) -> bytes:
														
 
															+        """Hash data and return the hexadecimal digest.
														
 
															+
														
 
															+        Args:
														
 
															+            data: Data to hash
														
 
															+
														
 
															+        Returns:
														
 
															+            Hexadecimal digest as bytes
														
 
															+        """
														
 
															+        h = self.new_hash()
														
 
															+        h.update(data)
														
 
															+        return h.hexdigest().encode("ascii")
														
 
															+
														
 
															+
														
 
															+# Define the supported hash algorithms
														
 
															+SHA1 = HashAlgorithm("sha1", 20, 40, sha1)
														
 
															+SHA256 = HashAlgorithm("sha256", 32, 64, sha256)
														
 
															+
														
 
															+# Map of algorithm names to HashAlgorithm instances
														
 
															+HASH_ALGORITHMS = {
														
 
															+    "sha1": SHA1,
														
 
															+    "sha256": SHA256,
														
 
															+}
														
 
															+
														
 
															+# Default algorithm for backward compatibility
														
 
															+DEFAULT_HASH_ALGORITHM = SHA1
														
 
															+
														
 
															+
														
 
															+def get_hash_algorithm(name: Optional[str] = None) -> HashAlgorithm:
														
 
															+    """Get a hash algorithm by name.
														
 
															+
														
 
															+    Args:
														
 
															+        name: Algorithm name ("sha1" or "sha256"). If None, returns default.
														
 
															+
														
 
															+    Returns:
														
 
															+        HashAlgorithm instance
														
 
															+
														
 
															+    Raises:
														
 
															+        ValueError: If the algorithm name is not supported
														
 
															+    """
														
 
															+    if name is None:
														
 
															+        return DEFAULT_HASH_ALGORITHM
														
 
															+    try:
														
 
															+        return HASH_ALGORITHMS[name.lower()]
														
 
															+    except KeyError:
														
 
															+        raise ValueError(f"Unsupported hash algorithm: {name}")
														
--- a/dulwich/object_store.py
+++ b/dulwich/object_store.py
@@ -1169,10 +1169,10 @@ class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
 
															         """
														
 
															         if name == ZERO_SHA:
														
 
															             raise KeyError(name)
														
 
															-        if len(name) == 40:
														
 
															+        if len(name) in (40, 64):  # Support both SHA1 (40) and SHA256 (64) hex
														
 
															             sha = hex_to_sha(cast(ObjectID, name))
														
 
															             hexsha = cast(ObjectID, name)
														
 
															-        elif len(name) == 20:
														
 
															+        elif len(name) in (20, 32):  # Support both SHA1 (20) and SHA256 (32) binary
														
 
															             sha = cast(RawObjectID, name)
														
 
															             hexsha = None
														
 
															         else:
														
@@ -1382,6 +1382,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
															         pack_write_bitmap_lookup_table: bool = True,
														
 
															         file_mode: int | None = None,
														
 
															         dir_mode: int | None = None,
														
 
															+        hash_algorithm=None,
														
 
															     ) -> None:
														
 
															         """Open an object store.
														
@@ -1402,6 +1403,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
															           pack_write_bitmap_lookup_table: whether to include lookup table in bitmaps
														
 
															           file_mode: File permission mask for shared repository
														
 
															           dir_mode: Directory permission mask for shared repository
														
 
															+          hash_algorithm: Hash algorithm to use (SHA1 or SHA256)
														
 
															         """
														
 
															         super().__init__(
														
 
															             pack_compression_level=pack_compression_level,
														
@@ -1426,6 +1428,11 @@ class DiskObjectStore(PackBasedObjectStore):
 
															         self.file_mode = file_mode
														
 
															         self.dir_mode = dir_mode
														
 
															+        # Import here to avoid circular dependency
														
 
															+        from .hash import get_hash_algorithm
														
 
															+
														
 
															+        self.hash_algorithm = hash_algorithm if hash_algorithm else get_hash_algorithm()
														
 
															+
														
 
															         # Commit graph support - lazy loaded
														
 
															         self._commit_graph = None
														
 
															         self._use_commit_graph = True  # Default to true
														
@@ -1540,6 +1547,24 @@ class DiskObjectStore(PackBasedObjectStore):
 
															                 (b"repack",), b"writeBitmaps", False
														
 
															             )
														
 
															+        # Get hash algorithm from config
														
 
															+        from .hash import get_hash_algorithm
														
 
															+
														
 
															+        hash_algorithm = None
														
 
															+        try:
														
 
															+            try:
														
 
															+                version = int(config.get((b"core",), b"repositoryformatversion"))
														
 
															+            except KeyError:
														
 
															+                version = 0
														
 
															+            if version == 1:
														
 
															+                try:
														
 
															+                    object_format = config.get((b"extensions",), b"objectformat")
														
 
															+                except KeyError:
														
 
															+                    object_format = b"sha1"
														
 
															+                hash_algorithm = get_hash_algorithm(object_format.decode("ascii"))
														
 
															+        except (KeyError, ValueError):
														
 
															+            pass
														
 
															+
														
 
															         instance = cls(
														
 
															             path,
														
 
															             loose_compression_level=loose_compression_level,
														
@@ -1557,6 +1582,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
															             pack_write_bitmap_lookup_table=pack_write_bitmap_lookup_table,
														
 
															             file_mode=file_mode,
														
 
															             dir_mode=dir_mode,
														
 
															+            hash_algorithm=hash_algorithm,
														
 
															         )
														
 
															         instance._use_commit_graph = use_commit_graph
														
 
															         instance._use_midx = use_midx
														
@@ -1647,6 +1673,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
															                     depth=self.pack_depth,
														
 
															                     threads=self.pack_threads,
														
 
															                     big_file_threshold=self.pack_big_file_threshold,
														
 
															+                    hash_algorithm=self.hash_algorithm,
														
 
															                 )
														
 
															                 new_packs.append(pack)
														
 
															                 self._pack_cache[f] = pack
														
@@ -1698,7 +1725,9 @@ class DiskObjectStore(PackBasedObjectStore):
 
															     def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None:
														
 
															         path = self._get_shafile_path(sha)
														
 
															         try:
														
 
															-            return ShaFile.from_path(path)
														
 
															+            # Load the object from path with SHA for hash algorithm detection
														
 
															+            # sha parameter here is already hex, so pass it directly
														
 
															+            return ShaFile.from_path(path, sha)
														
 
															         except FileNotFoundError:
														
 
															             return None
														
@@ -1885,6 +1914,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
															             depth=self.pack_depth,
														
 
															             threads=self.pack_threads,
														
 
															             big_file_threshold=self.pack_big_file_threshold,
														
 
															+            hash_algorithm=self.hash_algorithm,
														
 
															         )
														
 
															         final_pack.check_length_and_checksum()
														
 
															         self._add_cached_pack(pack_base_name, final_pack)
														
@@ -1964,7 +1994,9 @@ class DiskObjectStore(PackBasedObjectStore):
 
															         Args:
														
 
															           obj: Object to add
														
 
															         """
														
 
															-        path = self._get_shafile_path(obj.id)
														
 
															+        # Use the correct hash algorithm for the object ID
														
 
															+        obj_id = obj.get_id(self.hash_algorithm)
														
 
															+        path = self._get_shafile_path(obj_id)
														
 
															         dir = os.path.dirname(path)
														
 
															         try:
														
 
															             os.mkdir(dir)
														
@@ -1987,6 +2019,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
															         *,
														
 
															         file_mode: int | None = None,
														
 
															         dir_mode: int | None = None,
														
 
															+        hash_algorithm=None,
														
 
															     ) -> "DiskObjectStore":
														
 
															         """Initialize a new disk object store.
														
@@ -1996,6 +2029,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
															           path: Path where the object store should be created
														
 
															           file_mode: Optional file permission mask for shared repository
														
 
															           dir_mode: Optional directory permission mask for shared repository
														
 
															+          hash_algorithm: Hash algorithm to use (SHA1 or SHA256)
														
 
															         Returns:
														
 
															           New DiskObjectStore instance
														
@@ -2013,7 +2047,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
															         if dir_mode is not None:
														
 
															             os.chmod(info_path, dir_mode)
														
 
															             os.chmod(pack_path, dir_mode)
														
 
															-        return cls(path, file_mode=file_mode, dir_mode=dir_mode)
														
 
															+        return cls(path, file_mode=file_mode, dir_mode=dir_mode, hash_algorithm=hash_algorithm)
														
 
															     def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:
														
 
															         """Iterate over all object SHAs with the given prefix.
														
--- a/dulwich/objects.py
+++ b/dulwich/objects.py
@@ -112,6 +112,24 @@ if TYPE_CHECKING:
 
															     from .file import _GitFile
														
 
															+# Zero SHA constants for backward compatibility
														
 
															+ZERO_SHA = b"0" * 40  # SHA1 - kept for backward compatibility
														
 
															+
														
 
															+
														
 
															+def zero_sha_for(hash_algorithm=None):
														
 
															+    """Get the zero SHA for a given hash algorithm.
														
 
															+
														
 
															+    Args:
														
 
															+        hash_algorithm: HashAlgorithm instance. If None, returns SHA1 zero.
														
 
															+
														
 
															+    Returns:
														
 
															+        Zero SHA as hex bytes (40 chars for SHA1, 64 for SHA256)
														
 
															+    """
														
 
															+    if hash_algorithm is None:
														
 
															+        return ZERO_SHA
														
 
															+    return hash_algorithm.zero_oid
														
 
															+
														
 
															+
														
 
															 # Header fields for commits
														
 
															 _TREE_HEADER = b"tree"
														
 
															 _PARENT_HEADER = b"parent"
														
@@ -175,13 +193,17 @@ def _decompress(string: bytes) -> bytes:
 
															 def sha_to_hex(sha: RawObjectID) -> ObjectID:
														
 
															     """Takes a string and returns the hex of the sha within."""
														
 
															     hexsha = binascii.hexlify(sha)
														
 
															-    assert len(hexsha) == 40, f"Incorrect length of sha1 string: {hexsha!r}"
														
 
															+    # Support both SHA1 (40 chars) and SHA256 (64 chars)
														
 
															+    if len(hexsha) not in (40, 64):
														
 
															+        raise ValueError(f"Incorrect length of sha string: {hexsha!r}")
														
 
															     return ObjectID(hexsha)
														
 
															 def hex_to_sha(hex: ObjectID | str) -> RawObjectID:
														
 
															     """Takes a hex sha and returns a binary sha."""
														
 
															-    assert len(hex) == 40, f"Incorrect length of hexsha: {hex!r}"
														
 
															+    # Support both SHA1 (40 chars) and SHA256 (64 chars)
														
 
															+    if len(hex) not in (40, 64):
														
 
															+        raise ValueError(f"Incorrect length of hexsha: {hex}")
														
 
															     try:
														
 
															         return RawObjectID(binascii.unhexlify(hex))
														
 
															     except TypeError as exc:
														
@@ -191,15 +213,15 @@ def hex_to_sha(hex: ObjectID | str) -> RawObjectID:
 
															 def valid_hexsha(hex: bytes | str) -> bool:
														
 
															-    """Check if a string is a valid hex SHA.
														
 
															+    """Check if a hex string is a valid SHA1 or SHA256.
														
 
															     Args:
														
 
															-      hex: Hex string to check
														
 
															+        hex: Hex string to validate
														
 
															     Returns:
														
 
															-      True if valid hex SHA, False otherwise
														
 
															+        True if valid SHA1 (40 chars) or SHA256 (64 chars), False otherwise
														
 
															     """
														
 
															-    if len(hex) != 40:
														
 
															+    if len(hex) not in (40, 64):
														
 
															         return False
														
 
															     try:
														
 
															         binascii.unhexlify(hex)
														
@@ -549,11 +571,12 @@ class ShaFile:
 
															     ) -> None:
														
 
															         """Set the contents of this object from a list of chunks."""
														
 
															         self._chunked_text = chunks
														
 
															-        self._deserialize(chunks)
														
 
															+        # Set SHA before deserialization so Tree can detect hash algorithm
														
 
															         if sha is None:
														
 
															             self._sha = None
														
 
															         else:
														
 
															-            self._sha = FixedSha(sha)
														
 
															+            self._sha = FixedSha(sha)  # type: ignore
														
 
															+        self._deserialize(chunks)
														
 
															         self._needs_serialization = False
														
 
															     @staticmethod
														
@@ -613,17 +636,21 @@ class ShaFile:
 
															         raise NotImplementedError(self._serialize)
														
 
															     @classmethod
														
 
															-    def from_path(cls, path: str | bytes) -> "ShaFile":
														
 
															+    def from_path(cls, path: str | bytes, sha: ObjectID | None = None) -> "ShaFile":
														
 
															         """Open a SHA file from disk."""
														
 
															         with GitFile(path, "rb") as f:
														
 
															-            return cls.from_file(f)
														
 
															+            return cls.from_file(f, sha)
														
 
															     @classmethod
														
 
															-    def from_file(cls, f: BufferedIOBase | IO[bytes] | "_GitFile") -> "ShaFile":
														
 
															+    def from_file(cls, f: BufferedIOBase | IO[bytes] | "_GitFile", sha: ObjectID | None = None) -> "ShaFile":
														
 
															         """Get the contents of a SHA file on disk."""
														
 
															         try:
														
 
															             obj = cls._parse_file(f)
														
 
															-            obj._sha = None
														
 
															+            # Set SHA after parsing but before any further processing
														
 
															+            if sha is not None:
														
 
															+                obj._sha = FixedSha(sha)
														
 
															+            else:
														
 
															+                obj._sha = None
														
 
															             return obj
														
 
															         except (IndexError, ValueError) as exc:
														
 
															             raise ObjectFormatException("invalid object header") from exc
														
@@ -713,8 +740,21 @@ class ShaFile:
 
															         """Returns the length of the raw string of this object."""
														
 
															         return sum(map(len, self.as_raw_chunks()))
														
 
															-    def sha(self) -> "FixedSha | HASH":
														
 
															-        """The SHA1 object that is the name of this object."""
														
 
															+    def sha(self, hash_algorithm=None) -> "FixedSha | HASH":
														
 
															+        """The SHA object that is the name of this object.
														
 
															+
														
 
															+        Args:
														
 
															+            hash_algorithm: Optional HashAlgorithm to use. Defaults to SHA1.
														
 
															+        """
														
 
															+        # If using a different hash algorithm, always recalculate
														
 
															+        if hash_algorithm is not None:
														
 
															+            new_sha = hash_algorithm.new_hash()
														
 
															+            new_sha.update(self._header())
														
 
															+            for chunk in self.as_raw_chunks():
														
 
															+                new_sha.update(chunk)
														
 
															+            return new_sha
														
 
															+
														
 
															+        # Otherwise use cached SHA1 value
														
 
															         if self._sha is None or self._needs_serialization:
														
 
															             # this is a local because as_raw_chunks() overwrites self._sha
														
 
															             new_sha = sha1()
														
@@ -733,9 +773,32 @@ class ShaFile:
 
															     @property
														
 
															     def id(self) -> ObjectID:
														
 
															-        """The hex SHA of this object."""
														
 
															+        """The hex SHA1 of this object.
														
 
															+
														
 
															+        For SHA256 repositories, use get_id(hash_algorithm) instead.
														
 
															+        This property always returns SHA1 for backward compatibility.
														
 
															+        """
														
 
															         return ObjectID(self.sha().hexdigest().encode("ascii"))
														
 
															+    def get_id(self, hash_algorithm=None):
														
 
															+        """Get the hex SHA of this object using the specified hash algorithm.
														
 
															+
														
 
															+        Args:
														
 
															+            hash_algorithm: Optional HashAlgorithm to use. Defaults to SHA1.
														
 
															+
														
 
															+        Example:
														
 
															+            >>> blob = Blob()
														
 
															+            >>> blob.data = b"Hello, World!"
														
 
															+            >>> blob.id  # Always returns SHA1 for backward compatibility
														
 
															+            b'4ab299c8ad6ed14f31923dd94f8b5f5cb89dfb54'
														
 
															+            >>> blob.get_id()  # Same as .id
														
 
															+            b'4ab299c8ad6ed14f31923dd94f8b5f5cb89dfb54'
														
 
															+            >>> from dulwich.hash import SHA256
														
 
															+            >>> blob.get_id(SHA256)  # Get SHA256 hash
														
 
															+            b'03ba204e2f2e707...'  # 64-character SHA256
														
 
															+        """
														
 
															+        return self.sha(hash_algorithm).hexdigest().encode("ascii")
														
 
															+
														
 
															     def __repr__(self) -> str:
														
 
															         """Return string representation of this object."""
														
 
															         return f"<{self.__class__.__name__} {self.id!r}>"
														
@@ -1247,20 +1310,37 @@ class TreeEntry(NamedTuple):
 
															 def parse_tree(
														
 
															-    text: bytes, strict: bool = False
														
 
															+    text: bytes, strict: bool = False, hash_algorithm=None
														
 
															 ) -> Iterator[tuple[bytes, int, ObjectID]]:
														
 
															     """Parse a tree text.
														
 
															     Args:
														
 
															       text: Serialized text to parse
														
 
															-      strict: If True, enforce strict validation
														
 
															+      strict: Whether to be strict about format
														
 
															+      hash_algorithm: Hash algorithm object (SHA1 or SHA256) - if None, auto-detect
														
 
															     Returns: iterator of tuples of (name, mode, sha)
														
 
															     Raises:
														
 
															       ObjectFormatException: if the object was malformed in some way
														
 
															     """
														
 
															+    if hash_algorithm is not None:
														
 
															+        sha_len = hash_algorithm.oid_length
														
 
															+        return _parse_tree_with_sha_len(text, strict, sha_len)
														
 
															+
														
 
															+    # Try both hash lengths and use the one that works
														
 
															+    try:
														
 
															+        # Try SHA1 first (more common)
														
 
															+        return _parse_tree_with_sha_len(text, strict, 20)
														
 
															+    except ObjectFormatException:
														
 
															+        # If SHA1 fails, try SHA256
														
 
															+        return _parse_tree_with_sha_len(text, strict, 32)
														
 
															+
														
 
															+
														
 
															+def _parse_tree_with_sha_len(text, strict, sha_len):
														
 
															+    """Helper function to parse tree with a specific hash length."""
														
 
															     count = 0
														
 
															     length = len(text)
														
 
															+
														
 
															     while count < length:
														
 
															         mode_end = text.index(b" ", count)
														
 
															         mode_text = text[count:mode_end]
														
@@ -1272,10 +1352,18 @@ def parse_tree(
 
															             raise ObjectFormatException(f"Invalid mode {mode_text!r}") from exc
														
 
															         name_end = text.index(b"\0", mode_end)
														
 
															         name = text[mode_end + 1 : name_end]
														
 
															-        count = name_end + 21
														
 
															+
														
 
															+        count = name_end + 1 + sha_len
														
 
															+        if count > length:
														
 
															+            raise ObjectFormatException(
														
 
															+                f"Tree entry extends beyond tree length: {count} > {length}"
														
 
															+            )
														
 
															+
														
 
															         sha = text[name_end + 1 : count]
														
 
															-        if len(sha) != 20:
														
 
															-            raise ObjectFormatException("Sha has invalid length")
														
 
															+        if len(sha) != sha_len:
														
 
															+            raise ObjectFormatException(
														
 
															+                f"Sha has invalid length: {len(sha)} != {sha_len}"
														
 
															+            )
														
 
															         hexsha = sha_to_hex(RawObjectID(sha))
														
 
															         yield (name, mode, hexsha)
														
@@ -1386,12 +1474,34 @@ class Tree(ShaFile):
 
															         super().__init__()
														
 
															         self._entries: dict[bytes, tuple[int, ObjectID]] = {}
														
 
															+    def _get_hash_algorithm(self):
														
 
															+        """Get the hash algorithm based on the object's SHA."""
														
 
															+        if not hasattr(self, "_sha") or self._sha is None:
														
 
															+            return None
														
 
															+
														
 
															+        # Get the raw SHA bytes
														
 
															+        sha = self._sha.digest() if hasattr(self._sha, "digest") else self._sha
														
 
															+        if not isinstance(sha, bytes):
														
 
															+            return None
														
 
															+
														
 
															+        # Import hash modules lazily to avoid circular imports
														
 
															+        if len(sha) == 32:
														
 
															+            from .hash import SHA256
														
 
															+
														
 
															+            return SHA256
														
 
															+        elif len(sha) == 20:
														
 
															+            from .hash import SHA1
														
 
															+
														
 
															+            return SHA1
														
 
															+        return None
														
 
															+
														
 
															     @classmethod
														
 
															-    def from_path(cls, filename: str | bytes) -> "Tree":
														
 
															+    def from_path(cls, filename: str | bytes, sha: ObjectID | None = None) -> "Tree":
														
 
															         """Read a tree from a file on disk.
														
 
															         Args:
														
 
															           filename: Path to the tree file
														
 
															+          sha: Optional known SHA for the object
														
 
															         Returns:
														
 
															           A Tree object
														
@@ -1399,7 +1509,7 @@ class Tree(ShaFile):
 
															         Raises:
														
 
															           NotTreeError: If the file is not a tree
														
 
															         """
														
 
															-        tree = ShaFile.from_path(filename)
														
 
															+        tree = ShaFile.from_path(filename, sha)
														
 
															         if not isinstance(tree, cls):
														
 
															             raise NotTreeError(_path_to_bytes(filename))
														
 
															         return tree
														
@@ -1470,7 +1580,9 @@ class Tree(ShaFile):
 
															     def _deserialize(self, chunks: list[bytes]) -> None:
														
 
															         """Grab the entries in the tree."""
														
 
															         try:
														
 
															-            parsed_entries = parse_tree(b"".join(chunks))
														
 
															+            parsed_entries = parse_tree(
														
 
															+                b"".join(chunks), hash_algorithm=self._get_hash_algorithm()
														
 
															+            )
														
 
															         except ValueError as exc:
														
 
															             raise ObjectFormatException(exc) from exc
														
 
															         # TODO: list comprehension is for efficiency in the common (small)
														
@@ -1496,8 +1608,12 @@ class Tree(ShaFile):
 
															             # TODO: optionally exclude as in git fsck --strict
														
 
															             stat.S_IFREG | 0o664,
														
 
															         )
														
 
															-        for name, mode, sha in parse_tree(b"".join(self._chunked_text), True):
														
 
															-            check_hexsha(sha, f"invalid sha {sha!r}")
														
 
															+        for name, mode, sha in parse_tree(
														
 
															+            b"".join(self._chunked_text),
														
 
															+            strict=True,
														
 
															+            hash_algorithm=self._get_hash_algorithm(),
														
 
															+        ):
														
 
															+            check_hexsha(sha, f"invalid sha {sha}")
														
 
															             if b"/" in name or name in (b"", b".", b"..", b".git"):
														
 
															                 raise ObjectFormatException(
														
 
															                     "invalid name {}".format(name.decode("utf-8", "replace"))
														
--- a/dulwich/pack.py
+++ b/dulwich/pack.py
@@ -530,15 +530,16 @@ def iter_sha1(iter: Iterable[bytes]) -> bytes:
 
															     return sha.hexdigest().encode("ascii")
														
 
															-def load_pack_index(path: str | os.PathLike[str]) -> "PackIndex":
														
 
															+def load_pack_index(path: str | os.PathLike[str], hash_algorithm: int | None = None) -> "PackIndex":
														
 
															     """Load an index file by path.
														
 
															     Args:
														
 
															       path: Path to the index file
														
 
															+      hash_algorithm: Hash algorithm used by the repository
														
 
															     Returns: A PackIndex loaded from the given path
														
 
															     """
														
 
															     with GitFile(path, "rb") as f:
														
 
															-        return load_pack_index_file(path, f)
														
 
															+        return load_pack_index_file(path, f, hash_algorithm=hash_algorithm)
														
 
															 def _load_file_contents(
														
@@ -574,25 +575,35 @@ def _load_file_contents(
 
															 def load_pack_index_file(
														
 
															     path: str | os.PathLike[str], f: IO[bytes] | _GitFile
														
 
															+    hash_algorithm: int | None = None,
														
 
															 ) -> "PackIndex":
														
 
															     """Load an index file from a file-like object.
														
 
															     Args:
														
 
															       path: Path for the index file
														
 
															       f: File-like object
														
 
															+      hash_algorithm: Hash algorithm used by the repository
														
 
															     Returns: A PackIndex loaded from the given file
														
 
															     """
														
 
															     contents, size = _load_file_contents(f)
														
 
															     if contents[:4] == b"\377tOc":
														
 
															         version = struct.unpack(b">L", contents[4:8])[0]
														
 
															         if version == 2:
														
 
															-            return PackIndex2(path, file=f, contents=contents, size=size)
														
 
															+            return PackIndex2(
														
 
															+                path,
														
 
															+                file=f,
														
 
															+                contents=contents,
														
 
															+                size=size,
														
 
															+                hash_algorithm=hash_algorithm,
														
 
															+            )
														
 
															         elif version == 3:
														
 
															             return PackIndex3(path, file=f, contents=contents, size=size)
														
 
															         else:
														
 
															             raise KeyError(f"Unknown pack index format {version}")
														
 
															     else:
														
 
															-        return PackIndex1(path, file=f, contents=contents, size=size)
														
 
															+        return PackIndex1(
														
 
															+            path, file=f, contents=contents, size=size, hash_algorithm=hash_algorithm
														
 
															+        )
														
 
															 def bisect_find_sha(
														
@@ -777,7 +788,7 @@ class MemoryPackIndex(PackIndex):
 
															           sha: SHA to look up (binary or hex)
														
 
															         Returns: Offset in the pack file
														
 
															         """
														
 
															-        if len(sha) == 40:
														
 
															+        if len(sha) in (40, 64):  # Hex string (SHA1 or SHA256)
														
 
															             sha = hex_to_sha(cast(ObjectID, sha))
														
 
															         return self._by_sha[cast(RawObjectID, sha)]
														
@@ -976,7 +987,8 @@ class FilePackIndex(PackIndex):
 
															         Args:
														
 
															           sha: A *binary* SHA string. (20 characters long)_
														
 
															         """
														
 
															-        assert len(sha) == 20
														
 
															+        hash_size = getattr(self, "hash_size", 20)  # Default to SHA1 for v1
														
 
															+        assert len(sha) == hash_size
														
 
															         idx = ord(sha[:1])
														
 
															         if idx == 0:
														
 
															             start = 0
														
@@ -1020,6 +1032,7 @@ class PackIndex1(FilePackIndex):
 
															         file: IO[bytes] | _GitFile | None = None,
														
 
															         contents: bytes | None = None,
														
 
															         size: int | None = None,
														
 
															+        hash_algorithm: int | None = None,
														
 
															     ) -> None:
														
 
															         """Initialize a version 1 pack index.
														
@@ -1028,24 +1041,35 @@ class PackIndex1(FilePackIndex):
 
															             file: Optional file object
														
 
															             contents: Optional mmap'd contents
														
 
															             size: Optional size of the index
														
 
															+            hash_algorithm: Hash algorithm used by the repository
														
 
															         """
														
 
															         super().__init__(filename, file, contents, size)
														
 
															         self.version = 1
														
 
															         self._fan_out_table = self._read_fan_out_table(0)
														
 
															+        # Use provided hash algorithm if available, otherwise default to SHA1
														
 
															+        if hash_algorithm:
														
 
															+            self.hash_size = hash_algorithm.oid_length
														
 
															+        else:
														
 
															+            self.hash_size = 20  # Default to SHA1
														
 
															+
														
 
															+        self._entry_size = 4 + self.hash_size
														
 
															     def _unpack_entry(self, i: int) -> tuple[RawObjectID, int, None]:
														
 
															-        (offset, name) = unpack_from(">L20s", self._contents, (0x100 * 4) + (i * 24))
														
 
															+        base_offset = (0x100 * 4) + (i * self._entry_size)
														
 
															+        if self.hash_size == 20:
														
 
															+            (offset, name) = unpack_from(">L20s", self._contents, base_offset)
														
 
															+        else:  # SHA256
														
 
															+            offset = unpack_from(">L", self._contents, base_offset)[0]
														
 
															+            name = self._contents[base_offset + 4 : base_offset + 4 + self.hash_size]
														
 
															         return (RawObjectID(name), offset, None)
														
 
															     def _unpack_name(self, i: int) -> bytes:
														
 
															-        offset = (0x100 * 4) + (i * 24) + 4
														
 
															-        return self._contents[offset : offset + 20]
														
 
															+        offset = (0x100 * 4) + (i * self._entry_size) + 4
														
 
															+        return self._contents[offset : offset + self.hash_size]
														
 
															     def _unpack_offset(self, i: int) -> int:
														
 
															-        offset = (0x100 * 4) + (i * 24)
														
 
															-        result = unpack_from(">L", self._contents, offset)[0]
														
 
															-        assert isinstance(result, int)
														
 
															-        return result
														
 
															+        offset = (0x100 * 4) + (i * self._entry_size)
														
 
															+        return unpack_from(">L", self._contents, offset)[0]
														
 
															     def _unpack_crc32_checksum(self, i: int) -> None:
														
 
															         # Not stored in v1 index files
														
@@ -1061,6 +1085,7 @@ class PackIndex2(FilePackIndex):
 
															         file: IO[bytes] | _GitFile | None = None,
														
 
															         contents: bytes | None = None,
														
 
															         size: int | None = None,
														
 
															+        hash_algorithm: int | None = None,
														
 
															     ) -> None:
														
 
															         """Initialize a version 2 pack index.
														
@@ -1069,6 +1094,7 @@ class PackIndex2(FilePackIndex):
 
															             file: Optional file object
														
 
															             contents: Optional mmap'd contents
														
 
															             size: Optional size of the index
														
 
															+            hash_algorithm: Hash algorithm used by the repository
														
 
															         """
														
 
															         super().__init__(filename, file, contents, size)
														
 
															         if self._contents[:4] != b"\377tOc":
														
@@ -1077,8 +1103,15 @@ class PackIndex2(FilePackIndex):
 
															         if self.version != 2:
														
 
															             raise AssertionError(f"Version was {self.version}")
														
 
															         self._fan_out_table = self._read_fan_out_table(8)
														
 
															+
														
 
															+        # Use provided hash algorithm if available, otherwise default to SHA1
														
 
															+        if hash_algorithm:
														
 
															+            self.hash_size = hash_algorithm.oid_length
														
 
															+        else:
														
 
															+            self.hash_size = 20  # Default to SHA1
														
 
															+
														
 
															         self._name_table_offset = 8 + 0x100 * 4
														
 
															-        self._crc32_table_offset = self._name_table_offset + 20 * len(self)
														
 
															+        self._crc32_table_offset = self._name_table_offset + self.hash_size * len(self)
														
 
															         self._pack_offset_table_offset = self._crc32_table_offset + 4 * len(self)
														
 
															         self._pack_offset_largetable_offset = self._pack_offset_table_offset + 4 * len(
														
 
															             self
														
@@ -1092,25 +1125,27 @@ class PackIndex2(FilePackIndex):
 
															         )
														
 
															     def _unpack_name(self, i: int) -> bytes:
														
 
															-        offset = self._name_table_offset + i * 20
														
 
															-        return self._contents[offset : offset + 20]
														
 
															+        offset = self._name_table_offset + i * self.hash_size
														
 
															+        return self._contents[offset : offset + self.hash_size]
														
 
															     def _unpack_offset(self, i: int) -> int:
														
 
															-        offset_pos = self._pack_offset_table_offset + i * 4
														
 
															-        offset = unpack_from(">L", self._contents, offset_pos)[0]
														
 
															-        assert isinstance(offset, int)
														
 
															+        offset = self._pack_offset_table_offset + i * 4
														
 
															+        offset = unpack_from(">L", self._contents, offset)[0]
														
 
															         if offset & (2**31):
														
 
															-            large_offset_pos = (
														
 
															-                self._pack_offset_largetable_offset + (offset & (2**31 - 1)) * 8
														
 
															-            )
														
 
															-            offset = unpack_from(">Q", self._contents, large_offset_pos)[0]
														
 
															-            assert isinstance(offset, int)
														
 
															+            offset = self._pack_offset_largetable_offset + (offset & (2**31 - 1)) * 8
														
 
															+            offset = unpack_from(">Q", self._contents, offset)[0]
														
 
															         return offset
														
 
															     def _unpack_crc32_checksum(self, i: int) -> int:
														
 
															-        result = unpack_from(">L", self._contents, self._crc32_table_offset + i * 4)[0]
														
 
															-        assert isinstance(result, int)
														
 
															-        return result
														
 
															+        return unpack_from(">L", self._contents, self._crc32_table_offset + i * 4)[0]
														
 
															+
														
 
															+    def get_pack_checksum(self) -> bytes:
														
 
															+        """Return the checksum stored for the corresponding packfile.
														
 
															+
														
 
															+        Returns: binary digest (always 20 bytes - SHA1)
														
 
															+        """
														
 
															+        # Pack checksums are always SHA1, even in SHA256 repositories
														
 
															+        return bytes(self._contents[-40:-20])
														
 
															 class PackIndex3(FilePackIndex):
														
@@ -3172,6 +3207,8 @@ def write_pack_index_v1(
 
															         f.write(struct.pack(">L", fan_out_table[i]))
														
 
															         fan_out_table[i + 1] += fan_out_table[i]
														
 
															     for name, offset, _entry_checksum in entries:
														
 
															+        if len(name) != 20:
														
 
															+            raise TypeError("pack index v1 only supports SHA-1 names")
														
 
															         if not (offset <= 0xFFFFFFFF):
														
 
															             raise TypeError("pack format 1 only supports offsets < 2Gb")
														
 
															         f.write(struct.pack(">L20s", offset, name))
														
@@ -3250,11 +3287,11 @@ def _create_delta_py(base_buf: bytes, target_buf: bytes) -> Iterator[bytes]:
 
															             o = j1
														
 
															             while s > 127:
														
 
															                 yield bytes([127])
														
 
															-                yield bytes(memoryview(target_buf)[o : o + 127])
														
 
															+                yield memoryview(target_buf)[o : o + 127]
														
 
															                 s -= 127
														
 
															                 o += 127
														
 
															             yield bytes([s])
														
 
															-            yield bytes(memoryview(target_buf)[o : o + s])
														
 
															+            yield memoryview(target_buf)[o : o + s]
														
 
															 # Default to pure Python implementation
														
@@ -3357,12 +3394,20 @@ def write_pack_index_v2(
 
															     fan_out_table: dict[int, int] = defaultdict(lambda: 0)
														
 
															     for name, offset, entry_checksum in entries:
														
 
															         fan_out_table[ord(name[:1])] += 1
														
 
															+    try:
														
 
															+        hash_size = len(next(iter(entries))[0])
														
 
															+    except StopIteration:
														
 
															+        hash_size = 20  # Default to SHA-1 size if no entries
														
 
															     # Fan-out table
														
 
															     largetable: list[int] = []
														
 
															     for i in range(0x100):
														
 
															         f.write(struct.pack(b">L", fan_out_table[i]))
														
 
															         fan_out_table[i + 1] += fan_out_table[i]
														
 
															     for name, offset, entry_checksum in entries:
														
 
															+        if len(name) != hash_size:
														
 
															+            raise TypeError(
														
 
															+                f"Object name has wrong length: expected {hash_size}, got {len(name)}"
														
 
															+            )
														
 
															         f.write(name)
														
 
															     for name, offset, entry_checksum in entries:
														
 
															         f.write(struct.pack(b">L", entry_checksum))
														
@@ -3512,6 +3557,7 @@ class Pack:
 
															         depth: int | None = None,
														
 
															         threads: int | None = None,
														
 
															         big_file_threshold: int | None = None,
														
 
															+        hash_algorithm: int | None = None,
														
 
															     ) -> None:
														
 
															         """Initialize a Pack object.
														
@@ -3524,6 +3570,7 @@ class Pack:
 
															           depth: Maximum depth for delta chains
														
 
															           threads: Number of threads to use for operations
														
 
															           big_file_threshold: Size threshold for big file handling
														
 
															+          hash_algorithm: Hash algorithm identifier (1 = SHA-1, 2 = SHA-256)
														
 
															         """
														
 
															         self._basename = basename
														
 
															         self._data = None
														
@@ -3549,21 +3596,25 @@ class Pack:
 
															         )
														
 
															         self._idx_load = lambda: load_pack_index(self._idx_path)
														
 
															         self.resolve_ext_ref = resolve_ext_ref
														
 
															+        self.hash_algorithm = (
														
 
															+            hash_algorithm if hash_algorithm is not None else DEFAULT_HASH_ALGORITHM
														
 
															+        )
														
 
															     @classmethod
														
 
															     def from_lazy_objects(
														
 
															-        cls, data_fn: Callable[[], PackData], idx_fn: Callable[[], PackIndex]
														
 
															+        cls, data_fn: Callable[[], PackData], idx_fn: Callable[[], PackIndex],
														
 
															+        hash_algorithm: int | None = None
														
 
															     ) -> "Pack":
														
 
															         """Create a new pack object from callables to load pack data and index objects."""
														
 
															-        ret = cls("")
														
 
															+        ret = cls("", hash_algorithm=hash_algorithm)
														
 
															         ret._data_load = data_fn
														
 
															         ret._idx_load = idx_fn
														
 
															         return ret
														
 
															     @classmethod
														
 
															-    def from_objects(cls, data: PackData, idx: PackIndex) -> "Pack":
														
 
															+    def from_objects(cls, data: PackData, idx: PackIndex, hash_algorithm: int | None = None) -> "Pack":
														
 
															         """Create a new pack object from pack data and index objects."""
														
 
															-        ret = cls("")
														
 
															+        ret = cls("", hash_algorithm=hash_algorithm)
														
 
															         ret._data = data
														
 
															         ret._data_load = None
														
 
															         ret._idx = idx
														
--- a/dulwich/refs.py
+++ b/dulwich/refs.py
@@ -1000,7 +1000,7 @@ class DiskRefsContainer(RefsContainer):
 
															         """Read a reference file and return its contents.
														
 
															         If the reference file a symbolic reference, only read the first line of
														
 
															-        the file. Otherwise, only read the first 40 bytes.
														
 
															+        the file. Otherwise, read the hash (40 bytes for SHA1, 64 bytes for SHA256).
														
 
															         Args:
														
 
															           name: the refname to read, relative to refpath
														
@@ -1018,8 +1018,10 @@ class DiskRefsContainer(RefsContainer):
 
															                     # Read only the first line
														
 
															                     return header + next(iter(f)).rstrip(b"\r\n")
														
 
															                 else:
														
 
															-                    # Read only the first 40 bytes
														
 
															-                    return header + f.read(40 - len(SYMREF))
														
 
															+                    # Read the entire line to get the full hash (handles both SHA1 and SHA256)
														
 
															+                    f.seek(0)
														
 
															+                    line = f.readline().rstrip(b"\r\n")
														
 
															+                    return line
														
 
															         except (OSError, UnicodeError):
														
 
															             # don't assume anything specific about the error; in
														
 
															             # particular, invalid or forbidden paths can raise weird
														
--- a/dulwich/repo.py
+++ b/dulwich/repo.py
@@ -515,6 +515,7 @@ class BaseRepo:
 
															         self._graftpoints: dict[ObjectID, list[ObjectID]] = {}
														
 
															         self.hooks: dict[str, Hook] = {}
														
 
															+        self._hash_algorithm = None  # Cached hash algorithm
														
 
															     def _determine_file_mode(self) -> bool:
														
 
															         """Probe the file-system to determine whether permissions can be trusted.
														
@@ -537,6 +538,7 @@ class BaseRepo:
 
															         symlinks: bool | None = None,
														
 
															         format: int | None = None,
														
 
															         shared_repository: str | bool | None = None,
														
 
															+        object_format: str | None = None,
														
 
															     ) -> None:
														
 
															         """Initialize a default set of named files."""
														
 
															         from .config import ConfigFile
														
@@ -544,11 +546,30 @@ class BaseRepo:
 
															         self._put_named_file("description", b"Unnamed repository")
														
 
															         f = BytesIO()
														
 
															         cf = ConfigFile()
														
 
															-        if format is None:
														
 
															-            format = 0
														
 
															+
														
 
															+        # Determine the appropriate format version
														
 
															+        if object_format == "sha256":
														
 
															+            # SHA256 requires format version 1
														
 
															+            if format is None:
														
 
															+                format = 1
														
 
															+            elif format != 1:
														
 
															+                raise ValueError(
														
 
															+                    "SHA256 object format requires repository format version 1"
														
 
															+                )
														
 
															+        else:
														
 
															+            # SHA1 (default) can use format 0 or 1
														
 
															+            if format is None:
														
 
															+                format = 0
														
 
															+
														
 
															         if format not in (0, 1):
														
 
															             raise ValueError(f"Unsupported repository format version: {format}")
														
 
															+
														
 
															         cf.set("core", "repositoryformatversion", str(format))
														
 
															+
														
 
															+        # Set object format extension if using SHA256
														
 
															+        if object_format == "sha256":
														
 
															+            cf.set("extensions", "objectformat", "sha256")
														
 
															+
														
 
															         if self._determine_file_mode():
														
 
															             cf.set("core", "filemode", True)
														
 
															         else:
														
@@ -574,6 +595,19 @@ class BaseRepo:
 
															         self._put_named_file("config", f.getvalue())
														
 
															         self._put_named_file(os.path.join("info", "exclude"), b"")
														
 
															+        # Allow subclasses to handle config initialization
														
 
															+        self._init_config(cf)
														
 
															+
														
 
															+    def _init_config(self, config: "ConfigFile") -> None:
														
 
															+        """Initialize repository configuration.
														
 
															+
														
 
															+        This method can be overridden by subclasses to handle config initialization.
														
 
															+
														
 
															+        Args:
														
 
															+            config: The ConfigFile object that was just created
														
 
															+        """
														
 
															+        # Default implementation does nothing
														
 
															+
														
 
															     def get_named_file(self, path: str) -> BinaryIO | None:
														
 
															         """Get a file from the control dir with a specific name.
														
@@ -912,6 +946,42 @@ class BaseRepo:
 
															         """
														
 
															         raise NotImplementedError(self.get_config)
														
 
															+    def get_hash_algorithm(self):
														
 
															+        """Get the hash algorithm used by this repository.
														
 
															+
														
 
															+        Returns: HashAlgorithm instance (SHA1 or SHA256)
														
 
															+        """
														
 
															+        if self._hash_algorithm is None:
														
 
															+            from .hash import get_hash_algorithm
														
 
															+
														
 
															+            # Check if repository uses SHA256
														
 
															+            try:
														
 
															+                config = self.get_config()
														
 
															+                try:
														
 
															+                    version = int(config.get(("core",), "repositoryformatversion"))
														
 
															+                except KeyError:
														
 
															+                    version = 0  # Default version is 0
														
 
															+
														
 
															+                if version == 1:
														
 
															+                    # Check for SHA256 extension
														
 
															+                    try:
														
 
															+                        object_format = config.get(("extensions",), "objectformat")
														
 
															+                        if object_format == b"sha256":
														
 
															+                            self._hash_algorithm = get_hash_algorithm("sha256")
														
 
															+                        else:
														
 
															+                            self._hash_algorithm = get_hash_algorithm("sha1")
														
 
															+                    except KeyError:
														
 
															+                        # No objectformat extension, default to SHA1
														
 
															+                        self._hash_algorithm = get_hash_algorithm("sha1")
														
 
															+                else:
														
 
															+                    # Version 0 always uses SHA1
														
 
															+                    self._hash_algorithm = get_hash_algorithm("sha1")
														
 
															+            except (KeyError, ValueError):
														
 
															+                # If we can't read config, default to SHA1
														
 
															+                self._hash_algorithm = get_hash_algorithm("sha1")
														
 
															+
														
 
															+        return self._hash_algorithm
														
 
															+
														
 
															     def get_worktree_config(self) -> "ConfigFile":
														
 
															         """Retrieve the worktree config object."""
														
 
															         raise NotImplementedError(self.get_worktree_config)
														
@@ -1103,7 +1173,7 @@ class BaseRepo:
 
															         """
														
 
															         if not isinstance(name, bytes):
														
 
															             raise TypeError(f"'name' must be bytestring, not {type(name).__name__:.80}")
														
 
															-        if len(name) in (20, 40):
														
 
															+        if len(name) in (20, 32, 40, 64):  # Support both SHA1 and SHA256
														
 
															             try:
														
 
															                 # Try as ObjectID/RawObjectID
														
 
															                 return self.object_store[
														
@@ -1424,7 +1494,7 @@ class Repo(BaseRepo):
 
															                     has_reftable_extension = True
														
 
															                 else:
														
 
															                     raise UnsupportedExtension(f"refStorage = {value.decode()}")
														
 
															-            elif extension.lower() not in (b"worktreeconfig",):
														
 
															+            elif extension.lower() not in (b"worktreeconfig", b"objectformat"):
														
 
															                 raise UnsupportedExtension(extension.decode("utf-8"))
														
 
															         if object_store is None:
														
@@ -2055,6 +2125,7 @@ class Repo(BaseRepo):
 
															         symlinks: bool | None = None,
														
 
															         format: int | None = None,
														
 
															         shared_repository: str | bool | None = None,
														
 
															+        object_format: str | None = None,
														
 
															     ) -> "Repo":
														
 
															         path = os.fspath(path)
														
 
															         if isinstance(path, bytes):
														
@@ -2077,10 +2148,17 @@ class Repo(BaseRepo):
 
															                 os.chmod(dir_path, dir_mode)
														
 
															         if object_store is None:
														
 
															+            # Get hash algorithm for object store
														
 
															+            from .hash import get_hash_algorithm
														
 
															+
														
 
															+            hash_alg = get_hash_algorithm(
														
 
															+                "sha256" if object_format == "sha256" else "sha1"
														
 
															+            )
														
 
															             object_store = DiskObjectStore.init(
														
 
															                 os.path.join(controldir, OBJECTDIR),
														
 
															                 file_mode=file_mode,
														
 
															                 dir_mode=dir_mode,
														
 
															+                hash_algorithm=hash_alg,
														
 
															             )
														
 
															         ret = cls(path, bare=bare, object_store=object_store)
														
 
															         if default_branch is None:
														
@@ -2098,6 +2176,7 @@ class Repo(BaseRepo):
 
															             symlinks=symlinks,
														
 
															             format=format,
														
 
															             shared_repository=shared_repository,
														
 
															+            object_format=object_format,
														
 
															         )
														
 
															         return ret
														
@@ -2112,6 +2191,7 @@ class Repo(BaseRepo):
 
															         symlinks: bool | None = None,
														
 
															         format: int | None = None,
														
 
															         shared_repository: str | bool | None = None,
														
 
															+        object_format: str | None = None,
														
 
															     ) -> "Repo":
														
 
															         """Create a new repository.
														
@@ -2123,6 +2203,7 @@ class Repo(BaseRepo):
 
															           symlinks: Whether to support symlinks
														
 
															           format: Repository format version (defaults to 0)
														
 
															           shared_repository: Shared repository setting (group, all, umask, or octal)
														
 
															+          object_format: Object format to use ("sha1" or "sha256", defaults to "sha1")
														
 
															         Returns: `Repo` instance
														
 
															         """
														
 
															         path = os.fspath(path)
														
@@ -2142,6 +2223,7 @@ class Repo(BaseRepo):
 
															             symlinks=symlinks,
														
 
															             format=format,
														
 
															             shared_repository=shared_repository,
														
 
															+            object_format=object_format,
														
 
															         )
														
 
															     @classmethod
														
@@ -2213,6 +2295,7 @@ class Repo(BaseRepo):
 
															         default_branch: bytes | None = None,
														
 
															         format: int | None = None,
														
 
															         shared_repository: str | bool | None = None,
														
 
															+        object_format: str | None = None,
														
 
															     ) -> "Repo":
														
 
															         """Create a new bare repository.
														
@@ -2226,6 +2309,7 @@ class Repo(BaseRepo):
 
															           default_branch: Default branch name
														
 
															           format: Repository format version (defaults to 0)
														
 
															           shared_repository: Shared repository setting (group, all, umask, or octal)
														
 
															+          object_format: Object format to use ("sha1" or "sha256", defaults to "sha1")
														
 
															         Returns: a `Repo` instance
														
 
															         """
														
 
															         path = os.fspath(path)
														
@@ -2242,6 +2326,7 @@ class Repo(BaseRepo):
 
															             default_branch=default_branch,
														
 
															             format=format,
														
 
															             shared_repository=shared_repository,
														
 
															+            object_format=object_format,
														
 
															         )
														
 
															     create = init_bare
														
@@ -2551,6 +2636,10 @@ class MemoryRepo(BaseRepo):
 
															         """
														
 
															         raise NoIndexPresent
														
 
															+    def _init_config(self, config: "ConfigFile") -> None:
														
 
															+        """Initialize repository configuration for MemoryRepo."""
														
 
															+        self._config = config
														
 
															+
														
 
															     def get_config(self) -> "ConfigFile":
														
 
															         """Retrieve the config object.
														
@@ -2739,6 +2828,7 @@ class MemoryRepo(BaseRepo):
 
															         objects: Iterable[ShaFile],
														
 
															         refs: Mapping[Ref, ObjectID],
														
 
															         format: int | None = None,
														
 
															+        object_format: str | None = None,
														
 
															     ) -> "MemoryRepo":
														
 
															         """Create a new bare repository in memory.
														
@@ -2748,11 +2838,12 @@ class MemoryRepo(BaseRepo):
 
															           refs: Refs as dictionary, mapping names
														
 
															             to object SHA1s
														
 
															           format: Repository format version (defaults to 0)
														
 
															+          object_format: Object format to use ("sha1" or "sha256", defaults to "sha1")
														
 
															         """
														
 
															         ret = cls()
														
 
															         for obj in objects:
														
 
															             ret.object_store.add_object(obj)
														
 
															         for refname, sha in refs.items():
														
 
															             ret.refs.add_if_new(refname, sha)
														
 
															-        ret._init_files(bare=True, format=format)
														
 
															+        ret._init_files(bare=True, format=format, object_format=object_format)
														
 
															         return ret
														
--- a/dulwich/tests/utils.py
+++ b/dulwich/tests/utils.py
@@ -114,7 +114,7 @@ def make_object(cls: type[T], **attrs: Any) -> T:
 
															         if name == "id":
														
 
															             # id property is read-only, so we overwrite sha instead.
														
 
															             sha = FixedSha(value)
														
 
															-            obj.sha = lambda: sha
														
 
															+            obj.sha = lambda hash_algorithm=None: sha
														
 
															         else:
														
 
															             setattr(obj, name, value)
														
 
															     return obj
														
--- a/tests/compat/__init__.py
+++ b/tests/compat/__init__.py
@@ -41,6 +41,8 @@ def test_suite() -> unittest.TestSuite:
 
															         "reftable",
														
 
															         "repository",
														
 
															         "server",
														
 
															+        "sha256",
														
 
															+        "sha256_packs",
														
 
															         "utils",
														
 
															         "web",
														
 
															     ]
														
--- a/tests/compat/test_sha256.py
+++ b/tests/compat/test_sha256.py
@@ -0,0 +1,367 @@
 
															+# test_sha256.py -- Compatibility tests for SHA256 support
														
 
															+# Copyright (C) 2024 The Dulwich contributors
														
 
															+#
														
 
															+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
														
 
															+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
														
 
															+# General Public License as public by the Free Software Foundation; version 2.0
														
 
															+# or (at your option) any later version. You can redistribute it and/or
														
 
															+# modify it under the terms of either of these two licenses.
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+#
														
 
															+# You should have received a copy of the licenses; if not, see
														
 
															+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
														
 
															+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
														
 
															+# License, Version 2.0.
														
 
															+#
														
 
															+
														
 
															+"""Compatibility tests for SHA256 support with git command line tools."""
														
 
															+
														
 
															+import os
														
 
															+import tempfile
														
 
															+
														
 
															+from dulwich.hash import SHA256
														
 
															+from dulwich.objects import Blob, Commit, Tree
														
 
															+from dulwich.repo import Repo
														
 
															+
														
 
															+from .utils import CompatTestCase, run_git_or_fail
														
 
															+
														
 
															+
														
 
															+class GitSHA256CompatibilityTests(CompatTestCase):
														
 
															+    """Test SHA256 compatibility with git command line tools."""
														
 
															+
														
 
															+    min_git_version = (2, 29, 0)
														
 
															+
														
 
															+    def _run_git(self, args, cwd=None):
														
 
															+        """Run git command in the specified directory."""
														
 
															+        return run_git_or_fail(args, cwd=cwd)
														
 
															+
														
 
															+    def test_sha256_repo_creation_compat(self):
														
 
															+        """Test that dulwich-created SHA256 repos are readable by git."""
														
 
															+        # Create SHA256 repo with dulwich
														
 
															+        repo_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
														
 
															+        repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
														
 
															+
														
 
															+        # Add a blob and tree using dulwich
														
 
															+        blob = Blob.from_string(b"Hello SHA256 world!")
														
 
															+        tree = Tree()
														
 
															+        tree.add(b"hello.txt", 0o100644, blob.get_id(SHA256))
														
 
															+
														
 
															+        # Create objects in the repository
														
 
															+        object_store = repo.object_store
														
 
															+        object_store.add_object(blob)
														
 
															+        object_store.add_object(tree)
														
 
															+
														
 
															+        repo.close()
														
 
															+
														
 
															+        # Verify git can read the repository
														
 
															+        config_output = self._run_git(
														
 
															+            ["config", "--get", "extensions.objectformat"], cwd=repo_path
														
 
															+        )
														
 
															+        self.assertEqual(config_output.strip(), b"sha256")
														
 
															+
														
 
															+        # Verify git recognizes it as a SHA256 repository
														
 
															+        rev_parse_output = self._run_git(
														
 
															+            ["rev-parse", "--show-object-format"], cwd=repo_path
														
 
															+        )
														
 
															+        self.assertEqual(rev_parse_output.strip(), b"sha256")
														
 
															+
														
 
															+    def test_git_created_sha256_repo_readable(self):
														
 
															+        """Test that git-created SHA256 repos are readable by dulwich."""
														
 
															+        # Create SHA256 repo with git
														
 
															+        repo_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
														
 
															+        self._run_git(["init", "--object-format=sha256", repo_path])
														
 
															+
														
 
															+        # Create a file and commit with git
														
 
															+        test_file = os.path.join(repo_path, "test.txt")
														
 
															+        with open(test_file, "w") as f:
														
 
															+            f.write("Test SHA256 content")
														
 
															+
														
 
															+        self._run_git(["add", "test.txt"], cwd=repo_path)
														
 
															+        self._run_git(["commit", "-m", "Test SHA256 commit"], cwd=repo_path)
														
 
															+
														
 
															+        # Read with dulwich
														
 
															+        repo = Repo(repo_path)
														
 
															+
														
 
															+        # Verify dulwich detects SHA256
														
 
															+        hash_alg = repo.get_hash_algorithm()
														
 
															+        self.assertEqual(hash_alg, SHA256)
														
 
															+
														
 
															+        # Verify dulwich can read objects
														
 
															+        # Try both main and master branches (git default changed over time)
														
 
															+        try:
														
 
															+            head_ref = repo.refs[b"refs/heads/main"]
														
 
															+        except KeyError:
														
 
															+            head_ref = repo.refs[b"refs/heads/master"]
														
 
															+        self.assertEqual(len(head_ref), 64)  # SHA256 length
														
 
															+
														
 
															+        # Read the commit object
														
 
															+        commit = repo[head_ref]
														
 
															+        self.assertIsInstance(commit, Commit)
														
 
															+        self.assertEqual(len(commit.tree), 64)  # SHA256 tree ID
														
 
															+
														
 
															+        repo.close()
														
 
															+
														
 
															+    def test_object_hashing_consistency(self):
														
 
															+        """Test that object hashing is consistent between dulwich and git."""
														
 
															+        # Create SHA256 repo with git
														
 
															+        repo_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
														
 
															+        self._run_git(["init", "--object-format=sha256", repo_path])
														
 
															+
														
 
															+        # Create a test file with known content
														
 
															+        test_content = b"Test content for SHA256 hashing consistency"
														
 
															+        test_file = os.path.join(repo_path, "test.txt")
														
 
															+        with open(test_file, "wb") as f:
														
 
															+            f.write(test_content)
														
 
															+
														
 
															+        # Get git's hash for the content
														
 
															+        git_hash = self._run_git(["hash-object", "test.txt"], cwd=repo_path)
														
 
															+        git_hash = git_hash.strip().decode("ascii")
														
 
															+
														
 
															+        # Create same blob with dulwich
														
 
															+        blob = Blob.from_string(test_content)
														
 
															+        dulwich_hash = blob.get_id(SHA256).decode("ascii")
														
 
															+
														
 
															+        # Hashes should match
														
 
															+        self.assertEqual(git_hash, dulwich_hash)
														
 
															+
														
 
															+    def test_tree_hashing_consistency(self):
														
 
															+        """Test that tree hashing is consistent between dulwich and git."""
														
 
															+        # Create SHA256 repo with git
														
 
															+        repo_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
														
 
															+        self._run_git(["init", "--object-format=sha256", repo_path])
														
 
															+
														
 
															+        # Create a test file and add to index
														
 
															+        test_content = b"Tree test content"
														
 
															+        test_file = os.path.join(repo_path, "tree_test.txt")
														
 
															+        with open(test_file, "wb") as f:
														
 
															+            f.write(test_content)
														
 
															+
														
 
															+        self._run_git(["add", "tree_test.txt"], cwd=repo_path)
														
 
															+
														
 
															+        # Get git's tree hash
														
 
															+        git_tree_hash = self._run_git(["write-tree"], cwd=repo_path)
														
 
															+        git_tree_hash = git_tree_hash.strip().decode("ascii")
														
 
															+
														
 
															+        # Create same tree with dulwich
														
 
															+        blob = Blob.from_string(test_content)
														
 
															+        tree = Tree()
														
 
															+        tree.add(b"tree_test.txt", 0o100644, blob.get_id(SHA256))
														
 
															+
														
 
															+        dulwich_tree_hash = tree.get_id(SHA256).decode("ascii")
														
 
															+
														
 
															+        # Tree hashes should match
														
 
															+        self.assertEqual(git_tree_hash, dulwich_tree_hash)
														
 
															+
														
 
															+    def test_commit_creation_interop(self):
														
 
															+        """Test commit creation interoperability between dulwich and git."""
														
 
															+        # Create SHA256 repo with dulwich
														
 
															+        repo_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
														
 
															+        repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
														
 
															+
														
 
															+        # Create objects with dulwich
														
 
															+        blob = Blob.from_string(b"Interop test content")
														
 
															+        tree = Tree()
														
 
															+        tree.add(b"interop.txt", 0o100644, blob.get_id(SHA256))
														
 
															+
														
 
															+        commit = Commit()
														
 
															+        commit.tree = tree.get_id(SHA256)
														
 
															+        commit.author = commit.committer = b"Test User <test@example.com>"
														
 
															+        commit.commit_time = commit.author_time = 1234567890
														
 
															+        commit.commit_timezone = commit.author_timezone = 0
														
 
															+        commit.message = b"Test SHA256 commit from dulwich"
														
 
															+
														
 
															+        # Add objects to repo
														
 
															+        object_store = repo.object_store
														
 
															+        object_store.add_object(blob)
														
 
															+        object_store.add_object(tree)
														
 
															+        object_store.add_object(commit)
														
 
															+
														
 
															+        # Update HEAD
														
 
															+        commit_id = commit.get_id(SHA256)
														
 
															+        repo.refs[b"refs/heads/master"] = commit_id
														
 
															+        repo.close()
														
 
															+
														
 
															+        # Verify git can read the commit
														
 
															+        commit_hash = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
														
 
															+        commit_hash = commit_hash.strip().decode("ascii")
														
 
															+        self.assertEqual(len(commit_hash), 64)  # SHA256 length
														
 
															+
														
 
															+        # Verify git can show the commit
														
 
															+        commit_message = self._run_git(["log", "--format=%s", "-n", "1"], cwd=repo_path)
														
 
															+        self.assertEqual(commit_message.strip(), b"Test SHA256 commit from dulwich")
														
 
															+
														
 
															+        # Verify git can list the tree
														
 
															+        tree_content = self._run_git(["ls-tree", "HEAD"], cwd=repo_path)
														
 
															+        self.assertIn(b"interop.txt", tree_content)
														
 
															+
														
 
															+    def test_ref_updates_interop(self):
														
 
															+        """Test that ref updates work between dulwich and git."""
														
 
															+        # Create repo with git
														
 
															+        repo_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
														
 
															+        self._run_git(["init", "--object-format=sha256", repo_path])
														
 
															+
														
 
															+        # Create initial commit with git
														
 
															+        test_file = os.path.join(repo_path, "initial.txt")
														
 
															+        with open(test_file, "w") as f:
														
 
															+            f.write("Initial content")
														
 
															+
														
 
															+        self._run_git(["add", "initial.txt"], cwd=repo_path)
														
 
															+        self._run_git(["commit", "-m", "Initial commit"], cwd=repo_path)
														
 
															+
														
 
															+        initial_commit = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
														
 
															+        initial_commit = initial_commit.strip()
														
 
															+
														
 
															+        # Update ref with dulwich
														
 
															+        repo = Repo(repo_path)
														
 
															+
														
 
															+        # Create new commit with dulwich
														
 
															+        blob = Blob.from_string(b"New content from dulwich")
														
 
															+        tree = Tree()
														
 
															+        tree.add(b"dulwich.txt", 0o100644, blob.get_id(SHA256))
														
 
															+
														
 
															+        commit = Commit()
														
 
															+        commit.tree = tree.get_id(SHA256)
														
 
															+        commit.parents = [initial_commit]
														
 
															+        commit.author = commit.committer = b"Dulwich User <dulwich@example.com>"
														
 
															+        commit.commit_time = commit.author_time = 1234567891
														
 
															+        commit.commit_timezone = commit.author_timezone = 0
														
 
															+        commit.message = b"Commit from dulwich"
														
 
															+
														
 
															+        # Add objects and update ref
														
 
															+        object_store = repo.object_store
														
 
															+        object_store.add_object(blob)
														
 
															+        object_store.add_object(tree)
														
 
															+        object_store.add_object(commit)
														
 
															+
														
 
															+        new_commit_hash = commit.get_id(SHA256)
														
 
															+        repo.refs[b"refs/heads/master"] = new_commit_hash
														
 
															+        repo.close()
														
 
															+
														
 
															+        # Verify git sees the update
														
 
															+        current_commit = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
														
 
															+        current_commit = current_commit.strip().decode("ascii")
														
 
															+        self.assertEqual(current_commit, new_commit_hash.decode("ascii"))
														
 
															+
														
 
															+        # Verify git can access the new tree
														
 
															+        tree_listing = self._run_git(["ls-tree", "HEAD"], cwd=repo_path)
														
 
															+        self.assertIn(b"dulwich.txt", tree_listing)
														
 
															+
														
 
															+    def test_clone_sha256_repo_git_to_dulwich(self):
														
 
															+        """Test cloning a git SHA256 repository with dulwich."""
														
 
															+        # Create source repo with git
														
 
															+        source_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(lambda: __import__("shutil").rmtree(source_path))
														
 
															+        self._run_git(["init", "--object-format=sha256", source_path])
														
 
															+
														
 
															+        # Add content
														
 
															+        test_file = os.path.join(source_path, "clone_test.txt")
														
 
															+        with open(test_file, "w") as f:
														
 
															+            f.write("Content to be cloned")
														
 
															+
														
 
															+        self._run_git(["add", "clone_test.txt"], cwd=source_path)
														
 
															+        self._run_git(["commit", "-m", "Initial commit"], cwd=source_path)
														
 
															+
														
 
															+        # Clone with dulwich
														
 
															+        target_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(lambda: __import__("shutil").rmtree(target_path))
														
 
															+
														
 
															+        target_repo = Repo.init(target_path, mkdir=False, object_format="sha256")
														
 
															+
														
 
															+        # Copy objects (simplified clone)
														
 
															+        source_repo = Repo(source_path)
														
 
															+
														
 
															+        # Copy all objects
														
 
															+        for obj_id in source_repo.object_store:
														
 
															+            obj = source_repo.object_store[obj_id]
														
 
															+            target_repo.object_store.add_object(obj)
														
 
															+
														
 
															+        # Copy refs
														
 
															+        for ref_name in source_repo.refs.keys():
														
 
															+            ref_id = source_repo.refs[ref_name]
														
 
															+            target_repo.refs[ref_name] = ref_id
														
 
															+
														
 
															+        # Set HEAD
														
 
															+        target_repo.refs.set_symbolic_ref(b"HEAD", b"refs/heads/master")
														
 
															+
														
 
															+        source_repo.close()
														
 
															+        target_repo.close()
														
 
															+
														
 
															+        # Verify with git
														
 
															+        output = self._run_git(["rev-parse", "--show-object-format"], cwd=target_path)
														
 
															+        self.assertEqual(output.strip(), b"sha256")
														
 
															+
														
 
															+        # Verify content
														
 
															+        self._run_git(["checkout", "HEAD", "--", "."], cwd=target_path)
														
 
															+        cloned_file = os.path.join(target_path, "clone_test.txt")
														
 
															+        with open(cloned_file) as f:
														
 
															+            content = f.read()
														
 
															+        self.assertEqual(content, "Content to be cloned")
														
 
															+
														
 
															+    def test_fsck_sha256_repo(self):
														
 
															+        """Test that git fsck works on dulwich-created SHA256 repos."""
														
 
															+        # Create repo with dulwich
														
 
															+        repo_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
														
 
															+        repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
														
 
															+
														
 
															+        # Create a more complex object graph
														
 
															+        # Multiple blobs
														
 
															+        blobs = []
														
 
															+        for i in range(5):
														
 
															+            blob = Blob.from_string(f"Blob content {i}".encode())
														
 
															+            repo.object_store.add_object(blob)
														
 
															+            blobs.append(blob)
														
 
															+
														
 
															+        # Multiple trees
														
 
															+        subtree = Tree()
														
 
															+        subtree.add(b"subfile1.txt", 0o100644, blobs[0].get_id(SHA256))
														
 
															+        subtree.add(b"subfile2.txt", 0o100644, blobs[1].get_id(SHA256))
														
 
															+        repo.object_store.add_object(subtree)
														
 
															+
														
 
															+        main_tree = Tree()
														
 
															+        main_tree.add(b"file1.txt", 0o100644, blobs[2].get_id(SHA256))
														
 
															+        main_tree.add(b"file2.txt", 0o100644, blobs[3].get_id(SHA256))
														
 
															+        main_tree.add(b"subdir", 0o040000, subtree.get_id(SHA256))
														
 
															+        repo.object_store.add_object(main_tree)
														
 
															+
														
 
															+        # Create commits
														
 
															+        commit1 = Commit()
														
 
															+        commit1.tree = main_tree.get_id(SHA256)
														
 
															+        commit1.author = commit1.committer = b"Test <test@example.com>"
														
 
															+        commit1.commit_time = commit1.author_time = 1234567890
														
 
															+        commit1.commit_timezone = commit1.author_timezone = 0
														
 
															+        commit1.message = b"First commit"
														
 
															+        repo.object_store.add_object(commit1)
														
 
															+
														
 
															+        commit2 = Commit()
														
 
															+        commit2.tree = main_tree.get_id(SHA256)
														
 
															+        commit2.parents = [commit1.get_id(SHA256)]
														
 
															+        commit2.author = commit2.committer = b"Test <test@example.com>"
														
 
															+        commit2.commit_time = commit2.author_time = 1234567891
														
 
															+        commit2.commit_timezone = commit2.author_timezone = 0
														
 
															+        commit2.message = b"Second commit"
														
 
															+        repo.object_store.add_object(commit2)
														
 
															+
														
 
															+        # Set refs
														
 
															+        repo.refs[b"refs/heads/master"] = commit2.get_id(SHA256)
														
 
															+        repo.refs[b"refs/heads/branch1"] = commit1.get_id(SHA256)
														
 
															+
														
 
															+        repo.close()
														
 
															+
														
 
															+        # Run git fsck
														
 
															+        fsck_output = self._run_git(["fsck", "--full"], cwd=repo_path)
														
 
															+        # fsck should not report any errors (empty output or success message)
														
 
															+        self.assertNotIn(b"error", fsck_output.lower())
														
 
															+        self.assertNotIn(b"missing", fsck_output.lower())
														
 
															+        self.assertNotIn(b"broken", fsck_output.lower())
														
--- a/tests/compat/test_sha256_packs.py
+++ b/tests/compat/test_sha256_packs.py
@@ -0,0 +1,330 @@
 
															+# test_sha256_packs.py -- Compatibility tests for SHA256 pack files
														
 
															+# Copyright (C) 2024 The Dulwich contributors
														
 
															+#
														
 
															+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
														
 
															+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
														
 
															+# General Public License as public by the Free Software Foundation; version 2.0
														
 
															+# or (at your option) any later version. You can redistribute it and/or
														
 
															+# modify it under the terms of either of these two licenses.
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+#
														
 
															+# You should have received a copy of the licenses; if not, see
														
 
															+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
														
 
															+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
														
 
															+# License, Version 2.0.
														
 
															+#
														
 
															+
														
 
															+"""Compatibility tests for SHA256 pack files with git command line tools."""
														
 
															+
														
 
															+import os
														
 
															+import tempfile
														
 
															+
														
 
															+from dulwich.hash import SHA256
														
 
															+from dulwich.objects import Blob, Commit, Tree
														
 
															+from dulwich.pack import load_pack_index_file
														
 
															+from dulwich.repo import Repo
														
 
															+
														
 
															+from .utils import CompatTestCase, run_git_or_fail
														
 
															+
														
 
															+
														
 
															+class GitSHA256PackCompatibilityTests(CompatTestCase):
														
 
															+    """Test SHA256 pack file compatibility with git command line tools."""
														
 
															+
														
 
															+    min_git_version = (2, 29, 0)
														
 
															+
														
 
															+    def _run_git(self, args, cwd=None):
														
 
															+        """Run git command in the specified directory."""
														
 
															+        return run_git_or_fail(args, cwd=cwd)
														
 
															+
														
 
															+    def test_git_pack_readable_by_dulwich(self):
														
 
															+        """Test that git-created SHA256 pack files are readable by dulwich."""
														
 
															+        # Create SHA256 repo with git
														
 
															+        repo_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
														
 
															+        self._run_git(["init", "--object-format=sha256", repo_path])
														
 
															+
														
 
															+        # Create multiple files to ensure pack creation
														
 
															+        for i in range(20):
														
 
															+            test_file = os.path.join(repo_path, f"file{i}.txt")
														
 
															+            with open(test_file, "w") as f:
														
 
															+                f.write(f"Content for file {i}\n")
														
 
															+
														
 
															+        self._run_git(["add", "."], cwd=repo_path)
														
 
															+        self._run_git(["commit", "-m", "Add 20 files"], cwd=repo_path)
														
 
															+
														
 
															+        # Force pack creation
														
 
															+        self._run_git(["gc"], cwd=repo_path)
														
 
															+
														
 
															+        # Open with dulwich
														
 
															+        repo = Repo(repo_path)
														
 
															+        self.assertEqual(repo.get_hash_algorithm(), SHA256)
														
 
															+
														
 
															+        # Find pack files
														
 
															+        pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
														
 
															+        pack_files = [f for f in os.listdir(pack_dir) if f.endswith(".pack")]
														
 
															+        self.assertGreater(len(pack_files), 0, "No pack files created")
														
 
															+
														
 
															+        # Read pack with dulwich
														
 
															+        for pack_file in pack_files:
														
 
															+            pack_path = os.path.join(pack_dir, pack_file)
														
 
															+            idx_path = pack_path[:-5] + ".idx"
														
 
															+
														
 
															+            # Load pack index with SHA256 algorithm
														
 
															+            with open(idx_path, "rb") as f:
														
 
															+                pack_idx = load_pack_index_file(
														
 
															+                    idx_path, f, hash_algorithm=repo.get_hash_algorithm()
														
 
															+                )
														
 
															+
														
 
															+            # Verify it's detected as SHA256
														
 
															+            self.assertEqual(pack_idx.hash_size, 32)
														
 
															+
														
 
															+            # Verify we can iterate objects
														
 
															+            obj_count = 0
														
 
															+            for sha, offset, crc32 in pack_idx.iterentries():
														
 
															+                self.assertEqual(len(sha), 32)  # SHA256
														
 
															+                obj_count += 1
														
 
															+
														
 
															+            self.assertGreater(obj_count, 20)  # At least our files + trees + commit
														
 
															+
														
 
															+        # Verify we can read all objects through the repo interface
														
 
															+        head_ref = repo.refs[b"refs/heads/master"]
														
 
															+        commit = repo[head_ref]
														
 
															+        self.assertIsInstance(commit, Commit)
														
 
															+
														
 
															+        # Read the tree
														
 
															+        tree = repo[commit.tree]
														
 
															+        self.assertIsInstance(tree, Tree)
														
 
															+
														
 
															+        # Verify all files are there
														
 
															+        file_count = 0
														
 
															+        for name, mode, sha in tree.items():
														
 
															+            if name.startswith(b"file") and name.endswith(b".txt"):
														
 
															+                file_count += 1
														
 
															+                # Read the blob
														
 
															+                blob = repo[sha]
														
 
															+                self.assertIsInstance(blob, Blob)
														
 
															+
														
 
															+        self.assertEqual(file_count, 20)
														
 
															+        repo.close()
														
 
															+
														
 
															+    def test_dulwich_objects_readable_by_git(self):
														
 
															+        """Test that dulwich-created SHA256 objects are readable by git."""
														
 
															+        # Create SHA256 repo with dulwich
														
 
															+        repo_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
														
 
															+        repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
														
 
															+
														
 
															+        # Create objects
														
 
															+        blobs = []
														
 
															+        for i in range(10):
														
 
															+            blob = Blob.from_string(f"Dulwich blob content {i}".encode())
														
 
															+            repo.object_store.add_object(blob)
														
 
															+            blobs.append(blob)
														
 
															+
														
 
															+        # Create a tree with all blobs
														
 
															+        tree = Tree()
														
 
															+        for i, blob in enumerate(blobs):
														
 
															+            tree.add(f"blob{i}.txt".encode(), 0o100644, blob.get_id(SHA256))
														
 
															+        repo.object_store.add_object(tree)
														
 
															+
														
 
															+        # Create a commit
														
 
															+        commit = Commit()
														
 
															+        commit.tree = tree.get_id(SHA256)
														
 
															+        commit.author = commit.committer = b"Dulwich Test <test@dulwich.org>"
														
 
															+        commit.commit_time = commit.author_time = 1234567890
														
 
															+        commit.commit_timezone = commit.author_timezone = 0
														
 
															+        commit.message = b"Test commit with blobs"
														
 
															+        repo.object_store.add_object(commit)
														
 
															+
														
 
															+        # Update HEAD
														
 
															+        repo.refs[b"refs/heads/master"] = commit.get_id(SHA256)
														
 
															+        repo.close()
														
 
															+
														
 
															+        # Verify git can read all objects
														
 
															+        output = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
														
 
															+        self.assertEqual(len(output.strip()), 64)  # SHA256
														
 
															+
														
 
															+        # List tree contents
														
 
															+        tree_output = self._run_git(["ls-tree", "HEAD"], cwd=repo_path)
														
 
															+        # Count lines instead of occurrences of "blob" since "blob" appears twice per line
														
 
															+        lines = tree_output.strip().split(b"\n")
														
 
															+        self.assertEqual(len(lines), 10)
														
 
															+
														
 
															+        # Verify git can check out the content
														
 
															+        self._run_git(["checkout", "HEAD", "--", "."], cwd=repo_path)
														
 
															+
														
 
															+        # Verify files exist with correct content
														
 
															+        for i in range(10):
														
 
															+            file_path = os.path.join(repo_path, f"blob{i}.txt")
														
 
															+            self.assertTrue(os.path.exists(file_path))
														
 
															+            with open(file_path, "rb") as f:
														
 
															+                content = f.read()
														
 
															+                self.assertEqual(content, f"Dulwich blob content {i}".encode())
														
 
															+
														
 
															+    def test_pack_index_v1_interop(self):
														
 
															+        """Test pack index v1 interoperability with SHA256."""
														
 
															+        # Create repo with git using pack index v1
														
 
															+        repo_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
														
 
															+        self._run_git(["init", "--object-format=sha256", repo_path])
														
 
															+        self._run_git(["config", "pack.indexVersion", "1"], cwd=repo_path)
														
 
															+
														
 
															+        # Create files
														
 
															+        for i in range(10):
														
 
															+            test_file = os.path.join(repo_path, f"v1test{i}.txt")
														
 
															+            with open(test_file, "w") as f:
														
 
															+                f.write(f"Pack v1 test {i}\n")
														
 
															+
														
 
															+        self._run_git(["add", "."], cwd=repo_path)
														
 
															+        self._run_git(["commit", "-m", "Test pack v1"], cwd=repo_path)
														
 
															+        self._run_git(["gc"], cwd=repo_path)
														
 
															+
														
 
															+        # Read with dulwich
														
 
															+        repo = Repo(repo_path)
														
 
															+
														
 
															+        # Find pack index
														
 
															+        pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
														
 
															+        idx_files = [f for f in os.listdir(pack_dir) if f.endswith(".idx")]
														
 
															+
														
 
															+        for idx_file in idx_files:
														
 
															+            idx_path = os.path.join(pack_dir, idx_file)
														
 
															+            with open(idx_path, "rb") as f:
														
 
															+                pack_idx = load_pack_index_file(
														
 
															+                    idx_path, f, hash_algorithm=repo.get_hash_algorithm()
														
 
															+                )
														
 
															+
														
 
															+            # Verify it's v1 with SHA256
														
 
															+            self.assertEqual(pack_idx.version, 1)
														
 
															+            self.assertEqual(pack_idx.hash_size, 32)
														
 
															+
														
 
															+            # Verify we can iterate
														
 
															+            for sha, offset, crc32 in pack_idx.iterentries():
														
 
															+                self.assertEqual(len(sha), 32)
														
 
															+                self.assertIsNone(crc32)  # v1 doesn't store CRC32
														
 
															+
														
 
															+        repo.close()
														
 
															+
														
 
															+    def test_large_pack_interop(self):
														
 
															+        """Test large pack file interoperability."""
														
 
															+        # Create repo with dulwich
														
 
															+        repo_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
														
 
															+        repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
														
 
															+
														
 
															+        # Create a large file that will use delta compression
														
 
															+        large_content = b"A" * 10000
														
 
															+        blobs = []
														
 
															+
														
 
															+        # Create similar blobs to trigger delta compression
														
 
															+        for i in range(10):
														
 
															+            content = large_content + f" variation {i}".encode()
														
 
															+            blob = Blob.from_string(content)
														
 
															+            repo.object_store.add_object(blob)
														
 
															+            blobs.append(blob)
														
 
															+
														
 
															+        # Create tree
														
 
															+        tree = Tree()
														
 
															+        for i, blob in enumerate(blobs):
														
 
															+            tree.add(f"large{i}.txt".encode(), 0o100644, blob.get_id(SHA256))
														
 
															+        repo.object_store.add_object(tree)
														
 
															+
														
 
															+        # Create commit
														
 
															+        commit = Commit()
														
 
															+        commit.tree = tree.get_id(SHA256)
														
 
															+        commit.author = commit.committer = b"Test <test@example.com>"
														
 
															+        commit.commit_time = commit.author_time = 1234567890
														
 
															+        commit.commit_timezone = commit.author_timezone = 0
														
 
															+        commit.message = b"Large files for delta compression test"
														
 
															+        repo.object_store.add_object(commit)
														
 
															+
														
 
															+        repo.refs[b"refs/heads/master"] = commit.get_id(SHA256)
														
 
															+        repo.close()
														
 
															+
														
 
															+        # Run git gc to create packs with delta compression
														
 
															+        self._run_git(["gc", "--aggressive"], cwd=repo_path)
														
 
															+
														
 
															+        # Verify git created a pack
														
 
															+        pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
														
 
															+        pack_files = [f for f in os.listdir(pack_dir) if f.endswith(".pack")]
														
 
															+        self.assertGreater(len(pack_files), 0)
														
 
															+
														
 
															+        # Re-open with dulwich and verify we can read everything
														
 
															+        repo = Repo(repo_path)
														
 
															+        head = repo.refs[b"refs/heads/master"]
														
 
															+        commit = repo[head]
														
 
															+        tree = repo[commit.tree]
														
 
															+
														
 
															+        # Read all blobs
														
 
															+        for i in range(10):
														
 
															+            name = f"large{i}.txt".encode()
														
 
															+            mode, sha = tree[name]
														
 
															+            blob = repo[sha]
														
 
															+            expected = large_content + f" variation {i}".encode()
														
 
															+            self.assertEqual(blob.data, expected)
														
 
															+
														
 
															+        repo.close()
														
 
															+
														
 
															+    def test_mixed_loose_packed_objects(self):
														
 
															+        """Test repositories with both loose and packed objects."""
														
 
															+        # Create repo with git
														
 
															+        repo_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
														
 
															+        self._run_git(["init", "--object-format=sha256", repo_path])
														
 
															+
														
 
															+        # Create initial objects that will be packed
														
 
															+        for i in range(5):
														
 
															+            test_file = os.path.join(repo_path, f"packed{i}.txt")
														
 
															+            with open(test_file, "w") as f:
														
 
															+                f.write(f"Will be packed {i}\n")
														
 
															+
														
 
															+        self._run_git(["add", "."], cwd=repo_path)
														
 
															+        self._run_git(["commit", "-m", "Initial packed objects"], cwd=repo_path)
														
 
															+        self._run_git(["gc"], cwd=repo_path)
														
 
															+
														
 
															+        # Create more objects that will remain loose
														
 
															+        for i in range(5):
														
 
															+            test_file = os.path.join(repo_path, f"loose{i}.txt")
														
 
															+            with open(test_file, "w") as f:
														
 
															+                f.write(f"Will stay loose {i}\n")
														
 
															+
														
 
															+        self._run_git(["add", "."], cwd=repo_path)
														
 
															+        self._run_git(["commit", "-m", "Loose objects"], cwd=repo_path)
														
 
															+
														
 
															+        # Open with dulwich
														
 
															+        repo = Repo(repo_path)
														
 
															+
														
 
															+        # Count objects in packs vs loose
														
 
															+        pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
														
 
															+        pack_count = len([f for f in os.listdir(pack_dir) if f.endswith(".pack")])
														
 
															+        self.assertGreater(pack_count, 0)
														
 
															+
														
 
															+        # Verify we can read all objects
														
 
															+        head = repo.refs[b"refs/heads/master"]
														
 
															+        commit = repo[head]
														
 
															+
														
 
															+        # Walk the commit history
														
 
															+        commit_count = 0
														
 
															+        while commit.parents:
														
 
															+            commit_count += 1
														
 
															+            tree = repo[commit.tree]
														
 
															+            # Verify we can read the tree
														
 
															+            self.assertGreater(len(tree), 0)
														
 
															+
														
 
															+            if commit.parents:
														
 
															+                commit = repo[commit.parents[0]]
														
 
															+            else:
														
 
															+                break
														
 
															+
														
 
															+        self.assertEqual(commit_count, 1)  # We made 2 commits total
														
 
															+        repo.close()
														
 
															+
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    import unittest
														
 
															+
														
 
															+    unittest.main()
														
--- a/tests/test_sha256.py
+++ b/tests/test_sha256.py
@@ -0,0 +1,213 @@
 
															+# test_sha256.py -- Tests for SHA256 support
														
 
															+# Copyright (C) 2024 The Dulwich contributors
														
 
															+#
														
 
															+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
														
 
															+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
														
 
															+# General Public License as public by the Free Software Foundation; version 2.0
														
 
															+# or (at your option) any later version. You can redistribute it and/or
														
 
															+# modify it under the terms of either of these two licenses.
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+#
														
 
															+# You should have received a copy of the licenses; if not, see
														
 
															+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
														
 
															+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
														
 
															+# License, Version 2.0.
														
 
															+#
														
 
															+
														
 
															+"""Tests for SHA256 support in Dulwich."""
														
 
															+
														
 
															+import os
														
 
															+import shutil
														
 
															+import tempfile
														
 
															+import unittest
														
 
															+
														
 
															+from dulwich.hash import SHA1, SHA256, get_hash_algorithm
														
 
															+from dulwich.objects import Blob, Tree, valid_hexsha, zero_sha_for
														
 
															+from dulwich.repo import MemoryRepo, Repo
														
 
															+
														
 
															+
														
 
															+class HashAlgorithmTests(unittest.TestCase):
														
 
															+    """Tests for the hash algorithm abstraction."""
														
 
															+
														
 
															+    def test_sha1_properties(self):
														
 
															+        """Test SHA1 algorithm properties."""
														
 
															+        alg = SHA1
														
 
															+        self.assertEqual(alg.name, "sha1")
														
 
															+        self.assertEqual(alg.oid_length, 20)
														
 
															+        self.assertEqual(alg.hex_length, 40)
														
 
															+        self.assertEqual(len(alg.zero_oid), 40)
														
 
															+        self.assertEqual(len(alg.zero_oid_bin), 20)
														
 
															+
														
 
															+    def test_sha256_properties(self):
														
 
															+        """Test SHA256 algorithm properties."""
														
 
															+        alg = SHA256
														
 
															+        self.assertEqual(alg.name, "sha256")
														
 
															+        self.assertEqual(alg.oid_length, 32)
														
 
															+        self.assertEqual(alg.hex_length, 64)
														
 
															+        self.assertEqual(len(alg.zero_oid), 64)
														
 
															+        self.assertEqual(len(alg.zero_oid_bin), 32)
														
 
															+
														
 
															+    def test_get_hash_algorithm(self):
														
 
															+        """Test getting hash algorithms by name."""
														
 
															+        self.assertEqual(get_hash_algorithm("sha1"), SHA1)
														
 
															+        self.assertEqual(get_hash_algorithm("sha256"), SHA256)
														
 
															+        self.assertEqual(get_hash_algorithm(None), SHA1)  # Default
														
 
															+
														
 
															+        with self.assertRaises(ValueError):
														
 
															+            get_hash_algorithm("invalid")
														
 
															+
														
 
															+
														
 
															+class ObjectHashingTests(unittest.TestCase):
														
 
															+    """Tests for object hashing with different algorithms."""
														
 
															+
														
 
															+    def test_blob_sha1(self):
														
 
															+        """Test blob hashing with SHA1."""
														
 
															+        blob = Blob()
														
 
															+        blob.data = b"Hello, World!"
														
 
															+
														
 
															+        # Default should be SHA1
														
 
															+        sha1_id = blob.id
														
 
															+        self.assertEqual(len(sha1_id), 40)
														
 
															+        self.assertTrue(valid_hexsha(sha1_id))
														
 
															+
														
 
															+    def test_blob_sha256(self):
														
 
															+        """Test blob hashing with SHA256."""
														
 
															+        blob = Blob()
														
 
															+        blob.data = b"Hello, World!"
														
 
															+
														
 
															+        # Get SHA256 hash
														
 
															+        sha256_id = blob.get_id(SHA256)
														
 
															+        self.assertEqual(len(sha256_id), 64)
														
 
															+        self.assertTrue(valid_hexsha(sha256_id))
														
 
															+
														
 
															+        # SHA256 ID should be different from SHA1
														
 
															+        sha1_id = blob.id
														
 
															+        self.assertNotEqual(sha1_id, sha256_id)
														
 
															+
														
 
															+        # Verify .id property returns SHA1 for backward compatibility
														
 
															+        self.assertEqual(blob.id, sha1_id)
														
 
															+        self.assertEqual(blob.get_id(), sha1_id)  # Default should be SHA1
														
 
															+
														
 
															+    def test_tree_sha256(self):
														
 
															+        """Test tree hashing with SHA256."""
														
 
															+        tree = Tree()
														
 
															+        tree.add(b"file.txt", 0o100644, b"a" * 40)  # SHA1 hex
														
 
															+
														
 
															+        # Get SHA1 (default)
														
 
															+        sha1_id = tree.id
														
 
															+        self.assertEqual(len(sha1_id), 40)
														
 
															+
														
 
															+        # Get SHA256
														
 
															+        sha256_id = tree.get_id(SHA256)
														
 
															+        self.assertEqual(len(sha256_id), 64)
														
 
															+
														
 
															+        # Verify they're different
														
 
															+        self.assertNotEqual(sha1_id, sha256_id)
														
 
															+
														
 
															+    def test_valid_hexsha(self):
														
 
															+        """Test hex SHA validation for both algorithms."""
														
 
															+        # Valid SHA1
														
 
															+        self.assertTrue(valid_hexsha(b"1234567890abcdef1234567890abcdef12345678"))
														
 
															+
														
 
															+        # Valid SHA256
														
 
															+        self.assertTrue(
														
 
															+            valid_hexsha(
														
 
															+                b"1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"
														
 
															+            )
														
 
															+        )
														
 
															+
														
 
															+        # Invalid lengths
														
 
															+        self.assertFalse(valid_hexsha(b"1234"))
														
 
															+        self.assertFalse(
														
 
															+            valid_hexsha(b"1234567890abcdef1234567890abcdef123456")
														
 
															+        )  # 38 chars
														
 
															+
														
 
															+        # Invalid characters
														
 
															+        self.assertFalse(valid_hexsha(b"123456789gabcdef1234567890abcdef12345678"))
														
 
															+
														
 
															+    def test_zero_sha_for(self):
														
 
															+        """Test getting zero SHA for different algorithms."""
														
 
															+        # Default (SHA1)
														
 
															+        self.assertEqual(zero_sha_for(), b"0" * 40)
														
 
															+        self.assertEqual(zero_sha_for(None), b"0" * 40)
														
 
															+
														
 
															+        # SHA1 explicit
														
 
															+        self.assertEqual(zero_sha_for(SHA1), b"0" * 40)
														
 
															+
														
 
															+        # SHA256
														
 
															+        self.assertEqual(zero_sha_for(SHA256), b"0" * 64)
														
 
															+
														
 
															+
														
 
															+class RepositorySHA256Tests(unittest.TestCase):
														
 
															+    """Tests for SHA256 repository support."""
														
 
															+
														
 
															+    def setUp(self):
														
 
															+        """Set up test repository directory."""
														
 
															+        self.test_dir = tempfile.mkdtemp()
														
 
															+
														
 
															+    def tearDown(self):
														
 
															+        """Clean up test repository."""
														
 
															+        shutil.rmtree(self.test_dir)
														
 
															+
														
 
															+    def test_init_sha256_repo(self):
														
 
															+        """Test initializing a SHA256 repository."""
														
 
															+        repo_path = os.path.join(self.test_dir, "sha256_repo")
														
 
															+        repo = Repo.init(repo_path, mkdir=True, object_format="sha256")
														
 
															+
														
 
															+        # Check repository format version
														
 
															+        config = repo.get_config()
														
 
															+        self.assertEqual(config.get(("core",), "repositoryformatversion"), b"1")
														
 
															+
														
 
															+        # Check object format extension
														
 
															+        self.assertEqual(config.get(("extensions",), "objectformat"), b"sha256")
														
 
															+
														
 
															+        # Check hash algorithm detection
														
 
															+        hash_alg = repo.get_hash_algorithm()
														
 
															+        self.assertEqual(hash_alg, SHA256)
														
 
															+
														
 
															+        repo.close()
														
 
															+
														
 
															+    def test_init_sha1_repo(self):
														
 
															+        """Test initializing a SHA1 repository (default)."""
														
 
															+        repo_path = os.path.join(self.test_dir, "sha1_repo")
														
 
															+        repo = Repo.init(repo_path, mkdir=True)
														
 
															+
														
 
															+        # Check repository format version
														
 
															+        config = repo.get_config()
														
 
															+        self.assertEqual(config.get(("core",), "repositoryformatversion"), b"0")
														
 
															+
														
 
															+        # Object format extension should not exist
														
 
															+        with self.assertRaises(KeyError):
														
 
															+            config.get(("extensions",), "objectformat")
														
 
															+
														
 
															+        # Check hash algorithm detection
														
 
															+        hash_alg = repo.get_hash_algorithm()
														
 
															+        self.assertEqual(hash_alg, SHA1)
														
 
															+
														
 
															+        repo.close()
														
 
															+
														
 
															+    def test_format_version_validation(self):
														
 
															+        """Test format version validation for SHA256."""
														
 
															+        repo_path = os.path.join(self.test_dir, "invalid_repo")
														
 
															+
														
 
															+        # SHA256 with format version 0 should fail
														
 
															+        with self.assertRaises(ValueError) as cm:
														
 
															+            Repo.init(repo_path, mkdir=True, format=0, object_format="sha256")
														
 
															+        self.assertIn("SHA256", str(cm.exception))
														
 
															+
														
 
															+    def test_memory_repo_sha256(self):
														
 
															+        """Test SHA256 support in memory repository."""
														
 
															+        repo = MemoryRepo.init_bare([], {}, object_format="sha256")
														
 
															+
														
 
															+        # Check hash algorithm
														
 
															+        hash_alg = repo.get_hash_algorithm()
														
 
															+        self.assertEqual(hash_alg, SHA256)
														
 
															+
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    unittest.main()
														
--- a/tests/test_sha256_pack.py
+++ b/tests/test_sha256_pack.py
@@ -0,0 +1,128 @@
 
															+# test_sha256_pack.py -- Tests for SHA256 pack support
														
 
															+# Copyright (C) 2024 The Dulwich contributors
														
 
															+#
														
 
															+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
														
 
															+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
														
 
															+# General Public License as public by the Free Software Foundation; version 2.0
														
 
															+# or (at your option) any later version. You can redistribute it and/or
														
 
															+# modify it under the terms of either of these two licenses.
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+#
														
 
															+# You should have received a copy of the licenses; if not, see
														
 
															+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
														
 
															+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
														
 
															+# License, Version 2.0.
														
 
															+#
														
 
															+
														
 
															+"""Tests for SHA256 pack support in Dulwich."""
														
 
															+
														
 
															+import shutil
														
 
															+import tempfile
														
 
															+import unittest
														
 
															+from io import BytesIO
														
 
															+
														
 
															+from dulwich.hash import SHA256
														
 
															+from dulwich.pack import (
														
 
															+    load_pack_index_file,
														
 
															+    write_pack_index_v2,
														
 
															+)
														
 
															+
														
 
															+
														
 
															+class SHA256PackTests(unittest.TestCase):
														
 
															+    """Tests for SHA256 pack support."""
														
 
															+
														
 
															+    def setUp(self):
														
 
															+        """Set up test repository directory."""
														
 
															+        self.test_dir = tempfile.mkdtemp()
														
 
															+
														
 
															+    def tearDown(self):
														
 
															+        """Clean up test repository."""
														
 
															+        shutil.rmtree(self.test_dir)
														
 
															+
														
 
															+    def test_pack_index_v2_with_sha256(self):
														
 
															+        """Test that pack index v2 correctly handles SHA256 hashes."""
														
 
															+        # Create SHA256 entries manually (simulating what would happen in a SHA256 repo)
														
 
															+        entries = []
														
 
															+        for i in range(5):
														
 
															+            # Create a fake SHA256 hash
														
 
															+            sha256_hash = SHA256.hash_func(f"test object {i}".encode()).digest()
														
 
															+            offset = i * 1000  # Fake offsets
														
 
															+            crc32 = i  # Fake CRC32
														
 
															+            entries.append((sha256_hash, offset, crc32))
														
 
															+
														
 
															+        # Sort entries by SHA (required for pack index)
														
 
															+        entries.sort(key=lambda e: e[0])
														
 
															+
														
 
															+        # Write SHA256 pack index with SHA1 pack checksum (Git always uses SHA1 for pack checksums)
														
 
															+        index_buf = BytesIO()
														
 
															+        from hashlib import sha1
														
 
															+
														
 
															+        pack_checksum = sha1(b"fake pack data").digest()
														
 
															+        write_pack_index_v2(index_buf, entries, pack_checksum)
														
 
															+
														
 
															+        # Load and verify the index
														
 
															+        index_buf.seek(0)
														
 
															+        pack_idx = load_pack_index_file("<memory>", index_buf)
														
 
															+
														
 
															+        # Check that the index loaded correctly
														
 
															+        self.assertEqual(len(pack_idx), 5)
														
 
															+        self.assertEqual(pack_idx.version, 2)
														
 
															+
														
 
															+        # Verify hash_size detection
														
 
															+        self.assertEqual(pack_idx.hash_size, 32)
														
 
															+
														
 
															+        # Verify we can look up objects by SHA256
														
 
															+        for sha256_hash, offset, _ in entries:
														
 
															+            # This should not raise KeyError
														
 
															+            found_offset = pack_idx.object_offset(sha256_hash)
														
 
															+            self.assertEqual(found_offset, offset)
														
 
															+
														
 
															+    def test_pack_index_v1_with_sha256(self):
														
 
															+        """Test that pack index v1 correctly handles SHA256 hashes."""
														
 
															+        # Create SHA256 entries manually
														
 
															+        entries = []
														
 
															+        for i in range(5):
														
 
															+            # Create a fake SHA256 hash
														
 
															+            sha256_hash = SHA256.hash_func(f"test v1 object {i}".encode()).digest()
														
 
															+            offset = i * 1000  # Fake offsets
														
 
															+            crc32 = None  # v1 doesn't store CRC32
														
 
															+            entries.append((sha256_hash, offset, crc32))
														
 
															+
														
 
															+        # Sort entries by SHA (required for pack index)
														
 
															+        entries.sort(key=lambda e: e[0])
														
 
															+
														
 
															+        # Import write_pack_index_v1
														
 
															+        from dulwich.pack import write_pack_index_v1
														
 
															+
														
 
															+        # Write SHA256 pack index v1 with SHA1 pack checksum
														
 
															+        index_buf = BytesIO()
														
 
															+        from hashlib import sha1
														
 
															+
														
 
															+        pack_checksum = sha1(b"fake v1 pack data").digest()
														
 
															+        write_pack_index_v1(index_buf, entries, pack_checksum)
														
 
															+
														
 
															+        # Load and verify the index
														
 
															+        index_buf.seek(0)
														
 
															+        pack_idx = load_pack_index_file("<memory>", index_buf)
														
 
															+
														
 
															+        # Check that the index loaded correctly
														
 
															+        self.assertEqual(len(pack_idx), 5)
														
 
															+        self.assertEqual(pack_idx.version, 1)
														
 
															+
														
 
															+        # Verify hash_size detection
														
 
															+        self.assertEqual(pack_idx.hash_size, 32)
														
 
															+
														
 
															+        # Verify we can look up objects by SHA256
														
 
															+        for sha256_hash, offset, _ in entries:
														
 
															+            # This should not raise KeyError
														
 
															+            found_offset = pack_idx.object_offset(sha256_hash)
														
 
															+            self.assertEqual(found_offset, offset)
														
 
															+
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    unittest.main()