7 месяцев назад · a7e3b91227
--- a/NEWS
+++ b/NEWS
@@ -531,6 +531,13 @@ compatible.
 
				 
			
 
				  * Add ``gc`` command to ``dulwich.porcelain.`` (Jelmer Vernooĳ, #92)
			
 
				 
			
 
				+ * Add initial support for SHA256 repositories. Dulwich can now read and write Git
			
 
				+   repositories using SHA256 object format. This includes support for loose
			
 
				+   objects, pack files (v1 and v2 indexes), and tree parsing with SHA256 hashes.
			
 
				+   The Rust extensions have been updated to support variable hash lengths.
			
 
				+   SHA256 repositories require format version 1 and the objectFormat extension.
			
 
				+   (Jelmer Vernooĳ, #1115)
			
 
				+
			
 
				  * Add ``unpack-objects`` plumbing command to unpack objects from pack files
			
 
				    into loose objects in the repository. This command extracts all objects
			
 
				    from a pack file and writes them to the object store as individual files.
			
@@ -550,8 +557,7 @@ compatible.
 
				  * Add support for pack index format version 3. This format supports variable
			
 
				    hash sizes to enable future SHA-256 support. The implementation includes
			
 
				    reading and writing v3 indexes with proper hash algorithm identification
			
 
				-   (1 for SHA-1, 2 for SHA-256). Note that SHA-256 support itself is not yet
			
 
				-   implemented and will raise NotImplementedError. (Jelmer Vernooĳ)
			
 
				+   (1 for SHA-1, 2 for SHA-256). (Jelmer Vernooĳ)
			
 
				 
			
 
				  * Fix ``LocalGitClient`` assertion error when fetching externally cloned repositories
			
 
				    into ``MemoryRepo``. Previously, the client would fail with an AssertionError
			
--- a/crates/objects/src/lib.rs
+++ b/crates/objects/src/lib.rs
@@ -49,15 +49,13 @@ fn sha_to_pyhex(py: Python, sha: &[u8]) -> PyResult<Py<PyAny>> {
 
				     Ok(PyBytes::new(py, hexsha.as_slice()).into())
			
 
				 }
			
 
				 
			
 
				-#[pyfunction]
			
 
				-#[pyo3(signature = (text, strict=None))]
			
 
				-fn parse_tree(
			
 
				+fn parse_tree_with_length(
			
 
				     py: Python,
			
 
				     mut text: &[u8],
			
 
				-    strict: Option<bool>,
			
 
				-) -> PyResult<Vec<(Py<PyAny>, u32, Py<PyAny>)>> {
			
 
				+    strict: bool,
			
 
				+    hash_len: usize,
			
 
				+) -> PyResult<Vec<(PyObject, u32, PyObject)>> {
			
 
				     let mut entries = Vec::new();
			
 
				-    let strict = strict.unwrap_or(false);
			
 
				     while !text.is_empty() {
			
 
				         let mode_end = memchr(b' ', text)
			
 
				             .ok_or_else(|| ObjectFormatException::new_err(("Missing terminator for mode",)))?;
			
@@ -73,21 +71,54 @@ fn parse_tree(
 
				         let namelen = memchr(b'\0', text)
			
 
				             .ok_or_else(|| ObjectFormatException::new_err(("Missing trailing \\0",)))?;
			
 
				         let name = &text[..namelen];
			
 
				-        if namelen + 20 >= text.len() {
			
 
				+
			
 
				+        // Skip name and null terminator
			
 
				+        text = &text[namelen + 1..];
			
 
				+
			
 
				+        // Check if we have enough bytes for the hash
			
 
				+        if text.len() < hash_len {
			
 
				             return Err(ObjectFormatException::new_err(("SHA truncated",)));
			
 
				         }
			
 
				-        text = &text[namelen + 1..];
			
 
				-        let sha = &text[..20];
			
 
				+
			
 
				+        let sha = &text[..hash_len];
			
 
				         entries.push((
			
 
				             PyBytes::new(py, name).into_pyobject(py)?.unbind().into(),
			
 
				             mode,
			
 
				             sha_to_pyhex(py, sha)?,
			
 
				         ));
			
 
				-        text = &text[20..];
			
 
				+        text = &text[hash_len..];
			
 
				     }
			
 
				     Ok(entries)
			
 
				 }
			
 
				 
			
 
				+#[pyfunction]
			
 
				+#[pyo3(signature = (text, strict=None, hash_algorithm=None))]
			
 
				+fn parse_tree(
			
 
				+    py: Python,
			
 
				+    text: &[u8],
			
 
				+    strict: Option<bool>,
			
 
				+    hash_algorithm: Option<PyObject>,
			
 
				+) -> PyResult<Vec<(PyObject, u32, PyObject)>> {
			
 
				+    let strict = strict.unwrap_or(false);
			
 
				+
			
 
				+    // Determine hash length from hash_algorithm if provided
			
 
				+    if let Some(algo) = hash_algorithm {
			
 
				+        // Get oid_length attribute from hash algorithm object
			
 
				+        let oid_length: usize = algo.getattr(py, "oid_length")?.extract(py)?;
			
 
				+        parse_tree_with_length(py, text, strict, oid_length)
			
 
				+    } else {
			
 
				+        // Try to auto-detect by attempting to parse with both lengths
			
 
				+        // We'll attempt to parse with SHA1 first (20 bytes), then SHA256 (32 bytes)
			
 
				+        match parse_tree_with_length(py, text, strict, 20) {
			
 
				+            Ok(entries) => Ok(entries),
			
 
				+            Err(_) => {
			
 
				+                // SHA1 failed, try SHA256
			
 
				+                parse_tree_with_length(py, text, strict, 32)
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				 fn cmp_with_suffix(a: (u32, &[u8]), b: (u32, &[u8])) -> std::cmp::Ordering {
			
 
				     let len = std::cmp::min(a.1.len(), b.1.len());
			
 
				     let cmp = a.1[..len].cmp(&b.1[..len]);
			
--- a/crates/pack/src/lib.rs
+++ b/crates/pack/src/lib.rs
@@ -30,8 +30,9 @@ pyo3::import_exception!(dulwich.errors, ApplyDeltaError);
 
				 fn py_is_sha(sha: &Py<PyAny>, py: Python) -> PyResult<bool> {
			
 
				     // Check if the object is a bytes object
			
 
				     if sha.bind(py).is_instance_of::<PyBytes>() {
			
 
				-        // Check if the bytes object has a size of 20
			
 
				-        if sha.extract::<&[u8]>(py)?.len() == 20 {
			
 
				+        // Check if the bytes object has a size of 20 (SHA1) or 32 (SHA256)
			
 
				+        let len = sha.extract::<&[u8]>(py)?.len();
			
 
				+        if len == 20 || len == 32 {
			
 
				             Ok(true)
			
 
				         } else {
			
 
				             Ok(false)
			
@@ -53,9 +54,11 @@ fn bisect_find_sha(
 
				     let sha = sha.as_bytes(py);
			
 
				     let sha_len = sha.len();
			
 
				 
			
 
				-    // Check if sha is 20 bytes long
			
 
				-    if sha_len != 20 {
			
 
				-        return Err(PyValueError::new_err("Sha is not 20 bytes long"));
			
 
				+    // Check if sha is 20 bytes (SHA1) or 32 bytes (SHA256)
			
 
				+    if sha_len != 20 && sha_len != 32 {
			
 
				+        return Err(PyValueError::new_err(
			
 
				+            "Sha must be 20 (SHA1) or 32 (SHA256) bytes long",
			
 
				+        ));
			
 
				     }
			
 
				 
			
 
				     // Check if start > end
			
--- a/dulwich/hash.py
+++ b/dulwich/hash.py
@@ -0,0 +1,121 @@
 
				+# hash.py -- Hash algorithm abstraction layer for Git
			
 
				+# Copyright (C) 2024 The Dulwich contributors
			
 
				+#
			
 
				+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
			
 
				+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
			
 
				+# General Public License as public by the Free Software Foundation; version 2.0
			
 
				+# or (at your option) any later version. You can redistribute it and/or
			
 
				+# modify it under the terms of either of these two licenses.
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+#
			
 
				+# You should have received a copy of the licenses; if not, see
			
 
				+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
			
 
				+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
			
 
				+# License, Version 2.0.
			
 
				+#
			
 
				+
			
 
				+"""Hash algorithm abstraction for Git objects.
			
 
				+
			
 
				+This module provides an abstraction layer for different hash algorithms
			
 
				+used in Git repositories (SHA-1 and SHA-256).
			
 
				+"""
			
 
				+
			
 
				+from hashlib import sha1, sha256
			
 
				+from typing import Callable, Optional
			
 
				+
			
 
				+
			
 
				+class HashAlgorithm:
			
 
				+    """Base class for hash algorithms used in Git."""
			
 
				+
			
 
				+    def __init__(
			
 
				+        self, name: str, oid_length: int, hex_length: int, hash_func: Callable
			
 
				+    ) -> None:
			
 
				+        """Initialize a hash algorithm.
			
 
				+
			
 
				+        Args:
			
 
				+            name: Name of the algorithm (e.g., "sha1", "sha256")
			
 
				+            oid_length: Length of the binary object ID in bytes
			
 
				+            hex_length: Length of the hexadecimal object ID in characters
			
 
				+            hash_func: Hash function from hashlib
			
 
				+        """
			
 
				+        self.name = name
			
 
				+        self.oid_length = oid_length
			
 
				+        self.hex_length = hex_length
			
 
				+        self.hash_func = hash_func
			
 
				+        self.zero_oid = b"0" * hex_length
			
 
				+        self.zero_oid_bin = b"\x00" * oid_length
			
 
				+
			
 
				+    def __str__(self) -> str:
			
 
				+        return self.name
			
 
				+
			
 
				+    def __repr__(self) -> str:
			
 
				+        return f"HashAlgorithm({self.name!r})"
			
 
				+
			
 
				+    def new_hash(self):
			
 
				+        """Create a new hash object."""
			
 
				+        return self.hash_func()
			
 
				+
			
 
				+    def hash_object(self, data: bytes) -> bytes:
			
 
				+        """Hash data and return the digest.
			
 
				+
			
 
				+        Args:
			
 
				+            data: Data to hash
			
 
				+
			
 
				+        Returns:
			
 
				+            Binary digest
			
 
				+        """
			
 
				+        h = self.new_hash()
			
 
				+        h.update(data)
			
 
				+        return h.digest()
			
 
				+
			
 
				+    def hash_object_hex(self, data: bytes) -> bytes:
			
 
				+        """Hash data and return the hexadecimal digest.
			
 
				+
			
 
				+        Args:
			
 
				+            data: Data to hash
			
 
				+
			
 
				+        Returns:
			
 
				+            Hexadecimal digest as bytes
			
 
				+        """
			
 
				+        h = self.new_hash()
			
 
				+        h.update(data)
			
 
				+        return h.hexdigest().encode("ascii")
			
 
				+
			
 
				+
			
 
				+# Define the supported hash algorithms
			
 
				+SHA1 = HashAlgorithm("sha1", 20, 40, sha1)
			
 
				+SHA256 = HashAlgorithm("sha256", 32, 64, sha256)
			
 
				+
			
 
				+# Map of algorithm names to HashAlgorithm instances
			
 
				+HASH_ALGORITHMS = {
			
 
				+    "sha1": SHA1,
			
 
				+    "sha256": SHA256,
			
 
				+}
			
 
				+
			
 
				+# Default algorithm for backward compatibility
			
 
				+DEFAULT_HASH_ALGORITHM = SHA1
			
 
				+
			
 
				+
			
 
				+def get_hash_algorithm(name: Optional[str] = None) -> HashAlgorithm:
			
 
				+    """Get a hash algorithm by name.
			
 
				+
			
 
				+    Args:
			
 
				+        name: Algorithm name ("sha1" or "sha256"). If None, returns default.
			
 
				+
			
 
				+    Returns:
			
 
				+        HashAlgorithm instance
			
 
				+
			
 
				+    Raises:
			
 
				+        ValueError: If the algorithm name is not supported
			
 
				+    """
			
 
				+    if name is None:
			
 
				+        return DEFAULT_HASH_ALGORITHM
			
 
				+    try:
			
 
				+        return HASH_ALGORITHMS[name.lower()]
			
 
				+    except KeyError:
			
 
				+        raise ValueError(f"Unsupported hash algorithm: {name}")
			
--- a/dulwich/object_store.py
+++ b/dulwich/object_store.py
@@ -1169,10 +1169,10 @@ class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
 
				         """
			
 
				         if name == ZERO_SHA:
			
 
				             raise KeyError(name)
			
 
				-        if len(name) == 40:
			
 
				+        if len(name) in (40, 64):  # Support both SHA1 (40) and SHA256 (64) hex
			
 
				             sha = hex_to_sha(cast(ObjectID, name))
			
 
				             hexsha = cast(ObjectID, name)
			
 
				-        elif len(name) == 20:
			
 
				+        elif len(name) in (20, 32):  # Support both SHA1 (20) and SHA256 (32) binary
			
 
				             sha = cast(RawObjectID, name)
			
 
				             hexsha = None
			
 
				         else:
			
@@ -1382,6 +1382,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
				         pack_write_bitmap_lookup_table: bool = True,
			
 
				         file_mode: int | None = None,
			
 
				         dir_mode: int | None = None,
			
 
				+        hash_algorithm=None,
			
 
				     ) -> None:
			
 
				         """Open an object store.
			
 
				 
			
@@ -1402,6 +1403,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
				           pack_write_bitmap_lookup_table: whether to include lookup table in bitmaps
			
 
				           file_mode: File permission mask for shared repository
			
 
				           dir_mode: Directory permission mask for shared repository
			
 
				+          hash_algorithm: Hash algorithm to use (SHA1 or SHA256)
			
 
				         """
			
 
				         super().__init__(
			
 
				             pack_compression_level=pack_compression_level,
			
@@ -1426,6 +1428,11 @@ class DiskObjectStore(PackBasedObjectStore):
 
				         self.file_mode = file_mode
			
 
				         self.dir_mode = dir_mode
			
 
				 
			
 
				+        # Import here to avoid circular dependency
			
 
				+        from .hash import get_hash_algorithm
			
 
				+
			
 
				+        self.hash_algorithm = hash_algorithm if hash_algorithm else get_hash_algorithm()
			
 
				+
			
 
				         # Commit graph support - lazy loaded
			
 
				         self._commit_graph = None
			
 
				         self._use_commit_graph = True  # Default to true
			
@@ -1540,6 +1547,24 @@ class DiskObjectStore(PackBasedObjectStore):
 
				                 (b"repack",), b"writeBitmaps", False
			
 
				             )
			
 
				 
			
 
				+        # Get hash algorithm from config
			
 
				+        from .hash import get_hash_algorithm
			
 
				+
			
 
				+        hash_algorithm = None
			
 
				+        try:
			
 
				+            try:
			
 
				+                version = int(config.get((b"core",), b"repositoryformatversion"))
			
 
				+            except KeyError:
			
 
				+                version = 0
			
 
				+            if version == 1:
			
 
				+                try:
			
 
				+                    object_format = config.get((b"extensions",), b"objectformat")
			
 
				+                except KeyError:
			
 
				+                    object_format = b"sha1"
			
 
				+                hash_algorithm = get_hash_algorithm(object_format.decode("ascii"))
			
 
				+        except (KeyError, ValueError):
			
 
				+            pass
			
 
				+
			
 
				         instance = cls(
			
 
				             path,
			
 
				             loose_compression_level=loose_compression_level,
			
@@ -1557,6 +1582,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
				             pack_write_bitmap_lookup_table=pack_write_bitmap_lookup_table,
			
 
				             file_mode=file_mode,
			
 
				             dir_mode=dir_mode,
			
 
				+            hash_algorithm=hash_algorithm,
			
 
				         )
			
 
				         instance._use_commit_graph = use_commit_graph
			
 
				         instance._use_midx = use_midx
			
@@ -1647,6 +1673,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
				                     depth=self.pack_depth,
			
 
				                     threads=self.pack_threads,
			
 
				                     big_file_threshold=self.pack_big_file_threshold,
			
 
				+                    hash_algorithm=self.hash_algorithm,
			
 
				                 )
			
 
				                 new_packs.append(pack)
			
 
				                 self._pack_cache[f] = pack
			
@@ -1698,7 +1725,9 @@ class DiskObjectStore(PackBasedObjectStore):
 
				     def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None:
			
 
				         path = self._get_shafile_path(sha)
			
 
				         try:
			
 
				-            return ShaFile.from_path(path)
			
 
				+            # Load the object from path with SHA for hash algorithm detection
			
 
				+            # sha parameter here is already hex, so pass it directly
			
 
				+            return ShaFile.from_path(path, sha)
			
 
				         except FileNotFoundError:
			
 
				             return None
			
 
				 
			
@@ -1885,6 +1914,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
				             depth=self.pack_depth,
			
 
				             threads=self.pack_threads,
			
 
				             big_file_threshold=self.pack_big_file_threshold,
			
 
				+            hash_algorithm=self.hash_algorithm,
			
 
				         )
			
 
				         final_pack.check_length_and_checksum()
			
 
				         self._add_cached_pack(pack_base_name, final_pack)
			
@@ -1964,7 +1994,9 @@ class DiskObjectStore(PackBasedObjectStore):
 
				         Args:
			
 
				           obj: Object to add
			
 
				         """
			
 
				-        path = self._get_shafile_path(obj.id)
			
 
				+        # Use the correct hash algorithm for the object ID
			
 
				+        obj_id = obj.get_id(self.hash_algorithm)
			
 
				+        path = self._get_shafile_path(obj_id)
			
 
				         dir = os.path.dirname(path)
			
 
				         try:
			
 
				             os.mkdir(dir)
			
@@ -1987,6 +2019,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
				         *,
			
 
				         file_mode: int | None = None,
			
 
				         dir_mode: int | None = None,
			
 
				+        hash_algorithm=None,
			
 
				     ) -> "DiskObjectStore":
			
 
				         """Initialize a new disk object store.
			
 
				 
			
@@ -1996,6 +2029,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
				           path: Path where the object store should be created
			
 
				           file_mode: Optional file permission mask for shared repository
			
 
				           dir_mode: Optional directory permission mask for shared repository
			
 
				+          hash_algorithm: Hash algorithm to use (SHA1 or SHA256)
			
 
				 
			
 
				         Returns:
			
 
				           New DiskObjectStore instance
			
@@ -2013,7 +2047,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
				         if dir_mode is not None:
			
 
				             os.chmod(info_path, dir_mode)
			
 
				             os.chmod(pack_path, dir_mode)
			
 
				-        return cls(path, file_mode=file_mode, dir_mode=dir_mode)
			
 
				+        return cls(path, file_mode=file_mode, dir_mode=dir_mode, hash_algorithm=hash_algorithm)
			
 
				 
			
 
				     def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:
			
 
				         """Iterate over all object SHAs with the given prefix.
			
--- a/dulwich/objects.py
+++ b/dulwich/objects.py
@@ -112,6 +112,24 @@ if TYPE_CHECKING:
 
				 
			
 
				     from .file import _GitFile
			
 
				 
			
 
				+# Zero SHA constants for backward compatibility
			
 
				+ZERO_SHA = b"0" * 40  # SHA1 - kept for backward compatibility
			
 
				+
			
 
				+
			
 
				+def zero_sha_for(hash_algorithm=None):
			
 
				+    """Get the zero SHA for a given hash algorithm.
			
 
				+
			
 
				+    Args:
			
 
				+        hash_algorithm: HashAlgorithm instance. If None, returns SHA1 zero.
			
 
				+
			
 
				+    Returns:
			
 
				+        Zero SHA as hex bytes (40 chars for SHA1, 64 for SHA256)
			
 
				+    """
			
 
				+    if hash_algorithm is None:
			
 
				+        return ZERO_SHA
			
 
				+    return hash_algorithm.zero_oid
			
 
				+
			
 
				+
			
 
				 # Header fields for commits
			
 
				 _TREE_HEADER = b"tree"
			
 
				 _PARENT_HEADER = b"parent"
			
@@ -175,13 +193,17 @@ def _decompress(string: bytes) -> bytes:
 
				 def sha_to_hex(sha: RawObjectID) -> ObjectID:
			
 
				     """Takes a string and returns the hex of the sha within."""
			
 
				     hexsha = binascii.hexlify(sha)
			
 
				-    assert len(hexsha) == 40, f"Incorrect length of sha1 string: {hexsha!r}"
			
 
				+    # Support both SHA1 (40 chars) and SHA256 (64 chars)
			
 
				+    if len(hexsha) not in (40, 64):
			
 
				+        raise ValueError(f"Incorrect length of sha string: {hexsha!r}")
			
 
				     return ObjectID(hexsha)
			
 
				 
			
 
				 
			
 
				 def hex_to_sha(hex: ObjectID | str) -> RawObjectID:
			
 
				     """Takes a hex sha and returns a binary sha."""
			
 
				-    assert len(hex) == 40, f"Incorrect length of hexsha: {hex!r}"
			
 
				+    # Support both SHA1 (40 chars) and SHA256 (64 chars)
			
 
				+    if len(hex) not in (40, 64):
			
 
				+        raise ValueError(f"Incorrect length of hexsha: {hex}")
			
 
				     try:
			
 
				         return RawObjectID(binascii.unhexlify(hex))
			
 
				     except TypeError as exc:
			
@@ -191,15 +213,15 @@ def hex_to_sha(hex: ObjectID | str) -> RawObjectID:
 
				 
			
 
				 
			
 
				 def valid_hexsha(hex: bytes | str) -> bool:
			
 
				-    """Check if a string is a valid hex SHA.
			
 
				+    """Check if a hex string is a valid SHA1 or SHA256.
			
 
				 
			
 
				     Args:
			
 
				-      hex: Hex string to check
			
 
				+        hex: Hex string to validate
			
 
				 
			
 
				     Returns:
			
 
				-      True if valid hex SHA, False otherwise
			
 
				+        True if valid SHA1 (40 chars) or SHA256 (64 chars), False otherwise
			
 
				     """
			
 
				-    if len(hex) != 40:
			
 
				+    if len(hex) not in (40, 64):
			
 
				         return False
			
 
				     try:
			
 
				         binascii.unhexlify(hex)
			
@@ -549,11 +571,12 @@ class ShaFile:
 
				     ) -> None:
			
 
				         """Set the contents of this object from a list of chunks."""
			
 
				         self._chunked_text = chunks
			
 
				-        self._deserialize(chunks)
			
 
				+        # Set SHA before deserialization so Tree can detect hash algorithm
			
 
				         if sha is None:
			
 
				             self._sha = None
			
 
				         else:
			
 
				-            self._sha = FixedSha(sha)
			
 
				+            self._sha = FixedSha(sha)  # type: ignore
			
 
				+        self._deserialize(chunks)
			
 
				         self._needs_serialization = False
			
 
				 
			
 
				     @staticmethod
			
@@ -613,17 +636,21 @@ class ShaFile:
 
				         raise NotImplementedError(self._serialize)
			
 
				 
			
 
				     @classmethod
			
 
				-    def from_path(cls, path: str | bytes) -> "ShaFile":
			
 
				+    def from_path(cls, path: str | bytes, sha: ObjectID | None = None) -> "ShaFile":
			
 
				         """Open a SHA file from disk."""
			
 
				         with GitFile(path, "rb") as f:
			
 
				-            return cls.from_file(f)
			
 
				+            return cls.from_file(f, sha)
			
 
				 
			
 
				     @classmethod
			
 
				-    def from_file(cls, f: BufferedIOBase | IO[bytes] | "_GitFile") -> "ShaFile":
			
 
				+    def from_file(cls, f: BufferedIOBase | IO[bytes] | "_GitFile", sha: ObjectID | None = None) -> "ShaFile":
			
 
				         """Get the contents of a SHA file on disk."""
			
 
				         try:
			
 
				             obj = cls._parse_file(f)
			
 
				-            obj._sha = None
			
 
				+            # Set SHA after parsing but before any further processing
			
 
				+            if sha is not None:
			
 
				+                obj._sha = FixedSha(sha)
			
 
				+            else:
			
 
				+                obj._sha = None
			
 
				             return obj
			
 
				         except (IndexError, ValueError) as exc:
			
 
				             raise ObjectFormatException("invalid object header") from exc
			
@@ -713,8 +740,21 @@ class ShaFile:
 
				         """Returns the length of the raw string of this object."""
			
 
				         return sum(map(len, self.as_raw_chunks()))
			
 
				 
			
 
				-    def sha(self) -> "FixedSha | HASH":
			
 
				-        """The SHA1 object that is the name of this object."""
			
 
				+    def sha(self, hash_algorithm=None) -> "FixedSha | HASH":
			
 
				+        """The SHA object that is the name of this object.
			
 
				+
			
 
				+        Args:
			
 
				+            hash_algorithm: Optional HashAlgorithm to use. Defaults to SHA1.
			
 
				+        """
			
 
				+        # If using a different hash algorithm, always recalculate
			
 
				+        if hash_algorithm is not None:
			
 
				+            new_sha = hash_algorithm.new_hash()
			
 
				+            new_sha.update(self._header())
			
 
				+            for chunk in self.as_raw_chunks():
			
 
				+                new_sha.update(chunk)
			
 
				+            return new_sha
			
 
				+
			
 
				+        # Otherwise use cached SHA1 value
			
 
				         if self._sha is None or self._needs_serialization:
			
 
				             # this is a local because as_raw_chunks() overwrites self._sha
			
 
				             new_sha = sha1()
			
@@ -733,9 +773,32 @@ class ShaFile:
 
				 
			
 
				     @property
			
 
				     def id(self) -> ObjectID:
			
 
				-        """The hex SHA of this object."""
			
 
				+        """The hex SHA1 of this object.
			
 
				+
			
 
				+        For SHA256 repositories, use get_id(hash_algorithm) instead.
			
 
				+        This property always returns SHA1 for backward compatibility.
			
 
				+        """
			
 
				         return ObjectID(self.sha().hexdigest().encode("ascii"))
			
 
				 
			
 
				+    def get_id(self, hash_algorithm=None):
			
 
				+        """Get the hex SHA of this object using the specified hash algorithm.
			
 
				+
			
 
				+        Args:
			
 
				+            hash_algorithm: Optional HashAlgorithm to use. Defaults to SHA1.
			
 
				+
			
 
				+        Example:
			
 
				+            >>> blob = Blob()
			
 
				+            >>> blob.data = b"Hello, World!"
			
 
				+            >>> blob.id  # Always returns SHA1 for backward compatibility
			
 
				+            b'4ab299c8ad6ed14f31923dd94f8b5f5cb89dfb54'
			
 
				+            >>> blob.get_id()  # Same as .id
			
 
				+            b'4ab299c8ad6ed14f31923dd94f8b5f5cb89dfb54'
			
 
				+            >>> from dulwich.hash import SHA256
			
 
				+            >>> blob.get_id(SHA256)  # Get SHA256 hash
			
 
				+            b'03ba204e2f2e707...'  # 64-character SHA256
			
 
				+        """
			
 
				+        return self.sha(hash_algorithm).hexdigest().encode("ascii")
			
 
				+
			
 
				     def __repr__(self) -> str:
			
 
				         """Return string representation of this object."""
			
 
				         return f"<{self.__class__.__name__} {self.id!r}>"
			
@@ -1247,20 +1310,37 @@ class TreeEntry(NamedTuple):
 
				 
			
 
				 
			
 
				 def parse_tree(
			
 
				-    text: bytes, strict: bool = False
			
 
				+    text: bytes, strict: bool = False, hash_algorithm=None
			
 
				 ) -> Iterator[tuple[bytes, int, ObjectID]]:
			
 
				     """Parse a tree text.
			
 
				 
			
 
				     Args:
			
 
				       text: Serialized text to parse
			
 
				-      strict: If True, enforce strict validation
			
 
				+      strict: Whether to be strict about format
			
 
				+      hash_algorithm: Hash algorithm object (SHA1 or SHA256) - if None, auto-detect
			
 
				     Returns: iterator of tuples of (name, mode, sha)
			
 
				 
			
 
				     Raises:
			
 
				       ObjectFormatException: if the object was malformed in some way
			
 
				     """
			
 
				+    if hash_algorithm is not None:
			
 
				+        sha_len = hash_algorithm.oid_length
			
 
				+        return _parse_tree_with_sha_len(text, strict, sha_len)
			
 
				+
			
 
				+    # Try both hash lengths and use the one that works
			
 
				+    try:
			
 
				+        # Try SHA1 first (more common)
			
 
				+        return _parse_tree_with_sha_len(text, strict, 20)
			
 
				+    except ObjectFormatException:
			
 
				+        # If SHA1 fails, try SHA256
			
 
				+        return _parse_tree_with_sha_len(text, strict, 32)
			
 
				+
			
 
				+
			
 
				+def _parse_tree_with_sha_len(text, strict, sha_len):
			
 
				+    """Helper function to parse tree with a specific hash length."""
			
 
				     count = 0
			
 
				     length = len(text)
			
 
				+
			
 
				     while count < length:
			
 
				         mode_end = text.index(b" ", count)
			
 
				         mode_text = text[count:mode_end]
			
@@ -1272,10 +1352,18 @@ def parse_tree(
 
				             raise ObjectFormatException(f"Invalid mode {mode_text!r}") from exc
			
 
				         name_end = text.index(b"\0", mode_end)
			
 
				         name = text[mode_end + 1 : name_end]
			
 
				-        count = name_end + 21
			
 
				+
			
 
				+        count = name_end + 1 + sha_len
			
 
				+        if count > length:
			
 
				+            raise ObjectFormatException(
			
 
				+                f"Tree entry extends beyond tree length: {count} > {length}"
			
 
				+            )
			
 
				+
			
 
				         sha = text[name_end + 1 : count]
			
 
				-        if len(sha) != 20:
			
 
				-            raise ObjectFormatException("Sha has invalid length")
			
 
				+        if len(sha) != sha_len:
			
 
				+            raise ObjectFormatException(
			
 
				+                f"Sha has invalid length: {len(sha)} != {sha_len}"
			
 
				+            )
			
 
				         hexsha = sha_to_hex(RawObjectID(sha))
			
 
				         yield (name, mode, hexsha)
			
 
				 
			
@@ -1386,12 +1474,34 @@ class Tree(ShaFile):
 
				         super().__init__()
			
 
				         self._entries: dict[bytes, tuple[int, ObjectID]] = {}
			
 
				 
			
 
				+    def _get_hash_algorithm(self):
			
 
				+        """Get the hash algorithm based on the object's SHA."""
			
 
				+        if not hasattr(self, "_sha") or self._sha is None:
			
 
				+            return None
			
 
				+
			
 
				+        # Get the raw SHA bytes
			
 
				+        sha = self._sha.digest() if hasattr(self._sha, "digest") else self._sha
			
 
				+        if not isinstance(sha, bytes):
			
 
				+            return None
			
 
				+
			
 
				+        # Import hash modules lazily to avoid circular imports
			
 
				+        if len(sha) == 32:
			
 
				+            from .hash import SHA256
			
 
				+
			
 
				+            return SHA256
			
 
				+        elif len(sha) == 20:
			
 
				+            from .hash import SHA1
			
 
				+
			
 
				+            return SHA1
			
 
				+        return None
			
 
				+
			
 
				     @classmethod
			
 
				-    def from_path(cls, filename: str | bytes) -> "Tree":
			
 
				+    def from_path(cls, filename: str | bytes, sha: ObjectID | None = None) -> "Tree":
			
 
				         """Read a tree from a file on disk.
			
 
				 
			
 
				         Args:
			
 
				           filename: Path to the tree file
			
 
				+          sha: Optional known SHA for the object
			
 
				 
			
 
				         Returns:
			
 
				           A Tree object
			
@@ -1399,7 +1509,7 @@ class Tree(ShaFile):
 
				         Raises:
			
 
				           NotTreeError: If the file is not a tree
			
 
				         """
			
 
				-        tree = ShaFile.from_path(filename)
			
 
				+        tree = ShaFile.from_path(filename, sha)
			
 
				         if not isinstance(tree, cls):
			
 
				             raise NotTreeError(_path_to_bytes(filename))
			
 
				         return tree
			
@@ -1470,7 +1580,9 @@ class Tree(ShaFile):
 
				     def _deserialize(self, chunks: list[bytes]) -> None:
			
 
				         """Grab the entries in the tree."""
			
 
				         try:
			
 
				-            parsed_entries = parse_tree(b"".join(chunks))
			
 
				+            parsed_entries = parse_tree(
			
 
				+                b"".join(chunks), hash_algorithm=self._get_hash_algorithm()
			
 
				+            )
			
 
				         except ValueError as exc:
			
 
				             raise ObjectFormatException(exc) from exc
			
 
				         # TODO: list comprehension is for efficiency in the common (small)
			
@@ -1496,8 +1608,12 @@ class Tree(ShaFile):
 
				             # TODO: optionally exclude as in git fsck --strict
			
 
				             stat.S_IFREG | 0o664,
			
 
				         )
			
 
				-        for name, mode, sha in parse_tree(b"".join(self._chunked_text), True):
			
 
				-            check_hexsha(sha, f"invalid sha {sha!r}")
			
 
				+        for name, mode, sha in parse_tree(
			
 
				+            b"".join(self._chunked_text),
			
 
				+            strict=True,
			
 
				+            hash_algorithm=self._get_hash_algorithm(),
			
 
				+        ):
			
 
				+            check_hexsha(sha, f"invalid sha {sha}")
			
 
				             if b"/" in name or name in (b"", b".", b"..", b".git"):
			
 
				                 raise ObjectFormatException(
			
 
				                     "invalid name {}".format(name.decode("utf-8", "replace"))
			
--- a/dulwich/pack.py
+++ b/dulwich/pack.py
@@ -530,15 +530,16 @@ def iter_sha1(iter: Iterable[bytes]) -> bytes:
 
				     return sha.hexdigest().encode("ascii")
			
 
				 
			
 
				 
			
 
				-def load_pack_index(path: str | os.PathLike[str]) -> "PackIndex":
			
 
				+def load_pack_index(path: str | os.PathLike[str], hash_algorithm: int | None = None) -> "PackIndex":
			
 
				     """Load an index file by path.
			
 
				 
			
 
				     Args:
			
 
				       path: Path to the index file
			
 
				+      hash_algorithm: Hash algorithm used by the repository
			
 
				     Returns: A PackIndex loaded from the given path
			
 
				     """
			
 
				     with GitFile(path, "rb") as f:
			
 
				-        return load_pack_index_file(path, f)
			
 
				+        return load_pack_index_file(path, f, hash_algorithm=hash_algorithm)
			
 
				 
			
 
				 
			
 
				 def _load_file_contents(
			
@@ -574,25 +575,35 @@ def _load_file_contents(
 
				 
			
 
				 def load_pack_index_file(
			
 
				     path: str | os.PathLike[str], f: IO[bytes] | _GitFile
			
 
				+    hash_algorithm: int | None = None,
			
 
				 ) -> "PackIndex":
			
 
				     """Load an index file from a file-like object.
			
 
				 
			
 
				     Args:
			
 
				       path: Path for the index file
			
 
				       f: File-like object
			
 
				+      hash_algorithm: Hash algorithm used by the repository
			
 
				     Returns: A PackIndex loaded from the given file
			
 
				     """
			
 
				     contents, size = _load_file_contents(f)
			
 
				     if contents[:4] == b"\377tOc":
			
 
				         version = struct.unpack(b">L", contents[4:8])[0]
			
 
				         if version == 2:
			
 
				-            return PackIndex2(path, file=f, contents=contents, size=size)
			
 
				+            return PackIndex2(
			
 
				+                path,
			
 
				+                file=f,
			
 
				+                contents=contents,
			
 
				+                size=size,
			
 
				+                hash_algorithm=hash_algorithm,
			
 
				+            )
			
 
				         elif version == 3:
			
 
				             return PackIndex3(path, file=f, contents=contents, size=size)
			
 
				         else:
			
 
				             raise KeyError(f"Unknown pack index format {version}")
			
 
				     else:
			
 
				-        return PackIndex1(path, file=f, contents=contents, size=size)
			
 
				+        return PackIndex1(
			
 
				+            path, file=f, contents=contents, size=size, hash_algorithm=hash_algorithm
			
 
				+        )
			
 
				 
			
 
				 
			
 
				 def bisect_find_sha(
			
@@ -777,7 +788,7 @@ class MemoryPackIndex(PackIndex):
 
				           sha: SHA to look up (binary or hex)
			
 
				         Returns: Offset in the pack file
			
 
				         """
			
 
				-        if len(sha) == 40:
			
 
				+        if len(sha) in (40, 64):  # Hex string (SHA1 or SHA256)
			
 
				             sha = hex_to_sha(cast(ObjectID, sha))
			
 
				         return self._by_sha[cast(RawObjectID, sha)]
			
 
				 
			
@@ -976,7 +987,8 @@ class FilePackIndex(PackIndex):
 
				         Args:
			
 
				           sha: A *binary* SHA string. (20 characters long)_
			
 
				         """
			
 
				-        assert len(sha) == 20
			
 
				+        hash_size = getattr(self, "hash_size", 20)  # Default to SHA1 for v1
			
 
				+        assert len(sha) == hash_size
			
 
				         idx = ord(sha[:1])
			
 
				         if idx == 0:
			
 
				             start = 0
			
@@ -1020,6 +1032,7 @@ class PackIndex1(FilePackIndex):
 
				         file: IO[bytes] | _GitFile | None = None,
			
 
				         contents: bytes | None = None,
			
 
				         size: int | None = None,
			
 
				+        hash_algorithm: int | None = None,
			
 
				     ) -> None:
			
 
				         """Initialize a version 1 pack index.
			
 
				 
			
@@ -1028,24 +1041,35 @@ class PackIndex1(FilePackIndex):
 
				             file: Optional file object
			
 
				             contents: Optional mmap'd contents
			
 
				             size: Optional size of the index
			
 
				+            hash_algorithm: Hash algorithm used by the repository
			
 
				         """
			
 
				         super().__init__(filename, file, contents, size)
			
 
				         self.version = 1
			
 
				         self._fan_out_table = self._read_fan_out_table(0)
			
 
				+        # Use provided hash algorithm if available, otherwise default to SHA1
			
 
				+        if hash_algorithm:
			
 
				+            self.hash_size = hash_algorithm.oid_length
			
 
				+        else:
			
 
				+            self.hash_size = 20  # Default to SHA1
			
 
				+
			
 
				+        self._entry_size = 4 + self.hash_size
			
 
				 
			
 
				     def _unpack_entry(self, i: int) -> tuple[RawObjectID, int, None]:
			
 
				-        (offset, name) = unpack_from(">L20s", self._contents, (0x100 * 4) + (i * 24))
			
 
				+        base_offset = (0x100 * 4) + (i * self._entry_size)
			
 
				+        if self.hash_size == 20:
			
 
				+            (offset, name) = unpack_from(">L20s", self._contents, base_offset)
			
 
				+        else:  # SHA256
			
 
				+            offset = unpack_from(">L", self._contents, base_offset)[0]
			
 
				+            name = self._contents[base_offset + 4 : base_offset + 4 + self.hash_size]
			
 
				         return (RawObjectID(name), offset, None)
			
 
				 
			
 
				     def _unpack_name(self, i: int) -> bytes:
			
 
				-        offset = (0x100 * 4) + (i * 24) + 4
			
 
				-        return self._contents[offset : offset + 20]
			
 
				+        offset = (0x100 * 4) + (i * self._entry_size) + 4
			
 
				+        return self._contents[offset : offset + self.hash_size]
			
 
				 
			
 
				     def _unpack_offset(self, i: int) -> int:
			
 
				-        offset = (0x100 * 4) + (i * 24)
			
 
				-        result = unpack_from(">L", self._contents, offset)[0]
			
 
				-        assert isinstance(result, int)
			
 
				-        return result
			
 
				+        offset = (0x100 * 4) + (i * self._entry_size)
			
 
				+        return unpack_from(">L", self._contents, offset)[0]
			
 
				 
			
 
				     def _unpack_crc32_checksum(self, i: int) -> None:
			
 
				         # Not stored in v1 index files
			
@@ -1061,6 +1085,7 @@ class PackIndex2(FilePackIndex):
 
				         file: IO[bytes] | _GitFile | None = None,
			
 
				         contents: bytes | None = None,
			
 
				         size: int | None = None,
			
 
				+        hash_algorithm: int | None = None,
			
 
				     ) -> None:
			
 
				         """Initialize a version 2 pack index.
			
 
				 
			
@@ -1069,6 +1094,7 @@ class PackIndex2(FilePackIndex):
 
				             file: Optional file object
			
 
				             contents: Optional mmap'd contents
			
 
				             size: Optional size of the index
			
 
				+            hash_algorithm: Hash algorithm used by the repository
			
 
				         """
			
 
				         super().__init__(filename, file, contents, size)
			
 
				         if self._contents[:4] != b"\377tOc":
			
@@ -1077,8 +1103,15 @@ class PackIndex2(FilePackIndex):
 
				         if self.version != 2:
			
 
				             raise AssertionError(f"Version was {self.version}")
			
 
				         self._fan_out_table = self._read_fan_out_table(8)
			
 
				+
			
 
				+        # Use provided hash algorithm if available, otherwise default to SHA1
			
 
				+        if hash_algorithm:
			
 
				+            self.hash_size = hash_algorithm.oid_length
			
 
				+        else:
			
 
				+            self.hash_size = 20  # Default to SHA1
			
 
				+
			
 
				         self._name_table_offset = 8 + 0x100 * 4
			
 
				-        self._crc32_table_offset = self._name_table_offset + 20 * len(self)
			
 
				+        self._crc32_table_offset = self._name_table_offset + self.hash_size * len(self)
			
 
				         self._pack_offset_table_offset = self._crc32_table_offset + 4 * len(self)
			
 
				         self._pack_offset_largetable_offset = self._pack_offset_table_offset + 4 * len(
			
 
				             self
			
@@ -1092,25 +1125,27 @@ class PackIndex2(FilePackIndex):
 
				         )
			
 
				 
			
 
				     def _unpack_name(self, i: int) -> bytes:
			
 
				-        offset = self._name_table_offset + i * 20
			
 
				-        return self._contents[offset : offset + 20]
			
 
				+        offset = self._name_table_offset + i * self.hash_size
			
 
				+        return self._contents[offset : offset + self.hash_size]
			
 
				 
			
 
				     def _unpack_offset(self, i: int) -> int:
			
 
				-        offset_pos = self._pack_offset_table_offset + i * 4
			
 
				-        offset = unpack_from(">L", self._contents, offset_pos)[0]
			
 
				-        assert isinstance(offset, int)
			
 
				+        offset = self._pack_offset_table_offset + i * 4
			
 
				+        offset = unpack_from(">L", self._contents, offset)[0]
			
 
				         if offset & (2**31):
			
 
				-            large_offset_pos = (
			
 
				-                self._pack_offset_largetable_offset + (offset & (2**31 - 1)) * 8
			
 
				-            )
			
 
				-            offset = unpack_from(">Q", self._contents, large_offset_pos)[0]
			
 
				-            assert isinstance(offset, int)
			
 
				+            offset = self._pack_offset_largetable_offset + (offset & (2**31 - 1)) * 8
			
 
				+            offset = unpack_from(">Q", self._contents, offset)[0]
			
 
				         return offset
			
 
				 
			
 
				     def _unpack_crc32_checksum(self, i: int) -> int:
			
 
				-        result = unpack_from(">L", self._contents, self._crc32_table_offset + i * 4)[0]
			
 
				-        assert isinstance(result, int)
			
 
				-        return result
			
 
				+        return unpack_from(">L", self._contents, self._crc32_table_offset + i * 4)[0]
			
 
				+
			
 
				+    def get_pack_checksum(self) -> bytes:
			
 
				+        """Return the checksum stored for the corresponding packfile.
			
 
				+
			
 
				+        Returns: binary digest (always 20 bytes - SHA1)
			
 
				+        """
			
 
				+        # Pack checksums are always SHA1, even in SHA256 repositories
			
 
				+        return bytes(self._contents[-40:-20])
			
 
				 
			
 
				 
			
 
				 class PackIndex3(FilePackIndex):
			
@@ -3172,6 +3207,8 @@ def write_pack_index_v1(
 
				         f.write(struct.pack(">L", fan_out_table[i]))
			
 
				         fan_out_table[i + 1] += fan_out_table[i]
			
 
				     for name, offset, _entry_checksum in entries:
			
 
				+        if len(name) != 20:
			
 
				+            raise TypeError("pack index v1 only supports SHA-1 names")
			
 
				         if not (offset <= 0xFFFFFFFF):
			
 
				             raise TypeError("pack format 1 only supports offsets < 2Gb")
			
 
				         f.write(struct.pack(">L20s", offset, name))
			
@@ -3250,11 +3287,11 @@ def _create_delta_py(base_buf: bytes, target_buf: bytes) -> Iterator[bytes]:
 
				             o = j1
			
 
				             while s > 127:
			
 
				                 yield bytes([127])
			
 
				-                yield bytes(memoryview(target_buf)[o : o + 127])
			
 
				+                yield memoryview(target_buf)[o : o + 127]
			
 
				                 s -= 127
			
 
				                 o += 127
			
 
				             yield bytes([s])
			
 
				-            yield bytes(memoryview(target_buf)[o : o + s])
			
 
				+            yield memoryview(target_buf)[o : o + s]
			
 
				 
			
 
				 
			
 
				 # Default to pure Python implementation
			
@@ -3357,12 +3394,20 @@ def write_pack_index_v2(
 
				     fan_out_table: dict[int, int] = defaultdict(lambda: 0)
			
 
				     for name, offset, entry_checksum in entries:
			
 
				         fan_out_table[ord(name[:1])] += 1
			
 
				+    try:
			
 
				+        hash_size = len(next(iter(entries))[0])
			
 
				+    except StopIteration:
			
 
				+        hash_size = 20  # Default to SHA-1 size if no entries
			
 
				     # Fan-out table
			
 
				     largetable: list[int] = []
			
 
				     for i in range(0x100):
			
 
				         f.write(struct.pack(b">L", fan_out_table[i]))
			
 
				         fan_out_table[i + 1] += fan_out_table[i]
			
 
				     for name, offset, entry_checksum in entries:
			
 
				+        if len(name) != hash_size:
			
 
				+            raise TypeError(
			
 
				+                f"Object name has wrong length: expected {hash_size}, got {len(name)}"
			
 
				+            )
			
 
				         f.write(name)
			
 
				     for name, offset, entry_checksum in entries:
			
 
				         f.write(struct.pack(b">L", entry_checksum))
			
@@ -3512,6 +3557,7 @@ class Pack:
 
				         depth: int | None = None,
			
 
				         threads: int | None = None,
			
 
				         big_file_threshold: int | None = None,
			
 
				+        hash_algorithm: int | None = None,
			
 
				     ) -> None:
			
 
				         """Initialize a Pack object.
			
 
				 
			
@@ -3524,6 +3570,7 @@ class Pack:
 
				           depth: Maximum depth for delta chains
			
 
				           threads: Number of threads to use for operations
			
 
				           big_file_threshold: Size threshold for big file handling
			
 
				+          hash_algorithm: Hash algorithm identifier (1 = SHA-1, 2 = SHA-256)
			
 
				         """
			
 
				         self._basename = basename
			
 
				         self._data = None
			
@@ -3549,21 +3596,25 @@ class Pack:
 
				         )
			
 
				         self._idx_load = lambda: load_pack_index(self._idx_path)
			
 
				         self.resolve_ext_ref = resolve_ext_ref
			
 
				+        self.hash_algorithm = (
			
 
				+            hash_algorithm if hash_algorithm is not None else DEFAULT_HASH_ALGORITHM
			
 
				+        )
			
 
				 
			
 
				     @classmethod
			
 
				     def from_lazy_objects(
			
 
				-        cls, data_fn: Callable[[], PackData], idx_fn: Callable[[], PackIndex]
			
 
				+        cls, data_fn: Callable[[], PackData], idx_fn: Callable[[], PackIndex],
			
 
				+        hash_algorithm: int | None = None
			
 
				     ) -> "Pack":
			
 
				         """Create a new pack object from callables to load pack data and index objects."""
			
 
				-        ret = cls("")
			
 
				+        ret = cls("", hash_algorithm=hash_algorithm)
			
 
				         ret._data_load = data_fn
			
 
				         ret._idx_load = idx_fn
			
 
				         return ret
			
 
				 
			
 
				     @classmethod
			
 
				-    def from_objects(cls, data: PackData, idx: PackIndex) -> "Pack":
			
 
				+    def from_objects(cls, data: PackData, idx: PackIndex, hash_algorithm: int | None = None) -> "Pack":
			
 
				         """Create a new pack object from pack data and index objects."""
			
 
				-        ret = cls("")
			
 
				+        ret = cls("", hash_algorithm=hash_algorithm)
			
 
				         ret._data = data
			
 
				         ret._data_load = None
			
 
				         ret._idx = idx
			
--- a/dulwich/refs.py
+++ b/dulwich/refs.py
@@ -1000,7 +1000,7 @@ class DiskRefsContainer(RefsContainer):
 
				         """Read a reference file and return its contents.
			
 
				 
			
 
				         If the reference file a symbolic reference, only read the first line of
			
 
				-        the file. Otherwise, only read the first 40 bytes.
			
 
				+        the file. Otherwise, read the hash (40 bytes for SHA1, 64 bytes for SHA256).
			
 
				 
			
 
				         Args:
			
 
				           name: the refname to read, relative to refpath
			
@@ -1018,8 +1018,10 @@ class DiskRefsContainer(RefsContainer):
 
				                     # Read only the first line
			
 
				                     return header + next(iter(f)).rstrip(b"\r\n")
			
 
				                 else:
			
 
				-                    # Read only the first 40 bytes
			
 
				-                    return header + f.read(40 - len(SYMREF))
			
 
				+                    # Read the entire line to get the full hash (handles both SHA1 and SHA256)
			
 
				+                    f.seek(0)
			
 
				+                    line = f.readline().rstrip(b"\r\n")
			
 
				+                    return line
			
 
				         except (OSError, UnicodeError):
			
 
				             # don't assume anything specific about the error; in
			
 
				             # particular, invalid or forbidden paths can raise weird
			
--- a/dulwich/repo.py
+++ b/dulwich/repo.py
@@ -515,6 +515,7 @@ class BaseRepo:
 
				 
			
 
				         self._graftpoints: dict[ObjectID, list[ObjectID]] = {}
			
 
				         self.hooks: dict[str, Hook] = {}
			
 
				+        self._hash_algorithm = None  # Cached hash algorithm
			
 
				 
			
 
				     def _determine_file_mode(self) -> bool:
			
 
				         """Probe the file-system to determine whether permissions can be trusted.
			
@@ -537,6 +538,7 @@ class BaseRepo:
 
				         symlinks: bool | None = None,
			
 
				         format: int | None = None,
			
 
				         shared_repository: str | bool | None = None,
			
 
				+        object_format: str | None = None,
			
 
				     ) -> None:
			
 
				         """Initialize a default set of named files."""
			
 
				         from .config import ConfigFile
			
@@ -544,11 +546,30 @@ class BaseRepo:
 
				         self._put_named_file("description", b"Unnamed repository")
			
 
				         f = BytesIO()
			
 
				         cf = ConfigFile()
			
 
				-        if format is None:
			
 
				-            format = 0
			
 
				+
			
 
				+        # Determine the appropriate format version
			
 
				+        if object_format == "sha256":
			
 
				+            # SHA256 requires format version 1
			
 
				+            if format is None:
			
 
				+                format = 1
			
 
				+            elif format != 1:
			
 
				+                raise ValueError(
			
 
				+                    "SHA256 object format requires repository format version 1"
			
 
				+                )
			
 
				+        else:
			
 
				+            # SHA1 (default) can use format 0 or 1
			
 
				+            if format is None:
			
 
				+                format = 0
			
 
				+
			
 
				         if format not in (0, 1):
			
 
				             raise ValueError(f"Unsupported repository format version: {format}")
			
 
				+
			
 
				         cf.set("core", "repositoryformatversion", str(format))
			
 
				+
			
 
				+        # Set object format extension if using SHA256
			
 
				+        if object_format == "sha256":
			
 
				+            cf.set("extensions", "objectformat", "sha256")
			
 
				+
			
 
				         if self._determine_file_mode():
			
 
				             cf.set("core", "filemode", True)
			
 
				         else:
			
@@ -574,6 +595,19 @@ class BaseRepo:
 
				         self._put_named_file("config", f.getvalue())
			
 
				         self._put_named_file(os.path.join("info", "exclude"), b"")
			
 
				 
			
 
				+        # Allow subclasses to handle config initialization
			
 
				+        self._init_config(cf)
			
 
				+
			
 
				+    def _init_config(self, config: "ConfigFile") -> None:
			
 
				+        """Initialize repository configuration.
			
 
				+
			
 
				+        This method can be overridden by subclasses to handle config initialization.
			
 
				+
			
 
				+        Args:
			
 
				+            config: The ConfigFile object that was just created
			
 
				+        """
			
 
				+        # Default implementation does nothing
			
 
				+
			
 
				     def get_named_file(self, path: str) -> BinaryIO | None:
			
 
				         """Get a file from the control dir with a specific name.
			
 
				 
			
@@ -912,6 +946,42 @@ class BaseRepo:
 
				         """
			
 
				         raise NotImplementedError(self.get_config)
			
 
				 
			
 
				+    def get_hash_algorithm(self):
			
 
				+        """Get the hash algorithm used by this repository.
			
 
				+
			
 
				+        Returns: HashAlgorithm instance (SHA1 or SHA256)
			
 
				+        """
			
 
				+        if self._hash_algorithm is None:
			
 
				+            from .hash import get_hash_algorithm
			
 
				+
			
 
				+            # Check if repository uses SHA256
			
 
				+            try:
			
 
				+                config = self.get_config()
			
 
				+                try:
			
 
				+                    version = int(config.get(("core",), "repositoryformatversion"))
			
 
				+                except KeyError:
			
 
				+                    version = 0  # Default version is 0
			
 
				+
			
 
				+                if version == 1:
			
 
				+                    # Check for SHA256 extension
			
 
				+                    try:
			
 
				+                        object_format = config.get(("extensions",), "objectformat")
			
 
				+                        if object_format == b"sha256":
			
 
				+                            self._hash_algorithm = get_hash_algorithm("sha256")
			
 
				+                        else:
			
 
				+                            self._hash_algorithm = get_hash_algorithm("sha1")
			
 
				+                    except KeyError:
			
 
				+                        # No objectformat extension, default to SHA1
			
 
				+                        self._hash_algorithm = get_hash_algorithm("sha1")
			
 
				+                else:
			
 
				+                    # Version 0 always uses SHA1
			
 
				+                    self._hash_algorithm = get_hash_algorithm("sha1")
			
 
				+            except (KeyError, ValueError):
			
 
				+                # If we can't read config, default to SHA1
			
 
				+                self._hash_algorithm = get_hash_algorithm("sha1")
			
 
				+
			
 
				+        return self._hash_algorithm
			
 
				+
			
 
				     def get_worktree_config(self) -> "ConfigFile":
			
 
				         """Retrieve the worktree config object."""
			
 
				         raise NotImplementedError(self.get_worktree_config)
			
@@ -1103,7 +1173,7 @@ class BaseRepo:
 
				         """
			
 
				         if not isinstance(name, bytes):
			
 
				             raise TypeError(f"'name' must be bytestring, not {type(name).__name__:.80}")
			
 
				-        if len(name) in (20, 40):
			
 
				+        if len(name) in (20, 32, 40, 64):  # Support both SHA1 and SHA256
			
 
				             try:
			
 
				                 # Try as ObjectID/RawObjectID
			
 
				                 return self.object_store[
			
@@ -1424,7 +1494,7 @@ class Repo(BaseRepo):
 
				                     has_reftable_extension = True
			
 
				                 else:
			
 
				                     raise UnsupportedExtension(f"refStorage = {value.decode()}")
			
 
				-            elif extension.lower() not in (b"worktreeconfig",):
			
 
				+            elif extension.lower() not in (b"worktreeconfig", b"objectformat"):
			
 
				                 raise UnsupportedExtension(extension.decode("utf-8"))
			
 
				 
			
 
				         if object_store is None:
			
@@ -2055,6 +2125,7 @@ class Repo(BaseRepo):
 
				         symlinks: bool | None = None,
			
 
				         format: int | None = None,
			
 
				         shared_repository: str | bool | None = None,
			
 
				+        object_format: str | None = None,
			
 
				     ) -> "Repo":
			
 
				         path = os.fspath(path)
			
 
				         if isinstance(path, bytes):
			
@@ -2077,10 +2148,17 @@ class Repo(BaseRepo):
 
				                 os.chmod(dir_path, dir_mode)
			
 
				 
			
 
				         if object_store is None:
			
 
				+            # Get hash algorithm for object store
			
 
				+            from .hash import get_hash_algorithm
			
 
				+
			
 
				+            hash_alg = get_hash_algorithm(
			
 
				+                "sha256" if object_format == "sha256" else "sha1"
			
 
				+            )
			
 
				             object_store = DiskObjectStore.init(
			
 
				                 os.path.join(controldir, OBJECTDIR),
			
 
				                 file_mode=file_mode,
			
 
				                 dir_mode=dir_mode,
			
 
				+                hash_algorithm=hash_alg,
			
 
				             )
			
 
				         ret = cls(path, bare=bare, object_store=object_store)
			
 
				         if default_branch is None:
			
@@ -2098,6 +2176,7 @@ class Repo(BaseRepo):
 
				             symlinks=symlinks,
			
 
				             format=format,
			
 
				             shared_repository=shared_repository,
			
 
				+            object_format=object_format,
			
 
				         )
			
 
				         return ret
			
 
				 
			
@@ -2112,6 +2191,7 @@ class Repo(BaseRepo):
 
				         symlinks: bool | None = None,
			
 
				         format: int | None = None,
			
 
				         shared_repository: str | bool | None = None,
			
 
				+        object_format: str | None = None,
			
 
				     ) -> "Repo":
			
 
				         """Create a new repository.
			
 
				 
			
@@ -2123,6 +2203,7 @@ class Repo(BaseRepo):
 
				           symlinks: Whether to support symlinks
			
 
				           format: Repository format version (defaults to 0)
			
 
				           shared_repository: Shared repository setting (group, all, umask, or octal)
			
 
				+          object_format: Object format to use ("sha1" or "sha256", defaults to "sha1")
			
 
				         Returns: `Repo` instance
			
 
				         """
			
 
				         path = os.fspath(path)
			
@@ -2142,6 +2223,7 @@ class Repo(BaseRepo):
 
				             symlinks=symlinks,
			
 
				             format=format,
			
 
				             shared_repository=shared_repository,
			
 
				+            object_format=object_format,
			
 
				         )
			
 
				 
			
 
				     @classmethod
			
@@ -2213,6 +2295,7 @@ class Repo(BaseRepo):
 
				         default_branch: bytes | None = None,
			
 
				         format: int | None = None,
			
 
				         shared_repository: str | bool | None = None,
			
 
				+        object_format: str | None = None,
			
 
				     ) -> "Repo":
			
 
				         """Create a new bare repository.
			
 
				 
			
@@ -2226,6 +2309,7 @@ class Repo(BaseRepo):
 
				           default_branch: Default branch name
			
 
				           format: Repository format version (defaults to 0)
			
 
				           shared_repository: Shared repository setting (group, all, umask, or octal)
			
 
				+          object_format: Object format to use ("sha1" or "sha256", defaults to "sha1")
			
 
				         Returns: a `Repo` instance
			
 
				         """
			
 
				         path = os.fspath(path)
			
@@ -2242,6 +2326,7 @@ class Repo(BaseRepo):
 
				             default_branch=default_branch,
			
 
				             format=format,
			
 
				             shared_repository=shared_repository,
			
 
				+            object_format=object_format,
			
 
				         )
			
 
				 
			
 
				     create = init_bare
			
@@ -2551,6 +2636,10 @@ class MemoryRepo(BaseRepo):
 
				         """
			
 
				         raise NoIndexPresent
			
 
				 
			
 
				+    def _init_config(self, config: "ConfigFile") -> None:
			
 
				+        """Initialize repository configuration for MemoryRepo."""
			
 
				+        self._config = config
			
 
				+
			
 
				     def get_config(self) -> "ConfigFile":
			
 
				         """Retrieve the config object.
			
 
				 
			
@@ -2739,6 +2828,7 @@ class MemoryRepo(BaseRepo):
 
				         objects: Iterable[ShaFile],
			
 
				         refs: Mapping[Ref, ObjectID],
			
 
				         format: int | None = None,
			
 
				+        object_format: str | None = None,
			
 
				     ) -> "MemoryRepo":
			
 
				         """Create a new bare repository in memory.
			
 
				 
			
@@ -2748,11 +2838,12 @@ class MemoryRepo(BaseRepo):
 
				           refs: Refs as dictionary, mapping names
			
 
				             to object SHA1s
			
 
				           format: Repository format version (defaults to 0)
			
 
				+          object_format: Object format to use ("sha1" or "sha256", defaults to "sha1")
			
 
				         """
			
 
				         ret = cls()
			
 
				         for obj in objects:
			
 
				             ret.object_store.add_object(obj)
			
 
				         for refname, sha in refs.items():
			
 
				             ret.refs.add_if_new(refname, sha)
			
 
				-        ret._init_files(bare=True, format=format)
			
 
				+        ret._init_files(bare=True, format=format, object_format=object_format)
			
 
				         return ret
			
--- a/dulwich/tests/utils.py
+++ b/dulwich/tests/utils.py
@@ -114,7 +114,7 @@ def make_object(cls: type[T], **attrs: Any) -> T:
 
				         if name == "id":
			
 
				             # id property is read-only, so we overwrite sha instead.
			
 
				             sha = FixedSha(value)
			
 
				-            obj.sha = lambda: sha
			
 
				+            obj.sha = lambda hash_algorithm=None: sha
			
 
				         else:
			
 
				             setattr(obj, name, value)
			
 
				     return obj
			
--- a/tests/compat/__init__.py
+++ b/tests/compat/__init__.py
@@ -41,6 +41,8 @@ def test_suite() -> unittest.TestSuite:
 
				         "reftable",
			
 
				         "repository",
			
 
				         "server",
			
 
				+        "sha256",
			
 
				+        "sha256_packs",
			
 
				         "utils",
			
 
				         "web",
			
 
				     ]
			
--- a/tests/compat/test_sha256.py
+++ b/tests/compat/test_sha256.py
@@ -0,0 +1,367 @@
 
				+# test_sha256.py -- Compatibility tests for SHA256 support
			
 
				+# Copyright (C) 2024 The Dulwich contributors
			
 
				+#
			
 
				+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
			
 
				+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
			
 
				+# General Public License as public by the Free Software Foundation; version 2.0
			
 
				+# or (at your option) any later version. You can redistribute it and/or
			
 
				+# modify it under the terms of either of these two licenses.
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+#
			
 
				+# You should have received a copy of the licenses; if not, see
			
 
				+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
			
 
				+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
			
 
				+# License, Version 2.0.
			
 
				+#
			
 
				+
			
 
				+"""Compatibility tests for SHA256 support with git command line tools."""
			
 
				+
			
 
				+import os
			
 
				+import tempfile
			
 
				+
			
 
				+from dulwich.hash import SHA256
			
 
				+from dulwich.objects import Blob, Commit, Tree
			
 
				+from dulwich.repo import Repo
			
 
				+
			
 
				+from .utils import CompatTestCase, run_git_or_fail
			
 
				+
			
 
				+
			
 
				+class GitSHA256CompatibilityTests(CompatTestCase):
			
 
				+    """Test SHA256 compatibility with git command line tools."""
			
 
				+
			
 
				+    min_git_version = (2, 29, 0)
			
 
				+
			
 
				+    def _run_git(self, args, cwd=None):
			
 
				+        """Run git command in the specified directory."""
			
 
				+        return run_git_or_fail(args, cwd=cwd)
			
 
				+
			
 
				+    def test_sha256_repo_creation_compat(self):
			
 
				+        """Test that dulwich-created SHA256 repos are readable by git."""
			
 
				+        # Create SHA256 repo with dulwich
			
 
				+        repo_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
			
 
				+        repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
			
 
				+
			
 
				+        # Add a blob and tree using dulwich
			
 
				+        blob = Blob.from_string(b"Hello SHA256 world!")
			
 
				+        tree = Tree()
			
 
				+        tree.add(b"hello.txt", 0o100644, blob.get_id(SHA256))
			
 
				+
			
 
				+        # Create objects in the repository
			
 
				+        object_store = repo.object_store
			
 
				+        object_store.add_object(blob)
			
 
				+        object_store.add_object(tree)
			
 
				+
			
 
				+        repo.close()
			
 
				+
			
 
				+        # Verify git can read the repository
			
 
				+        config_output = self._run_git(
			
 
				+            ["config", "--get", "extensions.objectformat"], cwd=repo_path
			
 
				+        )
			
 
				+        self.assertEqual(config_output.strip(), b"sha256")
			
 
				+
			
 
				+        # Verify git recognizes it as a SHA256 repository
			
 
				+        rev_parse_output = self._run_git(
			
 
				+            ["rev-parse", "--show-object-format"], cwd=repo_path
			
 
				+        )
			
 
				+        self.assertEqual(rev_parse_output.strip(), b"sha256")
			
 
				+
			
 
				+    def test_git_created_sha256_repo_readable(self):
			
 
				+        """Test that git-created SHA256 repos are readable by dulwich."""
			
 
				+        # Create SHA256 repo with git
			
 
				+        repo_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
			
 
				+        self._run_git(["init", "--object-format=sha256", repo_path])
			
 
				+
			
 
				+        # Create a file and commit with git
			
 
				+        test_file = os.path.join(repo_path, "test.txt")
			
 
				+        with open(test_file, "w") as f:
			
 
				+            f.write("Test SHA256 content")
			
 
				+
			
 
				+        self._run_git(["add", "test.txt"], cwd=repo_path)
			
 
				+        self._run_git(["commit", "-m", "Test SHA256 commit"], cwd=repo_path)
			
 
				+
			
 
				+        # Read with dulwich
			
 
				+        repo = Repo(repo_path)
			
 
				+
			
 
				+        # Verify dulwich detects SHA256
			
 
				+        hash_alg = repo.get_hash_algorithm()
			
 
				+        self.assertEqual(hash_alg, SHA256)
			
 
				+
			
 
				+        # Verify dulwich can read objects
			
 
				+        # Try both main and master branches (git default changed over time)
			
 
				+        try:
			
 
				+            head_ref = repo.refs[b"refs/heads/main"]
			
 
				+        except KeyError:
			
 
				+            head_ref = repo.refs[b"refs/heads/master"]
			
 
				+        self.assertEqual(len(head_ref), 64)  # SHA256 length
			
 
				+
			
 
				+        # Read the commit object
			
 
				+        commit = repo[head_ref]
			
 
				+        self.assertIsInstance(commit, Commit)
			
 
				+        self.assertEqual(len(commit.tree), 64)  # SHA256 tree ID
			
 
				+
			
 
				+        repo.close()
			
 
				+
			
 
				+    def test_object_hashing_consistency(self):
			
 
				+        """Test that object hashing is consistent between dulwich and git."""
			
 
				+        # Create SHA256 repo with git
			
 
				+        repo_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
			
 
				+        self._run_git(["init", "--object-format=sha256", repo_path])
			
 
				+
			
 
				+        # Create a test file with known content
			
 
				+        test_content = b"Test content for SHA256 hashing consistency"
			
 
				+        test_file = os.path.join(repo_path, "test.txt")
			
 
				+        with open(test_file, "wb") as f:
			
 
				+            f.write(test_content)
			
 
				+
			
 
				+        # Get git's hash for the content
			
 
				+        git_hash = self._run_git(["hash-object", "test.txt"], cwd=repo_path)
			
 
				+        git_hash = git_hash.strip().decode("ascii")
			
 
				+
			
 
				+        # Create same blob with dulwich
			
 
				+        blob = Blob.from_string(test_content)
			
 
				+        dulwich_hash = blob.get_id(SHA256).decode("ascii")
			
 
				+
			
 
				+        # Hashes should match
			
 
				+        self.assertEqual(git_hash, dulwich_hash)
			
 
				+
			
 
				+    def test_tree_hashing_consistency(self):
			
 
				+        """Test that tree hashing is consistent between dulwich and git."""
			
 
				+        # Create SHA256 repo with git
			
 
				+        repo_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
			
 
				+        self._run_git(["init", "--object-format=sha256", repo_path])
			
 
				+
			
 
				+        # Create a test file and add to index
			
 
				+        test_content = b"Tree test content"
			
 
				+        test_file = os.path.join(repo_path, "tree_test.txt")
			
 
				+        with open(test_file, "wb") as f:
			
 
				+            f.write(test_content)
			
 
				+
			
 
				+        self._run_git(["add", "tree_test.txt"], cwd=repo_path)
			
 
				+
			
 
				+        # Get git's tree hash
			
 
				+        git_tree_hash = self._run_git(["write-tree"], cwd=repo_path)
			
 
				+        git_tree_hash = git_tree_hash.strip().decode("ascii")
			
 
				+
			
 
				+        # Create same tree with dulwich
			
 
				+        blob = Blob.from_string(test_content)
			
 
				+        tree = Tree()
			
 
				+        tree.add(b"tree_test.txt", 0o100644, blob.get_id(SHA256))
			
 
				+
			
 
				+        dulwich_tree_hash = tree.get_id(SHA256).decode("ascii")
			
 
				+
			
 
				+        # Tree hashes should match
			
 
				+        self.assertEqual(git_tree_hash, dulwich_tree_hash)
			
 
				+
			
 
				+    def test_commit_creation_interop(self):
			
 
				+        """Test commit creation interoperability between dulwich and git."""
			
 
				+        # Create SHA256 repo with dulwich
			
 
				+        repo_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
			
 
				+        repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
			
 
				+
			
 
				+        # Create objects with dulwich
			
 
				+        blob = Blob.from_string(b"Interop test content")
			
 
				+        tree = Tree()
			
 
				+        tree.add(b"interop.txt", 0o100644, blob.get_id(SHA256))
			
 
				+
			
 
				+        commit = Commit()
			
 
				+        commit.tree = tree.get_id(SHA256)
			
 
				+        commit.author = commit.committer = b"Test User <test@example.com>"
			
 
				+        commit.commit_time = commit.author_time = 1234567890
			
 
				+        commit.commit_timezone = commit.author_timezone = 0
			
 
				+        commit.message = b"Test SHA256 commit from dulwich"
			
 
				+
			
 
				+        # Add objects to repo
			
 
				+        object_store = repo.object_store
			
 
				+        object_store.add_object(blob)
			
 
				+        object_store.add_object(tree)
			
 
				+        object_store.add_object(commit)
			
 
				+
			
 
				+        # Update HEAD
			
 
				+        commit_id = commit.get_id(SHA256)
			
 
				+        repo.refs[b"refs/heads/master"] = commit_id
			
 
				+        repo.close()
			
 
				+
			
 
				+        # Verify git can read the commit
			
 
				+        commit_hash = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
			
 
				+        commit_hash = commit_hash.strip().decode("ascii")
			
 
				+        self.assertEqual(len(commit_hash), 64)  # SHA256 length
			
 
				+
			
 
				+        # Verify git can show the commit
			
 
				+        commit_message = self._run_git(["log", "--format=%s", "-n", "1"], cwd=repo_path)
			
 
				+        self.assertEqual(commit_message.strip(), b"Test SHA256 commit from dulwich")
			
 
				+
			
 
				+        # Verify git can list the tree
			
 
				+        tree_content = self._run_git(["ls-tree", "HEAD"], cwd=repo_path)
			
 
				+        self.assertIn(b"interop.txt", tree_content)
			
 
				+
			
 
				+    def test_ref_updates_interop(self):
			
 
				+        """Test that ref updates work between dulwich and git."""
			
 
				+        # Create repo with git
			
 
				+        repo_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
			
 
				+        self._run_git(["init", "--object-format=sha256", repo_path])
			
 
				+
			
 
				+        # Create initial commit with git
			
 
				+        test_file = os.path.join(repo_path, "initial.txt")
			
 
				+        with open(test_file, "w") as f:
			
 
				+            f.write("Initial content")
			
 
				+
			
 
				+        self._run_git(["add", "initial.txt"], cwd=repo_path)
			
 
				+        self._run_git(["commit", "-m", "Initial commit"], cwd=repo_path)
			
 
				+
			
 
				+        initial_commit = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
			
 
				+        initial_commit = initial_commit.strip()
			
 
				+
			
 
				+        # Update ref with dulwich
			
 
				+        repo = Repo(repo_path)
			
 
				+
			
 
				+        # Create new commit with dulwich
			
 
				+        blob = Blob.from_string(b"New content from dulwich")
			
 
				+        tree = Tree()
			
 
				+        tree.add(b"dulwich.txt", 0o100644, blob.get_id(SHA256))
			
 
				+
			
 
				+        commit = Commit()
			
 
				+        commit.tree = tree.get_id(SHA256)
			
 
				+        commit.parents = [initial_commit]
			
 
				+        commit.author = commit.committer = b"Dulwich User <dulwich@example.com>"
			
 
				+        commit.commit_time = commit.author_time = 1234567891
			
 
				+        commit.commit_timezone = commit.author_timezone = 0
			
 
				+        commit.message = b"Commit from dulwich"
			
 
				+
			
 
				+        # Add objects and update ref
			
 
				+        object_store = repo.object_store
			
 
				+        object_store.add_object(blob)
			
 
				+        object_store.add_object(tree)
			
 
				+        object_store.add_object(commit)
			
 
				+
			
 
				+        new_commit_hash = commit.get_id(SHA256)
			
 
				+        repo.refs[b"refs/heads/master"] = new_commit_hash
			
 
				+        repo.close()
			
 
				+
			
 
				+        # Verify git sees the update
			
 
				+        current_commit = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
			
 
				+        current_commit = current_commit.strip().decode("ascii")
			
 
				+        self.assertEqual(current_commit, new_commit_hash.decode("ascii"))
			
 
				+
			
 
				+        # Verify git can access the new tree
			
 
				+        tree_listing = self._run_git(["ls-tree", "HEAD"], cwd=repo_path)
			
 
				+        self.assertIn(b"dulwich.txt", tree_listing)
			
 
				+
			
 
				+    def test_clone_sha256_repo_git_to_dulwich(self):
			
 
				+        """Test cloning a git SHA256 repository with dulwich."""
			
 
				+        # Create source repo with git
			
 
				+        source_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(lambda: __import__("shutil").rmtree(source_path))
			
 
				+        self._run_git(["init", "--object-format=sha256", source_path])
			
 
				+
			
 
				+        # Add content
			
 
				+        test_file = os.path.join(source_path, "clone_test.txt")
			
 
				+        with open(test_file, "w") as f:
			
 
				+            f.write("Content to be cloned")
			
 
				+
			
 
				+        self._run_git(["add", "clone_test.txt"], cwd=source_path)
			
 
				+        self._run_git(["commit", "-m", "Initial commit"], cwd=source_path)
			
 
				+
			
 
				+        # Clone with dulwich
			
 
				+        target_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(lambda: __import__("shutil").rmtree(target_path))
			
 
				+
			
 
				+        target_repo = Repo.init(target_path, mkdir=False, object_format="sha256")
			
 
				+
			
 
				+        # Copy objects (simplified clone)
			
 
				+        source_repo = Repo(source_path)
			
 
				+
			
 
				+        # Copy all objects
			
 
				+        for obj_id in source_repo.object_store:
			
 
				+            obj = source_repo.object_store[obj_id]
			
 
				+            target_repo.object_store.add_object(obj)
			
 
				+
			
 
				+        # Copy refs
			
 
				+        for ref_name in source_repo.refs.keys():
			
 
				+            ref_id = source_repo.refs[ref_name]
			
 
				+            target_repo.refs[ref_name] = ref_id
			
 
				+
			
 
				+        # Set HEAD
			
 
				+        target_repo.refs.set_symbolic_ref(b"HEAD", b"refs/heads/master")
			
 
				+
			
 
				+        source_repo.close()
			
 
				+        target_repo.close()
			
 
				+
			
 
				+        # Verify with git
			
 
				+        output = self._run_git(["rev-parse", "--show-object-format"], cwd=target_path)
			
 
				+        self.assertEqual(output.strip(), b"sha256")
			
 
				+
			
 
				+        # Verify content
			
 
				+        self._run_git(["checkout", "HEAD", "--", "."], cwd=target_path)
			
 
				+        cloned_file = os.path.join(target_path, "clone_test.txt")
			
 
				+        with open(cloned_file) as f:
			
 
				+            content = f.read()
			
 
				+        self.assertEqual(content, "Content to be cloned")
			
 
				+
			
 
				+    def test_fsck_sha256_repo(self):
			
 
				+        """Test that git fsck works on dulwich-created SHA256 repos."""
			
 
				+        # Create repo with dulwich
			
 
				+        repo_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
			
 
				+        repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
			
 
				+
			
 
				+        # Create a more complex object graph
			
 
				+        # Multiple blobs
			
 
				+        blobs = []
			
 
				+        for i in range(5):
			
 
				+            blob = Blob.from_string(f"Blob content {i}".encode())
			
 
				+            repo.object_store.add_object(blob)
			
 
				+            blobs.append(blob)
			
 
				+
			
 
				+        # Multiple trees
			
 
				+        subtree = Tree()
			
 
				+        subtree.add(b"subfile1.txt", 0o100644, blobs[0].get_id(SHA256))
			
 
				+        subtree.add(b"subfile2.txt", 0o100644, blobs[1].get_id(SHA256))
			
 
				+        repo.object_store.add_object(subtree)
			
 
				+
			
 
				+        main_tree = Tree()
			
 
				+        main_tree.add(b"file1.txt", 0o100644, blobs[2].get_id(SHA256))
			
 
				+        main_tree.add(b"file2.txt", 0o100644, blobs[3].get_id(SHA256))
			
 
				+        main_tree.add(b"subdir", 0o040000, subtree.get_id(SHA256))
			
 
				+        repo.object_store.add_object(main_tree)
			
 
				+
			
 
				+        # Create commits
			
 
				+        commit1 = Commit()
			
 
				+        commit1.tree = main_tree.get_id(SHA256)
			
 
				+        commit1.author = commit1.committer = b"Test <test@example.com>"
			
 
				+        commit1.commit_time = commit1.author_time = 1234567890
			
 
				+        commit1.commit_timezone = commit1.author_timezone = 0
			
 
				+        commit1.message = b"First commit"
			
 
				+        repo.object_store.add_object(commit1)
			
 
				+
			
 
				+        commit2 = Commit()
			
 
				+        commit2.tree = main_tree.get_id(SHA256)
			
 
				+        commit2.parents = [commit1.get_id(SHA256)]
			
 
				+        commit2.author = commit2.committer = b"Test <test@example.com>"
			
 
				+        commit2.commit_time = commit2.author_time = 1234567891
			
 
				+        commit2.commit_timezone = commit2.author_timezone = 0
			
 
				+        commit2.message = b"Second commit"
			
 
				+        repo.object_store.add_object(commit2)
			
 
				+
			
 
				+        # Set refs
			
 
				+        repo.refs[b"refs/heads/master"] = commit2.get_id(SHA256)
			
 
				+        repo.refs[b"refs/heads/branch1"] = commit1.get_id(SHA256)
			
 
				+
			
 
				+        repo.close()
			
 
				+
			
 
				+        # Run git fsck
			
 
				+        fsck_output = self._run_git(["fsck", "--full"], cwd=repo_path)
			
 
				+        # fsck should not report any errors (empty output or success message)
			
 
				+        self.assertNotIn(b"error", fsck_output.lower())
			
 
				+        self.assertNotIn(b"missing", fsck_output.lower())
			
 
				+        self.assertNotIn(b"broken", fsck_output.lower())
			
--- a/tests/compat/test_sha256_packs.py
+++ b/tests/compat/test_sha256_packs.py
@@ -0,0 +1,330 @@
 
				+# test_sha256_packs.py -- Compatibility tests for SHA256 pack files
			
 
				+# Copyright (C) 2024 The Dulwich contributors
			
 
				+#
			
 
				+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
			
 
				+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
			
 
				+# General Public License as public by the Free Software Foundation; version 2.0
			
 
				+# or (at your option) any later version. You can redistribute it and/or
			
 
				+# modify it under the terms of either of these two licenses.
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+#
			
 
				+# You should have received a copy of the licenses; if not, see
			
 
				+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
			
 
				+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
			
 
				+# License, Version 2.0.
			
 
				+#
			
 
				+
			
 
				+"""Compatibility tests for SHA256 pack files with git command line tools."""
			
 
				+
			
 
				+import os
			
 
				+import tempfile
			
 
				+
			
 
				+from dulwich.hash import SHA256
			
 
				+from dulwich.objects import Blob, Commit, Tree
			
 
				+from dulwich.pack import load_pack_index_file
			
 
				+from dulwich.repo import Repo
			
 
				+
			
 
				+from .utils import CompatTestCase, run_git_or_fail
			
 
				+
			
 
				+
			
 
				+class GitSHA256PackCompatibilityTests(CompatTestCase):
			
 
				+    """Test SHA256 pack file compatibility with git command line tools."""
			
 
				+
			
 
				+    min_git_version = (2, 29, 0)
			
 
				+
			
 
				+    def _run_git(self, args, cwd=None):
			
 
				+        """Run git command in the specified directory."""
			
 
				+        return run_git_or_fail(args, cwd=cwd)
			
 
				+
			
 
				+    def test_git_pack_readable_by_dulwich(self):
			
 
				+        """Test that git-created SHA256 pack files are readable by dulwich."""
			
 
				+        # Create SHA256 repo with git
			
 
				+        repo_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
			
 
				+        self._run_git(["init", "--object-format=sha256", repo_path])
			
 
				+
			
 
				+        # Create multiple files to ensure pack creation
			
 
				+        for i in range(20):
			
 
				+            test_file = os.path.join(repo_path, f"file{i}.txt")
			
 
				+            with open(test_file, "w") as f:
			
 
				+                f.write(f"Content for file {i}\n")
			
 
				+
			
 
				+        self._run_git(["add", "."], cwd=repo_path)
			
 
				+        self._run_git(["commit", "-m", "Add 20 files"], cwd=repo_path)
			
 
				+
			
 
				+        # Force pack creation
			
 
				+        self._run_git(["gc"], cwd=repo_path)
			
 
				+
			
 
				+        # Open with dulwich
			
 
				+        repo = Repo(repo_path)
			
 
				+        self.assertEqual(repo.get_hash_algorithm(), SHA256)
			
 
				+
			
 
				+        # Find pack files
			
 
				+        pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
			
 
				+        pack_files = [f for f in os.listdir(pack_dir) if f.endswith(".pack")]
			
 
				+        self.assertGreater(len(pack_files), 0, "No pack files created")
			
 
				+
			
 
				+        # Read pack with dulwich
			
 
				+        for pack_file in pack_files:
			
 
				+            pack_path = os.path.join(pack_dir, pack_file)
			
 
				+            idx_path = pack_path[:-5] + ".idx"
			
 
				+
			
 
				+            # Load pack index with SHA256 algorithm
			
 
				+            with open(idx_path, "rb") as f:
			
 
				+                pack_idx = load_pack_index_file(
			
 
				+                    idx_path, f, hash_algorithm=repo.get_hash_algorithm()
			
 
				+                )
			
 
				+
			
 
				+            # Verify it's detected as SHA256
			
 
				+            self.assertEqual(pack_idx.hash_size, 32)
			
 
				+
			
 
				+            # Verify we can iterate objects
			
 
				+            obj_count = 0
			
 
				+            for sha, offset, crc32 in pack_idx.iterentries():
			
 
				+                self.assertEqual(len(sha), 32)  # SHA256
			
 
				+                obj_count += 1
			
 
				+
			
 
				+            self.assertGreater(obj_count, 20)  # At least our files + trees + commit
			
 
				+
			
 
				+        # Verify we can read all objects through the repo interface
			
 
				+        head_ref = repo.refs[b"refs/heads/master"]
			
 
				+        commit = repo[head_ref]
			
 
				+        self.assertIsInstance(commit, Commit)
			
 
				+
			
 
				+        # Read the tree
			
 
				+        tree = repo[commit.tree]
			
 
				+        self.assertIsInstance(tree, Tree)
			
 
				+
			
 
				+        # Verify all files are there
			
 
				+        file_count = 0
			
 
				+        for name, mode, sha in tree.items():
			
 
				+            if name.startswith(b"file") and name.endswith(b".txt"):
			
 
				+                file_count += 1
			
 
				+                # Read the blob
			
 
				+                blob = repo[sha]
			
 
				+                self.assertIsInstance(blob, Blob)
			
 
				+
			
 
				+        self.assertEqual(file_count, 20)
			
 
				+        repo.close()
			
 
				+
			
 
				+    def test_dulwich_objects_readable_by_git(self):
			
 
				+        """Test that dulwich-created SHA256 objects are readable by git."""
			
 
				+        # Create SHA256 repo with dulwich
			
 
				+        repo_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
			
 
				+        repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
			
 
				+
			
 
				+        # Create objects
			
 
				+        blobs = []
			
 
				+        for i in range(10):
			
 
				+            blob = Blob.from_string(f"Dulwich blob content {i}".encode())
			
 
				+            repo.object_store.add_object(blob)
			
 
				+            blobs.append(blob)
			
 
				+
			
 
				+        # Create a tree with all blobs
			
 
				+        tree = Tree()
			
 
				+        for i, blob in enumerate(blobs):
			
 
				+            tree.add(f"blob{i}.txt".encode(), 0o100644, blob.get_id(SHA256))
			
 
				+        repo.object_store.add_object(tree)
			
 
				+
			
 
				+        # Create a commit
			
 
				+        commit = Commit()
			
 
				+        commit.tree = tree.get_id(SHA256)
			
 
				+        commit.author = commit.committer = b"Dulwich Test <test@dulwich.org>"
			
 
				+        commit.commit_time = commit.author_time = 1234567890
			
 
				+        commit.commit_timezone = commit.author_timezone = 0
			
 
				+        commit.message = b"Test commit with blobs"
			
 
				+        repo.object_store.add_object(commit)
			
 
				+
			
 
				+        # Update HEAD
			
 
				+        repo.refs[b"refs/heads/master"] = commit.get_id(SHA256)
			
 
				+        repo.close()
			
 
				+
			
 
				+        # Verify git can read all objects
			
 
				+        output = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
			
 
				+        self.assertEqual(len(output.strip()), 64)  # SHA256
			
 
				+
			
 
				+        # List tree contents
			
 
				+        tree_output = self._run_git(["ls-tree", "HEAD"], cwd=repo_path)
			
 
				+        # Count lines instead of occurrences of "blob" since "blob" appears twice per line
			
 
				+        lines = tree_output.strip().split(b"\n")
			
 
				+        self.assertEqual(len(lines), 10)
			
 
				+
			
 
				+        # Verify git can check out the content
			
 
				+        self._run_git(["checkout", "HEAD", "--", "."], cwd=repo_path)
			
 
				+
			
 
				+        # Verify files exist with correct content
			
 
				+        for i in range(10):
			
 
				+            file_path = os.path.join(repo_path, f"blob{i}.txt")
			
 
				+            self.assertTrue(os.path.exists(file_path))
			
 
				+            with open(file_path, "rb") as f:
			
 
				+                content = f.read()
			
 
				+                self.assertEqual(content, f"Dulwich blob content {i}".encode())
			
 
				+
			
 
				+    def test_pack_index_v1_interop(self):
			
 
				+        """Test pack index v1 interoperability with SHA256."""
			
 
				+        # Create repo with git using pack index v1
			
 
				+        repo_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
			
 
				+        self._run_git(["init", "--object-format=sha256", repo_path])
			
 
				+        self._run_git(["config", "pack.indexVersion", "1"], cwd=repo_path)
			
 
				+
			
 
				+        # Create files
			
 
				+        for i in range(10):
			
 
				+            test_file = os.path.join(repo_path, f"v1test{i}.txt")
			
 
				+            with open(test_file, "w") as f:
			
 
				+                f.write(f"Pack v1 test {i}\n")
			
 
				+
			
 
				+        self._run_git(["add", "."], cwd=repo_path)
			
 
				+        self._run_git(["commit", "-m", "Test pack v1"], cwd=repo_path)
			
 
				+        self._run_git(["gc"], cwd=repo_path)
			
 
				+
			
 
				+        # Read with dulwich
			
 
				+        repo = Repo(repo_path)
			
 
				+
			
 
				+        # Find pack index
			
 
				+        pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
			
 
				+        idx_files = [f for f in os.listdir(pack_dir) if f.endswith(".idx")]
			
 
				+
			
 
				+        for idx_file in idx_files:
			
 
				+            idx_path = os.path.join(pack_dir, idx_file)
			
 
				+            with open(idx_path, "rb") as f:
			
 
				+                pack_idx = load_pack_index_file(
			
 
				+                    idx_path, f, hash_algorithm=repo.get_hash_algorithm()
			
 
				+                )
			
 
				+
			
 
				+            # Verify it's v1 with SHA256
			
 
				+            self.assertEqual(pack_idx.version, 1)
			
 
				+            self.assertEqual(pack_idx.hash_size, 32)
			
 
				+
			
 
				+            # Verify we can iterate
			
 
				+            for sha, offset, crc32 in pack_idx.iterentries():
			
 
				+                self.assertEqual(len(sha), 32)
			
 
				+                self.assertIsNone(crc32)  # v1 doesn't store CRC32
			
 
				+
			
 
				+        repo.close()
			
 
				+
			
 
				+    def test_large_pack_interop(self):
			
 
				+        """Test large pack file interoperability."""
			
 
				+        # Create repo with dulwich
			
 
				+        repo_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
			
 
				+        repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
			
 
				+
			
 
				+        # Create a large file that will use delta compression
			
 
				+        large_content = b"A" * 10000
			
 
				+        blobs = []
			
 
				+
			
 
				+        # Create similar blobs to trigger delta compression
			
 
				+        for i in range(10):
			
 
				+            content = large_content + f" variation {i}".encode()
			
 
				+            blob = Blob.from_string(content)
			
 
				+            repo.object_store.add_object(blob)
			
 
				+            blobs.append(blob)
			
 
				+
			
 
				+        # Create tree
			
 
				+        tree = Tree()
			
 
				+        for i, blob in enumerate(blobs):
			
 
				+            tree.add(f"large{i}.txt".encode(), 0o100644, blob.get_id(SHA256))
			
 
				+        repo.object_store.add_object(tree)
			
 
				+
			
 
				+        # Create commit
			
 
				+        commit = Commit()
			
 
				+        commit.tree = tree.get_id(SHA256)
			
 
				+        commit.author = commit.committer = b"Test <test@example.com>"
			
 
				+        commit.commit_time = commit.author_time = 1234567890
			
 
				+        commit.commit_timezone = commit.author_timezone = 0
			
 
				+        commit.message = b"Large files for delta compression test"
			
 
				+        repo.object_store.add_object(commit)
			
 
				+
			
 
				+        repo.refs[b"refs/heads/master"] = commit.get_id(SHA256)
			
 
				+        repo.close()
			
 
				+
			
 
				+        # Run git gc to create packs with delta compression
			
 
				+        self._run_git(["gc", "--aggressive"], cwd=repo_path)
			
 
				+
			
 
				+        # Verify git created a pack
			
 
				+        pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
			
 
				+        pack_files = [f for f in os.listdir(pack_dir) if f.endswith(".pack")]
			
 
				+        self.assertGreater(len(pack_files), 0)
			
 
				+
			
 
				+        # Re-open with dulwich and verify we can read everything
			
 
				+        repo = Repo(repo_path)
			
 
				+        head = repo.refs[b"refs/heads/master"]
			
 
				+        commit = repo[head]
			
 
				+        tree = repo[commit.tree]
			
 
				+
			
 
				+        # Read all blobs
			
 
				+        for i in range(10):
			
 
				+            name = f"large{i}.txt".encode()
			
 
				+            mode, sha = tree[name]
			
 
				+            blob = repo[sha]
			
 
				+            expected = large_content + f" variation {i}".encode()
			
 
				+            self.assertEqual(blob.data, expected)
			
 
				+
			
 
				+        repo.close()
			
 
				+
			
 
				+    def test_mixed_loose_packed_objects(self):
			
 
				+        """Test repositories with both loose and packed objects."""
			
 
				+        # Create repo with git
			
 
				+        repo_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
			
 
				+        self._run_git(["init", "--object-format=sha256", repo_path])
			
 
				+
			
 
				+        # Create initial objects that will be packed
			
 
				+        for i in range(5):
			
 
				+            test_file = os.path.join(repo_path, f"packed{i}.txt")
			
 
				+            with open(test_file, "w") as f:
			
 
				+                f.write(f"Will be packed {i}\n")
			
 
				+
			
 
				+        self._run_git(["add", "."], cwd=repo_path)
			
 
				+        self._run_git(["commit", "-m", "Initial packed objects"], cwd=repo_path)
			
 
				+        self._run_git(["gc"], cwd=repo_path)
			
 
				+
			
 
				+        # Create more objects that will remain loose
			
 
				+        for i in range(5):
			
 
				+            test_file = os.path.join(repo_path, f"loose{i}.txt")
			
 
				+            with open(test_file, "w") as f:
			
 
				+                f.write(f"Will stay loose {i}\n")
			
 
				+
			
 
				+        self._run_git(["add", "."], cwd=repo_path)
			
 
				+        self._run_git(["commit", "-m", "Loose objects"], cwd=repo_path)
			
 
				+
			
 
				+        # Open with dulwich
			
 
				+        repo = Repo(repo_path)
			
 
				+
			
 
				+        # Count objects in packs vs loose
			
 
				+        pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
			
 
				+        pack_count = len([f for f in os.listdir(pack_dir) if f.endswith(".pack")])
			
 
				+        self.assertGreater(pack_count, 0)
			
 
				+
			
 
				+        # Verify we can read all objects
			
 
				+        head = repo.refs[b"refs/heads/master"]
			
 
				+        commit = repo[head]
			
 
				+
			
 
				+        # Walk the commit history
			
 
				+        commit_count = 0
			
 
				+        while commit.parents:
			
 
				+            commit_count += 1
			
 
				+            tree = repo[commit.tree]
			
 
				+            # Verify we can read the tree
			
 
				+            self.assertGreater(len(tree), 0)
			
 
				+
			
 
				+            if commit.parents:
			
 
				+                commit = repo[commit.parents[0]]
			
 
				+            else:
			
 
				+                break
			
 
				+
			
 
				+        self.assertEqual(commit_count, 1)  # We made 2 commits total
			
 
				+        repo.close()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    import unittest
			
 
				+
			
 
				+    unittest.main()
			
--- a/tests/test_sha256.py
+++ b/tests/test_sha256.py
@@ -0,0 +1,213 @@
 
				+# test_sha256.py -- Tests for SHA256 support
			
 
				+# Copyright (C) 2024 The Dulwich contributors
			
 
				+#
			
 
				+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
			
 
				+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
			
 
				+# General Public License as public by the Free Software Foundation; version 2.0
			
 
				+# or (at your option) any later version. You can redistribute it and/or
			
 
				+# modify it under the terms of either of these two licenses.
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+#
			
 
				+# You should have received a copy of the licenses; if not, see
			
 
				+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
			
 
				+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
			
 
				+# License, Version 2.0.
			
 
				+#
			
 
				+
			
 
				+"""Tests for SHA256 support in Dulwich."""
			
 
				+
			
 
				+import os
			
 
				+import shutil
			
 
				+import tempfile
			
 
				+import unittest
			
 
				+
			
 
				+from dulwich.hash import SHA1, SHA256, get_hash_algorithm
			
 
				+from dulwich.objects import Blob, Tree, valid_hexsha, zero_sha_for
			
 
				+from dulwich.repo import MemoryRepo, Repo
			
 
				+
			
 
				+
			
 
				+class HashAlgorithmTests(unittest.TestCase):
			
 
				+    """Tests for the hash algorithm abstraction."""
			
 
				+
			
 
				+    def test_sha1_properties(self):
			
 
				+        """Test SHA1 algorithm properties."""
			
 
				+        alg = SHA1
			
 
				+        self.assertEqual(alg.name, "sha1")
			
 
				+        self.assertEqual(alg.oid_length, 20)
			
 
				+        self.assertEqual(alg.hex_length, 40)
			
 
				+        self.assertEqual(len(alg.zero_oid), 40)
			
 
				+        self.assertEqual(len(alg.zero_oid_bin), 20)
			
 
				+
			
 
				+    def test_sha256_properties(self):
			
 
				+        """Test SHA256 algorithm properties."""
			
 
				+        alg = SHA256
			
 
				+        self.assertEqual(alg.name, "sha256")
			
 
				+        self.assertEqual(alg.oid_length, 32)
			
 
				+        self.assertEqual(alg.hex_length, 64)
			
 
				+        self.assertEqual(len(alg.zero_oid), 64)
			
 
				+        self.assertEqual(len(alg.zero_oid_bin), 32)
			
 
				+
			
 
				+    def test_get_hash_algorithm(self):
			
 
				+        """Test getting hash algorithms by name."""
			
 
				+        self.assertEqual(get_hash_algorithm("sha1"), SHA1)
			
 
				+        self.assertEqual(get_hash_algorithm("sha256"), SHA256)
			
 
				+        self.assertEqual(get_hash_algorithm(None), SHA1)  # Default
			
 
				+
			
 
				+        with self.assertRaises(ValueError):
			
 
				+            get_hash_algorithm("invalid")
			
 
				+
			
 
				+
			
 
				+class ObjectHashingTests(unittest.TestCase):
			
 
				+    """Tests for object hashing with different algorithms."""
			
 
				+
			
 
				+    def test_blob_sha1(self):
			
 
				+        """Test blob hashing with SHA1."""
			
 
				+        blob = Blob()
			
 
				+        blob.data = b"Hello, World!"
			
 
				+
			
 
				+        # Default should be SHA1
			
 
				+        sha1_id = blob.id
			
 
				+        self.assertEqual(len(sha1_id), 40)
			
 
				+        self.assertTrue(valid_hexsha(sha1_id))
			
 
				+
			
 
				+    def test_blob_sha256(self):
			
 
				+        """Test blob hashing with SHA256."""
			
 
				+        blob = Blob()
			
 
				+        blob.data = b"Hello, World!"
			
 
				+
			
 
				+        # Get SHA256 hash
			
 
				+        sha256_id = blob.get_id(SHA256)
			
 
				+        self.assertEqual(len(sha256_id), 64)
			
 
				+        self.assertTrue(valid_hexsha(sha256_id))
			
 
				+
			
 
				+        # SHA256 ID should be different from SHA1
			
 
				+        sha1_id = blob.id
			
 
				+        self.assertNotEqual(sha1_id, sha256_id)
			
 
				+
			
 
				+        # Verify .id property returns SHA1 for backward compatibility
			
 
				+        self.assertEqual(blob.id, sha1_id)
			
 
				+        self.assertEqual(blob.get_id(), sha1_id)  # Default should be SHA1
			
 
				+
			
 
				+    def test_tree_sha256(self):
			
 
				+        """Test tree hashing with SHA256."""
			
 
				+        tree = Tree()
			
 
				+        tree.add(b"file.txt", 0o100644, b"a" * 40)  # SHA1 hex
			
 
				+
			
 
				+        # Get SHA1 (default)
			
 
				+        sha1_id = tree.id
			
 
				+        self.assertEqual(len(sha1_id), 40)
			
 
				+
			
 
				+        # Get SHA256
			
 
				+        sha256_id = tree.get_id(SHA256)
			
 
				+        self.assertEqual(len(sha256_id), 64)
			
 
				+
			
 
				+        # Verify they're different
			
 
				+        self.assertNotEqual(sha1_id, sha256_id)
			
 
				+
			
 
				+    def test_valid_hexsha(self):
			
 
				+        """Test hex SHA validation for both algorithms."""
			
 
				+        # Valid SHA1
			
 
				+        self.assertTrue(valid_hexsha(b"1234567890abcdef1234567890abcdef12345678"))
			
 
				+
			
 
				+        # Valid SHA256
			
 
				+        self.assertTrue(
			
 
				+            valid_hexsha(
			
 
				+                b"1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"
			
 
				+            )
			
 
				+        )
			
 
				+
			
 
				+        # Invalid lengths
			
 
				+        self.assertFalse(valid_hexsha(b"1234"))
			
 
				+        self.assertFalse(
			
 
				+            valid_hexsha(b"1234567890abcdef1234567890abcdef123456")
			
 
				+        )  # 38 chars
			
 
				+
			
 
				+        # Invalid characters
			
 
				+        self.assertFalse(valid_hexsha(b"123456789gabcdef1234567890abcdef12345678"))
			
 
				+
			
 
				+    def test_zero_sha_for(self):
			
 
				+        """Test getting zero SHA for different algorithms."""
			
 
				+        # Default (SHA1)
			
 
				+        self.assertEqual(zero_sha_for(), b"0" * 40)
			
 
				+        self.assertEqual(zero_sha_for(None), b"0" * 40)
			
 
				+
			
 
				+        # SHA1 explicit
			
 
				+        self.assertEqual(zero_sha_for(SHA1), b"0" * 40)
			
 
				+
			
 
				+        # SHA256
			
 
				+        self.assertEqual(zero_sha_for(SHA256), b"0" * 64)
			
 
				+
			
 
				+
			
 
				+class RepositorySHA256Tests(unittest.TestCase):
			
 
				+    """Tests for SHA256 repository support."""
			
 
				+
			
 
				+    def setUp(self):
			
 
				+        """Set up test repository directory."""
			
 
				+        self.test_dir = tempfile.mkdtemp()
			
 
				+
			
 
				+    def tearDown(self):
			
 
				+        """Clean up test repository."""
			
 
				+        shutil.rmtree(self.test_dir)
			
 
				+
			
 
				+    def test_init_sha256_repo(self):
			
 
				+        """Test initializing a SHA256 repository."""
			
 
				+        repo_path = os.path.join(self.test_dir, "sha256_repo")
			
 
				+        repo = Repo.init(repo_path, mkdir=True, object_format="sha256")
			
 
				+
			
 
				+        # Check repository format version
			
 
				+        config = repo.get_config()
			
 
				+        self.assertEqual(config.get(("core",), "repositoryformatversion"), b"1")
			
 
				+
			
 
				+        # Check object format extension
			
 
				+        self.assertEqual(config.get(("extensions",), "objectformat"), b"sha256")
			
 
				+
			
 
				+        # Check hash algorithm detection
			
 
				+        hash_alg = repo.get_hash_algorithm()
			
 
				+        self.assertEqual(hash_alg, SHA256)
			
 
				+
			
 
				+        repo.close()
			
 
				+
			
 
				+    def test_init_sha1_repo(self):
			
 
				+        """Test initializing a SHA1 repository (default)."""
			
 
				+        repo_path = os.path.join(self.test_dir, "sha1_repo")
			
 
				+        repo = Repo.init(repo_path, mkdir=True)
			
 
				+
			
 
				+        # Check repository format version
			
 
				+        config = repo.get_config()
			
 
				+        self.assertEqual(config.get(("core",), "repositoryformatversion"), b"0")
			
 
				+
			
 
				+        # Object format extension should not exist
			
 
				+        with self.assertRaises(KeyError):
			
 
				+            config.get(("extensions",), "objectformat")
			
 
				+
			
 
				+        # Check hash algorithm detection
			
 
				+        hash_alg = repo.get_hash_algorithm()
			
 
				+        self.assertEqual(hash_alg, SHA1)
			
 
				+
			
 
				+        repo.close()
			
 
				+
			
 
				+    def test_format_version_validation(self):
			
 
				+        """Test format version validation for SHA256."""
			
 
				+        repo_path = os.path.join(self.test_dir, "invalid_repo")
			
 
				+
			
 
				+        # SHA256 with format version 0 should fail
			
 
				+        with self.assertRaises(ValueError) as cm:
			
 
				+            Repo.init(repo_path, mkdir=True, format=0, object_format="sha256")
			
 
				+        self.assertIn("SHA256", str(cm.exception))
			
 
				+
			
 
				+    def test_memory_repo_sha256(self):
			
 
				+        """Test SHA256 support in memory repository."""
			
 
				+        repo = MemoryRepo.init_bare([], {}, object_format="sha256")
			
 
				+
			
 
				+        # Check hash algorithm
			
 
				+        hash_alg = repo.get_hash_algorithm()
			
 
				+        self.assertEqual(hash_alg, SHA256)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    unittest.main()
			
--- a/tests/test_sha256_pack.py
+++ b/tests/test_sha256_pack.py
@@ -0,0 +1,128 @@
 
				+# test_sha256_pack.py -- Tests for SHA256 pack support
			
 
				+# Copyright (C) 2024 The Dulwich contributors
			
 
				+#
			
 
				+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
			
 
				+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
			
 
				+# General Public License as public by the Free Software Foundation; version 2.0
			
 
				+# or (at your option) any later version. You can redistribute it and/or
			
 
				+# modify it under the terms of either of these two licenses.
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+#
			
 
				+# You should have received a copy of the licenses; if not, see
			
 
				+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
			
 
				+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
			
 
				+# License, Version 2.0.
			
 
				+#
			
 
				+
			
 
				+"""Tests for SHA256 pack support in Dulwich."""
			
 
				+
			
 
				+import shutil
			
 
				+import tempfile
			
 
				+import unittest
			
 
				+from io import BytesIO
			
 
				+
			
 
				+from dulwich.hash import SHA256
			
 
				+from dulwich.pack import (
			
 
				+    load_pack_index_file,
			
 
				+    write_pack_index_v2,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+class SHA256PackTests(unittest.TestCase):
			
 
				+    """Tests for SHA256 pack support."""
			
 
				+
			
 
				+    def setUp(self):
			
 
				+        """Set up test repository directory."""
			
 
				+        self.test_dir = tempfile.mkdtemp()
			
 
				+
			
 
				+    def tearDown(self):
			
 
				+        """Clean up test repository."""
			
 
				+        shutil.rmtree(self.test_dir)
			
 
				+
			
 
				+    def test_pack_index_v2_with_sha256(self):
			
 
				+        """Test that pack index v2 correctly handles SHA256 hashes."""
			
 
				+        # Create SHA256 entries manually (simulating what would happen in a SHA256 repo)
			
 
				+        entries = []
			
 
				+        for i in range(5):
			
 
				+            # Create a fake SHA256 hash
			
 
				+            sha256_hash = SHA256.hash_func(f"test object {i}".encode()).digest()
			
 
				+            offset = i * 1000  # Fake offsets
			
 
				+            crc32 = i  # Fake CRC32
			
 
				+            entries.append((sha256_hash, offset, crc32))
			
 
				+
			
 
				+        # Sort entries by SHA (required for pack index)
			
 
				+        entries.sort(key=lambda e: e[0])
			
 
				+
			
 
				+        # Write SHA256 pack index with SHA1 pack checksum (Git always uses SHA1 for pack checksums)
			
 
				+        index_buf = BytesIO()
			
 
				+        from hashlib import sha1
			
 
				+
			
 
				+        pack_checksum = sha1(b"fake pack data").digest()
			
 
				+        write_pack_index_v2(index_buf, entries, pack_checksum)
			
 
				+
			
 
				+        # Load and verify the index
			
 
				+        index_buf.seek(0)
			
 
				+        pack_idx = load_pack_index_file("<memory>", index_buf)
			
 
				+
			
 
				+        # Check that the index loaded correctly
			
 
				+        self.assertEqual(len(pack_idx), 5)
			
 
				+        self.assertEqual(pack_idx.version, 2)
			
 
				+
			
 
				+        # Verify hash_size detection
			
 
				+        self.assertEqual(pack_idx.hash_size, 32)
			
 
				+
			
 
				+        # Verify we can look up objects by SHA256
			
 
				+        for sha256_hash, offset, _ in entries:
			
 
				+            # This should not raise KeyError
			
 
				+            found_offset = pack_idx.object_offset(sha256_hash)
			
 
				+            self.assertEqual(found_offset, offset)
			
 
				+
			
 
				+    def test_pack_index_v1_with_sha256(self):
			
 
				+        """Test that pack index v1 correctly handles SHA256 hashes."""
			
 
				+        # Create SHA256 entries manually
			
 
				+        entries = []
			
 
				+        for i in range(5):
			
 
				+            # Create a fake SHA256 hash
			
 
				+            sha256_hash = SHA256.hash_func(f"test v1 object {i}".encode()).digest()
			
 
				+            offset = i * 1000  # Fake offsets
			
 
				+            crc32 = None  # v1 doesn't store CRC32
			
 
				+            entries.append((sha256_hash, offset, crc32))
			
 
				+
			
 
				+        # Sort entries by SHA (required for pack index)
			
 
				+        entries.sort(key=lambda e: e[0])
			
 
				+
			
 
				+        # Import write_pack_index_v1
			
 
				+        from dulwich.pack import write_pack_index_v1
			
 
				+
			
 
				+        # Write SHA256 pack index v1 with SHA1 pack checksum
			
 
				+        index_buf = BytesIO()
			
 
				+        from hashlib import sha1
			
 
				+
			
 
				+        pack_checksum = sha1(b"fake v1 pack data").digest()
			
 
				+        write_pack_index_v1(index_buf, entries, pack_checksum)
			
 
				+
			
 
				+        # Load and verify the index
			
 
				+        index_buf.seek(0)
			
 
				+        pack_idx = load_pack_index_file("<memory>", index_buf)
			
 
				+
			
 
				+        # Check that the index loaded correctly
			
 
				+        self.assertEqual(len(pack_idx), 5)
			
 
				+        self.assertEqual(pack_idx.version, 1)
			
 
				+
			
 
				+        # Verify hash_size detection
			
 
				+        self.assertEqual(pack_idx.hash_size, 32)
			
 
				+
			
 
				+        # Verify we can look up objects by SHA256
			
 
				+        for sha256_hash, offset, _ in entries:
			
 
				+            # This should not raise KeyError
			
 
				+            found_offset = pack_idx.object_offset(sha256_hash)
			
 
				+            self.assertEqual(found_offset, offset)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    unittest.main()