Przeglądaj źródła

Add --objectformat option to dulwich init command

Support specifying the object format (sha1 or sha256) when creating
a new repository via the dulwich init command. This passes through
to the existing object_format parameter in Repo.init() and
Repo.init_bare().
Jelmer Vernooij 2 miesięcy temu
rodzic
commit
1e0b2788a5

+ 9 - 1
dulwich/bundle.py

@@ -38,6 +38,9 @@ from typing import (
     runtime_checkable,
     runtime_checkable,
 )
 )
 
 
+if TYPE_CHECKING:
+    from .object_format import ObjectFormat
+
 from .objects import ObjectID
 from .objects import ObjectID
 from .pack import PackData, UnpackedObject, write_pack_data
 from .pack import PackData, UnpackedObject, write_pack_data
 from .refs import Ref
 from .refs import Ref
@@ -47,6 +50,8 @@ from .refs import Ref
 class PackDataLike(Protocol):
 class PackDataLike(Protocol):
     """Protocol for objects that behave like PackData."""
     """Protocol for objects that behave like PackData."""
 
 
+    object_format: "ObjectFormat"
+
     def __len__(self) -> int:
     def __len__(self) -> int:
         """Return the number of objects in the pack."""
         """Return the number of objects in the pack."""
         ...
         ...
@@ -311,7 +316,10 @@ def create_bundle_from_repo(
     # For now, create a simple wrapper to hold the data
     # For now, create a simple wrapper to hold the data
     class _BundlePackData:
     class _BundlePackData:
         def __init__(
         def __init__(
-            self, count: int, objects: Iterator[UnpackedObject], object_format
+            self,
+            count: int,
+            objects: Iterator[UnpackedObject],
+            object_format: "ObjectFormat",
         ) -> None:
         ) -> None:
             self._count = count
             self._count = count
             self._objects = list(objects)  # Materialize the iterator
             self._objects = list(objects)  # Materialize the iterator

+ 11 - 1
dulwich/cli.py

@@ -1881,12 +1881,22 @@ class cmd_init(Command):
         parser.add_argument(
         parser.add_argument(
             "--bare", action="store_true", help="Create a bare repository"
             "--bare", action="store_true", help="Create a bare repository"
         )
         )
+        parser.add_argument(
+            "--objectformat",
+            type=str,
+            choices=["sha1", "sha256"],
+            help="Object format to use (sha1 or sha256)",
+        )
         parser.add_argument(
         parser.add_argument(
             "path", nargs="?", default=os.getcwd(), help="Repository path"
             "path", nargs="?", default=os.getcwd(), help="Repository path"
         )
         )
         parsed_args = parser.parse_args(args)
         parsed_args = parser.parse_args(args)
 
 
-        porcelain.init(parsed_args.path, bare=parsed_args.bare)
+        porcelain.init(
+            parsed_args.path,
+            bare=parsed_args.bare,
+            object_format=parsed_args.objectformat,
+        )
 
 
 
 
 class cmd_clone(Command):
 class cmd_clone(Command):

+ 3 - 2
dulwich/commit_graph.py

@@ -65,6 +65,7 @@ from collections.abc import Iterator, Sequence
 from typing import TYPE_CHECKING, BinaryIO
 from typing import TYPE_CHECKING, BinaryIO
 
 
 from .file import _GitFile
 from .file import _GitFile
+from .object_format import ObjectFormat
 
 
 if TYPE_CHECKING:
 if TYPE_CHECKING:
     from .object_store import BaseObjectStore
     from .object_store import BaseObjectStore
@@ -156,7 +157,7 @@ class CommitGraphChunk:
 class CommitGraph:
 class CommitGraph:
     """Git commit graph file reader/writer."""
     """Git commit graph file reader/writer."""
 
 
-    def __init__(self, *, object_format=None) -> None:
+    def __init__(self, *, object_format: ObjectFormat | None = None) -> None:
         """Initialize CommitGraph.
         """Initialize CommitGraph.
 
 
         Args:
         Args:
@@ -257,7 +258,7 @@ class CommitGraph:
             end = start + self.object_format.oid_length
             end = start + self.object_format.oid_length
             oid = oid_lookup_data[start:end]
             oid = oid_lookup_data[start:end]
             oids.append(oid)
             oids.append(oid)
-            self._oid_to_index[sha_to_hex(oid)] = i
+            self._oid_to_index[oid] = i
 
 
         # Parse commit data chunk
         # Parse commit data chunk
         commit_data = self.chunks[CHUNK_COMMIT_DATA].data
         commit_data = self.chunks[CHUNK_COMMIT_DATA].data

+ 5 - 2
dulwich/dumb.py

@@ -31,9 +31,12 @@ import tempfile
 import zlib
 import zlib
 from collections.abc import Callable, Iterator, Mapping, Sequence
 from collections.abc import Callable, Iterator, Mapping, Sequence
 from io import BytesIO
 from io import BytesIO
-from typing import Any
+from typing import TYPE_CHECKING, Any
 from urllib.parse import urljoin
 from urllib.parse import urljoin
 
 
+if TYPE_CHECKING:
+    from .object_format import ObjectFormat
+
 from .errors import NotGitRepository, ObjectFormatException
 from .errors import NotGitRepository, ObjectFormatException
 from .object_store import BaseObjectStore
 from .object_store import BaseObjectStore
 from .objects import (
 from .objects import (
@@ -62,7 +65,7 @@ class DumbHTTPObjectStore(BaseObjectStore):
         http_request_func: Callable[
         http_request_func: Callable[
             [str, dict[str, str]], tuple[Any, Callable[..., bytes]]
             [str, dict[str, str]], tuple[Any, Callable[..., bytes]]
         ],
         ],
-        object_format=None,
+        object_format: "ObjectFormat | None" = None,
     ) -> None:
     ) -> None:
         """Initialize a DumbHTTPObjectStore.
         """Initialize a DumbHTTPObjectStore.
 
 

+ 19 - 28
dulwich/object_store.py

@@ -70,6 +70,9 @@ from typing import (
     cast,
     cast,
 )
 )
 
 
+if TYPE_CHECKING:
+    from .object_format import ObjectFormat
+
 from .errors import NotTreeError
 from .errors import NotTreeError
 from .file import GitFile, _GitFile
 from .file import GitFile, _GitFile
 from .midx import MultiPackIndex, load_midx
 from .midx import MultiPackIndex, load_midx
@@ -319,7 +322,7 @@ class PackContainer(Protocol):
 class BaseObjectStore:
 class BaseObjectStore:
     """Object store interface."""
     """Object store interface."""
 
 
-    def __init__(self, *, object_format=None) -> None:
+    def __init__(self, *, object_format: "ObjectFormat | None" = None) -> None:
         """Initialize object store.
         """Initialize object store.
 
 
         Args:
         Args:
@@ -829,7 +832,7 @@ class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
         pack_threads: int | None = None,
         pack_threads: int | None = None,
         pack_big_file_threshold: int | None = None,
         pack_big_file_threshold: int | None = None,
         *,
         *,
-        object_format=None,
+        object_format: "ObjectFormat | None" = None,
     ) -> None:
     ) -> None:
         """Initialize a PackBasedObjectStore.
         """Initialize a PackBasedObjectStore.
 
 
@@ -1399,7 +1402,7 @@ class DiskObjectStore(PackBasedObjectStore):
         pack_write_bitmap_lookup_table: bool = True,
         pack_write_bitmap_lookup_table: bool = True,
         file_mode: int | None = None,
         file_mode: int | None = None,
         dir_mode: int | None = None,
         dir_mode: int | None = None,
-        object_format=None,
+        object_format: "ObjectFormat | None" = None,
     ) -> None:
     ) -> None:
         """Open an object store.
         """Open an object store.
 
 
@@ -1575,10 +1578,10 @@ class DiskObjectStore(PackBasedObjectStore):
                 version = 0
                 version = 0
             if version == 1:
             if version == 1:
                 try:
                 try:
-                    object_format = config.get((b"extensions",), b"objectformat")
+                    object_format_name = config.get((b"extensions",), b"objectformat")
                 except KeyError:
                 except KeyError:
-                    object_format = b"sha1"
-                object_format = get_object_format(object_format.decode("ascii"))
+                    object_format_name = b"sha1"
+                object_format = get_object_format(object_format_name.decode("ascii"))
         except (KeyError, ValueError):
         except (KeyError, ValueError):
             pass
             pass
 
 
@@ -1599,18 +1602,7 @@ class DiskObjectStore(PackBasedObjectStore):
             pack_write_bitmap_lookup_table=pack_write_bitmap_lookup_table,
             pack_write_bitmap_lookup_table=pack_write_bitmap_lookup_table,
             file_mode=file_mode,
             file_mode=file_mode,
             dir_mode=dir_mode,
             dir_mode=dir_mode,
-            object_format=hash_algorithm,
-            loose_compression_level,
-            pack_compression_level,
-            pack_index_version,
-            pack_delta_window_size,
-            pack_window_memory,
-            pack_delta_cache_size,
-            pack_depth,
-            pack_threads,
-            pack_big_file_threshold,
-            fsync_object_files,
-            object_format,
+            object_format=object_format,
         )
         )
         instance._use_commit_graph = use_commit_graph
         instance._use_commit_graph = use_commit_graph
         instance._use_midx = use_midx
         instance._use_midx = use_midx
@@ -2046,8 +2038,7 @@ class DiskObjectStore(PackBasedObjectStore):
         *,
         *,
         file_mode: int | None = None,
         file_mode: int | None = None,
         dir_mode: int | None = None,
         dir_mode: int | None = None,
-        object_format=None,
-        cls, path: str | os.PathLike[str], object_format=None
+        object_format: "ObjectFormat | None" = None,
     ) -> "DiskObjectStore":
     ) -> "DiskObjectStore":
         """Initialize a new disk object store.
         """Initialize a new disk object store.
 
 
@@ -2058,7 +2049,6 @@ class DiskObjectStore(PackBasedObjectStore):
           file_mode: Optional file permission mask for shared repository
           file_mode: Optional file permission mask for shared repository
           dir_mode: Optional directory permission mask for shared repository
           dir_mode: Optional directory permission mask for shared repository
           object_format: Hash algorithm to use (SHA1 or SHA256)
           object_format: Hash algorithm to use (SHA1 or SHA256)
-          object_format: Hash algorithm to use (SHA1 or SHA256)
 
 
         Returns:
         Returns:
           New DiskObjectStore instance
           New DiskObjectStore instance
@@ -2076,10 +2066,9 @@ class DiskObjectStore(PackBasedObjectStore):
         if dir_mode is not None:
         if dir_mode is not None:
             os.chmod(info_path, dir_mode)
             os.chmod(info_path, dir_mode)
             os.chmod(pack_path, dir_mode)
             os.chmod(pack_path, dir_mode)
-        return cls(path, file_mode=file_mode, dir_mode=dir_mode, object_format=object_format)
-        os.mkdir(os.path.join(path, "info"))
-        os.mkdir(os.path.join(path, PACKDIR))
-        return cls(path, object_format=object_format)
+        return cls(
+            path, file_mode=file_mode, dir_mode=dir_mode, object_format=object_format
+        )
 
 
     def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:
     def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:
         """Iterate over all object SHAs with the given prefix.
         """Iterate over all object SHAs with the given prefix.
@@ -2240,11 +2229,13 @@ class DiskObjectStore(PackBasedObjectStore):
             raise KeyError(name)
             raise KeyError(name)
 
 
         sha: RawObjectID
         sha: RawObjectID
-        if len(name) == 40:
+        if len(name) in (40, 64):
             # name is ObjectID (hex), convert to RawObjectID
             # name is ObjectID (hex), convert to RawObjectID
+            # Support both SHA1 (40) and SHA256 (64)
             sha = hex_to_sha(cast(ObjectID, name))
             sha = hex_to_sha(cast(ObjectID, name))
-        elif len(name) == 20:
+        elif len(name) in (20, 32):
             # name is already RawObjectID (binary)
             # name is already RawObjectID (binary)
+            # Support both SHA1 (20) and SHA256 (32)
             sha = RawObjectID(name)
             sha = RawObjectID(name)
         else:
         else:
             raise AssertionError(f"Invalid object name {name!r}")
             raise AssertionError(f"Invalid object name {name!r}")
@@ -2447,7 +2438,7 @@ class DiskObjectStore(PackBasedObjectStore):
 class MemoryObjectStore(PackCapableObjectStore):
 class MemoryObjectStore(PackCapableObjectStore):
     """Object store that keeps all objects in memory."""
     """Object store that keeps all objects in memory."""
 
 
-    def __init__(self, *, object_format=None) -> None:
+    def __init__(self, *, object_format: "ObjectFormat | None" = None) -> None:
         """Initialize a MemoryObjectStore.
         """Initialize a MemoryObjectStore.
 
 
         Creates an empty in-memory object store.
         Creates an empty in-memory object store.

+ 56 - 19
dulwich/objects.py

@@ -117,7 +117,6 @@ if TYPE_CHECKING:
 ZERO_SHA = b"0" * 40  # SHA1 - kept for backward compatibility
 ZERO_SHA = b"0" * 40  # SHA1 - kept for backward compatibility
 
 
 
 
->>>>>>> 063bbc0c (Update more hardcoded sha length)
 # Header fields for commits
 # Header fields for commits
 _TREE_HEADER = b"tree"
 _TREE_HEADER = b"tree"
 _PARENT_HEADER = b"parent"
 _PARENT_HEADER = b"parent"
@@ -459,10 +458,7 @@ class ShaFile:
     type_name: bytes
     type_name: bytes
     type_num: int
     type_num: int
     _chunked_text: list[bytes] | None
     _chunked_text: list[bytes] | None
-<<<<<<< HEAD
     _sha: "FixedSha | None | HASH"
     _sha: "FixedSha | None | HASH"
-=======
-    _sha: Union[FixedSha, None, "HASH"]
     object_format: ObjectFormat
     object_format: ObjectFormat
 
 
     def __init__(self) -> None:
     def __init__(self) -> None:
@@ -566,7 +562,11 @@ class ShaFile:
         self.set_raw_chunks([text], sha)
         self.set_raw_chunks([text], sha)
 
 
     def set_raw_chunks(
     def set_raw_chunks(
-        self, chunks: list[bytes], sha: ObjectID | RawObjectID | None = None, *, object_format=None
+        self,
+        chunks: list[bytes],
+        sha: ObjectID | RawObjectID | None = None,
+        *,
+        object_format: ObjectFormat | None = None,
     ) -> None:
     ) -> None:
         """Set the contents of this object from a list of chunks."""
         """Set the contents of this object from a list of chunks."""
         self._chunked_text = chunks
         self._chunked_text = chunks
@@ -613,7 +613,10 @@ class ShaFile:
 
 
     @classmethod
     @classmethod
     def _parse_file(
     def _parse_file(
-        cls, f: BufferedIOBase | IO[bytes] | "_GitFile", *, object_format=None
+        cls,
+        f: BufferedIOBase | IO[bytes] | "_GitFile",
+        *,
+        object_format: ObjectFormat | None = None,
     ) -> "ShaFile":
     ) -> "ShaFile":
         map = f.read()
         map = f.read()
         if not map:
         if not map:
@@ -639,7 +642,11 @@ class ShaFile:
 
 
     @classmethod
     @classmethod
     def from_path(
     def from_path(
-        cls, path: str | bytes, sha: ObjectID | None = None, *, object_format=None
+        cls,
+        path: str | bytes,
+        sha: ObjectID | None = None,
+        *,
+        object_format: ObjectFormat | None = None,
     ) -> "ShaFile":
     ) -> "ShaFile":
         """Open a SHA file from disk."""
         """Open a SHA file from disk."""
         with GitFile(path, "rb") as f:
         with GitFile(path, "rb") as f:
@@ -651,7 +658,7 @@ class ShaFile:
         f: BufferedIOBase | IO[bytes] | "_GitFile",
         f: BufferedIOBase | IO[bytes] | "_GitFile",
         sha: ObjectID | None = None,
         sha: ObjectID | None = None,
         *,
         *,
-        object_format=None,
+        object_format: ObjectFormat | None = None,
     ) -> "ShaFile":
     ) -> "ShaFile":
         """Get the contents of a SHA file on disk."""
         """Get the contents of a SHA file on disk."""
         try:
         try:
@@ -679,7 +686,7 @@ class ShaFile:
         string: bytes,
         string: bytes,
         sha: ObjectID | RawObjectID | None = None,
         sha: ObjectID | RawObjectID | None = None,
         *,
         *,
-        object_format=None,
+        object_format: ObjectFormat | None = None,
     ) -> "ShaFile":
     ) -> "ShaFile":
         """Creates an object of the indicated type from the raw string given.
         """Creates an object of the indicated type from the raw string given.
 
 
@@ -765,7 +772,7 @@ class ShaFile:
         """Returns the length of the raw string of this object."""
         """Returns the length of the raw string of this object."""
         return sum(map(len, self.as_raw_chunks()))
         return sum(map(len, self.as_raw_chunks()))
 
 
-    def sha(self, object_format=None) -> "FixedSha | HASH":
+    def sha(self, object_format: ObjectFormat | None = None) -> "FixedSha | HASH":
         """The SHA object that is the name of this object.
         """The SHA object that is the name of this object.
 
 
         Args:
         Args:
@@ -805,7 +812,7 @@ class ShaFile:
         """
         """
         return ObjectID(self.sha().hexdigest().encode("ascii"))
         return ObjectID(self.sha().hexdigest().encode("ascii"))
 
 
-    def get_id(self, object_format=None) -> bytes:
+    def get_id(self, object_format: ObjectFormat | None = None) -> bytes:
         """Get the hex SHA of this object using the specified hash algorithm.
         """Get the hex SHA of this object using the specified hash algorithm.
 
 
         Args:
         Args:
@@ -894,12 +901,19 @@ class Blob(ShaFile):
     )
     )
 
 
     @classmethod
     @classmethod
-    def from_path(cls, path: str | bytes, sha: ObjectID | None = None) -> "Blob":
+    def from_path(
+        cls,
+        path: str | bytes,
+        sha: ObjectID | None = None,
+        *,
+        object_format: ObjectFormat | None = None,
+    ) -> "Blob":
         """Read a blob from a file on disk.
         """Read a blob from a file on disk.
 
 
         Args:
         Args:
           path: Path to the blob file
           path: Path to the blob file
           sha: Optional known SHA for the object
           sha: Optional known SHA for the object
+          object_format: Optional object format to use
 
 
         Returns:
         Returns:
           A Blob object
           A Blob object
@@ -907,7 +921,7 @@ class Blob(ShaFile):
         Raises:
         Raises:
           NotBlobError: If the file is not a blob
           NotBlobError: If the file is not a blob
         """
         """
-        blob = ShaFile.from_path(path, sha)
+        blob = ShaFile.from_path(path, sha, object_format=object_format)
         if not isinstance(blob, cls):
         if not isinstance(blob, cls):
             raise NotBlobError(_path_to_bytes(path))
             raise NotBlobError(_path_to_bytes(path))
         return blob
         return blob
@@ -1056,12 +1070,19 @@ class Tag(ShaFile):
         self._signature: bytes | None = None
         self._signature: bytes | None = None
 
 
     @classmethod
     @classmethod
-    def from_path(cls, filename: str | bytes, sha: ObjectID | None = None) -> "Tag":
+    def from_path(
+        cls,
+        filename: str | bytes,
+        sha: ObjectID | None = None,
+        *,
+        object_format: ObjectFormat | None = None,
+    ) -> "Tag":
         """Read a tag from a file on disk.
         """Read a tag from a file on disk.
 
 
         Args:
         Args:
           filename: Path to the tag file
           filename: Path to the tag file
           sha: Optional known SHA for the object
           sha: Optional known SHA for the object
+          object_format: Optional object format to use
 
 
         Returns:
         Returns:
           A Tag object
           A Tag object
@@ -1069,7 +1090,7 @@ class Tag(ShaFile):
         Raises:
         Raises:
           NotTagError: If the file is not a tag
           NotTagError: If the file is not a tag
         """
         """
-        tag = ShaFile.from_path(filename, sha)
+        tag = ShaFile.from_path(filename, sha, object_format=object_format)
         if not isinstance(tag, cls):
         if not isinstance(tag, cls):
             raise NotTagError(_path_to_bytes(filename))
             raise NotTagError(_path_to_bytes(filename))
         return tag
         return tag
@@ -1365,6 +1386,8 @@ def parse_tree(
         name_end = text.index(b"\0", mode_end)
         name_end = text.index(b"\0", mode_end)
         name = text[mode_end + 1 : name_end]
         name = text[mode_end + 1 : name_end]
 
 
+        if sha_len is None:
+            raise ObjectFormatException("sha_len must be specified")
         count = name_end + 1 + sha_len
         count = name_end + 1 + sha_len
         if count > length:
         if count > length:
             raise ObjectFormatException(
             raise ObjectFormatException(
@@ -1487,12 +1510,19 @@ class Tree(ShaFile):
         self._entries: dict[bytes, tuple[int, ObjectID]] = {}
         self._entries: dict[bytes, tuple[int, ObjectID]] = {}
 
 
     @classmethod
     @classmethod
-    def from_path(cls, filename: str | bytes, sha: ObjectID | None = None) -> "Tree":
+    def from_path(
+        cls,
+        filename: str | bytes,
+        sha: ObjectID | None = None,
+        *,
+        object_format: ObjectFormat | None = None,
+    ) -> "Tree":
         """Read a tree from a file on disk.
         """Read a tree from a file on disk.
 
 
         Args:
         Args:
           filename: Path to the tree file
           filename: Path to the tree file
           sha: Optional known SHA for the object
           sha: Optional known SHA for the object
+          object_format: Optional object format to use
 
 
         Returns:
         Returns:
           A Tree object
           A Tree object
@@ -1500,7 +1530,7 @@ class Tree(ShaFile):
         Raises:
         Raises:
           NotTreeError: If the file is not a tree
           NotTreeError: If the file is not a tree
         """
         """
-        tree = ShaFile.from_path(filename, sha)
+        tree = ShaFile.from_path(filename, sha, object_format=object_format)
         if not isinstance(tree, cls):
         if not isinstance(tree, cls):
             raise NotTreeError(_path_to_bytes(filename))
             raise NotTreeError(_path_to_bytes(filename))
         return tree
         return tree
@@ -1876,12 +1906,19 @@ class Commit(ShaFile):
         self._commit_timezone_neg_utc: bool | None = False
         self._commit_timezone_neg_utc: bool | None = False
 
 
     @classmethod
     @classmethod
-    def from_path(cls, path: str | bytes, sha: ObjectID | None = None) -> "Commit":
+    def from_path(
+        cls,
+        path: str | bytes,
+        sha: ObjectID | None = None,
+        *,
+        object_format: ObjectFormat | None = None,
+    ) -> "Commit":
         """Read a commit from a file on disk.
         """Read a commit from a file on disk.
 
 
         Args:
         Args:
           path: Path to the commit file
           path: Path to the commit file
           sha: Optional known SHA for the object
           sha: Optional known SHA for the object
+          object_format: Optional object format to use
 
 
         Returns:
         Returns:
           A Commit object
           A Commit object
@@ -1889,7 +1926,7 @@ class Commit(ShaFile):
         Raises:
         Raises:
           NotCommitError: If the file is not a commit
           NotCommitError: If the file is not a commit
         """
         """
-        commit = ShaFile.from_path(path, sha)
+        commit = ShaFile.from_path(path, sha, object_format=object_format)
         if not isinstance(commit, cls):
         if not isinstance(commit, cls):
             raise NotCommitError(_path_to_bytes(path))
             raise NotCommitError(_path_to_bytes(path))
         return commit
         return commit

+ 19 - 12
dulwich/pack.py

@@ -127,7 +127,6 @@ from typing import (
     Optional,
     Optional,
     Protocol,
     Protocol,
     TypeVar,
     TypeVar,
-    cast,
 )
 )
 
 
 try:
 try:
@@ -143,6 +142,8 @@ if TYPE_CHECKING:
     from .bitmap import PackBitmap
     from .bitmap import PackBitmap
     from .commit_graph import CommitGraph
     from .commit_graph import CommitGraph
     from .object_format import ObjectFormat
     from .object_format import ObjectFormat
+    from .object_store import BaseObjectStore
+    from .ref import Ref
 
 
 # For some reason the above try, except fails to set has_mmap = False for plan9
 # For some reason the above try, except fails to set has_mmap = False for plan9
 if sys.platform == "Plan9":
 if sys.platform == "Plan9":
@@ -1085,7 +1086,7 @@ class PackIndex1(FilePackIndex):
 
 
     def _unpack_offset(self, i: int) -> int:
     def _unpack_offset(self, i: int) -> int:
         offset = (0x100 * 4) + (i * self._entry_size)
         offset = (0x100 * 4) + (i * self._entry_size)
-        return unpack_from(">L", self._contents, offset)[0]
+        return int(unpack_from(">L", self._contents, offset)[0])
 
 
     def _unpack_crc32_checksum(self, i: int) -> None:
     def _unpack_crc32_checksum(self, i: int) -> None:
         # Not stored in v1 index files
         # Not stored in v1 index files
@@ -1146,14 +1147,18 @@ class PackIndex2(FilePackIndex):
 
 
     def _unpack_offset(self, i: int) -> int:
     def _unpack_offset(self, i: int) -> int:
         offset = self._pack_offset_table_offset + i * 4
         offset = self._pack_offset_table_offset + i * 4
-        offset = unpack_from(">L", self._contents, offset)[0]
-        if offset & (2**31):
-            offset = self._pack_offset_largetable_offset + (offset & (2**31 - 1)) * 8
-            offset = unpack_from(">Q", self._contents, offset)[0]
-        return offset
+        offset_val = int(unpack_from(">L", self._contents, offset)[0])
+        if offset_val & (2**31):
+            offset = (
+                self._pack_offset_largetable_offset + (offset_val & (2**31 - 1)) * 8
+            )
+            offset_val = int(unpack_from(">Q", self._contents, offset)[0])
+        return offset_val
 
 
     def _unpack_crc32_checksum(self, i: int) -> int:
     def _unpack_crc32_checksum(self, i: int) -> int:
-        return unpack_from(">L", self._contents, self._crc32_table_offset + i * 4)[0]
+        return int(
+            unpack_from(">L", self._contents, self._crc32_table_offset + i * 4)[0]
+        )
 
 
     def get_pack_checksum(self) -> bytes:
     def get_pack_checksum(self) -> bytes:
         """Return the checksum stored for the corresponding packfile.
         """Return the checksum stored for the corresponding packfile.
@@ -1652,7 +1657,7 @@ class PackData:
         depth: int | None = None,
         depth: int | None = None,
         threads: int | None = None,
         threads: int | None = None,
         big_file_threshold: int | None = None,
         big_file_threshold: int | None = None,
-        object_format=None,
+        object_format: "ObjectFormat | None" = None,
     ) -> None:
     ) -> None:
         """Create a PackData object representing the pack in the given filename.
         """Create a PackData object representing the pack in the given filename.
 
 
@@ -3408,11 +3413,11 @@ def _create_delta_py(base_buf: bytes, target_buf: bytes) -> Iterator[bytes]:
             o = j1
             o = j1
             while s > 127:
             while s > 127:
                 yield bytes([127])
                 yield bytes([127])
-                yield memoryview(target_buf)[o : o + 127]
+                yield bytes(memoryview(target_buf)[o : o + 127])
                 s -= 127
                 s -= 127
                 o += 127
                 o += 127
             yield bytes([s])
             yield bytes([s])
-            yield memoryview(target_buf)[o : o + s]
+            yield bytes(memoryview(target_buf)[o : o + s])
 
 
 
 
 # Default to pure Python implementation
 # Default to pure Python implementation
@@ -4088,8 +4093,10 @@ class Pack:
                     isinstance(basename, bytes)
                     isinstance(basename, bytes)
                     and len(basename) == self.object_format.oid_length
                     and len(basename) == self.object_format.oid_length
                 )
                 )
-                base_offset, base_type, base_obj = get_ref(basename)
+                base_offset_temp, base_type, base_obj = get_ref(basename)
                 assert isinstance(base_type, int)
                 assert isinstance(base_type, int)
+                # base_offset_temp can be None for thin packs (external references)
+                base_offset = base_offset_temp
                 if base_offset == prev_offset:  # object is based on itself
                 if base_offset == prev_offset:  # object is based on itself
                     raise UnresolvedDeltas([basename])
                     raise UnresolvedDeltas([basename])
             delta_stack.append((prev_offset, base_type, delta))
             delta_stack.append((prev_offset, base_type, delta))

+ 4 - 2
dulwich/porcelain/__init__.py

@@ -1300,6 +1300,7 @@ def init(
     *,
     *,
     bare: bool = False,
     bare: bool = False,
     symlinks: bool | None = None,
     symlinks: bool | None = None,
+    object_format: str | None = None,
 ) -> Repo:
 ) -> Repo:
     """Create a new git repository.
     """Create a new git repository.
 
 
@@ -1307,15 +1308,16 @@ def init(
       path: Path to repository.
       path: Path to repository.
       bare: Whether to create a bare repository.
       bare: Whether to create a bare repository.
       symlinks: Whether to create actual symlinks (defaults to autodetect)
       symlinks: Whether to create actual symlinks (defaults to autodetect)
+      object_format: Object format to use ("sha1" or "sha256", defaults to "sha1")
     Returns: A Repo instance
     Returns: A Repo instance
     """
     """
     if not os.path.exists(path):
     if not os.path.exists(path):
         os.mkdir(path)
         os.mkdir(path)
 
 
     if bare:
     if bare:
-        return Repo.init_bare(path)
+        return Repo.init_bare(path, object_format=object_format)
     else:
     else:
-        return Repo.init(path, symlinks=symlinks)
+        return Repo.init(path, symlinks=symlinks, object_format=object_format)
 
 
 
 
 def _filter_transport_kwargs(**kwargs: object) -> TransportKwargs:
 def _filter_transport_kwargs(**kwargs: object) -> TransportKwargs:

+ 7 - 10
dulwich/repo.py

@@ -83,6 +83,7 @@ if TYPE_CHECKING:
     from .filters import FilterBlobNormalizer, FilterContext
     from .filters import FilterBlobNormalizer, FilterContext
     from .index import Index
     from .index import Index
     from .notes import Notes
     from .notes import Notes
+    from .object_format import ObjectFormat
     from .object_store import BaseObjectStore, GraphWalker
     from .object_store import BaseObjectStore, GraphWalker
     from .pack import UnpackedObject
     from .pack import UnpackedObject
     from .rebase import RebaseStateManager
     from .rebase import RebaseStateManager
@@ -502,7 +503,7 @@ class BaseRepo:
         self,
         self,
         object_store: "PackCapableObjectStore",
         object_store: "PackCapableObjectStore",
         refs: RefsContainer,
         refs: RefsContainer,
-        object_format=None,
+        object_format: "ObjectFormat | None" = None,
     ) -> None:
     ) -> None:
         """Open a repository.
         """Open a repository.
 
 
@@ -512,14 +513,17 @@ class BaseRepo:
         Args:
         Args:
           object_store: Object store to use
           object_store: Object store to use
           refs: Refs container to use
           refs: Refs container to use
-          object_format: Hash algorithm to use (if None, will be determined from config)
+          object_format: Hash algorithm to use (if None, will use object_store's format)
         """
         """
         self.object_store = object_store
         self.object_store = object_store
         self.refs = refs
         self.refs = refs
 
 
         self._graftpoints: dict[ObjectID, list[ObjectID]] = {}
         self._graftpoints: dict[ObjectID, list[ObjectID]] = {}
         self.hooks: dict[str, Hook] = {}
         self.hooks: dict[str, Hook] = {}
-        self.object_format = object_format  # Hash algorithm (SHA1 or SHA256)
+        if object_format is None:
+            self.object_format: ObjectFormat = object_store.object_format
+        else:
+            self.object_format = object_format
 
 
     def _determine_file_mode(self) -> bool:
     def _determine_file_mode(self) -> bool:
         """Probe the file-system to determine whether permissions can be trusted.
         """Probe the file-system to determine whether permissions can be trusted.
@@ -2115,7 +2119,6 @@ class Repo(BaseRepo):
         controldir: str | bytes | os.PathLike[str],
         controldir: str | bytes | os.PathLike[str],
         bare: bool,
         bare: bool,
         object_store: PackBasedObjectStore | None = None,
         object_store: PackBasedObjectStore | None = None,
-        object_store: "PackBasedObjectStore | None" = None,
         config: "StackedConfig | None" = None,
         config: "StackedConfig | None" = None,
         default_branch: bytes | None = None,
         default_branch: bytes | None = None,
         symlinks: bool | None = None,
         symlinks: bool | None = None,
@@ -2143,11 +2146,6 @@ class Repo(BaseRepo):
             if dir_mode is not None:
             if dir_mode is not None:
                 os.chmod(dir_path, dir_mode)
                 os.chmod(dir_path, dir_mode)
 
 
-        if object_store is None:
-            # Get hash algorithm for object store
-            from .hash import get_hash_algorithm
-            os.mkdir(os.path.join(controldir, *d))
-
         # Determine hash algorithm
         # Determine hash algorithm
         from .object_format import get_object_format
         from .object_format import get_object_format
 
 
@@ -2159,7 +2157,6 @@ class Repo(BaseRepo):
                 file_mode=file_mode,
                 file_mode=file_mode,
                 dir_mode=dir_mode,
                 dir_mode=dir_mode,
                 object_format=hash_alg,
                 object_format=hash_alg,
-                os.path.join(controldir, OBJECTDIR), object_format=hash_alg
             )
             )
         ret = cls(path, bare=bare, object_store=object_store)
         ret = cls(path, bare=bare, object_store=object_store)
         if default_branch is None:
         if default_branch is None:

+ 2 - 0
dulwich/server.py

@@ -81,6 +81,7 @@ from typing import IO, TYPE_CHECKING
 from typing import Protocol as TypingProtocol
 from typing import Protocol as TypingProtocol
 
 
 if TYPE_CHECKING:
 if TYPE_CHECKING:
+    from .object_format import ObjectFormat
     from .object_store import BaseObjectStore
     from .object_store import BaseObjectStore
     from .repo import BaseRepo
     from .repo import BaseRepo
 
 
@@ -174,6 +175,7 @@ class BackendRepo(TypingProtocol):
 
 
     object_store: PackBasedObjectStore
     object_store: PackBasedObjectStore
     refs: RefsContainer
     refs: RefsContainer
+    object_format: "ObjectFormat"
 
 
     def get_refs(self) -> dict[bytes, bytes]:
     def get_refs(self) -> dict[bytes, bytes]:
         """Get all the refs in the repository.
         """Get all the refs in the repository.

+ 44 - 0
tests/test_cli.py

@@ -126,6 +126,50 @@ class InitCommandTest(DulwichCliTestCase):
         self.assertTrue(os.path.exists(os.path.join(bare_repo_path, "HEAD")))
         self.assertTrue(os.path.exists(os.path.join(bare_repo_path, "HEAD")))
         self.assertFalse(os.path.exists(os.path.join(bare_repo_path, ".git")))
         self.assertFalse(os.path.exists(os.path.join(bare_repo_path, ".git")))
 
 
+    def test_init_objectformat_sha256(self) -> None:
+        # Create a new directory for init with SHA-256
+        new_repo_path = os.path.join(self.test_dir, "sha256_repo")
+        _result, _stdout, _stderr = self._run_cli(
+            "init", "--objectformat=sha256", new_repo_path
+        )
+        self.assertTrue(os.path.exists(os.path.join(new_repo_path, ".git")))
+        # Verify the object format
+        repo = Repo(new_repo_path)
+        self.addCleanup(repo.close)
+        config = repo.get_config()
+        self.assertEqual(b"sha256", config.get((b"extensions",), b"objectformat"))
+
+    def test_init_objectformat_sha1(self) -> None:
+        # Create a new directory for init with SHA-1
+        new_repo_path = os.path.join(self.test_dir, "sha1_repo")
+        _result, _stdout, _stderr = self._run_cli(
+            "init", "--objectformat=sha1", new_repo_path
+        )
+        self.assertTrue(os.path.exists(os.path.join(new_repo_path, ".git")))
+        # SHA-1 is the default, so objectformat should not be set
+        repo = Repo(new_repo_path)
+        self.addCleanup(repo.close)
+        config = repo.get_config()
+        # The extensions section may not exist at all for SHA-1
+        if config.has_section((b"extensions",)):
+            object_format = config.get((b"extensions",), b"objectformat")
+            self.assertNotEqual(b"sha256", object_format)
+        # If the section doesn't exist, that's also fine (SHA-1 is default)
+
+    def test_init_bare_objectformat_sha256(self) -> None:
+        # Create a bare repo with SHA-256
+        bare_repo_path = os.path.join(self.test_dir, "bare_sha256_repo")
+        _result, _stdout, _stderr = self._run_cli(
+            "init", "--bare", "--objectformat=sha256", bare_repo_path
+        )
+        self.assertTrue(os.path.exists(os.path.join(bare_repo_path, "HEAD")))
+        self.assertFalse(os.path.exists(os.path.join(bare_repo_path, ".git")))
+        # Verify the object format
+        repo = Repo(bare_repo_path)
+        self.addCleanup(repo.close)
+        config = repo.get_config()
+        self.assertEqual(b"sha256", config.get((b"extensions",), b"objectformat"))
+
 
 
 class HelperFunctionsTest(TestCase):
 class HelperFunctionsTest(TestCase):
     """Tests for CLI helper functions."""
     """Tests for CLI helper functions."""