2
0
Jelmer Vernooij 3 сар өмнө
parent
commit
6fa6f4f773
56 өөрчлөгдсөн 2908 нэмэгдсэн , 1337 устгасан
  1. 2 2
      dulwich/__init__.py
  2. 4 3
      dulwich/annotate.py
  3. 5 0
      dulwich/archive.py
  4. 1 1
      dulwich/bisect.py
  5. 7 6
      dulwich/bundle.py
  6. 349 135
      dulwich/cli.py
  7. 366 135
      dulwich/client.py
  8. 1 1
      dulwich/cloud/gcs.py
  9. 1 1
      dulwich/commit_graph.py
  10. 19 17
      dulwich/config.py
  11. 20 10
      dulwich/contrib/requests_vendor.py
  12. 55 41
      dulwich/contrib/swift.py
  13. 101 6
      dulwich/diff.py
  14. 43 15
      dulwich/diff_tree.py
  15. 5 3
      dulwich/dumb.py
  16. 12 11
      dulwich/fastexport.py
  17. 22 9
      dulwich/file.py
  18. 17 3
      dulwich/filter_branch.py
  19. 35 13
      dulwich/filters.py
  20. 4 1
      dulwich/gc.py
  21. 23 11
      dulwich/graph.py
  22. 4 4
      dulwich/greenthreads.py
  23. 5 3
      dulwich/ignore.py
  24. 55 18
      dulwich/index.py
  25. 6 6
      dulwich/lfs.py
  26. 2 1
      dulwich/lfs_server.py
  27. 1 1
      dulwich/line_ending.py
  28. 23 10
      dulwich/merge.py
  29. 6 6
      dulwich/merge_drivers.py
  30. 35 4
      dulwich/notes.py
  31. 258 132
      dulwich/object_store.py
  32. 16 9
      dulwich/objects.py
  33. 208 134
      dulwich/pack.py
  34. 4 2
      dulwich/patch.py
  35. 251 115
      dulwich/porcelain.py
  36. 23 13
      dulwich/rebase.py
  37. 17 13
      dulwich/refs.py
  38. 102 50
      dulwich/reftable.py
  39. 201 123
      dulwich/repo.py
  40. 233 112
      dulwich/server.py
  41. 2 6
      dulwich/sparse_patterns.py
  42. 42 24
      dulwich/stash.py
  43. 5 2
      dulwich/submodule.py
  44. 27 14
      dulwich/tests/test_object_store.py
  45. 9 9
      dulwich/tests/utils.py
  46. 20 19
      dulwich/walk.py
  47. 93 15
      dulwich/web.py
  48. 26 18
      dulwich/worktree.py
  49. 38 0
      pyproject.toml
  50. 50 7
      tests/compat/test_client.py
  51. 3 3
      tests/compat/test_dumb.py
  52. 31 27
      tests/test_client.py
  53. 8 4
      tests/test_dumb.py
  54. 3 4
      tests/test_index.py
  55. 8 4
      tests/test_object_store.py
  56. 1 1
      tests/test_porcelain_lfs.py

+ 2 - 2
dulwich/__init__.py

@@ -62,7 +62,7 @@ except ImportError:
             import functools
             import warnings
 
-            m = f"{func.__name__} is deprecated"  # type: ignore[attr-defined]
+            m = f"{func.__name__} is deprecated"
             since_str = str(since) if since is not None else None
             remove_in_str = str(remove_in) if remove_in is not None else None
 
@@ -82,7 +82,7 @@ except ImportError:
                     DeprecationWarning,
                     stacklevel=2,
                 )
-                return func(*args, **kwargs)  # type: ignore[operator]
+                return func(*args, **kwargs)
 
             return _wrapped_func  # type: ignore[return-value]
 

+ 4 - 3
dulwich/annotate.py

@@ -36,9 +36,9 @@ from dulwich.walk import (
 )
 
 if TYPE_CHECKING:
-    from dulwich.diff_tree import TreeChange, TreeEntry
+    from dulwich.diff_tree import TreeChange
     from dulwich.object_store import BaseObjectStore
-    from dulwich.objects import Commit
+    from dulwich.objects import Commit, TreeEntry
 
 # Walk over ancestry graph breadth-first
 # When checking each revision, find lines that according to difflib.Differ()
@@ -103,13 +103,14 @@ def annotate_lines(
                 changes = [tree_change]
             for change in changes:
                 if change.new is not None and change.new.path == path:
-                    if change.old is not None:
+                    if change.old is not None and change.old.path is not None:
                         path = change.old.path
                     revs.append((log_entry.commit, change.new))
                     break
 
     lines_annotated: list[tuple[tuple[Commit, TreeEntry], bytes]] = []
     for commit, entry in reversed(revs):
+        assert entry.sha is not None
         blob_obj = store[entry.sha]
         assert isinstance(blob_obj, Blob)
         lines_annotated = update_lines(lines_annotated, (commit, entry), blob_obj)

+ 5 - 0
dulwich/archive.py

@@ -135,6 +135,7 @@ def tar_stream(
             buf.seek(0, SEEK_END)
 
         for entry_abspath, entry in _walk_tree(store, tree, prefix):
+            assert entry.sha is not None
             try:
                 blob = store[entry.sha]
             except KeyError:
@@ -151,6 +152,7 @@ def tar_stream(
             # tarfile only works with ascii.
             info.name = entry_abspath.decode("utf-8", "surrogateescape")
             info.size = blob.raw_length()
+            assert entry.mode is not None
             info.mode = entry.mode
             info.mtime = mtime
 
@@ -166,8 +168,11 @@ def _walk_tree(
 ) -> Generator[tuple[bytes, "TreeEntry"], None, None]:
     """Recursively walk a dulwich Tree, yielding tuples of (absolute path, TreeEntry) along the way."""
     for entry in tree.iteritems():
+        assert entry.path is not None
         entry_abspath = posixpath.join(root, entry.path)
+        assert entry.mode is not None
         if stat.S_ISDIR(entry.mode):
+            assert entry.sha is not None
             subtree = store[entry.sha]
             if isinstance(subtree, Tree):
                 yield from _walk_tree(store, subtree, entry_abspath)

+ 1 - 1
dulwich/bisect.py

@@ -367,7 +367,7 @@ class BisectState:
         return next_commit
 
     def _find_bisect_candidates(
-        self, bad_sha: bytes, good_shas: list[bytes], skip_shas: set
+        self, bad_sha: bytes, good_shas: list[bytes], skip_shas: set[bytes]
     ) -> list[bytes]:
         """Find all commits between good and bad commits.
 

+ 7 - 6
dulwich/bundle.py

@@ -28,6 +28,7 @@ from typing import (
     Callable,
     Optional,
     Protocol,
+    cast,
     runtime_checkable,
 )
 
@@ -213,7 +214,7 @@ def write_bundle(f: BinaryIO, bundle: Bundle) -> None:
     if bundle.pack_data is None:
         raise ValueError("bundle.pack_data is not loaded")
     write_pack_data(
-        f.write,
+        cast(Callable[[bytes], None], f.write),
         num_records=len(bundle.pack_data),
         records=bundle.pack_data.iter_unpacked(),
     )
@@ -251,7 +252,7 @@ def create_bundle_from_repo(
 
     # Build the references dictionary for the bundle
     bundle_refs = {}
-    want_objects = []
+    want_objects = set()
 
     for ref in refs:
         if ref in repo.refs:
@@ -265,11 +266,11 @@ def create_bundle_from_repo(
                     bundle_refs[ref] = ref_value
             except KeyError:
                 bundle_refs[ref] = ref_value
-            want_objects.append(bundle_refs[ref])
+            want_objects.add(bundle_refs[ref])
 
     # Convert prerequisites to proper format
     bundle_prerequisites = []
-    have_objects = []
+    have_objects = set()
     for prereq in prerequisites:
         if isinstance(prereq, str):
             prereq = prereq.encode("utf-8")
@@ -280,7 +281,7 @@ def create_bundle_from_repo(
                     bytes.fromhex(prereq.decode("utf-8"))
                     # Store hex in bundle and for pack generation
                     bundle_prerequisites.append((prereq, b""))
-                    have_objects.append(prereq)
+                    have_objects.add(prereq)
                 except ValueError:
                     # Not a valid hex string, invalid prerequisite
                     raise ValueError(f"Invalid prerequisite format: {prereq!r}")
@@ -288,7 +289,7 @@ def create_bundle_from_repo(
                 # Binary SHA, convert to hex for both bundle and pack generation
                 hex_prereq = prereq.hex().encode("ascii")
                 bundle_prerequisites.append((hex_prereq, b""))
-                have_objects.append(hex_prereq)
+                have_objects.add(hex_prereq)
             else:
                 # Invalid length
                 raise ValueError(f"Invalid prerequisite SHA length: {len(prereq)}")

Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 349 - 135
dulwich/cli.py


Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 366 - 135
dulwich/client.py


+ 1 - 1
dulwich/cloud/gcs.py

@@ -93,7 +93,7 @@ class GcsObjectStore(BucketBasedObjectStore):
             return load_pack_index_file(name + ".idx", f)
 
     def _get_pack(self, name: str) -> Pack:
-        return Pack.from_lazy_objects(  # type: ignore[no-untyped-call]
+        return Pack.from_lazy_objects(
             lambda: self._load_pack_data(name), lambda: self._load_pack_index(name)
         )
 

+ 1 - 1
dulwich/commit_graph.py

@@ -288,7 +288,7 @@ class CommitGraph:
         entry = self.get_entry_by_oid(oid)
         return entry.generation if entry else None
 
-    def get_parents(self, oid: ObjectID) -> Optional[list[ObjectID]]:
+    def get_parents(self, oid: ObjectID) -> Optional[list[bytes]]:
         """Get parent commit IDs for a commit."""
         entry = self.get_entry_by_oid(oid)
         return entry.parents if entry else None

+ 19 - 17
dulwich/config.py

@@ -57,7 +57,7 @@ ConfigValue = Union[str, bytes, bool, int]
 logger = logging.getLogger(__name__)
 
 # Type for file opener callback
-FileOpener = Callable[[Union[str, os.PathLike]], IO[bytes]]
+FileOpener = Callable[[Union[str, os.PathLike[str]]], IO[bytes]]
 
 # Type for includeIf condition matcher
 # Takes the condition value (e.g., "main" for onbranch:main) and returns bool
@@ -254,7 +254,7 @@ class CaseInsensitiveOrderedMultiDict(MutableMapping[K, V], Generic[K, V]):
             def __contains__(self, key: object) -> bool:
                 return key in self._keys
 
-            def __iter__(self):
+            def __iter__(self) -> Iterator[K]:
                 return iter(self._keys)
 
             def __len__(self) -> int:
@@ -726,16 +726,16 @@ _WHITESPACE_CHARS = [ord(b"\t"), ord(b" ")]
 
 
 def _parse_string(value: bytes) -> bytes:
-    value = bytearray(value.strip())
+    value_array = bytearray(value.strip())
     ret = bytearray()
     whitespace = bytearray()
     in_quotes = False
     i = 0
-    while i < len(value):
-        c = value[i]
+    while i < len(value_array):
+        c = value_array[i]
         if c == ord(b"\\"):
             i += 1
-            if i >= len(value):
+            if i >= len(value_array):
                 # Backslash at end of string - treat as literal backslash
                 if whitespace:
                     ret.extend(whitespace)
@@ -743,7 +743,7 @@ def _parse_string(value: bytes) -> bytes:
                 ret.append(ord(b"\\"))
             else:
                 try:
-                    v = _ESCAPE_TABLE[value[i]]
+                    v = _ESCAPE_TABLE[value_array[i]]
                     if whitespace:
                         ret.extend(whitespace)
                         whitespace = bytearray()
@@ -1102,7 +1102,7 @@ class ConfigFile(ConfigDict):
             opener: FileOpener
             if file_opener is None:
 
-                def opener(path: Union[str, os.PathLike]) -> IO[bytes]:
+                def opener(path: Union[str, os.PathLike[str]]) -> IO[bytes]:
                     return GitFile(path, "rb")
             else:
                 opener = file_opener
@@ -1242,7 +1242,7 @@ class ConfigFile(ConfigDict):
     @classmethod
     def from_path(
         cls,
-        path: Union[str, os.PathLike],
+        path: Union[str, os.PathLike[str]],
         *,
         max_include_depth: int = DEFAULT_MAX_INCLUDE_DEPTH,
         file_opener: Optional[FileOpener] = None,
@@ -1263,7 +1263,7 @@ class ConfigFile(ConfigDict):
         opener: FileOpener
         if file_opener is None:
 
-            def opener(p: Union[str, os.PathLike]) -> IO[bytes]:
+            def opener(p: Union[str, os.PathLike[str]]) -> IO[bytes]:
                 return GitFile(p, "rb")
         else:
             opener = file_opener
@@ -1279,12 +1279,14 @@ class ConfigFile(ConfigDict):
             ret.path = abs_path
             return ret
 
-    def write_to_path(self, path: Optional[Union[str, os.PathLike]] = None) -> None:
+    def write_to_path(
+        self, path: Optional[Union[str, os.PathLike[str]]] = None
+    ) -> None:
         """Write configuration to a file on disk."""
         if path is None:
             if self.path is None:
                 raise ValueError("No path specified and no default path available")
-            path_to_use: Union[str, os.PathLike] = self.path
+            path_to_use: Union[str, os.PathLike[str]] = self.path
         else:
             path_to_use = path
         with GitFile(path_to_use, "wb") as f:
@@ -1351,11 +1353,11 @@ def _find_git_in_win_reg() -> Iterator[str]:
     else:
         subkey = "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\Git_is1"
 
-    for key in (winreg.HKEY_CURRENT_USER, winreg.HKEY_LOCAL_MACHINE):  # type: ignore
+    for key in (winreg.HKEY_CURRENT_USER, winreg.HKEY_LOCAL_MACHINE):  # type: ignore[attr-defined,unused-ignore]
         with suppress(OSError):
-            with winreg.OpenKey(key, subkey) as k:  # type: ignore
-                val, typ = winreg.QueryValueEx(k, "InstallLocation")  # type: ignore
-                if typ == winreg.REG_SZ:  # type: ignore
+            with winreg.OpenKey(key, subkey) as k:  # type: ignore[attr-defined,unused-ignore]
+                val, typ = winreg.QueryValueEx(k, "InstallLocation")  # type: ignore[attr-defined,unused-ignore]
+                if typ == winreg.REG_SZ:  # type: ignore[attr-defined,unused-ignore]
                     yield val
 
 
@@ -1502,7 +1504,7 @@ class StackedConfig(Config):
 
 
 def read_submodules(
-    path: Union[str, os.PathLike],
+    path: Union[str, os.PathLike[str]],
 ) -> Iterator[tuple[bytes, bytes, bytes]]:
     """Read a .gitmodules file."""
     cfg = ConfigFile.from_path(path)

+ 20 - 10
dulwich/contrib/requests_vendor.py

@@ -31,8 +31,9 @@ the dulwich.client.HttpGitClient attribute:
 This implementation is experimental and does not have any tests.
 """
 
+from collections.abc import Iterator
 from io import BytesIO
-from typing import TYPE_CHECKING, Any, Callable, Optional
+from typing import TYPE_CHECKING, Any, Callable, Optional, Union
 
 if TYPE_CHECKING:
     from ..config import ConfigFile
@@ -58,7 +59,10 @@ class RequestsHttpGitClient(AbstractHttpGitClient):
         config: Optional["ConfigFile"] = None,
         username: Optional[str] = None,
         password: Optional[str] = None,
-        **kwargs: object,
+        thin_packs: bool = True,
+        report_activity: Optional[Callable[[int, str], None]] = None,
+        quiet: bool = False,
+        include_tags: bool = False,
     ) -> None:
         """Initialize RequestsHttpGitClient.
 
@@ -68,7 +72,10 @@ class RequestsHttpGitClient(AbstractHttpGitClient):
           config: Git configuration file
           username: Username for authentication
           password: Password for authentication
-          **kwargs: Additional keyword arguments
+          thin_packs: Whether to use thin packs
+          report_activity: Function to report activity
+          quiet: Whether to suppress output
+          include_tags: Whether to include tags
         """
         self._username = username
         self._password = password
@@ -79,24 +86,27 @@ class RequestsHttpGitClient(AbstractHttpGitClient):
             self.session.auth = (username, password)  # type: ignore[assignment]
 
         super().__init__(
-            base_url=base_url, dumb=bool(dumb) if dumb is not None else False, **kwargs
+            base_url=base_url,
+            dumb=bool(dumb) if dumb is not None else False,
+            thin_packs=thin_packs,
+            report_activity=report_activity,
+            quiet=quiet,
+            include_tags=include_tags,
         )
 
     def _http_request(
         self,
         url: str,
         headers: Optional[dict[str, str]] = None,
-        data: Optional[bytes] = None,
-        allow_compression: bool = False,
+        data: Optional[Union[bytes, Iterator[bytes]]] = None,
+        raise_for_status: bool = True,
     ) -> tuple[Any, Callable[[int], bytes]]:
         req_headers = self.session.headers.copy()  # type: ignore[attr-defined]
         if headers is not None:
             req_headers.update(headers)
 
-        if allow_compression:
-            req_headers["Accept-Encoding"] = "gzip"
-        else:
-            req_headers["Accept-Encoding"] = "identity"
+        # Accept compression by default
+        req_headers.setdefault("Accept-Encoding", "gzip")
 
         if data:
             resp = self.session.post(url, headers=req_headers, data=data)

+ 55 - 41
dulwich/contrib/swift.py

@@ -39,16 +39,17 @@ import zlib
 from collections.abc import Iterator
 from configparser import ConfigParser
 from io import BytesIO
-from typing import BinaryIO, Callable, Optional, Union, cast
+from typing import Any, BinaryIO, Callable, Optional, Union, cast
 
 from geventhttpclient import HTTPClient
 
 from ..file import _GitFile
 from ..greenthreads import GreenThreadsMissingObjectFinder
 from ..lru_cache import LRUSizeCache
-from ..object_store import INFODIR, PACKDIR, ObjectContainer, PackBasedObjectStore
+from ..object_store import INFODIR, PACKDIR, PackBasedObjectStore
 from ..objects import S_ISGITLINK, Blob, Commit, Tag, Tree
 from ..pack import (
+    ObjectContainer,
     Pack,
     PackData,
     PackIndex,
@@ -123,11 +124,14 @@ class PackInfoMissingObjectFinder(GreenThreadsMissingObjectFinder):
                             item.sha,
                             item.path
                             if isinstance(item.path, bytes)
-                            else item.path.encode("utf-8"),
+                            else item.path.encode("utf-8")
+                            if item.path is not None
+                            else b"",
                             None,
                             False,
                         )
                         for item in obj.items()
+                        if item.sha is not None
                     ]
                     self.add_todo(tree_items)
                 elif isinstance(obj, Tag):
@@ -137,7 +141,7 @@ class PackInfoMissingObjectFinder(GreenThreadsMissingObjectFinder):
             except KeyError:
                 pass
         self.sha_done.add(sha)
-        self.progress(f"counting objects: {len(self.sha_done)}\r")
+        self.progress(f"counting objects: {len(self.sha_done)}\r".encode())
         return (
             sha,
             0,
@@ -197,7 +201,7 @@ def pack_info_create(pack_data: "PackData", pack_index: "PackIndex") -> bytes:
       Compressed JSON bytes containing pack information
     """
     pack = Pack.from_objects(pack_data, pack_index)
-    info: dict = {}
+    info: dict[bytes, Any] = {}
     for obj in pack.iterobjects():
         # Commit
         if obj.type_num == Commit.type_num:
@@ -211,7 +215,7 @@ def pack_info_create(pack_data: "PackData", pack_index: "PackIndex") -> bytes:
             shas = [
                 (s, n, not stat.S_ISDIR(m))
                 for n, m, s in tree_obj.items()
-                if not S_ISGITLINK(m)
+                if m is not None and not S_ISGITLINK(m)
             ]
             info[obj.id] = (obj.type_num, shas)
         # Blob
@@ -229,7 +233,7 @@ def load_pack_info(
     filename: str,
     scon: Optional["SwiftConnector"] = None,
     file: Optional[BinaryIO] = None,
-) -> Optional[dict]:
+) -> Optional[dict[str, Any]]:
     """Load pack info from Swift or file.
 
     Args:
@@ -247,13 +251,13 @@ def load_pack_info(
         if obj is None:
             return None
         if isinstance(obj, bytes):
-            return json.loads(zlib.decompress(obj))
+            return cast(dict[str, Any], json.loads(zlib.decompress(obj)))
         else:
             f: BinaryIO = obj
     else:
         f = file
     try:
-        return json.loads(zlib.decompress(f.read()))
+        return cast(dict[str, Any], json.loads(zlib.decompress(f.read())))
     finally:
         if hasattr(f, "close"):
             f.close()
@@ -412,7 +416,7 @@ class SwiftConnector:
                     f"PUT request failed with error code {ret.status_code}"
                 )
 
-    def get_container_objects(self) -> Optional[list[dict]]:
+    def get_container_objects(self) -> Optional[list[dict[str, Any]]]:
         """Retrieve objects list in a container.
 
         Returns: A list of dict that describe objects
@@ -428,9 +432,9 @@ class SwiftConnector:
                 f"GET request failed with error code {ret.status_code}"
             )
         content = ret.read()
-        return json.loads(content)
+        return cast(list[dict[str, Any]], json.loads(content))
 
-    def get_object_stat(self, name: str) -> Optional[dict]:
+    def get_object_stat(self, name: str) -> Optional[dict[str, Any]]:
         """Retrieve object stat.
 
         Args:
@@ -504,7 +508,7 @@ class SwiftConnector:
             raise SwiftException(
                 f"GET request failed with error code {ret.status_code}"
             )
-        content = ret.read()
+        content = cast(bytes, ret.read())
 
         if range:
             return content
@@ -633,7 +637,9 @@ class SwiftPackData(PackData):
     using the Range header feature of Swift.
     """
 
-    def __init__(self, scon: SwiftConnector, filename: Union[str, os.PathLike]) -> None:
+    def __init__(
+        self, scon: SwiftConnector, filename: Union[str, os.PathLike[str]]
+    ) -> None:
         """Initialize a SwiftPackReader.
 
         Args:
@@ -703,17 +709,19 @@ class SwiftPack(Pack):
           *args: Arguments to pass to parent class
           **kwargs: Keyword arguments, must include 'scon' (SwiftConnector)
         """
-        self.scon = kwargs["scon"]
+        self.scon: SwiftConnector = kwargs["scon"]  # type: ignore
         del kwargs["scon"]
         super().__init__(*args, **kwargs)  # type: ignore
         self._pack_info_path = self._basename + ".info"
-        self._pack_info: Optional[dict] = None
-        self._pack_info_load = lambda: load_pack_info(self._pack_info_path, self.scon)  # type: ignore
-        self._idx_load = lambda: swift_load_pack_index(self.scon, self._idx_path)  # type: ignore
-        self._data_load = lambda: SwiftPackData(self.scon, self._data_path)  # type: ignore
+        self._pack_info: Optional[dict[str, Any]] = None
+        self._pack_info_load: Callable[[], Optional[dict[str, Any]]] = (
+            lambda: load_pack_info(self._pack_info_path, self.scon)
+        )
+        self._idx_load = lambda: swift_load_pack_index(self.scon, self._idx_path)
+        self._data_load = lambda: SwiftPackData(self.scon, self._data_path)
 
     @property
-    def pack_info(self) -> Optional[dict]:
+    def pack_info(self) -> Optional[dict[str, Any]]:
         """The pack data object being used."""
         if self._pack_info is None:
             self._pack_info = self._pack_info_load()
@@ -739,7 +747,7 @@ class SwiftObjectStore(PackBasedObjectStore):
         self.pack_dir = posixpath.join(OBJECTDIR, PACKDIR)
         self._alternates = None
 
-    def _update_pack_cache(self) -> list:
+    def _update_pack_cache(self) -> list[Any]:
         objects = self.scon.get_container_objects()
         if objects is None:
             return []
@@ -755,11 +763,11 @@ class SwiftObjectStore(PackBasedObjectStore):
             ret.append(pack)
         return ret
 
-    def _iter_loose_objects(self) -> Iterator:
+    def _iter_loose_objects(self) -> Iterator[Any]:
         """Loose objects are not supported by this repository."""
         return iter([])
 
-    def pack_info_get(self, sha: bytes) -> Optional[tuple]:
+    def pack_info_get(self, sha: bytes) -> Optional[tuple[Any, ...]]:
         """Get pack info for a specific SHA.
 
         Args:
@@ -773,23 +781,23 @@ class SwiftObjectStore(PackBasedObjectStore):
                 if hasattr(pack, "pack_info"):
                     pack_info = pack.pack_info
                     if pack_info is not None:
-                        return pack_info.get(sha)
+                        return cast(Optional[tuple[Any, ...]], pack_info.get(sha))
         return None
 
     def _collect_ancestors(
-        self, heads: list, common: Optional[set] = None
-    ) -> tuple[set, set]:
+        self, heads: list[Any], common: Optional[set[Any]] = None
+    ) -> tuple[set[Any], set[Any]]:
         if common is None:
             common = set()
 
-        def _find_parents(commit: bytes) -> list:
+        def _find_parents(commit: bytes) -> list[Any]:
             for pack in self.packs:
                 if commit in pack:
                     try:
                         if hasattr(pack, "pack_info"):
                             pack_info = pack.pack_info
                             if pack_info is not None:
-                                return pack_info[commit][1]
+                                return cast(list[Any], pack_info[commit][1])
                     except KeyError:
                         # Seems to have no parents
                         return []
@@ -809,7 +817,7 @@ class SwiftObjectStore(PackBasedObjectStore):
                 queue.extend(parents)
         return (commits, bases)
 
-    def add_pack(self) -> tuple[BytesIO, Callable, Callable]:
+    def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]:
         """Add a new pack to this object store.
 
         Returns: Fileobject to write to and a commit function to
@@ -851,7 +859,11 @@ class SwiftObjectStore(PackBasedObjectStore):
         def abort() -> None:
             """Abort the pack operation (no-op)."""
 
-        return f, commit, abort
+        def commit_wrapper() -> None:
+            """Wrapper that discards the return value."""
+            commit()
+
+        return f, commit_wrapper, abort
 
     def add_object(self, obj: object) -> None:
         """Add a single object to the store.
@@ -871,7 +883,9 @@ class SwiftObjectStore(PackBasedObjectStore):
     def _get_loose_object(self, sha: bytes) -> None:
         return None
 
-    def add_thin_pack(self, read_all: Callable, read_some: Callable) -> "SwiftPack":
+    def add_thin_pack(
+        self, read_all: Callable[[int], bytes], read_some: Callable[[int], bytes]
+    ) -> "SwiftPack":
         """Read a thin pack.
 
         Read it from a stream and complete it in a temporary file.
@@ -882,7 +896,7 @@ class SwiftObjectStore(PackBasedObjectStore):
         try:
             pack_data = PackData(file=cast(_GitFile, f), filename=path)
             indexer = PackIndexer(cast(BinaryIO, pack_data._file), resolve_ext_ref=None)
-            copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer)
+            copier = PackStreamCopier(read_all, read_some, f, delta_iter=None)
             copier.verify()
             return self._complete_thin_pack(f, path, copier, indexer)
         finally:
@@ -974,7 +988,7 @@ class SwiftInfoRefsContainer(InfoRefsContainer):
 
     def _load_check_ref(
         self, name: bytes, old_ref: Optional[bytes]
-    ) -> Union[dict, bool]:
+    ) -> Union[dict[bytes, bytes], bool]:
         self._check_refname(name)
         obj = self.scon.get_object(self.filename)
         if not obj:
@@ -990,7 +1004,7 @@ class SwiftInfoRefsContainer(InfoRefsContainer):
                 return False
         return refs
 
-    def _write_refs(self, refs: dict) -> None:
+    def _write_refs(self, refs: dict[bytes, bytes]) -> None:
         f = BytesIO()
         f.writelines(write_info_refs(refs, cast("ObjectContainer", self.store)))
         self.scon.put_object(self.filename, f)
@@ -1006,7 +1020,7 @@ class SwiftInfoRefsContainer(InfoRefsContainer):
         message: Optional[bytes] = None,
     ) -> bool:
         """Set a refname to new_ref only if it currently equals old_ref."""
-        if name == "HEAD":
+        if name == b"HEAD":
             return True
         refs = self._load_check_ref(name, old_ref)
         if not isinstance(refs, dict):
@@ -1026,7 +1040,7 @@ class SwiftInfoRefsContainer(InfoRefsContainer):
         message: object = None,
     ) -> bool:
         """Remove a refname only if it currently equals old_ref."""
-        if name == "HEAD":
+        if name == b"HEAD":
             return True
         refs = self._load_check_ref(name, old_ref)
         if not isinstance(refs, dict):
@@ -1069,8 +1083,8 @@ class SwiftRepo(BaseRepo):
         objects = self.scon.get_container_objects()
         if not objects:
             raise Exception(f"There is not any GIT repo here : {self.root}")
-        objects = [o["name"].split("/")[0] for o in objects]
-        if OBJECTDIR not in objects:
+        object_names = [o["name"].split("/")[0] for o in objects]
+        if OBJECTDIR not in object_names:
             raise Exception(f"This repository ({self.root}) is not bare.")
         self.bare = True
         self._controldir = self.root
@@ -1143,7 +1157,7 @@ class SwiftSystemBackend(Backend):
         return cast("BackendRepo", SwiftRepo(path, self.conf))
 
 
-def cmd_daemon(args: list) -> None:
+def cmd_daemon(args: list[str]) -> None:
     """Start a TCP git server for Swift repositories.
 
     Args:
@@ -1199,7 +1213,7 @@ def cmd_daemon(args: list) -> None:
     server.serve_forever()
 
 
-def cmd_init(args: list) -> None:
+def cmd_init(args: list[str]) -> None:
     """Initialize a new Git repository in Swift.
 
     Args:
@@ -1225,7 +1239,7 @@ def cmd_init(args: list) -> None:
     SwiftRepo.init_bare(scon, conf)
 
 
-def main(argv: list = sys.argv) -> None:
+def main(argv: list[str] = sys.argv) -> None:
     """Main entry point for Swift Git command line interface.
 
     Args:

+ 101 - 6
dulwich/diff.py

@@ -44,10 +44,18 @@ Example usage:
     diff_index_to_tree(repo, sys.stdout.buffer, paths=[b'src/', b'README.md'])
 """
 
+import io
 import logging
 import os
 import stat
-from typing import BinaryIO, Optional
+import sys
+from collections.abc import Iterable
+from typing import BinaryIO, Optional, Union
+
+if sys.version_info >= (3, 12):
+    from collections.abc import Buffer
+else:
+    Buffer = Union[bytes, bytearray, memoryview]
 
 from .index import ConflictedIndexEntry, commit_index
 from .object_store import iter_tree_contents
@@ -138,7 +146,7 @@ def diff_working_tree_to_tree(
     assert isinstance(commit, Commit)
     tree = commit.tree
     normalizer = repo.get_blob_normalizer()
-    filter_callback = normalizer.checkin_normalize
+    filter_callback = normalizer.checkin_normalize if normalizer is not None else None
 
     # Get index for tracking new files
     index = repo.open_index()
@@ -147,6 +155,9 @@ def diff_working_tree_to_tree(
 
     # Process files from the committed tree lazily
     for entry in iter_tree_contents(repo.object_store, tree):
+        assert (
+            entry.path is not None and entry.mode is not None and entry.sha is not None
+        )
         path = entry.path
         if not should_include_path(path, paths):
             continue
@@ -377,7 +388,7 @@ def diff_working_tree_to_index(
     """
     index = repo.open_index()
     normalizer = repo.get_blob_normalizer()
-    filter_callback = normalizer.checkin_normalize
+    filter_callback = normalizer.checkin_normalize if normalizer is not None else None
 
     # Process each file in the index
     for tree_path, entry in index.iteritems():
@@ -521,7 +532,7 @@ def diff_working_tree_to_index(
             )
 
 
-class ColorizedDiffStream:
+class ColorizedDiffStream(BinaryIO):
     """Stream wrapper that colorizes diff output line by line using Rich.
 
     This class wraps a binary output stream and applies color formatting
@@ -561,13 +572,18 @@ class ColorizedDiffStream:
         self.console = Console(file=self.text_wrapper, force_terminal=True)
         self.buffer = b""
 
-    def write(self, data: bytes) -> None:
+    def write(self, data: Union[bytes, Buffer]) -> int:  # type: ignore[override,unused-ignore]
         """Write data to the stream, applying colorization.
 
         Args:
             data: Bytes to write
+
+        Returns:
+            Number of bytes written
         """
         # Add new data to buffer
+        if not isinstance(data, bytes):
+            data = bytes(data)
         self.buffer += data
 
         # Process complete lines
@@ -575,7 +591,9 @@ class ColorizedDiffStream:
             line, self.buffer = self.buffer.split(b"\n", 1)
             self._colorize_and_write_line(line + b"\n")
 
-    def writelines(self, lines: list[bytes]) -> None:
+        return len(data)
+
+    def writelines(self, lines: Iterable[Union[bytes, Buffer]]) -> None:  # type: ignore[override,unused-ignore]
         """Write a list of lines to the stream.
 
         Args:
@@ -619,3 +637,80 @@ class ColorizedDiffStream:
             self.text_wrapper.flush()
         if hasattr(self.output_stream, "flush"):
             self.output_stream.flush()
+
+    # BinaryIO interface methods
+    def close(self) -> None:
+        """Close the stream."""
+        self.flush()
+        if hasattr(self.output_stream, "close"):
+            self.output_stream.close()
+
+    @property
+    def closed(self) -> bool:
+        """Check if the stream is closed."""
+        return getattr(self.output_stream, "closed", False)
+
+    def fileno(self) -> int:
+        """Return the file descriptor."""
+        return self.output_stream.fileno()
+
+    def isatty(self) -> bool:
+        """Check if the stream is a TTY."""
+        return getattr(self.output_stream, "isatty", lambda: False)()
+
+    def read(self, n: int = -1) -> bytes:
+        """Read is not supported on this write-only stream."""
+        raise io.UnsupportedOperation("not readable")
+
+    def readable(self) -> bool:
+        """This stream is not readable."""
+        return False
+
+    def readline(self, limit: int = -1) -> bytes:
+        """Read is not supported on this write-only stream."""
+        raise io.UnsupportedOperation("not readable")
+
+    def readlines(self, hint: int = -1) -> list[bytes]:
+        """Read is not supported on this write-only stream."""
+        raise io.UnsupportedOperation("not readable")
+
+    def seek(self, offset: int, whence: int = 0) -> int:
+        """Seek is not supported on this stream."""
+        raise io.UnsupportedOperation("not seekable")
+
+    def seekable(self) -> bool:
+        """This stream is not seekable."""
+        return False
+
+    def tell(self) -> int:
+        """Tell is not supported on this stream."""
+        raise io.UnsupportedOperation("not seekable")
+
+    def truncate(self, size: Optional[int] = None) -> int:
+        """Truncate is not supported on this stream."""
+        raise io.UnsupportedOperation("not truncatable")
+
+    def writable(self) -> bool:
+        """This stream is writable."""
+        return True
+
+    def __enter__(self) -> "ColorizedDiffStream":
+        """Context manager entry."""
+        return self
+
+    def __exit__(
+        self,
+        exc_type: Optional[type[BaseException]],
+        exc_val: Optional[BaseException],
+        exc_tb: Optional[object],
+    ) -> None:
+        """Context manager exit."""
+        self.flush()
+
+    def __iter__(self) -> "ColorizedDiffStream":
+        """Iterator interface - not supported."""
+        raise io.UnsupportedOperation("not iterable")
+
+    def __next__(self) -> bytes:
+        """Iterator interface - not supported."""
+        raise io.UnsupportedOperation("not iterable")

+ 43 - 15
dulwich/diff_tree.py

@@ -115,10 +115,10 @@ def _merge_entries(
     while i1 < len1 and i2 < len2:
         entry1 = entries1[i1]
         entry2 = entries2[i2]
-        if entry1.path < entry2.path:
+        if entry1.path < entry2.path:  # type: ignore[operator]
             result.append((entry1, None))
             i1 += 1
-        elif entry1.path > entry2.path:
+        elif entry1.path > entry2.path:  # type: ignore[operator]
             result.append((None, entry2))
             i2 += 1
         else:
@@ -133,7 +133,7 @@ def _merge_entries(
 
 
 def _is_tree(entry: Optional[TreeEntry]) -> bool:
-    if entry is None:
+    if entry is None or entry.mode is None:
         return False
     return stat.S_ISDIR(entry.mode)
 
@@ -174,8 +174,8 @@ def walk_trees(
         if prune_identical and is_tree1 and is_tree2 and entry1 == entry2:
             continue
 
-        tree1 = (is_tree1 and entry1 and store[entry1.sha]) or None
-        tree2 = (is_tree2 and entry2 and store[entry2.sha]) or None
+        tree1 = (is_tree1 and entry1 and store[entry1.sha]) or None  # type: ignore[index]
+        tree2 = (is_tree2 and entry2 and store[entry2.sha]) or None  # type: ignore[index]
         path = (
             (entry1.path if entry1 else None)
             or (entry2.path if entry2 else None)
@@ -183,7 +183,7 @@ def walk_trees(
         )
 
         # If we have path filters, check if we should process this tree
-        if paths is not None and (is_tree1 or is_tree2):
+        if paths is not None and (is_tree1 or is_tree2) and path is not None:
             # Special case for root tree
             if path == b"":
                 should_recurse = True
@@ -219,6 +219,7 @@ def walk_trees(
                 tree1 = Tree()
             if tree2 is None:
                 tree2 = Tree()
+            assert path is not None
             todo.extend(reversed(_merge_entries(path, tree1, tree2)))
 
         # Only yield entries that match our path filters
@@ -231,18 +232,24 @@ def walk_trees(
                     # Exact match
                     yield entry1, entry2
                     break
-                elif path.startswith(filter_path + b"/"):
+                elif path is not None and path.startswith(filter_path + b"/"):
                     # This entry is under a filter directory
                     yield entry1, entry2
                     break
-                elif filter_path.startswith(path + b"/") and (is_tree1 or is_tree2):
+                elif (
+                    path is not None
+                    and filter_path.startswith(path + b"/")
+                    and (is_tree1 or is_tree2)
+                ):
                     # This is a parent directory of a filter path
                     yield entry1, entry2
                     break
 
 
 def _skip_tree(entry: Optional[TreeEntry], include_trees: bool) -> Optional[TreeEntry]:
-    if entry is None or (not include_trees and stat.S_ISDIR(entry.mode)):
+    if entry is None or entry.mode is None:
+        return None
+    if not include_trees and stat.S_ISDIR(entry.mode):
         return None
     return entry
 
@@ -297,7 +304,9 @@ def tree_changes(
 
         if entry1 is not None and entry2 is not None:
             if (
-                stat.S_IFMT(entry1.mode) != stat.S_IFMT(entry2.mode)
+                entry1.mode is not None
+                and entry2.mode is not None
+                and stat.S_IFMT(entry1.mode) != stat.S_IFMT(entry2.mode)
                 and not change_type_same
             ):
                 # File type changed: report as delete/add.
@@ -377,6 +386,7 @@ def tree_changes_for_merge(
             else:
                 assert change.new is not None
                 path = change.new.path
+            assert path is not None
             changes_by_path[path][i] = change
 
     def old_sha(c: TreeChange) -> Optional[ObjectID]:
@@ -503,7 +513,8 @@ def _tree_change_key(entry: TreeChange) -> tuple[bytes, bytes]:
         path1 = path2
     if path2 is None:
         path2 = path1
-    assert path1 is not None and path2 is not None
+    assert path1 is not None
+    assert path2 is not None
     return (path1, path2)
 
 
@@ -558,6 +569,8 @@ class RenameDetector:
         assert change.old is not None and change.new is not None
         if change.old.sha == change.new.sha:
             return False
+        assert change.old.sha is not None
+        assert change.new.sha is not None
         old_obj = self._store[change.old.sha]
         new_obj = self._store[change.new.sha]
         return _similarity_score(old_obj, new_obj) < self._rewrite_threshold
@@ -625,10 +638,14 @@ class RenameDetector:
         for sha, sha_deletes in delete_map.items():
             sha_adds = add_map[sha]
             for (old, is_delete), new in zip(sha_deletes, sha_adds):
+                assert old.mode is not None
+                assert new.mode is not None
                 if stat.S_IFMT(old.mode) != stat.S_IFMT(new.mode):
                     continue
                 if is_delete:
+                    assert old.path is not None
                     delete_paths.add(old.path)
+                assert new.path is not None
                 add_paths.add(new.path)
                 new_type = (is_delete and CHANGE_RENAME) or CHANGE_COPY
                 self._changes.append(TreeChange(new_type, old, new))
@@ -638,6 +655,7 @@ class RenameDetector:
             old = sha_deletes[0][0]
             if num_extra_adds > 0:
                 for new in sha_adds[-num_extra_adds:]:
+                    assert new.path is not None
                     add_paths.add(new.path)
                     self._changes.append(TreeChange(CHANGE_COPY, old, new))
         self._prune(add_paths, delete_paths)
@@ -677,15 +695,19 @@ class RenameDetector:
         check_paths = self._rename_threshold is not None
         for delete in self._deletes:
             assert delete.old is not None
+            assert delete.old.mode is not None
             if S_ISGITLINK(delete.old.mode):
                 continue  # Git links don't exist in this repo.
+            assert delete.old.sha is not None
             old_sha = delete.old.sha
             old_obj = self._store[old_sha]
             block_cache[old_sha] = _count_blocks(old_obj)
             for add in self._adds:
                 assert add.new is not None
+                assert add.new.mode is not None
                 if stat.S_IFMT(delete.old.mode) != stat.S_IFMT(add.new.mode):
                     continue
+                assert add.new.sha is not None
                 new_obj = self._store[add.new.sha]
                 score = _similarity_score(old_obj, new_obj, block_cache=block_cache)
                 if score > self._rename_threshold:
@@ -703,9 +725,11 @@ class RenameDetector:
         for _, change in self._candidates:
             assert change.old is not None and change.new is not None
             new_path = change.new.path
+            assert new_path is not None
             if new_path in add_paths:
                 continue
             old_path = change.old.path
+            assert old_path is not None
             orig_type = change.type
             if old_path in delete_paths:
                 change = TreeChange(CHANGE_COPY, change.old, change.new)
@@ -731,10 +755,14 @@ class RenameDetector:
             assert add.new is not None
             path = add.new.path
             delete = delete_map.get(path)
-            if delete is not None:
-                assert delete.old is not None
-                if stat.S_IFMT(delete.old.mode) == stat.S_IFMT(add.new.mode):
-                    modifies[path] = TreeChange(CHANGE_MODIFY, delete.old, add.new)
+            if (
+                delete is not None
+                and delete.old is not None
+                and delete.old.mode is not None
+                and add.new.mode is not None
+                and stat.S_IFMT(delete.old.mode) == stat.S_IFMT(add.new.mode)
+            ):
+                modifies[path] = TreeChange(CHANGE_MODIFY, delete.old, add.new)
 
         def check_add_mod(a: TreeChange) -> bool:
             assert a.new is not None

+ 5 - 3
dulwich/dumb.py

@@ -434,9 +434,10 @@ class DumbRemoteHTTPRepo:
 
     def fetch_pack_data(
         self,
+        determine_wants: Callable[[dict[Ref, ObjectID], Optional[int]], list[ObjectID]],
         graph_walker: object,
-        determine_wants: Callable[[dict[Ref, ObjectID]], list[ObjectID]],
         progress: Optional[Callable[[bytes], None]] = None,
+        *,
         get_tagged: Optional[bool] = None,
         depth: Optional[int] = None,
     ) -> Iterator[UnpackedObject]:
@@ -447,8 +448,8 @@ class DumbRemoteHTTPRepo:
         all objects reachable from the wanted refs.
 
         Args:
-          graph_walker: GraphWalker instance (not used for dumb HTTP)
           determine_wants: Function that returns list of wanted SHAs
+          graph_walker: GraphWalker instance (not used for dumb HTTP)
           progress: Optional progress callback
           get_tagged: Whether to get tagged objects
           depth: Depth for shallow clones (not supported for dumb HTTP)
@@ -457,7 +458,7 @@ class DumbRemoteHTTPRepo:
           Iterator of UnpackedObject instances
         """
         refs = self.get_refs()
-        wants = determine_wants(refs)
+        wants = determine_wants(refs, depth)
 
         if not wants:
             return
@@ -493,6 +494,7 @@ class DumbRemoteHTTPRepo:
                 to_fetch.add(obj.object[1])
             elif isinstance(obj, Tree):  # Tree
                 for _, _, item_sha in obj.items():
+                    assert item_sha is not None
                     to_fetch.add(item_sha)
 
             if progress:

+ 12 - 11
dulwich/fastexport.py

@@ -101,7 +101,7 @@ class GitFastExporter:
         """
         marker = self._allocate_marker()
         self.markers[marker] = blob.id
-        return (commands.BlobCommand(marker, blob.data), marker)
+        return (commands.BlobCommand(marker, blob.data), marker)  # type: ignore[no-untyped-call,unused-ignore]
 
     def emit_blob(self, blob: Blob) -> bytes:
         """Emit a blob to the output stream.
@@ -126,7 +126,7 @@ class GitFastExporter:
         ) in self.store.tree_changes(base_tree, new_tree):
             if new_path is None:
                 if old_path is not None:
-                    yield commands.FileDeleteCommand(old_path)
+                    yield commands.FileDeleteCommand(old_path)  # type: ignore[no-untyped-call,unused-ignore]
                 continue
             marker = b""
             if new_mode is not None and not stat.S_ISDIR(new_mode):
@@ -137,10 +137,10 @@ class GitFastExporter:
                     if isinstance(blob, Blob):
                         marker = self.emit_blob(blob)
             if old_path != new_path and old_path is not None:
-                yield commands.FileRenameCommand(old_path, new_path)
+                yield commands.FileRenameCommand(old_path, new_path)  # type: ignore[no-untyped-call,unused-ignore]
             if old_mode != new_mode or old_hexsha != new_hexsha:
                 prefixed_marker = b":" + marker
-                yield commands.FileModifyCommand(
+                yield commands.FileModifyCommand(  # type: ignore[no-untyped-call,unused-ignore]
                     new_path, new_mode, prefixed_marker, None
                 )
 
@@ -157,7 +157,7 @@ class GitFastExporter:
             merges = []
         author, author_email = split_email(commit.author)
         committer, committer_email = split_email(commit.committer)
-        cmd = commands.CommitCommand(
+        cmd = commands.CommitCommand(  # type: ignore[no-untyped-call,unused-ignore]
             ref,
             marker,
             (author, author_email, commit.author_time, commit.author_timezone),
@@ -192,7 +192,7 @@ class GitFastExporter:
         return marker
 
 
-class GitImportProcessor(processor.ImportProcessor):
+class GitImportProcessor(processor.ImportProcessor):  # type: ignore[misc,unused-ignore]
     """An import processor that imports into a Git repository using Dulwich."""
 
     # FIXME: Batch creation of objects?
@@ -212,7 +212,7 @@ class GitImportProcessor(processor.ImportProcessor):
           verbose: Whether to enable verbose output
           outf: Output file for verbose messages
         """
-        processor.ImportProcessor.__init__(self, params, verbose)
+        processor.ImportProcessor.__init__(self, params, verbose)  # type: ignore[no-untyped-call,unused-ignore]
         self.repo = repo
         self.last_commit = ZERO_SHA
         self.markers: dict[bytes, bytes] = {}
@@ -240,8 +240,8 @@ class GitImportProcessor(processor.ImportProcessor):
         Returns:
           Dictionary of markers to object IDs
         """
-        p = parser.ImportParser(stream)
-        self.process(p.iter_commands)
+        p = parser.ImportParser(stream)  # type: ignore[no-untyped-call,unused-ignore]
+        self.process(p.iter_commands)  # type: ignore[no-untyped-call,unused-ignore]
         return self.markers
 
     def blob_handler(self, cmd: commands.BlobCommand) -> None:
@@ -279,7 +279,7 @@ class GitImportProcessor(processor.ImportProcessor):
         if cmd.from_:
             cmd.from_ = self.lookup_object(cmd.from_)
             self._reset_base(cmd.from_)
-        for filecmd in cmd.iter_files():
+        for filecmd in cmd.iter_files():  # type: ignore[no-untyped-call,unused-ignore]
             if filecmd.name == b"filemodify":
                 if filecmd.data is not None:
                     blob = Blob.from_string(filecmd.data)
@@ -333,6 +333,7 @@ class GitImportProcessor(processor.ImportProcessor):
                 mode,
                 hexsha,
             ) in iter_tree_contents(self.repo.object_store, tree_id):
+                assert path is not None and mode is not None and hexsha is not None
                 self._contents[path] = (mode, hexsha)
 
     def reset_handler(self, cmd: commands.ResetCommand) -> None:
@@ -355,4 +356,4 @@ class GitImportProcessor(processor.ImportProcessor):
 
     def feature_handler(self, cmd: commands.FeatureCommand) -> None:
         """Process a FeatureCommand."""
-        raise fastimport_errors.UnknownFeature(cmd.feature_name)
+        raise fastimport_errors.UnknownFeature(cmd.feature_name)  # type: ignore[no-untyped-call,unused-ignore]

+ 22 - 9
dulwich/file.py

@@ -34,7 +34,9 @@ else:
     Buffer = Union[bytes, bytearray, memoryview]
 
 
-def ensure_dir_exists(dirname: Union[str, bytes, os.PathLike]) -> None:
+def ensure_dir_exists(
+    dirname: Union[str, bytes, os.PathLike[str], os.PathLike[bytes]],
+) -> None:
     """Ensure a directory exists, creating if necessary."""
     try:
         os.makedirs(dirname)
@@ -66,7 +68,7 @@ def _fancy_rename(oldname: Union[str, bytes], newname: Union[str, bytes]) -> Non
 
 @overload
 def GitFile(
-    filename: Union[str, bytes, os.PathLike],
+    filename: Union[str, bytes, os.PathLike[str], os.PathLike[bytes]],
     mode: Literal["wb"],
     bufsize: int = -1,
     mask: int = 0o644,
@@ -75,7 +77,7 @@ def GitFile(
 
 @overload
 def GitFile(
-    filename: Union[str, bytes, os.PathLike],
+    filename: Union[str, bytes, os.PathLike[str], os.PathLike[bytes]],
     mode: Literal["rb"] = "rb",
     bufsize: int = -1,
     mask: int = 0o644,
@@ -84,7 +86,7 @@ def GitFile(
 
 @overload
 def GitFile(
-    filename: Union[str, bytes, os.PathLike],
+    filename: Union[str, bytes, os.PathLike[str], os.PathLike[bytes]],
     mode: str = "rb",
     bufsize: int = -1,
     mask: int = 0o644,
@@ -92,7 +94,7 @@ def GitFile(
 
 
 def GitFile(
-    filename: Union[str, bytes, os.PathLike],
+    filename: Union[str, bytes, os.PathLike[str], os.PathLike[bytes]],
     mode: str = "rb",
     bufsize: int = -1,
     mask: int = 0o644,
@@ -128,7 +130,9 @@ class FileLocked(Exception):
     """File is already locked."""
 
     def __init__(
-        self, filename: Union[str, bytes, os.PathLike], lockfilename: Union[str, bytes]
+        self,
+        filename: Union[str, bytes, os.PathLike[str], os.PathLike[bytes]],
+        lockfilename: Union[str, bytes],
     ) -> None:
         """Initialize FileLocked.
 
@@ -152,6 +156,11 @@ class _GitFile(IO[bytes]):
         released. Typically this will happen in a finally block.
     """
 
+    _file: IO[bytes]
+    _filename: Union[str, bytes]
+    _lockfilename: Union[str, bytes]
+    _closed: bool
+
     PROXY_PROPERTIES: ClassVar[set[str]] = {
         "encoding",
         "errors",
@@ -181,7 +190,7 @@ class _GitFile(IO[bytes]):
 
     def __init__(
         self,
-        filename: Union[str, bytes, os.PathLike],
+        filename: Union[str, bytes, os.PathLike[str], os.PathLike[bytes]],
         mode: str,
         bufsize: int,
         mask: int,
@@ -272,6 +281,10 @@ class _GitFile(IO[bytes]):
         else:
             self.close()
 
+    def __fspath__(self) -> Union[str, bytes]:
+        """Return the file path for os.fspath() compatibility."""
+        return self._filename
+
     @property
     def closed(self) -> bool:
         """Return whether the file is closed."""
@@ -289,7 +302,7 @@ class _GitFile(IO[bytes]):
 
     # TODO: Remove type: ignore when Python 3.10 support is dropped (Oct 2026)
     # Python 3.9/3.10 have issues with IO[bytes] overload signatures
-    def write(self, data: Buffer, /) -> int:  # type: ignore[override]
+    def write(self, data: Buffer, /) -> int:  # type: ignore[override,unused-ignore]
         return self._file.write(data)
 
     def readline(self, size: int = -1) -> bytes:
@@ -300,7 +313,7 @@ class _GitFile(IO[bytes]):
 
     # TODO: Remove type: ignore when Python 3.10 support is dropped (Oct 2026)
     # Python 3.9/3.10 have issues with IO[bytes] overload signatures
-    def writelines(self, lines: Iterable[Buffer], /) -> None:  # type: ignore[override]
+    def writelines(self, lines: Iterable[Buffer], /) -> None:  # type: ignore[override,unused-ignore]
         return self._file.writelines(lines)
 
     def seek(self, offset: int, whence: int = 0) -> int:

+ 17 - 3
dulwich/filter_branch.py

@@ -24,7 +24,7 @@
 import os
 import tempfile
 import warnings
-from typing import Callable, Optional
+from typing import Callable, Optional, TypedDict
 
 from .index import Index, build_index_from_tree
 from .object_store import BaseObjectStore
@@ -32,6 +32,19 @@ from .objects import Commit, Tag, Tree
 from .refs import RefsContainer
 
 
+class CommitData(TypedDict, total=False):
+    """TypedDict for commit data fields."""
+
+    author: bytes
+    author_time: int
+    author_timezone: int
+    committer: bytes
+    commit_time: int
+    commit_timezone: int
+    message: bytes
+    encoding: bytes
+
+
 class CommitFilter:
     """Filter for rewriting commits during filter-branch operations."""
 
@@ -39,7 +52,7 @@ class CommitFilter:
         self,
         object_store: BaseObjectStore,
         *,
-        filter_fn: Optional[Callable[[Commit], Optional[dict[str, bytes]]]] = None,
+        filter_fn: Optional[Callable[[Commit], Optional[CommitData]]] = None,
         filter_author: Optional[Callable[[bytes], Optional[bytes]]] = None,
         filter_committer: Optional[Callable[[bytes], Optional[bytes]]] = None,
         filter_message: Optional[Callable[[bytes], Optional[bytes]]] = None,
@@ -122,6 +135,7 @@ class CommitFilter:
             for entry in current_tree.items():
                 if entry.path == part:
                     try:
+                        assert entry.sha is not None
                         obj = self.object_store[entry.sha]
                         if isinstance(obj, Tree):
                             current_tree = obj
@@ -267,7 +281,7 @@ class CommitFilter:
                 return new_parents[0]
 
         # Apply filters
-        new_data = {}
+        new_data: CommitData = {}
 
         # Custom filter function takes precedence
         if self.filter_fn:

+ 35 - 13
dulwich/filters.py

@@ -134,12 +134,12 @@ class ProcessFilterDriver:
         self.required = required
         self.cwd = cwd
         self.process_cmd = process_cmd
-        self._process: Optional[subprocess.Popen] = None
+        self._process: Optional[subprocess.Popen[bytes]] = None
         self._protocol: Optional[Protocol] = None
         self._capabilities: set[bytes] = set()
         self._process_lock = threading.Lock()
 
-    def _get_or_start_process(self):
+    def _get_or_start_process(self) -> Optional["Protocol"]:
         """Get or start the long-running process filter."""
         if self._process is None and self.process_cmd:
             from .errors import HangupException
@@ -164,12 +164,16 @@ class ProcessFilterDriver:
                     )
 
                 # Create protocol wrapper
-                def write_func(data):
+                def write_func(data: bytes) -> int:
+                    assert self._process is not None
+                    assert self._process.stdin is not None
                     n = self._process.stdin.write(data)
                     self._process.stdin.flush()
                     return n
 
-                def read_func(size):
+                def read_func(size: int) -> bytes:
+                    assert self._process is not None
+                    assert self._process.stdout is not None
                     return self._process.stdout.read(size)
 
                 self._protocol = Protocol(read_func, write_func)
@@ -186,9 +190,9 @@ class ProcessFilterDriver:
 
                 # Verify handshake (be liberal - accept with or without newlines)
                 if welcome and welcome.rstrip(b"\n\r") != b"git-filter-server":
-                    raise FilterError(f"Invalid welcome message: {welcome}")
+                    raise FilterError(f"Invalid welcome message: {welcome!r}")
                 if version and version.rstrip(b"\n\r") != b"version=2":
-                    raise FilterError(f"Invalid version: {version}")
+                    raise FilterError(f"Invalid version: {version!r}")
                 if flush is not None:
                     raise FilterError("Expected flush packet after handshake")
 
@@ -215,7 +219,7 @@ class ProcessFilterDriver:
             except (OSError, subprocess.SubprocessError, HangupException) as e:
                 self.cleanup()
                 raise FilterError(f"Failed to start process filter: {e}")
-        return self._process
+        return self._protocol
 
     def _use_process_filter(self, data: bytes, operation: str, path: str = "") -> bytes:
         """Use the long-running process filter for the operation."""
@@ -347,7 +351,7 @@ class ProcessFilterDriver:
             logging.warning(f"Optional smudge filter failed: {e}")
             return data
 
-    def cleanup(self):
+    def cleanup(self) -> None:
         """Clean up the process filter."""
         if self._process:
             # Close stdin first to signal the process to quit cleanly
@@ -394,7 +398,7 @@ class ProcessFilterDriver:
                             try:
                                 import signal
 
-                                os.kill(self._process.pid, signal.SIGKILL)
+                                os.kill(self._process.pid, signal.SIGKILL)  # type: ignore[attr-defined,unused-ignore]
                                 self._process.wait(timeout=1)
                             except (ProcessLookupError, subprocess.TimeoutExpired):
                                 pass
@@ -413,23 +417,41 @@ class ProcessFilterDriver:
 
         # Check if the filter commands in config match our current commands
         try:
-            clean_cmd = config.get(("filter", filter_name), "clean")
+            clean_cmd_raw = config.get(("filter", filter_name), "clean")
         except KeyError:
             clean_cmd = None
+        else:
+            clean_cmd = (
+                clean_cmd_raw.decode("utf-8")
+                if isinstance(clean_cmd_raw, bytes)
+                else clean_cmd_raw
+            )
         if clean_cmd != self.clean_cmd:
             return False
 
         try:
-            smudge_cmd = config.get(("filter", filter_name), "smudge")
+            smudge_cmd_raw = config.get(("filter", filter_name), "smudge")
         except KeyError:
             smudge_cmd = None
+        else:
+            smudge_cmd = (
+                smudge_cmd_raw.decode("utf-8")
+                if isinstance(smudge_cmd_raw, bytes)
+                else smudge_cmd_raw
+            )
         if smudge_cmd != self.smudge_cmd:
             return False
 
         try:
-            process_cmd = config.get(("filter", filter_name), "process")
+            process_cmd_raw = config.get(("filter", filter_name), "process")
         except KeyError:
             process_cmd = None
+        else:
+            process_cmd = (
+                process_cmd_raw.decode("utf-8")
+                if isinstance(process_cmd_raw, bytes)
+                else process_cmd_raw
+            )
         if process_cmd != self.process_cmd:
             return False
 
@@ -439,7 +461,7 @@ class ProcessFilterDriver:
 
         return True
 
-    def __del__(self):
+    def __del__(self) -> None:
         """Clean up the process filter on destruction."""
         self.cleanup()
 

+ 4 - 1
dulwich/gc.py

@@ -96,6 +96,7 @@ def find_reachable_objects(
         elif isinstance(obj, Tree):
             # Tree entries
             for entry in obj.items():
+                assert entry.sha is not None
                 if entry.sha not in reachable:
                     pending.append(entry.sha)
                     reachable.add(entry.sha)
@@ -369,7 +370,9 @@ def should_run_gc(repo: "BaseRepo", config: Optional["Config"] = None) -> bool:
 
 
 def maybe_auto_gc(
-    repo: "Repo", config: Optional["Config"] = None, progress: Optional[Callable] = None
+    repo: "Repo",
+    config: Optional["Config"] = None,
+    progress: Optional[Callable[[str], None]] = None,
 ) -> bool:
     """Run automatic garbage collection if needed.
 

+ 23 - 11
dulwich/graph.py

@@ -22,13 +22,13 @@
 
 from collections.abc import Iterator
 from heapq import heappop, heappush
-from typing import TYPE_CHECKING, Any, Callable, Generic, Optional, TypeVar
+from typing import TYPE_CHECKING, Callable, Generic, Optional, TypeVar
 
 if TYPE_CHECKING:
     from .repo import BaseRepo
 
 from .lru_cache import LRUCache
-from .objects import ObjectID
+from .objects import Commit, ObjectID
 
 T = TypeVar("T")
 
@@ -212,13 +212,17 @@ def find_merge_base(repo: "BaseRepo", commit_ids: list[ObjectID]) -> list[Object
     Returns:
       list of lowest common ancestor commit_ids
     """
-    cmtcache: LRUCache[ObjectID, Any] = LRUCache(max_cache=128)
+    cmtcache: LRUCache[ObjectID, Commit] = LRUCache(max_cache=128)
     parents_provider = repo.parents_provider()
 
     def lookup_stamp(cmtid: ObjectID) -> int:
         if cmtid not in cmtcache:
-            cmtcache[cmtid] = repo.object_store[cmtid]
-        return cmtcache[cmtid].commit_time
+            obj = repo.object_store[cmtid]
+            assert isinstance(obj, Commit)
+            cmtcache[cmtid] = obj
+        commit_time = cmtcache[cmtid].commit_time
+        assert isinstance(commit_time, int)
+        return commit_time
 
     def lookup_parents(cmtid: ObjectID) -> list[ObjectID]:
         commit = None
@@ -250,13 +254,17 @@ def find_octopus_base(repo: "BaseRepo", commit_ids: list[ObjectID]) -> list[Obje
     Returns:
       list of lowest common ancestor commit_ids
     """
-    cmtcache: LRUCache[ObjectID, Any] = LRUCache(max_cache=128)
+    cmtcache: LRUCache[ObjectID, Commit] = LRUCache(max_cache=128)
     parents_provider = repo.parents_provider()
 
     def lookup_stamp(cmtid: ObjectID) -> int:
         if cmtid not in cmtcache:
-            cmtcache[cmtid] = repo.object_store[cmtid]
-        return cmtcache[cmtid].commit_time
+            obj = repo.object_store[cmtid]
+            assert isinstance(obj, Commit)
+            cmtcache[cmtid] = obj
+        commit_time = cmtcache[cmtid].commit_time
+        assert isinstance(commit_time, int)
+        return commit_time
 
     def lookup_parents(cmtid: ObjectID) -> list[ObjectID]:
         commit = None
@@ -294,13 +302,17 @@ def can_fast_forward(repo: "BaseRepo", c1: bytes, c2: bytes) -> bool:
       c1: Commit id for first commit
       c2: Commit id for second commit
     """
-    cmtcache: LRUCache[ObjectID, Any] = LRUCache(max_cache=128)
+    cmtcache: LRUCache[ObjectID, Commit] = LRUCache(max_cache=128)
     parents_provider = repo.parents_provider()
 
     def lookup_stamp(cmtid: ObjectID) -> int:
         if cmtid not in cmtcache:
-            cmtcache[cmtid] = repo.object_store[cmtid]
-        return cmtcache[cmtid].commit_time
+            obj = repo.object_store[cmtid]
+            assert isinstance(obj, Commit)
+            cmtcache[cmtid] = obj
+        commit_time = cmtcache[cmtid].commit_time
+        assert isinstance(commit_time, int)
+        return commit_time
 
     def lookup_parents(cmtid: ObjectID) -> list[ObjectID]:
         commit = None

+ 4 - 4
dulwich/greenthreads.py

@@ -84,7 +84,7 @@ class GreenThreadsMissingObjectFinder(MissingObjectFinder):
         object_store: BaseObjectStore,
         haves: list[ObjectID],
         wants: list[ObjectID],
-        progress: Optional[Callable[[str], None]] = None,
+        progress: Optional[Callable[[bytes], None]] = None,
         get_tagged: Optional[Callable[[], dict[ObjectID, ObjectID]]] = None,
         concurrency: int = 1,
         get_parents: Optional[Callable[[ObjectID], list[ObjectID]]] = None,
@@ -129,12 +129,12 @@ class GreenThreadsMissingObjectFinder(MissingObjectFinder):
         for t in have_tags:
             self.sha_done.add(t)
         missing_tags = want_tags.difference(have_tags)
-        wants = missing_commits.union(missing_tags)
+        all_wants = missing_commits.union(missing_tags)
         self.objects_to_send: set[
             tuple[ObjectID, Optional[bytes], Optional[int], bool]
-        ] = {(w, None, 0, False) for w in wants}
+        ] = {(w, None, 0, False) for w in all_wants}
         if progress is None:
-            self.progress = lambda x: None
+            self.progress: Callable[[bytes], None] = lambda x: None
         else:
             self.progress = progress
         self._tagged = (get_tagged and get_tagged()) or {}

+ 5 - 3
dulwich/ignore.py

@@ -47,7 +47,7 @@ def _pattern_to_str(pattern: Union["Pattern", bytes, str]) -> str:
     return pattern_data.decode() if isinstance(pattern_data, bytes) else pattern_data
 
 
-def _check_parent_exclusion(path: str, matching_patterns: list) -> bool:
+def _check_parent_exclusion(path: str, matching_patterns: list["Pattern"]) -> bool:
     """Check if a parent directory exclusion prevents negation patterns from taking effect.
 
     Args:
@@ -484,7 +484,7 @@ class IgnoreFilter:
 
     @classmethod
     def from_path(
-        cls, path: Union[str, os.PathLike], ignorecase: bool = False
+        cls, path: Union[str, os.PathLike[str]], ignorecase: bool = False
     ) -> "IgnoreFilter":
         """Create an IgnoreFilter from a file path.
 
@@ -665,7 +665,9 @@ class IgnoreFilterManager:
 
         return result
 
-    def _apply_directory_traversal_rule(self, path: str, matches: list) -> bool:
+    def _apply_directory_traversal_rule(
+        self, path: str, matches: list["Pattern"]
+    ) -> bool:
         """Apply directory traversal rule for issue #1203.
 
         If a directory would be ignored by a ** pattern, but there are negation

+ 55 - 18
dulwich/index.py

@@ -45,7 +45,7 @@ if TYPE_CHECKING:
     from .config import Config
     from .diff_tree import TreeChange
     from .file import _GitFile
-    from .line_ending import BlobNormalizer
+    from .filters import FilterBlobNormalizer
     from .object_store import BaseObjectStore
     from .repo import Repo
 
@@ -63,6 +63,12 @@ from .objects import (
 )
 from .pack import ObjectContainer, SHA1Reader, SHA1Writer
 
+# Type alias for recursive tree structure used in commit_tree
+if sys.version_info >= (3, 10):
+    TreeDict = dict[bytes, Union["TreeDict", tuple[int, bytes]]]
+else:
+    TreeDict = dict[bytes, Any]
+
 # 2-bit stage (during merge)
 FLAG_STAGEMASK = 0x3000
 FLAG_STAGESHIFT = 12
@@ -971,7 +977,7 @@ class Index:
 
     def __init__(
         self,
-        filename: Union[bytes, str, os.PathLike],
+        filename: Union[bytes, str, os.PathLike[str]],
         read: bool = True,
         skip_hash: bool = False,
         version: Optional[int] = None,
@@ -1225,15 +1231,15 @@ def commit_tree(
     Returns:
       SHA1 of the created tree.
     """
-    trees: dict[bytes, Any] = {b"": {}}
+    trees: dict[bytes, TreeDict] = {b"": {}}
 
-    def add_tree(path: bytes) -> dict[bytes, Any]:
+    def add_tree(path: bytes) -> TreeDict:
         if path in trees:
             return trees[path]
         dirname, basename = pathsplit(path)
         t = add_tree(dirname)
         assert isinstance(basename, bytes)
-        newtree: dict[bytes, Any] = {}
+        newtree: TreeDict = {}
         t[basename] = newtree
         trees[path] = newtree
         return newtree
@@ -1299,6 +1305,7 @@ def changes_from_tree(
 
     if tree is not None:
         for name, mode, sha in iter_tree_contents(object_store, tree):
+            assert name is not None and mode is not None and sha is not None
             try:
                 (other_sha, other_mode) = lookup_entry(name)
             except KeyError:
@@ -1409,7 +1416,10 @@ def build_file_from_blob(
     honor_filemode: bool = True,
     tree_encoding: str = "utf-8",
     symlink_fn: Optional[
-        Callable[[Union[str, bytes, os.PathLike], Union[str, bytes, os.PathLike]], None]
+        Callable[
+            [Union[str, bytes, os.PathLike[str]], Union[str, bytes, os.PathLike[str]]],
+            None,
+        ]
     ] = None,
 ) -> os.stat_result:
     """Build a file or symlink on disk based on a Git object.
@@ -1596,9 +1606,12 @@ def build_index_from_tree(
     honor_filemode: bool = True,
     validate_path_element: Callable[[bytes], bool] = validate_path_element_default,
     symlink_fn: Optional[
-        Callable[[Union[str, bytes, os.PathLike], Union[str, bytes, os.PathLike]], None]
+        Callable[
+            [Union[str, bytes, os.PathLike[str]], Union[str, bytes, os.PathLike[str]]],
+            None,
+        ]
     ] = None,
-    blob_normalizer: Optional["BlobNormalizer"] = None,
+    blob_normalizer: Optional["FilterBlobNormalizer"] = None,
     tree_encoding: str = "utf-8",
 ) -> None:
     """Generate and materialize index from a tree.
@@ -1625,6 +1638,9 @@ def build_index_from_tree(
         root_path = os.fsencode(root_path)
 
     for entry in iter_tree_contents(object_store, tree_id):
+        assert (
+            entry.path is not None and entry.mode is not None and entry.sha is not None
+        )
         if not validate_path(entry.path, validate_path_element):
             continue
         full_path = _tree_to_fs_path(root_path, entry.path, tree_encoding)
@@ -1866,7 +1882,7 @@ def _check_file_matches(
     entry_mode: int,
     current_stat: os.stat_result,
     honor_filemode: bool,
-    blob_normalizer: Optional["BlobNormalizer"] = None,
+    blob_normalizer: Optional["FilterBlobNormalizer"] = None,
     tree_path: Optional[bytes] = None,
 ) -> bool:
     """Check if a file on disk matches the expected git object.
@@ -1946,6 +1962,7 @@ def _transition_to_submodule(
         ensure_submodule_placeholder(repo, path)
 
     st = os.lstat(full_path)
+    assert entry.sha is not None
     index[path] = index_entry_from_stat(st, entry.sha)
 
 
@@ -1958,12 +1975,16 @@ def _transition_to_file(
     index: Index,
     honor_filemode: bool,
     symlink_fn: Optional[
-        Callable[[Union[str, bytes, os.PathLike], Union[str, bytes, os.PathLike]], None]
+        Callable[
+            [Union[str, bytes, os.PathLike[str]], Union[str, bytes, os.PathLike[str]]],
+            None,
+        ]
     ],
-    blob_normalizer: Optional["BlobNormalizer"],
+    blob_normalizer: Optional["FilterBlobNormalizer"],
     tree_encoding: str = "utf-8",
 ) -> None:
     """Transition any type to regular file or symlink."""
+    assert entry.sha is not None and entry.mode is not None
     # Check if we need to update
     if (
         current_stat is not None
@@ -2123,6 +2144,7 @@ def detect_case_only_renames(
     # Pre-normalize all paths once to avoid repeated normalization
     for change in changes:
         if change.type == CHANGE_DELETE and change.old:
+            assert change.old.path is not None
             try:
                 normalized = normalize_path(change.old.path)
             except UnicodeDecodeError:
@@ -2136,6 +2158,7 @@ def detect_case_only_renames(
                 old_paths_normalized[normalized] = change.old.path
                 old_changes[change.old.path] = change
         elif change.type == CHANGE_RENAME and change.old:
+            assert change.old.path is not None
             # Treat RENAME as DELETE + ADD for case-only detection
             try:
                 normalized = normalize_path(change.old.path)
@@ -2154,6 +2177,7 @@ def detect_case_only_renames(
             change.type in (CHANGE_ADD, CHANGE_MODIFY, CHANGE_RENAME, CHANGE_COPY)
             and change.new
         ):
+            assert change.new.path is not None
             try:
                 normalized = normalize_path(change.new.path)
             except UnicodeDecodeError:
@@ -2212,10 +2236,13 @@ def update_working_tree(
     honor_filemode: bool = True,
     validate_path_element: Optional[Callable[[bytes], bool]] = None,
     symlink_fn: Optional[
-        Callable[[Union[str, bytes, os.PathLike], Union[str, bytes, os.PathLike]], None]
+        Callable[
+            [Union[str, bytes, os.PathLike[str]], Union[str, bytes, os.PathLike[str]]],
+            None,
+        ]
     ] = None,
     force_remove_untracked: bool = False,
-    blob_normalizer: Optional["BlobNormalizer"] = None,
+    blob_normalizer: Optional["FilterBlobNormalizer"] = None,
     tree_encoding: str = "utf-8",
     allow_overwrite_modified: bool = False,
 ) -> None:
@@ -2278,6 +2305,7 @@ def update_working_tree(
         if change.type in (CHANGE_ADD, CHANGE_MODIFY, CHANGE_RENAME, CHANGE_COPY):
             assert change.new is not None
             path = change.new.path
+            assert path is not None
             if b"/" in path:  # This is a file inside a directory
                 # Check if any parent path exists as a file in the old tree or changes
                 parts = path.split(b"/")
@@ -2319,6 +2347,9 @@ def update_working_tree(
             if old_change:
                 # Check if file has been modified
                 assert old_change.old is not None
+                assert (
+                    old_change.old.sha is not None and old_change.old.mode is not None
+                )
                 file_matches = _check_file_matches(
                     repo.object_store,
                     full_path,
@@ -2340,6 +2371,7 @@ def update_working_tree(
             # Only check files that are being modified or deleted
             if change.type in (CHANGE_MODIFY, CHANGE_DELETE) and change.old:
                 path = change.old.path
+                assert path is not None
                 if path.startswith(b".git") or not validate_path(
                     path, validate_path_element
                 ):
@@ -2357,6 +2389,7 @@ def update_working_tree(
 
                 if stat.S_ISREG(current_stat.st_mode):
                     # Check if working tree file differs from old tree
+                    assert change.old.sha is not None and change.old.mode is not None
                     file_matches = _check_file_matches(
                         repo.object_store,
                         full_path,
@@ -2380,7 +2413,7 @@ def update_working_tree(
     for change in changes:
         if change.type in (CHANGE_DELETE, CHANGE_RENAME):
             # Remove file/directory
-            assert change.old is not None
+            assert change.old is not None and change.old.path is not None
             path = change.old.path
             if path.startswith(b".git") or not validate_path(
                 path, validate_path_element
@@ -2407,7 +2440,11 @@ def update_working_tree(
             CHANGE_RENAME,
         ):
             # Add or modify file
-            assert change.new is not None
+            assert (
+                change.new is not None
+                and change.new.path is not None
+                and change.new.mode is not None
+            )
             path = change.new.path
             if path.startswith(b".git") or not validate_path(
                 path, validate_path_element
@@ -2449,7 +2486,7 @@ def _check_entry_for_changes(
     tree_path: bytes,
     entry: Union[IndexEntry, ConflictedIndexEntry],
     root_path: bytes,
-    filter_blob_callback: Optional[Callable] = None,
+    filter_blob_callback: Optional[Callable[[bytes, bytes], bytes]] = None,
 ) -> Optional[bytes]:
     """Check a single index entry for changes.
 
@@ -2478,7 +2515,7 @@ def _check_entry_for_changes(
         blob = blob_from_path_and_stat(full_path, st)
 
         if filter_blob_callback is not None:
-            blob = filter_blob_callback(blob, tree_path)
+            blob.data = filter_blob_callback(blob.data, tree_path)
     except FileNotFoundError:
         # The file was removed, so we assume that counts as
         # different from whatever file used to exist.
@@ -2492,7 +2529,7 @@ def _check_entry_for_changes(
 def get_unstaged_changes(
     index: Index,
     root_path: Union[str, bytes],
-    filter_blob_callback: Optional[Callable] = None,
+    filter_blob_callback: Optional[Callable[..., Any]] = None,
     preload_index: bool = False,
 ) -> Generator[bytes, None, None]:
     """Walk through an index and check for differences against working tree.

+ 6 - 6
dulwich/lfs.py

@@ -39,7 +39,7 @@ import os
 import tempfile
 from collections.abc import Iterable
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, BinaryIO, Optional, Union
+from typing import TYPE_CHECKING, Any, BinaryIO, Optional, Union
 from urllib.parse import urljoin, urlparse
 from urllib.request import Request, urlopen
 
@@ -328,7 +328,7 @@ class LFSFilterDriver:
         """Clean up any resources held by this filter driver."""
         # LFSFilterDriver doesn't hold any resources that need cleanup
 
-    def reuse(self, config, filter_name: str) -> bool:
+    def reuse(self, config: Optional["Config"], filter_name: str) -> bool:
         """Check if this filter driver should be reused with the given configuration."""
         # LFSFilterDriver is stateless and lightweight, no need to cache
         return False
@@ -415,7 +415,7 @@ class LFSClient:
         if self._pool_manager is None:
             from dulwich.client import default_urllib3_manager
 
-            self._pool_manager = default_urllib3_manager(self.config)  # type: ignore[assignment]
+            self._pool_manager = default_urllib3_manager(self.config)
         return self._pool_manager
 
     def _make_request(
@@ -442,7 +442,7 @@ class LFSClient:
             raise ValueError(
                 f"HTTP {response.status}: {response.data.decode('utf-8', errors='ignore')}"
             )
-        return response.data  # type: ignore[return-value]
+        return response.data
 
     def batch(
         self,
@@ -478,7 +478,7 @@ class LFSClient:
         response_data = json.loads(response)
         return self._parse_batch_response(response_data)
 
-    def _parse_batch_response(self, data: dict) -> LFSBatchResponse:
+    def _parse_batch_response(self, data: dict[str, Any]) -> LFSBatchResponse:
         """Parse JSON response into LFSBatchResponse dataclass."""
         objects = []
         for obj_data in data.get("objects", []):
@@ -558,7 +558,7 @@ class LFSClient:
         if actual_oid != oid:
             raise LFSError(f"Downloaded OID {actual_oid} != expected {oid}")
 
-        return content  # type: ignore[return-value]
+        return content
 
     def upload(
         self, oid: str, size: int, content: bytes, ref: Optional[str] = None

+ 2 - 1
dulwich/lfs_server.py

@@ -24,6 +24,7 @@
 import hashlib
 import json
 import tempfile
+import typing
 from http.server import BaseHTTPRequestHandler, HTTPServer
 from typing import Optional
 
@@ -35,7 +36,7 @@ class LFSRequestHandler(BaseHTTPRequestHandler):
 
     server: "LFSServer"  # Type annotation for the server attribute
 
-    def send_json_response(self, status_code: int, data: dict) -> None:
+    def send_json_response(self, status_code: int, data: dict[str, typing.Any]) -> None:
         """Send a JSON response."""
         response = json.dumps(data).encode("utf-8")
         self.send_response(status_code)

+ 1 - 1
dulwich/line_ending.py

@@ -287,7 +287,7 @@ class LineEndingFilter(FilterDriver):
         """Clean up any resources held by this filter driver."""
         # LineEndingFilter doesn't hold any resources that need cleanup
 
-    def reuse(self, config, filter_name: str) -> bool:
+    def reuse(self, config: "StackedConfig", filter_name: str) -> bool:
         """Check if this filter driver should be reused with the given configuration."""
         # LineEndingFilter is lightweight and should always be recreated
         # to ensure it uses the latest configuration

+ 23 - 10
dulwich/merge.py

@@ -1,11 +1,15 @@
 """Git merge implementation."""
 
-from typing import Optional
+from difflib import SequenceMatcher
+from typing import TYPE_CHECKING, Optional
 
-try:
+if TYPE_CHECKING:
     import merge3
-except ImportError:
-    merge3 = None  # type: ignore
+else:
+    try:
+        import merge3
+    except ImportError:
+        merge3 = None  # type: ignore[assignment]
 
 from dulwich.attrs import GitAttributes
 from dulwich.config import Config
@@ -14,14 +18,20 @@ from dulwich.object_store import BaseObjectStore
 from dulwich.objects import S_ISGITLINK, Blob, Commit, Tree, is_blob, is_tree
 
 
-def make_merge3(*args, **kwargs) -> "merge3.Merge3":
+def make_merge3(
+    base: list[bytes],
+    a: list[bytes],
+    b: list[bytes],
+    is_cherrypick: bool = False,
+    sequence_matcher: Optional[type[SequenceMatcher[bytes]]] = None,
+) -> "merge3.Merge3":
     """Return a Merge3 object, or raise ImportError if merge3 is not installed."""
     if merge3 is None:
         raise ImportError(
             "merge3 module is required for three-way merging. "
             "Install it with: pip install merge3"
         )
-    return merge3.Merge3(*args, **kwargs)
+    return merge3.Merge3(base, a, b, is_cherrypick, sequence_matcher)
 
 
 class MergeConflict(Exception):
@@ -55,7 +65,7 @@ def _can_merge_lines(
 
 if merge3 is not None:
 
-    def _merge3_to_bytes(m: merge3.Merge3) -> bytes:
+    def _merge3_to_bytes(m: "merge3.Merge3") -> bytes:
         """Convert merge3 result to bytes with conflict markers.
 
         Args:
@@ -65,7 +75,7 @@ if merge3 is not None:
             Merged content as bytes
         """
         result = []
-        for group in m.merge_groups():
+        for group in m.merge_groups():  # type: ignore[no-untyped-call,unused-ignore]
             if group[0] == "unchanged":
                 result.extend(group[1])
             elif group[0] == "a":
@@ -291,20 +301,23 @@ class Merger:
         Returns:
             tuple of (merged_tree, list_of_conflicted_paths)
         """
-        conflicts = []
-        merged_entries = {}
+        conflicts: list[bytes] = []
+        merged_entries: dict[bytes, tuple[Optional[int], Optional[bytes]]] = {}
 
         # Get all paths from all trees
         all_paths = set()
 
         if base_tree:
             for entry in base_tree.items():
+                assert entry.path is not None
                 all_paths.add(entry.path)
 
         for entry in ours_tree.items():
+            assert entry.path is not None
             all_paths.add(entry.path)
 
         for entry in theirs_tree.items():
+            assert entry.path is not None
             all_paths.add(entry.path)
 
         # Process each path

+ 6 - 6
dulwich/merge_drivers.py

@@ -23,7 +23,7 @@
 import os
 import subprocess
 import tempfile
-from typing import Any, Callable, Optional, Protocol
+from typing import Callable, Optional, Protocol
 
 from .config import Config
 
@@ -143,7 +143,7 @@ class MergeDriverRegistry:
             config: Git configuration object
         """
         self._drivers: dict[str, MergeDriver] = {}
-        self._factories: dict[str, Any] = {}
+        self._factories: dict[str, Callable[[], MergeDriver]] = {}
         self._config = config
 
         # Register built-in drivers
@@ -193,10 +193,10 @@ class MergeDriverRegistry:
 
         # Finally check configuration
         if self._config:
-            driver = self._create_from_config(name)
-            if driver:
-                self._drivers[name] = driver
-                return driver
+            config_driver = self._create_from_config(name)
+            if config_driver is not None:
+                self._drivers[name] = config_driver
+                return config_driver
 
         return None
 

+ 35 - 4
dulwich/notes.py

@@ -63,9 +63,11 @@ def get_note_fanout_level(tree: Tree, object_store: "BaseObjectStore") -> int:
         """
         count = 0
         for name, mode, sha in tree.items():
+            assert mode is not None
             if stat.S_ISREG(mode):
                 count += 1
             elif stat.S_ISDIR(mode) and level < 2:  # Only recurse 2 levels deep
+                assert sha is not None
                 try:
                     subtree = object_store[sha]
                     assert isinstance(subtree, Tree)
@@ -149,6 +151,8 @@ class NotesTree:
         dir_names = []
 
         for name, mode, sha in self._tree.items():
+            assert name is not None
+            assert mode is not None
             if stat.S_ISDIR(mode):
                 has_dirs = True
                 dir_names.append(name)
@@ -159,6 +163,8 @@ class NotesTree:
         if has_files and not has_dirs:
             # Check if any file names are full 40-char hex strings
             for name, mode, sha in self._tree.items():
+                assert name is not None
+                assert mode is not None
                 if stat.S_ISREG(mode) and len(name) == 40:
                     try:
                         int(name, 16)  # Verify it's a valid hex string
@@ -184,6 +190,8 @@ class NotesTree:
                     # Check if this subtree also has 2-char hex directories
                     sub_has_dirs = False
                     for sub_name, sub_mode, sub_sha in sample_tree.items():
+                        assert sub_name is not None
+                        assert sub_mode is not None
                         if stat.S_ISDIR(sub_mode) and len(sub_name) == 2:
                             try:
                                 int(sub_name, 16)
@@ -231,7 +239,9 @@ class NotesTree:
             components = path.split(b"/")
 
             # Build new tree structure
-            def update_tree(tree: Tree, components: list, blob_sha: bytes) -> Tree:
+            def update_tree(
+                tree: Tree, components: list[bytes], blob_sha: bytes
+            ) -> Tree:
                 """Update tree with new note entry.
 
                 Args:
@@ -247,6 +257,9 @@ class NotesTree:
                     new_tree = Tree()
                     for name, mode, sha in tree.items():
                         if name != components[0]:
+                            assert name is not None
+                            assert mode is not None
+                            assert sha is not None
                             new_tree.add(name, mode, sha)
                     new_tree.add(components[0], stat.S_IFREG | 0o644, blob_sha)
                     return new_tree
@@ -257,6 +270,7 @@ class NotesTree:
                     for name, mode, sha in tree.items():
                         if name == components[0]:
                             # Update this subtree
+                            assert mode is not None and sha is not None
                             if stat.S_ISDIR(mode):
                                 subtree = self._object_store[sha]
                                 assert isinstance(subtree, Tree)
@@ -268,6 +282,11 @@ class NotesTree:
                             new_tree.add(name, stat.S_IFDIR, new_subtree.id)
                             found = True
                         else:
+                            assert (
+                                name is not None
+                                and mode is not None
+                                and sha is not None
+                            )
                             new_tree.add(name, mode, sha)
 
                     if not found:
@@ -299,6 +318,11 @@ class NotesTree:
         new_tree = Tree()
         for existing_name, existing_mode, existing_sha in tree.items():
             if existing_name != name:
+                assert (
+                    existing_name is not None
+                    and existing_mode is not None
+                    and existing_sha is not None
+                )
                 new_tree.add(existing_name, existing_mode, existing_sha)
         new_tree.add(name, mode, sha)
         self._object_store.add_object(new_tree)
@@ -358,7 +382,8 @@ class NotesTree:
             note_obj = self._object_store[note_sha]
             if not isinstance(note_obj, Blob):
                 return None
-            return note_obj.data
+            data: bytes = note_obj.data
+            return data
         except KeyError:
             return None
 
@@ -386,7 +411,7 @@ class NotesTree:
         components = path.split(b"/")
 
         # Build new tree structure
-        def update_tree(tree: Tree, components: list, blob_sha: bytes) -> Tree:
+        def update_tree(tree: Tree, components: list[bytes], blob_sha: bytes) -> Tree:
             """Update tree with new note entry.
 
             Args:
@@ -402,6 +427,7 @@ class NotesTree:
                 new_tree = Tree()
                 for name, mode, sha in tree.items():
                     if name != components[0]:
+                        assert name is not None and mode is not None and sha is not None
                         new_tree.add(name, mode, sha)
                 new_tree.add(components[0], stat.S_IFREG | 0o644, blob_sha)
                 return new_tree
@@ -412,6 +438,7 @@ class NotesTree:
                 for name, mode, sha in tree.items():
                     if name == components[0]:
                         # Update this subtree
+                        assert mode is not None and sha is not None
                         if stat.S_ISDIR(mode):
                             subtree = self._object_store[sha]
                             assert isinstance(subtree, Tree)
@@ -423,6 +450,7 @@ class NotesTree:
                         new_tree.add(name, stat.S_IFDIR, new_subtree.id)
                         found = True
                     else:
+                        assert name is not None and mode is not None and sha is not None
                         new_tree.add(name, mode, sha)
 
                 if not found:
@@ -457,7 +485,7 @@ class NotesTree:
         components = path.split(b"/")
 
         # Build new tree structure without the note
-        def remove_from_tree(tree: Tree, components: list) -> Optional[Tree]:
+        def remove_from_tree(tree: Tree, components: list[bytes]) -> Optional[Tree]:
             """Remove note entry from tree.
 
             Args:
@@ -473,6 +501,7 @@ class NotesTree:
                 found = False
                 for name, mode, sha in tree.items():
                     if name != components[0]:
+                        assert name is not None and mode is not None and sha is not None
                         new_tree.add(name, mode, sha)
                     else:
                         found = True
@@ -487,6 +516,7 @@ class NotesTree:
                 new_tree = Tree()
                 modified = False
                 for name, mode, sha in tree.items():
+                    assert name is not None and mode is not None and sha is not None
                     if name == components[0] and stat.S_ISDIR(mode):
                         # Update this subtree
                         subtree = self._object_store[sha]
@@ -532,6 +562,7 @@ class NotesTree:
                 Tuples of (object_sha, note_sha)
             """
             for name, mode, sha in tree.items():
+                assert name is not None and mode is not None and sha is not None
                 if stat.S_ISDIR(mode):  # Directory
                     subtree = self._object_store[sha]
                     assert isinstance(subtree, Tree)

Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 258 - 132
dulwich/object_store.py


+ 16 - 9
dulwich/objects.py

@@ -36,6 +36,7 @@ from typing import (
     TYPE_CHECKING,
     NamedTuple,
     Optional,
+    TypeVar,
     Union,
 )
 
@@ -153,9 +154,10 @@ def valid_hexsha(hex: Union[bytes, str]) -> bool:
         return True
 
 
-def hex_to_filename(
-    path: Union[str, bytes], hex: Union[str, bytes]
-) -> Union[str, bytes]:
+PathT = TypeVar("PathT", str, bytes)
+
+
+def hex_to_filename(path: PathT, hex: Union[str, bytes]) -> PathT:
     """Takes a hex sha and returns its filename relative to the given path."""
     # os.path.join accepts bytes or unicode, but all args must be of the same
     # type. Make sure that hex which is expected to be bytes, is the same type
@@ -1112,9 +1114,9 @@ class Tag(ShaFile):
 class TreeEntry(NamedTuple):
     """Named tuple encapsulating a single tree entry."""
 
-    path: bytes
-    mode: int
-    sha: bytes
+    path: Optional[bytes]
+    mode: Optional[int]
+    sha: Optional[bytes]
 
     def in_path(self, path: bytes) -> "TreeEntry":
         """Return a copy of this entry with the given path prepended."""
@@ -1390,7 +1392,7 @@ class Tree(ShaFile):
             last = entry
 
     def _serialize(self) -> list[bytes]:
-        return list(serialize_tree(self.iteritems()))
+        return list(serialize_tree(self.iteritems()))  # type: ignore[arg-type]
 
     def as_pretty_string(self) -> str:
         """Return a human-readable string representation of this tree.
@@ -1399,8 +1401,13 @@ class Tree(ShaFile):
           Pretty-printed tree entries
         """
         text: list[str] = []
-        for name, mode, hexsha in self.iteritems():
-            text.append(pretty_format_tree_entry(name, mode, hexsha))
+        for entry in self.iteritems():
+            if (
+                entry.path is not None
+                and entry.mode is not None
+                and entry.sha is not None
+            ):
+                text.append(pretty_format_tree_entry(entry.path, entry.mode, entry.sha))
         return "".join(text)
 
     def lookup_path(

+ 208 - 134
dulwich/pack.py

@@ -65,7 +65,6 @@ from typing import (
     Protocol,
     TypeVar,
     Union,
-    cast,
 )
 
 try:
@@ -75,6 +74,11 @@ except ImportError:
 else:
     has_mmap = True
 
+if sys.version_info >= (3, 12):
+    from collections.abc import Buffer
+else:
+    Buffer = Union[bytes, bytearray, memoryview]
+
 if TYPE_CHECKING:
     from _hashlib import HASH as HashObject
 
@@ -132,7 +136,7 @@ class ObjectContainer(Protocol):
     def add_objects(
         self,
         objects: Sequence[tuple[ShaFile, Optional[str]]],
-        progress: Optional[Callable[[str], None]] = None,
+        progress: Optional[Callable[..., None]] = None,
     ) -> Optional["Pack"]:
         """Add a set of objects to this object store.
 
@@ -190,7 +194,8 @@ class PackedObjectContainer(ObjectContainer):
 
     def iter_unpacked_subset(
         self,
-        shas: set[bytes],
+        shas: Iterable[bytes],
+        *,
         include_comp: bool = False,
         allow_missing: bool = False,
         convert_ofs_delta: bool = True,
@@ -458,7 +463,7 @@ def iter_sha1(iter: Iterable[bytes]) -> bytes:
     return sha.hexdigest().encode("ascii")
 
 
-def load_pack_index(path: Union[str, os.PathLike]) -> "PackIndex":
+def load_pack_index(path: Union[str, os.PathLike[str]]) -> "PackIndex":
     """Load an index file by path.
 
     Args:
@@ -501,7 +506,7 @@ def _load_file_contents(
 
 
 def load_pack_index_file(
-    path: Union[str, os.PathLike], f: Union[IO[bytes], _GitFile]
+    path: Union[str, os.PathLike[str]], f: Union[IO[bytes], _GitFile]
 ) -> "PackIndex":
     """Load an index file from a file-like object.
 
@@ -751,7 +756,7 @@ class FilePackIndex(PackIndex):
 
     def __init__(
         self,
-        filename: Union[str, os.PathLike],
+        filename: Union[str, os.PathLike[str]],
         file: Optional[Union[IO[bytes], _GitFile]] = None,
         contents: Optional[Union[bytes, "mmap.mmap"]] = None,
         size: Optional[int] = None,
@@ -944,7 +949,7 @@ class PackIndex1(FilePackIndex):
 
     def __init__(
         self,
-        filename: Union[str, os.PathLike],
+        filename: Union[str, os.PathLike[str]],
         file: Optional[Union[IO[bytes], _GitFile]] = None,
         contents: Optional[bytes] = None,
         size: Optional[int] = None,
@@ -971,7 +976,9 @@ class PackIndex1(FilePackIndex):
 
     def _unpack_offset(self, i: int) -> int:
         offset = (0x100 * 4) + (i * 24)
-        return unpack_from(">L", self._contents, offset)[0]
+        result = unpack_from(">L", self._contents, offset)[0]
+        assert isinstance(result, int)
+        return result
 
     def _unpack_crc32_checksum(self, i: int) -> None:
         # Not stored in v1 index files
@@ -983,7 +990,7 @@ class PackIndex2(FilePackIndex):
 
     def __init__(
         self,
-        filename: Union[str, os.PathLike],
+        filename: Union[str, os.PathLike[str]],
         file: Optional[Union[IO[bytes], _GitFile]] = None,
         contents: Optional[bytes] = None,
         size: Optional[int] = None,
@@ -1022,15 +1029,21 @@ class PackIndex2(FilePackIndex):
         return self._contents[offset : offset + 20]
 
     def _unpack_offset(self, i: int) -> int:
-        offset = self._pack_offset_table_offset + i * 4
-        offset = unpack_from(">L", self._contents, offset)[0]
+        offset_pos = self._pack_offset_table_offset + i * 4
+        offset = unpack_from(">L", self._contents, offset_pos)[0]
+        assert isinstance(offset, int)
         if offset & (2**31):
-            offset = self._pack_offset_largetable_offset + (offset & (2**31 - 1)) * 8
-            offset = unpack_from(">Q", self._contents, offset)[0]
+            large_offset_pos = (
+                self._pack_offset_largetable_offset + (offset & (2**31 - 1)) * 8
+            )
+            offset = unpack_from(">Q", self._contents, large_offset_pos)[0]
+            assert isinstance(offset, int)
         return offset
 
     def _unpack_crc32_checksum(self, i: int) -> int:
-        return unpack_from(">L", self._contents, self._crc32_table_offset + i * 4)[0]
+        result = unpack_from(">L", self._contents, self._crc32_table_offset + i * 4)[0]
+        assert isinstance(result, int)
+        return result
 
 
 class PackIndex3(FilePackIndex):
@@ -1041,7 +1054,7 @@ class PackIndex3(FilePackIndex):
 
     def __init__(
         self,
-        filename: Union[str, os.PathLike],
+        filename: Union[str, os.PathLike[str]],
         file: Optional[Union[IO[bytes], _GitFile]] = None,
         contents: Optional[bytes] = None,
         size: Optional[int] = None,
@@ -1096,15 +1109,21 @@ class PackIndex3(FilePackIndex):
         return self._contents[offset : offset + self.hash_size]
 
     def _unpack_offset(self, i: int) -> int:
-        offset = self._pack_offset_table_offset + i * 4
-        offset = unpack_from(">L", self._contents, offset)[0]
+        offset_pos = self._pack_offset_table_offset + i * 4
+        offset = unpack_from(">L", self._contents, offset_pos)[0]
+        assert isinstance(offset, int)
         if offset & (2**31):
-            offset = self._pack_offset_largetable_offset + (offset & (2**31 - 1)) * 8
-            offset = unpack_from(">Q", self._contents, offset)[0]
+            large_offset_pos = (
+                self._pack_offset_largetable_offset + (offset & (2**31 - 1)) * 8
+            )
+            offset = unpack_from(">Q", self._contents, large_offset_pos)[0]
+            assert isinstance(offset, int)
         return offset
 
     def _unpack_crc32_checksum(self, i: int) -> int:
-        return unpack_from(">L", self._contents, self._crc32_table_offset + i * 4)[0]
+        result = unpack_from(">L", self._contents, self._crc32_table_offset + i * 4)[0]
+        assert isinstance(result, int)
+        return result
 
 
 def read_pack_header(read: Callable[[int], bytes]) -> tuple[int, int]:
@@ -1375,9 +1394,9 @@ class PackStreamReader:
             # read buffer and (20 - N) come from the wire.
             self.read(20)
 
-        pack_sha = bytearray(self._trailer)  # type: ignore
+        pack_sha = bytearray(self._trailer)
         if pack_sha != self.sha.digest():
-            raise ChecksumMismatch(sha_to_hex(pack_sha), self.sha.hexdigest())
+            raise ChecksumMismatch(sha_to_hex(bytes(pack_sha)), self.sha.hexdigest())
 
 
 class PackStreamCopier(PackStreamReader):
@@ -1389,10 +1408,10 @@ class PackStreamCopier(PackStreamReader):
 
     def __init__(
         self,
-        read_all: Callable,
-        read_some: Callable,
+        read_all: Callable[[int], bytes],
+        read_some: Optional[Callable[[int], bytes]],
         outfile: IO[bytes],
-        delta_iter: Optional["DeltaChainIterator"] = None,
+        delta_iter: Optional["DeltaChainIterator[UnpackedObject]"] = None,
     ) -> None:
         """Initialize the copier.
 
@@ -1409,13 +1428,13 @@ class PackStreamCopier(PackStreamReader):
         self.outfile = outfile
         self._delta_iter = delta_iter
 
-    def _read(self, read: Callable, size: int) -> bytes:
+    def _read(self, read: Callable[[int], bytes], size: int) -> bytes:
         """Read data from the read callback and write it to the file."""
         data = super()._read(read, size)
         self.outfile.write(data)
         return data
 
-    def verify(self, progress: Optional[Callable] = None) -> None:
+    def verify(self, progress: Optional[Callable[..., None]] = None) -> None:
         """Verify a pack stream and write it to the output file.
 
         See PackStreamReader.iterobjects for a list of exceptions this may
@@ -1501,7 +1520,7 @@ class PackData:
 
     def __init__(
         self,
-        filename: Union[str, os.PathLike],
+        filename: Union[str, os.PathLike[str]],
         file: Optional[IO[bytes]] = None,
         size: Optional[int] = None,
         *,
@@ -1553,7 +1572,7 @@ class PackData:
         return os.path.basename(self._filename)
 
     @property
-    def path(self) -> Union[str, os.PathLike]:
+    def path(self) -> Union[str, os.PathLike[str]]:
         """Get the full path of the pack file.
 
         Returns:
@@ -1575,7 +1594,7 @@ class PackData:
         return cls(str(file), file=file, size=size)
 
     @classmethod
-    def from_path(cls, path: Union[str, os.PathLike]) -> "PackData":
+    def from_path(cls, path: Union[str, os.PathLike[str]]) -> "PackData":
         """Create a PackData object from a file path.
 
         Args:
@@ -1627,7 +1646,7 @@ class PackData:
 
         Returns: 20-byte binary SHA1 digest
         """
-        return compute_file_sha(cast(IO[bytes], self._file), end_ofs=-20).digest()
+        return compute_file_sha(self._file, end_ofs=-20).digest()
 
     def iter_unpacked(self, *, include_comp: bool = False) -> Iterator[UnpackedObject]:
         """Iterate over unpacked objects in the pack."""
@@ -1647,8 +1666,10 @@ class PackData:
             self._file.seek(-len(unused), SEEK_CUR)
 
     def iterentries(
-        self, progress=None, resolve_ext_ref: Optional[ResolveExtRefFn] = None
-    ):
+        self,
+        progress: Optional[Callable[[int, int], None]] = None,
+        resolve_ext_ref: Optional[ResolveExtRefFn] = None,
+    ) -> Iterator[tuple[bytes, int, Optional[int]]]:
         """Yield entries summarizing the contents of this pack.
 
         Args:
@@ -1678,14 +1699,14 @@ class PackData:
         Returns: Iterator of tuples with (sha, offset, crc32)
         """
         return sorted(
-            self.iterentries(progress=progress, resolve_ext_ref=resolve_ext_ref)
+            self.iterentries(progress=progress, resolve_ext_ref=resolve_ext_ref)  # type: ignore
         )
 
     def create_index_v1(
         self,
         filename: str,
-        progress: Optional[Callable] = None,
-        resolve_ext_ref: Optional[Callable] = None,
+        progress: Optional[Callable[..., None]] = None,
+        resolve_ext_ref: Optional[ResolveExtRefFn] = None,
     ) -> bytes:
         """Create a version 1 file for this data file.
 
@@ -1701,8 +1722,8 @@ class PackData:
         checksum = self.calculate_checksum()
         with GitFile(filename, "wb") as f:
             write_pack_index_v1(
-                cast(BinaryIO, f),
-                cast(list[tuple[bytes, int, Optional[int]]], entries),
+                f,
+                entries,
                 checksum,
             )
         return checksum
@@ -1710,8 +1731,8 @@ class PackData:
     def create_index_v2(
         self,
         filename: str,
-        progress: Optional[Callable] = None,
-        resolve_ext_ref: Optional[Callable] = None,
+        progress: Optional[Callable[..., None]] = None,
+        resolve_ext_ref: Optional[ResolveExtRefFn] = None,
     ) -> bytes:
         """Create a version 2 index file for this data file.
 
@@ -1730,8 +1751,8 @@ class PackData:
     def create_index_v3(
         self,
         filename: str,
-        progress: Optional[Callable] = None,
-        resolve_ext_ref: Optional[Callable] = None,
+        progress: Optional[Callable[..., None]] = None,
+        resolve_ext_ref: Optional[ResolveExtRefFn] = None,
         hash_algorithm: int = 1,
     ) -> bytes:
         """Create a version 3 index file for this data file.
@@ -1754,9 +1775,9 @@ class PackData:
     def create_index(
         self,
         filename: str,
-        progress: Optional[Callable] = None,
+        progress: Optional[Callable[..., None]] = None,
         version: int = 2,
-        resolve_ext_ref: Optional[Callable] = None,
+        resolve_ext_ref: Optional[ResolveExtRefFn] = None,
         hash_algorithm: int = 1,
     ) -> bytes:
         """Create an  index file for this data file.
@@ -1853,9 +1874,9 @@ class DeltaChainIterator(Generic[T]):
 
     def __init__(
         self,
-        file_obj: Optional[BinaryIO],
+        file_obj: Optional[IO[bytes]],
         *,
-        resolve_ext_ref: Optional[Callable] = None,
+        resolve_ext_ref: Optional[ResolveExtRefFn] = None,
     ) -> None:
         """Initialize DeltaChainIterator.
 
@@ -1872,8 +1893,8 @@ class DeltaChainIterator(Generic[T]):
 
     @classmethod
     def for_pack_data(
-        cls, pack_data: PackData, resolve_ext_ref: Optional[Callable] = None
-    ) -> "DeltaChainIterator":
+        cls, pack_data: PackData, resolve_ext_ref: Optional[ResolveExtRefFn] = None
+    ) -> "DeltaChainIterator[T]":
         """Create a DeltaChainIterator from pack data.
 
         Args:
@@ -1896,8 +1917,8 @@ class DeltaChainIterator(Generic[T]):
         shas: Iterable[bytes],
         *,
         allow_missing: bool = False,
-        resolve_ext_ref: Optional[Callable] = None,
-    ) -> "DeltaChainIterator":
+        resolve_ext_ref: Optional[ResolveExtRefFn] = None,
+    ) -> "DeltaChainIterator[T]":
         """Create a DeltaChainIterator for a subset of objects.
 
         Args:
@@ -1967,7 +1988,7 @@ class DeltaChainIterator(Generic[T]):
         Args:
           pack_data: PackData object to use
         """
-        self._file = cast(BinaryIO, pack_data._file)
+        self._file = pack_data._file
 
     def _walk_all_chains(self) -> Iterator[T]:
         for offset, type_num in self._full_ofs:
@@ -1997,7 +2018,7 @@ class DeltaChainIterator(Generic[T]):
             self._ext_refs.append(base_sha)
             self._pending_ref.pop(base_sha)
             for new_offset in pending:
-                yield from self._follow_chain(new_offset, type_num, chunks)
+                yield from self._follow_chain(new_offset, type_num, chunks)  # type: ignore[arg-type]
 
         self._ensure_no_pending()
 
@@ -2073,7 +2094,7 @@ class PackIndexer(DeltaChainIterator[PackIndexEntry]):
 
     _compute_crc32 = True
 
-    def _result(self, unpacked: UnpackedObject) -> tuple:
+    def _result(self, unpacked: UnpackedObject) -> tuple[bytes, int, Optional[int]]:
         """Convert unpacked object to pack index entry.
 
         Args:
@@ -2082,6 +2103,7 @@ class PackIndexer(DeltaChainIterator[PackIndexEntry]):
         Returns:
             Tuple of (sha, offset, crc32) for index entry
         """
+        assert unpacked.offset is not None
         return unpacked.sha(), unpacked.offset, unpacked.crc32
 
 
@@ -2261,7 +2283,7 @@ class SHA1Reader(BinaryIO):
 class SHA1Writer(BinaryIO):
     """Wrapper for file-like object that remembers the SHA1 of its data."""
 
-    def __init__(self, f) -> None:
+    def __init__(self, f: Union[BinaryIO, IO[bytes]]) -> None:
         """Initialize SHA1Writer.
 
         Args:
@@ -2272,7 +2294,7 @@ class SHA1Writer(BinaryIO):
         self.sha1 = sha1(b"")
         self.digest: Optional[bytes] = None
 
-    def write(self, data) -> int:
+    def write(self, data: Union[bytes, bytearray, memoryview], /) -> int:  # type: ignore[override]
         """Write data and update SHA1.
 
         Args:
@@ -2282,9 +2304,9 @@ class SHA1Writer(BinaryIO):
             Number of bytes written
         """
         self.sha1.update(data)
-        self.f.write(data)
-        self.length += len(data)
-        return len(data)
+        written = self.f.write(data)
+        self.length += written
+        return written
 
     def write_sha(self) -> bytes:
         """Write the SHA1 digest to the file.
@@ -2457,9 +2479,7 @@ def pack_object_header(
 
 def pack_object_chunks(
     type: int,
-    object: Union[
-        ShaFile, bytes, list[bytes], tuple[Union[bytes, int], Union[bytes, list[bytes]]]
-    ],
+    object: Union[list[bytes], tuple[Union[bytes, int], list[bytes]]],
     compression_level: int = -1,
 ) -> Iterator[bytes]:
     """Generate chunks for a pack object.
@@ -2499,7 +2519,7 @@ def pack_object_chunks(
 def write_pack_object(
     write: Callable[[bytes], int],
     type: int,
-    object: ShaFile,
+    object: Union[list[bytes], tuple[Union[bytes, int], list[bytes]]],
     sha: Optional["HashObject"] = None,
     compression_level: int = -1,
 ) -> int:
@@ -2523,13 +2543,13 @@ def write_pack_object(
 
 
 def write_pack(
-    filename,
+    filename: str,
     objects: Union[Sequence[ShaFile], Sequence[tuple[ShaFile, Optional[bytes]]]],
     *,
     deltify: Optional[bool] = None,
     delta_window_size: Optional[int] = None,
     compression_level: int = -1,
-):
+) -> tuple[bytes, bytes]:
     """Write a new pack data file.
 
     Args:
@@ -2542,15 +2562,16 @@ def write_pack(
     """
     with GitFile(filename + ".pack", "wb") as f:
         entries, data_sum = write_pack_objects(
-            f.write,
+            f,
             objects,
             delta_window_size=delta_window_size,
             deltify=deltify,
             compression_level=compression_level,
         )
-    entries = sorted([(k, v[0], v[1]) for (k, v) in entries.items()])
+    entries_list = sorted([(k, v[0], v[1]) for (k, v) in entries.items()])
     with GitFile(filename + ".idx", "wb") as f:
-        return data_sum, write_pack_index(f, entries, data_sum)
+        idx_sha = write_pack_index(f, entries_list, data_sum)
+    return data_sum, idx_sha
 
 
 def pack_header_chunks(num_objects: int) -> Iterator[bytes]:
@@ -2560,17 +2581,22 @@ def pack_header_chunks(num_objects: int) -> Iterator[bytes]:
     yield struct.pack(b">L", num_objects)  # Number of objects in pack
 
 
-def write_pack_header(write, num_objects) -> None:
+def write_pack_header(
+    write: Union[Callable[[bytes], int], IO[bytes]], num_objects: int
+) -> None:
     """Write a pack header for the given number of objects."""
+    write_fn: Callable[[bytes], int]
     if hasattr(write, "write"):
-        write = write.write
+        write_fn = write.write
         warnings.warn(
             "write_pack_header() now takes a write rather than file argument",
             DeprecationWarning,
             stacklevel=2,
         )
+    else:
+        write_fn = write
     for chunk in pack_header_chunks(num_objects):
-        write(chunk)
+        write_fn(chunk)
 
 
 def find_reusable_deltas(
@@ -2578,7 +2604,7 @@ def find_reusable_deltas(
     object_ids: set[bytes],
     *,
     other_haves: Optional[set[bytes]] = None,
-    progress=None,
+    progress: Optional[Callable[..., None]] = None,
 ) -> Iterator[UnpackedObject]:
     """Find deltas in a pack that can be reused.
 
@@ -2614,7 +2640,7 @@ def deltify_pack_objects(
     objects: Union[Iterator[ShaFile], Iterator[tuple[ShaFile, Optional[bytes]]]],
     *,
     window_size: Optional[int] = None,
-    progress=None,
+    progress: Optional[Callable[..., None]] = None,
 ) -> Iterator[UnpackedObject]:
     """Generate deltas for pack objects.
 
@@ -2633,8 +2659,9 @@ def deltify_pack_objects(
             else:
                 yield (e[0], (e[0].type_num, e[1]))
 
+    sorted_objs = sort_objects_for_delta(objects_with_hints())
     yield from deltas_from_sorted_objects(
-        sort_objects_for_delta(objects_with_hints()),
+        sorted_objs,
         window_size=window_size,
         progress=progress,
     )
@@ -2642,14 +2669,14 @@ def deltify_pack_objects(
 
 def sort_objects_for_delta(
     objects: Union[Iterator[ShaFile], Iterator[tuple[ShaFile, Optional[PackHint]]]],
-) -> Iterator[ShaFile]:
+) -> Iterator[tuple[ShaFile, Optional[bytes]]]:
     """Sort objects for optimal delta compression.
 
     Args:
       objects: Iterator of objects or (object, hint) tuples
 
     Returns:
-      Iterator of sorted ShaFile objects
+      Iterator of sorted (ShaFile, path) tuples
     """
     magic = []
     for entry in objects:
@@ -2662,16 +2689,20 @@ def sort_objects_for_delta(
                 (type_num, path) = hint
         else:
             obj = entry
+            type_num = None
+            path = None
         magic.append((type_num, path, -obj.raw_length(), obj))
     # Build a list of objects ordered by the magic Linus heuristic
     # This helps us find good objects to diff against us
     magic.sort()
-    return (x[3] for x in magic)
+    return ((x[3], x[1]) for x in magic)
 
 
 def deltas_from_sorted_objects(
-    objects, window_size: Optional[int] = None, progress=None
-):
+    objects: Iterator[tuple[ShaFile, Optional[bytes]]],
+    window_size: Optional[int] = None,
+    progress: Optional[Callable[..., None]] = None,
+) -> Iterator[UnpackedObject]:
     """Create deltas from sorted objects.
 
     Args:
@@ -2687,7 +2718,7 @@ def deltas_from_sorted_objects(
         window_size = DEFAULT_PACK_DELTA_WINDOW_SIZE
 
     possible_bases: deque[tuple[bytes, int, list[bytes]]] = deque()
-    for i, o in enumerate(objects):
+    for i, (o, path) in enumerate(objects):
         if progress is not None and i % 1000 == 0:
             progress((f"generating deltas: {i}\r").encode())
         raw = o.as_raw_chunks()
@@ -2721,12 +2752,16 @@ def deltas_from_sorted_objects(
 
 
 def pack_objects_to_data(
-    objects: Union[Sequence[ShaFile], Sequence[tuple[ShaFile, Optional[bytes]]]],
+    objects: Union[
+        Sequence[ShaFile],
+        Sequence[tuple[ShaFile, Optional[bytes]]],
+        Sequence[tuple[ShaFile, Optional[PackHint]]],
+    ],
     *,
     deltify: Optional[bool] = None,
     delta_window_size: Optional[int] = None,
     ofs_delta: bool = True,
-    progress=None,
+    progress: Optional[Callable[..., None]] = None,
 ) -> tuple[int, Iterator[UnpackedObject]]:
     """Create pack data from objects.
 
@@ -2773,7 +2808,7 @@ def generate_unpacked_objects(
     reuse_deltas: bool = True,
     ofs_delta: bool = True,
     other_haves: Optional[set[bytes]] = None,
-    progress=None,
+    progress: Optional[Callable[..., None]] = None,
 ) -> Iterator[UnpackedObject]:
     """Create pack data from objects.
 
@@ -2794,8 +2829,9 @@ def generate_unpacked_objects(
         objects_to_delta = container.iterobjects_subset(
             todo.keys(), allow_missing=False
         )
+        sorted_objs = sort_objects_for_delta((o, todo[o.id]) for o in objects_to_delta)
         yield from deltas_from_sorted_objects(
-            sort_objects_for_delta((o, todo[o.id]) for o in objects_to_delta),
+            sorted_objs,
             window_size=delta_window_size,
             progress=progress,
         )
@@ -2823,7 +2859,11 @@ def full_unpacked_object(o: ShaFile) -> UnpackedObject:
 
 
 def write_pack_from_container(
-    write,
+    write: Union[
+        Callable[[bytes], None],
+        Callable[[Union[bytes, bytearray, memoryview]], int],
+        IO[bytes],
+    ],
     container: PackedObjectContainer,
     object_ids: Sequence[tuple[ObjectID, Optional[PackHint]]],
     delta_window_size: Optional[int] = None,
@@ -2831,7 +2871,7 @@ def write_pack_from_container(
     reuse_deltas: bool = True,
     compression_level: int = -1,
     other_haves: Optional[set[bytes]] = None,
-):
+) -> tuple[dict[bytes, tuple[int, int]], bytes]:
     """Write a new pack data file.
 
     Args:
@@ -2865,13 +2905,13 @@ def write_pack_from_container(
 
 
 def write_pack_objects(
-    write,
+    write: Union[Callable[[bytes], None], IO[bytes]],
     objects: Union[Sequence[ShaFile], Sequence[tuple[ShaFile, Optional[bytes]]]],
     *,
     delta_window_size: Optional[int] = None,
     deltify: Optional[bool] = None,
     compression_level: int = -1,
-):
+) -> tuple[dict[bytes, tuple[int, int]], bytes]:
     """Write a new pack data file.
 
     Args:
@@ -2898,11 +2938,11 @@ class PackChunkGenerator:
 
     def __init__(
         self,
-        num_records=None,
-        records=None,
-        progress=None,
-        compression_level=-1,
-        reuse_compressed=True,
+        num_records: Optional[int] = None,
+        records: Optional[Iterator[UnpackedObject]] = None,
+        progress: Optional[Callable[..., None]] = None,
+        compression_level: int = -1,
+        reuse_compressed: bool = True,
     ) -> None:
         """Initialize PackChunkGenerator.
 
@@ -2914,10 +2954,12 @@ class PackChunkGenerator:
             reuse_compressed: Whether to reuse compressed chunks
         """
         self.cs = sha1(b"")
-        self.entries: dict[Union[int, bytes], tuple[int, int]] = {}
+        self.entries: dict[bytes, tuple[int, int]] = {}
+        if records is None:
+            records = iter([])  # Empty iterator if None
         self._it = self._pack_data_chunks(
-            num_records=num_records,
             records=records,
+            num_records=num_records,
             progress=progress,
             compression_level=compression_level,
             reuse_compressed=reuse_compressed,
@@ -2935,8 +2977,8 @@ class PackChunkGenerator:
         self,
         records: Iterator[UnpackedObject],
         *,
-        num_records=None,
-        progress=None,
+        num_records: Optional[int] = None,
+        progress: Optional[Callable[..., None]] = None,
         compression_level: int = -1,
         reuse_compressed: bool = True,
     ) -> Iterator[bytes]:
@@ -2965,6 +3007,9 @@ class PackChunkGenerator:
                 progress((f"writing pack data: {i}/{num_records}\r").encode("ascii"))
             raw: Union[list[bytes], tuple[int, list[bytes]], tuple[bytes, list[bytes]]]
             if unpacked.delta_base is not None:
+                assert isinstance(unpacked.delta_base, bytes), (
+                    f"Expected bytes, got {type(unpacked.delta_base)}"
+                )
                 try:
                     base_offset, _base_crc32 = self.entries[unpacked.delta_base]
                 except KeyError:
@@ -3002,13 +3047,17 @@ class PackChunkGenerator:
 
 
 def write_pack_data(
-    write,
+    write: Union[
+        Callable[[bytes], None],
+        Callable[[Union[bytes, bytearray, memoryview]], int],
+        IO[bytes],
+    ],
     records: Iterator[UnpackedObject],
     *,
-    num_records=None,
-    progress=None,
-    compression_level=-1,
-):
+    num_records: Optional[int] = None,
+    progress: Optional[Callable[..., None]] = None,
+    compression_level: int = -1,
+) -> tuple[dict[bytes, tuple[int, int]], bytes]:
     """Write a new pack data file.
 
     Args:
@@ -3026,12 +3075,17 @@ def write_pack_data(
         compression_level=compression_level,
     )
     for chunk in chunk_generator:
-        write(chunk)
+        if callable(write):
+            write(chunk)
+        else:
+            write.write(chunk)
     return chunk_generator.entries, chunk_generator.sha1digest()
 
 
 def write_pack_index_v1(
-    f: BinaryIO, entries: list[tuple[bytes, int, Optional[int]]], pack_checksum: bytes
+    f: IO[bytes],
+    entries: Iterable[tuple[bytes, int, Union[int, None]]],
+    pack_checksum: bytes,
 ) -> bytes:
     """Write a new pack index file.
 
@@ -3059,7 +3113,7 @@ def write_pack_index_v1(
     return f.write_sha()
 
 
-def _delta_encode_size(size) -> bytes:
+def _delta_encode_size(size: int) -> bytes:
     ret = bytearray()
     c = size & 0x7F
     size >>= 7
@@ -3129,11 +3183,11 @@ def create_delta(base_buf: bytes, target_buf: bytes) -> Iterator[bytes]:
             o = j1
             while s > 127:
                 yield bytes([127])
-                yield memoryview(target_buf)[o : o + 127]
+                yield bytes(memoryview(target_buf)[o : o + 127])
                 s -= 127
                 o += 127
             yield bytes([s])
-            yield memoryview(target_buf)[o : o + s]
+            yield bytes(memoryview(target_buf)[o : o + s])
 
 
 def apply_delta(
@@ -3213,7 +3267,9 @@ def apply_delta(
 
 
 def write_pack_index_v2(
-    f, entries: Iterable[PackIndexEntry], pack_checksum: bytes
+    f: IO[bytes],
+    entries: Iterable[tuple[bytes, int, Union[int, None]]],
+    pack_checksum: bytes,
 ) -> bytes:
     """Write a new pack index file.
 
@@ -3253,7 +3309,10 @@ def write_pack_index_v2(
 
 
 def write_pack_index_v3(
-    f, entries: Iterable[PackIndexEntry], pack_checksum: bytes, hash_algorithm: int = 1
+    f: IO[bytes],
+    entries: Iterable[tuple[bytes, int, Union[int, None]]],
+    pack_checksum: bytes,
+    hash_algorithm: int = 1,
 ) -> bytes:
     """Write a new pack index file in v3 format.
 
@@ -3330,12 +3389,16 @@ def write_pack_index_v3(
 
 
 def write_pack_index(
-    index_filename, entries, pack_checksum, progress=None, version=None
-):
+    f: IO[bytes],
+    entries: Iterable[tuple[bytes, int, Union[int, None]]],
+    pack_checksum: bytes,
+    progress: Optional[Callable[..., None]] = None,
+    version: Optional[int] = None,
+) -> bytes:
     """Write a pack index file.
 
     Args:
-      index_filename: Index filename.
+      f: File-like object to write to.
       entries: List of (checksum, offset, crc32) tuples
       pack_checksum: Checksum of the pack file.
       progress: Progress function (not currently used)
@@ -3348,11 +3411,11 @@ def write_pack_index(
         version = DEFAULT_PACK_INDEX_VERSION
 
     if version == 1:
-        return write_pack_index_v1(index_filename, entries, pack_checksum)
+        return write_pack_index_v1(f, entries, pack_checksum)
     elif version == 2:
-        return write_pack_index_v2(index_filename, entries, pack_checksum)
+        return write_pack_index_v2(f, entries, pack_checksum)
     elif version == 3:
-        return write_pack_index_v3(index_filename, entries, pack_checksum)
+        return write_pack_index_v3(f, entries, pack_checksum)
     else:
         raise ValueError(f"Unsupported pack index version: {version}")
 
@@ -3368,15 +3431,15 @@ class Pack:
 
     def __init__(
         self,
-        basename,
+        basename: str,
         resolve_ext_ref: Optional[ResolveExtRefFn] = None,
         *,
-        delta_window_size=None,
-        window_memory=None,
-        delta_cache_size=None,
-        depth=None,
-        threads=None,
-        big_file_threshold=None,
+        delta_window_size: Optional[int] = None,
+        window_memory: Optional[int] = None,
+        delta_cache_size: Optional[int] = None,
+        depth: Optional[int] = None,
+        threads: Optional[int] = None,
+        big_file_threshold: Optional[int] = None,
     ) -> None:
         """Initialize a Pack object.
 
@@ -3414,7 +3477,9 @@ class Pack:
         self.resolve_ext_ref = resolve_ext_ref
 
     @classmethod
-    def from_lazy_objects(cls, data_fn: Callable, idx_fn: Callable) -> "Pack":
+    def from_lazy_objects(
+        cls, data_fn: Callable[[], PackData], idx_fn: Callable[[], PackIndex]
+    ) -> "Pack":
         """Create a new pack object from callables to load pack data and index objects."""
         ret = cls("")
         ret._data_load = data_fn
@@ -3543,7 +3608,7 @@ class Pack:
         offset = self.index.object_offset(sha1)
         obj_type, obj = self.data.get_object_at(offset)
         type_num, chunks = self.resolve_object(offset, obj_type, obj)
-        return type_num, b"".join(chunks)
+        return type_num, b"".join(chunks)  # type: ignore[arg-type]
 
     def __getitem__(self, sha1: bytes) -> ShaFile:
         """Retrieve the specified SHA1."""
@@ -3654,8 +3719,14 @@ class Pack:
         return offset, type, obj
 
     def resolve_object(
-        self, offset: int, type: int, obj, get_ref=None
-    ) -> tuple[int, Iterable[bytes]]:
+        self,
+        offset: int,
+        type: int,
+        obj: OldUnpackedObject,
+        get_ref: Optional[
+            Callable[[bytes], tuple[Optional[int], int, OldUnpackedObject]]
+        ] = None,
+    ) -> tuple[int, OldUnpackedObject]:
         """Resolve an object, possibly resolving deltas when necessary.
 
         Returns: Tuple with object type and contents.
@@ -3673,13 +3744,16 @@ class Pack:
             if base_type == OFS_DELTA:
                 (delta_offset, delta) = base_obj
                 # TODO: clean up asserts and replace with nicer error messages
+                assert isinstance(delta_offset, int), (
+                    f"Expected int, got {delta_offset.__class__}"
+                )
                 base_offset = base_offset - delta_offset
                 base_type, base_obj = self.data.get_object_at(base_offset)
                 assert isinstance(base_type, int)
             elif base_type == REF_DELTA:
                 (basename, delta) = base_obj
                 assert isinstance(basename, bytes) and len(basename) == 20
-                base_offset, base_type, base_obj = get_ref(basename)
+                base_offset, base_type, base_obj = get_ref(basename)  # type: ignore[assignment]
                 assert isinstance(base_type, int)
                 if base_offset == prev_offset:  # object is based on itself
                     raise UnresolvedDeltas([basename])
@@ -3710,7 +3784,7 @@ class Pack:
         return base_type, chunks
 
     def entries(
-        self, progress: Optional[ProgressFn] = None
+        self, progress: Optional[Callable[[int, int], None]] = None
     ) -> Iterator[PackIndexEntry]:
         """Yield entries summarizing the contents of this pack.
 
@@ -3761,11 +3835,11 @@ class Pack:
 def extend_pack(
     f: BinaryIO,
     object_ids: set[ObjectID],
-    get_raw,
+    get_raw: Callable[[ObjectID], tuple[int, bytes]],
     *,
-    compression_level=-1,
-    progress=None,
-) -> tuple[bytes, list]:
+    compression_level: int = -1,
+    progress: Optional[Callable[[bytes], None]] = None,
+) -> tuple[bytes, list[tuple[bytes, int, int]]]:
     """Extend a pack file with more objects.
 
     The caller should make sure that object_ids does not contain any objects
@@ -3802,7 +3876,7 @@ def extend_pack(
         crc32 = write_pack_object(
             f.write,
             type_num,
-            data,
+            [data],  # Convert bytes to list[bytes]
             sha=new_sha,
             compression_level=compression_level,
         )
@@ -3814,8 +3888,8 @@ def extend_pack(
 
 try:
     from dulwich._pack import (  # type: ignore
-        apply_delta,  # type: ignore
-        bisect_find_sha,  # type: ignore
+        apply_delta,
+        bisect_find_sha,
     )
 except ImportError:
     pass

+ 4 - 2
dulwich/patch.py

@@ -215,7 +215,9 @@ def unified_diff(
                     yield b"+" + line
 
 
-def _get_sequence_matcher(algorithm: str, a: list[bytes], b: list[bytes]):
+def _get_sequence_matcher(
+    algorithm: str, a: list[bytes], b: list[bytes]
+) -> SequenceMatcher[bytes]:
     """Get appropriate sequence matcher for the given algorithm.
 
     Args:
@@ -233,7 +235,7 @@ def _get_sequence_matcher(algorithm: str, a: list[bytes], b: list[bytes]):
         try:
             from patiencediff import PatienceSequenceMatcher
 
-            return PatienceSequenceMatcher(None, a, b)
+            return PatienceSequenceMatcher(None, a, b)  # type: ignore[no-any-return,unused-ignore]
         except ImportError:
             raise DiffAlgorithmNotAvailable(
                 "patience", "Install with: pip install 'dulwich[patiencediff]'"

Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 251 - 115
dulwich/porcelain.py


+ 23 - 13
dulwich/rebase.py

@@ -26,7 +26,7 @@ import shutil
 import subprocess
 from dataclasses import dataclass
 from enum import Enum
-from typing import Optional, Protocol
+from typing import Callable, Optional, Protocol, TypedDict
 
 from dulwich.graph import find_merge_base
 from dulwich.merge import three_way_merge
@@ -526,6 +526,16 @@ class DiskRebaseStateManager:
         return None
 
 
+class RebaseState(TypedDict):
+    """Type definition for rebase state."""
+
+    original_head: Optional[bytes]
+    rebasing_branch: Optional[bytes]
+    onto: Optional[bytes]
+    todo: list[Commit]
+    done: list[Commit]
+
+
 class MemoryRebaseStateManager:
     """Manages rebase state in memory for MemoryRepo."""
 
@@ -536,7 +546,7 @@ class MemoryRebaseStateManager:
           repo: Repository instance
         """
         self.repo = repo
-        self._state: Optional[dict] = None
+        self._state: Optional[RebaseState] = None
         self._todo: Optional[RebaseTodo] = None
 
     def save(
@@ -548,13 +558,13 @@ class MemoryRebaseStateManager:
         done: list[Commit],
     ) -> None:
         """Save rebase state in memory."""
-        self._state = {
-            "original_head": original_head,
-            "rebasing_branch": rebasing_branch,
-            "onto": onto,
-            "todo": todo[:],  # Copy the lists
-            "done": done[:],
-        }
+        self._state = RebaseState(
+            original_head=original_head,
+            rebasing_branch=rebasing_branch,
+            onto=onto,
+            todo=todo[:],  # Copy the lists
+            done=done[:],
+        )
 
     def load(
         self,
@@ -940,7 +950,7 @@ def start_interactive(
     upstream: bytes,
     onto: Optional[bytes] = None,
     branch: Optional[bytes] = None,
-    editor_callback=None,
+    editor_callback: Optional[Callable[[bytes], bytes]] = None,
 ) -> RebaseTodo:
     """Start an interactive rebase.
 
@@ -1000,7 +1010,7 @@ def start_interactive(
     return todo
 
 
-def edit_todo(repo: Repo, editor_callback) -> RebaseTodo:
+def edit_todo(repo: Repo, editor_callback: Callable[[bytes], bytes]) -> RebaseTodo:
     """Edit the todo list of an in-progress interactive rebase.
 
     Args:
@@ -1041,7 +1051,7 @@ def edit_todo(repo: Repo, editor_callback) -> RebaseTodo:
 def process_interactive_rebase(
     repo: Repo,
     todo: Optional[RebaseTodo] = None,
-    editor_callback=None,
+    editor_callback: Optional[Callable[[bytes], bytes]] = None,
 ) -> tuple[bool, Optional[str]]:
     """Process an interactive rebase.
 
@@ -1189,7 +1199,7 @@ def _squash_commits(
     rebaser: Rebaser,
     entry: RebaseTodoEntry,
     keep_message: bool,
-    editor_callback=None,
+    editor_callback: Optional[Callable[[bytes], bytes]] = None,
 ) -> Optional[str]:
     """Helper to squash/fixup commits.
 

+ 17 - 13
dulwich/refs.py

@@ -36,7 +36,6 @@ from typing import (
     Optional,
     TypeVar,
     Union,
-    cast,
 )
 
 if TYPE_CHECKING:
@@ -153,12 +152,12 @@ class RefsContainer:
             Callable[
                 [
                     bytes,
-                    Optional[bytes],
-                    Optional[bytes],
+                    bytes,
+                    bytes,
                     Optional[bytes],
                     Optional[int],
                     Optional[int],
-                    Optional[bytes],
+                    bytes,
                 ],
                 None,
             ]
@@ -181,6 +180,11 @@ class RefsContainer:
             return
         if message is None:
             return
+        # Use ZERO_SHA for None values, matching git behavior
+        if old_sha is None:
+            old_sha = ZERO_SHA
+        if new_sha is None:
+            new_sha = ZERO_SHA
         self._logger(ref, old_sha, new_sha, committer, timestamp, timezone, message)
 
     def set_symbolic_ref(
@@ -282,7 +286,7 @@ class RefsContainer:
         """Iterate over all reference keys."""
         return iter(self.allkeys())
 
-    def keys(self, base=None):
+    def keys(self, base: Optional[bytes] = None) -> set[bytes]:
         """Refs present in this container.
 
         Args:
@@ -799,18 +803,18 @@ class DiskRefsContainer(RefsContainer):
 
     def __init__(
         self,
-        path: Union[str, bytes, os.PathLike],
-        worktree_path: Optional[Union[str, bytes, os.PathLike]] = None,
+        path: Union[str, bytes, os.PathLike[str]],
+        worktree_path: Optional[Union[str, bytes, os.PathLike[str]]] = None,
         logger: Optional[
             Callable[
                 [
                     bytes,
-                    Optional[bytes],
-                    Optional[bytes],
+                    bytes,
+                    bytes,
                     Optional[bytes],
                     Optional[int],
                     Optional[int],
-                    Optional[bytes],
+                    bytes,
                 ],
                 None,
             ]
@@ -1378,7 +1382,7 @@ def read_packed_refs_with_peeled(
     """
     last = None
     for line in f:
-        if line[0] == b"#":
+        if line.startswith(b"#"):
             continue
         line = line.rstrip(b"\r\n")
         if line.startswith(b"^"):
@@ -1504,7 +1508,7 @@ def _set_default_branch(
     refs: RefsContainer,
     origin: bytes,
     origin_head: Optional[bytes],
-    branch: bytes,
+    branch: Optional[bytes],
     ref_message: Optional[bytes],
 ) -> bytes:
     """Set the default branch."""
@@ -1764,7 +1768,7 @@ def filter_ref_prefix(refs: T, prefixes: Iterable[bytes]) -> T:
       prefixes: The prefixes to filter by.
     """
     filtered = {k: v for k, v in refs.items() if any(k.startswith(p) for p in prefixes)}
-    return cast(T, filtered)
+    return filtered
 
 
 def is_per_worktree_ref(ref: bytes) -> bool:

+ 102 - 50
dulwich/reftable.py

@@ -15,7 +15,8 @@ import time
 import zlib
 from dataclasses import dataclass
 from io import BytesIO
-from typing import BinaryIO, Optional, Union
+from types import TracebackType
+from typing import BinaryIO, Callable, Optional, Union
 
 from dulwich.objects import ObjectID
 from dulwich.refs import (
@@ -337,13 +338,13 @@ class LogBlock:
 class RefBlock:
     """A block containing reference records."""
 
-    def __init__(self):
+    def __init__(self) -> None:
         """Initialize RefBlock."""
-        self.refs = []
+        self.refs: list[RefRecord] = []
 
     def add_ref(
         self, refname: bytes, value_type: int, value: bytes, update_index: int = 1
-    ):
+    ) -> None:
         """Add a reference to the block."""
         self.refs.append(RefRecord(refname, value_type, value, update_index))
 
@@ -512,7 +513,7 @@ class ReftableWriter:
             is_batch_operation  # Track if this is a batch operation
         )
 
-    def add_ref(self, refname: bytes, sha: bytes):
+    def add_ref(self, refname: bytes, sha: bytes) -> None:
         """Add a direct reference."""
         self.refs[refname] = (REF_VALUE_REF, sha)
         if refname not in self.refs_order:
@@ -520,7 +521,7 @@ class ReftableWriter:
 
         self._maybe_auto_create_head()
 
-    def add_symbolic_ref(self, refname: bytes, target: bytes):
+    def add_symbolic_ref(self, refname: bytes, target: bytes) -> None:
         """Add a symbolic reference."""
         self.refs[refname] = (REF_VALUE_SYMREF, target)
         if refname not in self.refs_order:
@@ -533,20 +534,20 @@ class ReftableWriter:
             # Update existing ref (e.g., if HEAD was auto-created and now explicitly set)
             pass
 
-    def delete_ref(self, refname: bytes):
+    def delete_ref(self, refname: bytes) -> None:
         """Mark a reference as deleted."""
         self.refs[refname] = (REF_VALUE_DELETE, b"")
         if refname not in self.refs_order:
             self.refs_order.append(refname)
 
-    def _maybe_auto_create_head(self):
+    def _maybe_auto_create_head(self) -> None:
         """Auto-create HEAD -> refs/heads/master if needed (Git compatibility)."""
         if self.auto_create_head and b"HEAD" not in self.refs:
             # Git always creates HEAD -> refs/heads/master by default
             self.refs[b"HEAD"] = (REF_VALUE_SYMREF, b"refs/heads/master")
             self.refs_order.insert(0, b"HEAD")
 
-    def write(self):
+    def write(self) -> None:
         """Write the reftable to the file."""
         # Skip recalculation if max_update_index was already set higher than default
         # This preserves Git's behavior for symbolic-ref operations
@@ -569,7 +570,7 @@ class ReftableWriter:
         # so we only need to add final padding and CRC
         self._write_final_padding()
 
-    def _write_header(self):
+    def _write_header(self) -> None:
         """Write the reftable header."""
         # Magic bytes
         header_data = REFTABLE_MAGIC
@@ -591,7 +592,7 @@ class ReftableWriter:
         self.f.write(header_data)
         self._written_data.append(header_data)
 
-    def _get_ref_update_indices(self):
+    def _get_ref_update_indices(self) -> dict[bytes, int]:
         """Get update indices for all refs based on operation type.
 
         In batch operations, all refs get the same update index (timestamp).
@@ -606,7 +607,7 @@ class ReftableWriter:
             return {name: self.min_update_index for name in self.refs_order}
         elif hasattr(self, "_ref_update_indices"):
             # Use provided indices
-            return self._ref_update_indices
+            return self._ref_update_indices  # type: ignore[no-any-return]
         elif len(self.refs_order) == 1 and self.refs_order[0] == b"HEAD":
             # Special case for single HEAD symbolic ref
             value_type, _ = self.refs[b"HEAD"]
@@ -621,7 +622,7 @@ class ReftableWriter:
                 indices[name] = self.min_update_index + i
             return indices
 
-    def _write_ref_blocks(self):
+    def _write_ref_blocks(self) -> None:
         """Write reference blocks."""
         # Only write block if we have refs
         if not self.refs:
@@ -658,7 +659,7 @@ class ReftableWriter:
         self.f.write(block_data)
         self._written_data.append(block_data)
 
-    def _write_final_padding(self):
+    def _write_final_padding(self) -> None:
         """Write final padding and CRC for Git compatibility."""
         # Git writes exactly 40 bytes after the ref block (which includes embedded footer)
         # This is 36 bytes of zeros followed by 4-byte CRC
@@ -695,7 +696,7 @@ class ReftableReader:
         self.refs: dict[bytes, tuple[int, bytes]] = {}
         self._read_blocks()
 
-    def _read_header(self):
+    def _read_header(self) -> None:
         """Read and validate the reftable header."""
         # Read magic bytes
         magic = self.f.read(4)
@@ -717,7 +718,7 @@ class ReftableReader:
         self.min_update_index = struct.unpack(">Q", self.f.read(8))[0]
         self.max_update_index = struct.unpack(">Q", self.f.read(8))[0]
 
-    def _read_blocks(self):
+    def _read_blocks(self) -> None:
         """Read all blocks from the reftable."""
         while True:
             # Read block type
@@ -745,7 +746,7 @@ class ReftableReader:
             ):  # Likely parsing footer as block
                 break
 
-    def _process_ref_block(self, data: bytes):
+    def _process_ref_block(self, data: bytes) -> None:
         """Process a reference block."""
         block = RefBlock.decode(data, min_update_index=self.min_update_index)
         for ref in block.refs:
@@ -764,14 +765,19 @@ class ReftableReader:
 class _ReftableBatchContext:
     """Context manager for batching reftable updates."""
 
-    def __init__(self, refs_container):
+    def __init__(self, refs_container: "ReftableRefsContainer") -> None:
         self.refs_container = refs_container
 
-    def __enter__(self):
+    def __enter__(self) -> "_ReftableBatchContext":
         self.refs_container._batch_mode = True
         return self
 
-    def __exit__(self, exc_type, exc_val, exc_tb):
+    def __exit__(
+        self,
+        exc_type: Optional[type[BaseException]],
+        exc_val: Optional[BaseException],
+        exc_tb: Optional[TracebackType],
+    ) -> None:
         self.refs_container._batch_mode = False
         if exc_type is None:  # Only flush if no exception occurred
             self.refs_container._flush_pending_updates()
@@ -780,7 +786,24 @@ class _ReftableBatchContext:
 class ReftableRefsContainer(RefsContainer):
     """A refs container backed by the reftable format."""
 
-    def __init__(self, path: Union[str, bytes], logger=None):
+    def __init__(
+        self,
+        path: Union[str, bytes],
+        logger: Optional[
+            Callable[
+                [
+                    bytes,
+                    bytes,
+                    bytes,
+                    Optional[bytes],
+                    Optional[int],
+                    Optional[int],
+                    bytes,
+                ],
+                None,
+            ]
+        ] = None,
+    ) -> None:
         """Initialize a reftable refs container.
 
         Args:
@@ -800,11 +823,12 @@ class ReftableRefsContainer(RefsContainer):
         self._ref_update_indices: dict[
             bytes, int
         ] = {}  # Track chronological update index for each ref
+        self._batch_mode = False  # Track whether we're in batch mode
 
         # Create refs/heads marker file for Git compatibility
         self._ensure_refs_heads_marker()
 
-    def _ensure_refs_heads_marker(self):
+    def _ensure_refs_heads_marker(self) -> None:
         """Ensure refs/heads marker file exists for Git compatibility.
 
         Git expects a refs/heads file (not directory) to exist when using
@@ -826,11 +850,11 @@ class ReftableRefsContainer(RefsContainer):
             with open(refs_heads_path, "wb") as f:
                 f.write(b"this repository uses the reftable format\n")
 
-    def _read_table_file(self, table_file: str):
+    def _read_table_file(self, table_file: str) -> BinaryIO:
         """Context manager helper to open and read a reftable file."""
         return open(table_file, "rb")
 
-    def _load_ref_update_indices(self):
+    def _load_ref_update_indices(self) -> None:
         """Load the update indices for all refs from existing reftable files."""
         for table_file in self._get_table_files():
             with self._read_table_file(table_file) as f:
@@ -921,7 +945,7 @@ class ReftableRefsContainer(RefsContainer):
                     all_refs[refname] = (value_type, value)
         return all_refs
 
-    def allkeys(self):
+    def allkeys(self) -> set[bytes]:
         """Return set of all ref names."""
         refs = self._read_all_tables()
         result = set(refs.keys())
@@ -1058,14 +1082,14 @@ class ReftableRefsContainer(RefsContainer):
         table_name = f"0x{min_idx:016x}-0x{max_idx:016x}-{hash_part:08x}.ref"
         return os.path.join(self.reftable_dir, table_name)
 
-    def add_packed_refs(self, new_refs: dict[bytes, Optional[bytes]]):
+    def add_packed_refs(self, new_refs: dict[bytes, Optional[bytes]]) -> None:
         """Add packed refs. Creates a new reftable file with all refs consolidated."""
         if not new_refs:
             return
 
         self._write_batch_updates(new_refs)
 
-    def _write_batch_updates(self, updates: dict[bytes, Optional[bytes]]):
+    def _write_batch_updates(self, updates: dict[bytes, Optional[bytes]]) -> None:
         """Write multiple ref updates to a single reftable file."""
         if not updates:
             return
@@ -1085,13 +1109,13 @@ class ReftableRefsContainer(RefsContainer):
 
     def set_if_equals(
         self,
-        name,
-        old_ref,
-        new_ref,
-        committer=None,
-        timestamp=None,
-        timezone=None,
-        message=None,
+        name: bytes,
+        old_ref: Optional[bytes],
+        new_ref: Optional[bytes],
+        committer: Optional[bytes] = None,
+        timestamp: Optional[int] = None,
+        timezone: Optional[int] = None,
+        message: Optional[bytes] = None,
     ) -> bool:
         """Atomically set a ref if it currently equals old_ref."""
         # For now, implement a simple non-atomic version
@@ -1114,7 +1138,13 @@ class ReftableRefsContainer(RefsContainer):
         return True
 
     def add_if_new(
-        self, name, ref, committer=None, timestamp=None, timezone=None, message=None
+        self,
+        name: bytes,
+        ref: bytes,
+        committer: Optional[bytes] = None,
+        timestamp: Optional[int] = None,
+        timezone: Optional[int] = None,
+        message: Optional[bytes] = None,
     ) -> bool:
         """Add a ref only if it doesn't exist."""
         try:
@@ -1126,7 +1156,13 @@ class ReftableRefsContainer(RefsContainer):
         return True
 
     def remove_if_equals(
-        self, name, old_ref, committer=None, timestamp=None, timezone=None, message=None
+        self,
+        name: bytes,
+        old_ref: Optional[bytes],
+        committer: Optional[bytes] = None,
+        timestamp: Optional[int] = None,
+        timezone: Optional[int] = None,
+        message: Optional[bytes] = None,
     ) -> bool:
         """Remove a ref if it equals old_ref."""
         return self.set_if_equals(
@@ -1140,12 +1176,18 @@ class ReftableRefsContainer(RefsContainer):
         )
 
     def set_symbolic_ref(
-        self, name, other, committer=None, timestamp=None, timezone=None, message=None
-    ):
+        self,
+        name: bytes,
+        other: bytes,
+        committer: Optional[bytes] = None,
+        timestamp: Optional[int] = None,
+        timezone: Optional[int] = None,
+        message: Optional[bytes] = None,
+    ) -> None:
         """Set a symbolic reference."""
         self._write_ref_update(name, REF_VALUE_SYMREF, other)
 
-    def _write_ref_update(self, name: bytes, value_type: int, value: bytes):
+    def _write_ref_update(self, name: bytes, value_type: int, value: bytes) -> None:
         """Write a single ref update immediately to its own reftable file."""
         # Check if we're in batch mode - if so, buffer for later
         if getattr(self, "_batch_mode", False):
@@ -1156,7 +1198,9 @@ class ReftableRefsContainer(RefsContainer):
         # Write immediately like Git does - one file per update
         self._write_single_ref_update(name, value_type, value)
 
-    def _write_single_ref_update(self, name: bytes, value_type: int, value: bytes):
+    def _write_single_ref_update(
+        self, name: bytes, value_type: int, value: bytes
+    ) -> None:
         """Write a single ref update to its own reftable file like Git does."""
         table_path = self._generate_table_path()
         next_update_index = self._get_next_update_index()
@@ -1181,7 +1225,7 @@ class ReftableRefsContainer(RefsContainer):
 
         self._update_tables_list()
 
-    def _flush_pending_updates(self):
+    def _flush_pending_updates(self) -> None:
         """Flush pending ref updates like Git does - consolidate all refs."""
         if not self._pending_updates:
             return
@@ -1226,7 +1270,9 @@ class ReftableRefsContainer(RefsContainer):
 
         self._pending_updates.clear()
 
-    def _process_pending_updates(self):
+    def _process_pending_updates(
+        self,
+    ) -> tuple[Optional[tuple[bytes, int, bytes]], list[tuple[bytes, int, bytes]]]:
         """Process pending updates and return (head_update, other_updates)."""
         head_update = None
         other_updates = []
@@ -1284,8 +1330,12 @@ class ReftableRefsContainer(RefsContainer):
         return current
 
     def _apply_batch_updates(
-        self, all_refs, other_updates, head_update, batch_update_index
-    ):
+        self,
+        all_refs: dict[bytes, tuple[int, bytes]],
+        other_updates: list[tuple[bytes, int, bytes]],
+        head_update: Optional[tuple[bytes, int, bytes]],
+        batch_update_index: int,
+    ) -> None:
         """Apply batch updates to the refs dict and update indices."""
         # Process all updates and assign the SAME update index to all refs in batch
         for name, value_type, value in other_updates:
@@ -1308,7 +1358,9 @@ class ReftableRefsContainer(RefsContainer):
                 all_refs[name] = (value_type, value)
                 self._ref_update_indices[name] = batch_update_index
 
-    def _write_batch_file(self, all_refs, batch_update_index):
+    def _write_batch_file(
+        self, all_refs: dict[bytes, tuple[int, bytes]], batch_update_index: int
+    ) -> list[str]:
         """Write all refs to a single batch file and return created filenames."""
         # All refs in batch have same update index
         table_path = self._generate_table_path(batch_update_index, batch_update_index)
@@ -1329,22 +1381,22 @@ class ReftableRefsContainer(RefsContainer):
                 writer.refs[refname] = (value_type, value)
 
             # Pass the update indices to the writer
-            writer._ref_update_indices = {
+            writer._ref_update_indices = {  # type: ignore[attr-defined]
                 name: batch_update_index for name in all_refs.keys()
             }
             writer.write()
 
         return [os.path.basename(table_path)]
 
-    def batch_update(self):
+    def batch_update(self) -> "_ReftableBatchContext":
         """Context manager for batching multiple ref updates into a single reftable."""
         return _ReftableBatchContext(self)
 
-    def remove_packed_ref(self, name: bytes):
+    def remove_packed_ref(self, name: bytes) -> None:
         """Remove a packed ref. Creates a deletion record."""
         self._write_ref_update(name, REF_VALUE_DELETE, b"")
 
-    def _compact_tables_list(self, new_table_name: str):
+    def _compact_tables_list(self, new_table_name: str) -> None:
         """Compact tables list to single file like Git does."""
         tables_list_path = os.path.join(self.reftable_dir, "tables.list")
 
@@ -1357,7 +1409,7 @@ class ReftableRefsContainer(RefsContainer):
         with open(tables_list_path, "wb") as f:
             f.write((new_table_name + "\n").encode())
 
-    def _update_tables_list(self):
+    def _update_tables_list(self) -> None:
         """Update the tables.list file with current table files."""
         tables_list_path = os.path.join(self.reftable_dir, "tables.list")
 

+ 201 - 123
dulwich/repo.py

@@ -34,8 +34,9 @@ import stat
 import sys
 import time
 import warnings
-from collections.abc import Iterable, Iterator
+from collections.abc import Generator, Iterable, Iterator
 from io import BytesIO
+from types import TracebackType
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -52,15 +53,17 @@ if TYPE_CHECKING:
     # these imports.
     from .attrs import GitAttributes
     from .config import ConditionMatcher, ConfigFile, StackedConfig
+    from .diff_tree import RenameDetector
+    from .filters import FilterBlobNormalizer, FilterContext
     from .index import Index
-    from .line_ending import BlobNormalizer
     from .notes import Notes
-    from .object_store import BaseObjectStore, GraphWalker, UnpackedObject
+    from .object_store import BaseObjectStore, GraphWalker
+    from .pack import UnpackedObject
     from .rebase import RebaseStateManager
     from .walk import Walker
     from .worktree import WorkTree
 
-from . import replace_me
+from . import reflog, replace_me
 from .errors import (
     NoIndexPresent,
     NotBlobError,
@@ -121,6 +124,7 @@ from .refs import (
 
 CONTROLDIR = ".git"
 OBJECTDIR = "objects"
+DEFAULT_OFS_DELTA = True
 
 T = TypeVar("T", bound="ShaFile")
 REFSDIR = "refs"
@@ -172,7 +176,7 @@ def _get_default_identity() -> tuple[str, str]:
         fullname = None
     else:
         try:
-            entry = pwd.getpwuid(os.getuid())  # type: ignore
+            entry = pwd.getpwuid(os.getuid())  # type: ignore[attr-defined,unused-ignore]
         except KeyError:
             fullname = None
         else:
@@ -346,7 +350,7 @@ class ParentsProvider:
     def __init__(
         self,
         store: "BaseObjectStore",
-        grafts: dict = {},
+        grafts: dict[bytes, list[bytes]] = {},
         shallows: Iterable[bytes] = [],
     ) -> None:
         """Initialize ParentsProvider.
@@ -385,7 +389,9 @@ class ParentsProvider:
             obj = self.store[commit_id]
             assert isinstance(obj, Commit)
             commit = obj
-        return commit.parents
+        parents = commit.parents
+        assert isinstance(parents, list)
+        return parents
 
 
 class BaseRepo:
@@ -501,10 +507,12 @@ class BaseRepo:
     def fetch(
         self,
         target: "BaseRepo",
-        determine_wants: Optional[Callable] = None,
-        progress: Optional[Callable] = None,
+        determine_wants: Optional[
+            Callable[[dict[bytes, bytes], Optional[int]], list[bytes]]
+        ] = None,
+        progress: Optional[Callable[..., None]] = None,
         depth: Optional[int] = None,
-    ) -> dict:
+    ) -> dict[bytes, bytes]:
         """Fetch objects into another repository.
 
         Args:
@@ -528,13 +536,13 @@ class BaseRepo:
 
     def fetch_pack_data(
         self,
-        determine_wants: Callable,
+        determine_wants: Callable[[dict[bytes, bytes], Optional[int]], list[bytes]],
         graph_walker: "GraphWalker",
-        progress: Optional[Callable],
+        progress: Optional[Callable[[bytes], None]],
         *,
-        get_tagged: Optional[Callable] = None,
+        get_tagged: Optional[Callable[[], dict[bytes, bytes]]] = None,
         depth: Optional[int] = None,
-    ) -> tuple:
+    ) -> tuple[int, Iterator["UnpackedObject"]]:
         """Fetch the pack data required for a set of revisions.
 
         Args:
@@ -563,11 +571,11 @@ class BaseRepo:
 
     def find_missing_objects(
         self,
-        determine_wants: Callable,
+        determine_wants: Callable[[dict[bytes, bytes], Optional[int]], list[bytes]],
         graph_walker: "GraphWalker",
-        progress: Optional[Callable],
+        progress: Optional[Callable[[bytes], None]],
         *,
-        get_tagged: Optional[Callable] = None,
+        get_tagged: Optional[Callable[[], dict[bytes, bytes]]] = None,
         depth: Optional[int] = None,
     ) -> Optional[MissingObjectFinder]:
         """Fetch the missing objects required for a set of revisions.
@@ -587,7 +595,7 @@ class BaseRepo:
         """
         refs = serialize_refs(self.object_store, self.get_refs())
 
-        wants = determine_wants(refs)
+        wants = determine_wants(refs, depth)
         if not isinstance(wants, list):
             raise TypeError("determine_wants() did not return a list")
 
@@ -670,8 +678,8 @@ class BaseRepo:
 
     def generate_pack_data(
         self,
-        have: Iterable[ObjectID],
-        want: Iterable[ObjectID],
+        have: set[ObjectID],
+        want: set[ObjectID],
         progress: Optional[Callable[[str], None]] = None,
         ofs_delta: Optional[bool] = None,
     ) -> tuple[int, Iterator["UnpackedObject"]]:
@@ -688,7 +696,7 @@ class BaseRepo:
             want,
             shallow=self.get_shallow(),
             progress=progress,
-            ofs_delta=ofs_delta,
+            ofs_delta=ofs_delta if ofs_delta is not None else DEFAULT_OFS_DELTA,
         )
 
     def get_graph_walker(
@@ -792,10 +800,10 @@ class BaseRepo:
         """Retrieve the worktree config object."""
         raise NotImplementedError(self.get_worktree_config)
 
-    def get_description(self) -> Optional[str]:
+    def get_description(self) -> Optional[bytes]:
         """Retrieve the description for this repository.
 
-        Returns: String with the description of the repository
+        Returns: Bytes with the description of the repository
             as set by the user.
         """
         raise NotImplementedError(self.get_description)
@@ -815,7 +823,7 @@ class BaseRepo:
         """
         raise NotImplementedError(self.get_rebase_state_manager)
 
-    def get_blob_normalizer(self) -> "BlobNormalizer":
+    def get_blob_normalizer(self) -> "FilterBlobNormalizer":
         """Return a BlobNormalizer object for checkin/checkout operations.
 
         Returns: BlobNormalizer instance
@@ -907,42 +915,67 @@ class BaseRepo:
 
         return Notes(self.object_store, self.refs)
 
-    def get_walker(self, include: Optional[list[bytes]] = None, **kwargs) -> "Walker":
+    def get_walker(
+        self,
+        include: Optional[list[bytes]] = None,
+        exclude: Optional[list[bytes]] = None,
+        order: str = "date",
+        reverse: bool = False,
+        max_entries: Optional[int] = None,
+        paths: Optional[list[bytes]] = None,
+        rename_detector: Optional["RenameDetector"] = None,
+        follow: bool = False,
+        since: Optional[int] = None,
+        until: Optional[int] = None,
+        queue_cls: Optional[type] = None,
+    ) -> "Walker":
         """Obtain a walker for this repository.
 
         Args:
           include: Iterable of SHAs of commits to include along with their
             ancestors. Defaults to [HEAD]
-          **kwargs: Additional keyword arguments including:
-
-            * exclude: Iterable of SHAs of commits to exclude along with their
-              ancestors, overriding includes.
-            * order: ORDER_* constant specifying the order of results.
-              Anything other than ORDER_DATE may result in O(n) memory usage.
-            * reverse: If True, reverse the order of output, requiring O(n)
-              memory.
-            * max_entries: The maximum number of entries to yield, or None for
-              no limit.
-            * paths: Iterable of file or subtree paths to show entries for.
-            * rename_detector: diff.RenameDetector object for detecting
-              renames.
-            * follow: If True, follow path across renames/copies. Forces a
-              default rename_detector.
-            * since: Timestamp to list commits after.
-            * until: Timestamp to list commits before.
-            * queue_cls: A class to use for a queue of commits, supporting the
-              iterator protocol. The constructor takes a single argument, the Walker.
+          exclude: Iterable of SHAs of commits to exclude along with their
+            ancestors, overriding includes.
+          order: ORDER_* constant specifying the order of results.
+            Anything other than ORDER_DATE may result in O(n) memory usage.
+          reverse: If True, reverse the order of output, requiring O(n)
+            memory.
+          max_entries: The maximum number of entries to yield, or None for
+            no limit.
+          paths: Iterable of file or subtree paths to show entries for.
+          rename_detector: diff.RenameDetector object for detecting
+            renames.
+          follow: If True, follow path across renames/copies. Forces a
+            default rename_detector.
+          since: Timestamp to list commits after.
+          until: Timestamp to list commits before.
+          queue_cls: A class to use for a queue of commits, supporting the
+            iterator protocol. The constructor takes a single argument, the Walker.
+          **kwargs: Additional keyword arguments
 
         Returns: A `Walker` object
         """
-        from .walk import Walker
+        from .walk import Walker, _CommitTimeQueue
 
         if include is None:
             include = [self.head()]
 
-        kwargs["get_parents"] = lambda commit: self.get_parents(commit.id, commit)
-
-        return Walker(self.object_store, include, **kwargs)
+        # Pass all arguments to Walker explicitly to avoid type issues with **kwargs
+        return Walker(
+            self.object_store,
+            include,
+            exclude=exclude,
+            order=order,
+            reverse=reverse,
+            max_entries=max_entries,
+            paths=paths,
+            rename_detector=rename_detector,
+            follow=follow,
+            since=since,
+            until=until,
+            get_parents=lambda commit: self.get_parents(commit.id, commit),
+            queue_cls=queue_cls if queue_cls is not None else _CommitTimeQueue,
+        )
 
     def __getitem__(self, name: Union[ObjectID, Ref]) -> "ShaFile":
         """Retrieve a Git object by SHA1 or ref.
@@ -1139,7 +1172,7 @@ def read_gitfile(f: BinaryIO) -> str:
 class UnsupportedVersion(Exception):
     """Unsupported repository version."""
 
-    def __init__(self, version) -> None:
+    def __init__(self, version: int) -> None:
         """Initialize UnsupportedVersion exception.
 
         Args:
@@ -1151,7 +1184,7 @@ class UnsupportedVersion(Exception):
 class UnsupportedExtension(Exception):
     """Unsupported repository extension."""
 
-    def __init__(self, extension) -> None:
+    def __init__(self, extension: str) -> None:
         """Initialize UnsupportedExtension exception.
 
         Args:
@@ -1181,10 +1214,11 @@ class Repo(BaseRepo):
     path: str
     bare: bool
     object_store: DiskObjectStore
+    filter_context: Optional["FilterContext"]
 
     def __init__(
         self,
-        root: Union[str, bytes, os.PathLike],
+        root: Union[str, bytes, os.PathLike[str]],
         object_store: Optional[PackBasedObjectStore] = None,
         bare: Optional[bool] = None,
     ) -> None:
@@ -1268,7 +1302,7 @@ class Repo(BaseRepo):
                 else:
                     raise UnsupportedExtension(f"refStorage = {value.decode()}")
             elif extension.lower() not in (b"worktreeconfig",):
-                raise UnsupportedExtension(extension)
+                raise UnsupportedExtension(extension.decode("utf-8"))
 
         if object_store is None:
             object_store = DiskObjectStore.from_config(
@@ -1315,7 +1349,14 @@ class Repo(BaseRepo):
         return WorkTree(self, self.path)
 
     def _write_reflog(
-        self, ref, old_sha, new_sha, committer, timestamp, timezone, message
+        self,
+        ref: bytes,
+        old_sha: bytes,
+        new_sha: bytes,
+        committer: Optional[bytes],
+        timestamp: Optional[int],
+        timezone: Optional[int],
+        message: bytes,
     ) -> None:
         from .reflog import format_reflog_line
 
@@ -1347,7 +1388,7 @@ class Repo(BaseRepo):
         base = self.controldir() if is_per_worktree_ref(ref) else self.commondir()
         return os.path.join(base, "logs", os.fsdecode(ref))
 
-    def read_reflog(self, ref):
+    def read_reflog(self, ref: bytes) -> Generator[reflog.Entry, None, None]:
         """Read reflog entries for a reference.
 
         Args:
@@ -1366,7 +1407,7 @@ class Repo(BaseRepo):
             return
 
     @classmethod
-    def discover(cls, start="."):
+    def discover(cls, start: Union[str, bytes, os.PathLike[str]] = ".") -> "Repo":
         """Iterate parent directories to discover a repository.
 
         Return a Repo object for the first parent directory that looks like a
@@ -1375,16 +1416,19 @@ class Repo(BaseRepo):
         Args:
           start: The directory to start discovery from (defaults to '.')
         """
-        remaining = True
         path = os.path.abspath(start)
-        while remaining:
+        while True:
             try:
                 return cls(path)
             except NotGitRepository:
-                path, remaining = os.path.split(path)
-        raise NotGitRepository(
-            "No git repository was found at {path}".format(**dict(path=start))
-        )
+                new_path, _tail = os.path.split(path)
+                if new_path == path:  # Root reached
+                    break
+                path = new_path
+        start_str = os.fspath(start)
+        if isinstance(start_str, bytes):
+            start_str = start_str.decode("utf-8")
+        raise NotGitRepository(f"No git repository was found at {start_str}")
 
     def controldir(self) -> str:
         """Return the path of the control directory."""
@@ -1448,7 +1492,11 @@ class Repo(BaseRepo):
         except FileNotFoundError:
             return
 
-    def get_named_file(self, path, basedir=None):
+    def get_named_file(
+        self,
+        path: Union[str, bytes],
+        basedir: Optional[str] = None,
+    ) -> Optional[BinaryIO]:
         """Get a file from the control dir with a specific name.
 
         Although the filename should be interpreted as a filename relative to
@@ -1465,13 +1513,15 @@ class Repo(BaseRepo):
         # the dumb web serving code.
         if basedir is None:
             basedir = self.controldir()
+        if isinstance(path, bytes):
+            path = path.decode("utf-8")
         path = path.lstrip(os.path.sep)
         try:
             return open(os.path.join(basedir, path), "rb")
         except FileNotFoundError:
             return None
 
-    def index_path(self):
+    def index_path(self) -> str:
         """Return path to the index file."""
         return os.path.join(self.controldir(), INDEX_FILENAME)
 
@@ -1522,7 +1572,7 @@ class Repo(BaseRepo):
     def stage(
         self,
         fs_paths: Union[
-            str, bytes, os.PathLike, Iterable[Union[str, bytes, os.PathLike]]
+            str, bytes, os.PathLike[str], Iterable[Union[str, bytes, os.PathLike[str]]]
         ],
     ) -> None:
         """Stage a set of paths.
@@ -1544,16 +1594,16 @@ class Repo(BaseRepo):
 
     def clone(
         self,
-        target_path,
+        target_path: Union[str, bytes, os.PathLike[str]],
         *,
-        mkdir=True,
-        bare=False,
-        origin=b"origin",
-        checkout=None,
-        branch=None,
-        progress=None,
+        mkdir: bool = True,
+        bare: bool = False,
+        origin: bytes = b"origin",
+        checkout: Optional[bool] = None,
+        branch: Optional[bytes] = None,
+        progress: Optional[Callable[[str], None]] = None,
         depth: Optional[int] = None,
-        symlinks=None,
+        symlinks: Optional[bool] = None,
     ) -> "Repo":
         """Clone this repository.
 
@@ -1638,7 +1688,7 @@ class Repo(BaseRepo):
         return target
 
     @replace_me(remove_in="0.26.0")
-    def reset_index(self, tree: Optional[bytes] = None):
+    def reset_index(self, tree: Optional[bytes] = None) -> None:
         """Reset the index back to a specific tree.
 
         Args:
@@ -1767,7 +1817,7 @@ class Repo(BaseRepo):
             ret.path = path
             return ret
 
-    def get_rebase_state_manager(self):
+    def get_rebase_state_manager(self) -> "RebaseStateManager":
         """Get the appropriate rebase state manager for this repository.
 
         Returns: DiskRebaseStateManager instance
@@ -1779,10 +1829,10 @@ class Repo(BaseRepo):
         path = os.path.join(self.controldir(), "rebase-merge")
         return DiskRebaseStateManager(path)
 
-    def get_description(self):
+    def get_description(self) -> Optional[bytes]:
         """Retrieve the description of this repository.
 
-        Returns: A string describing the repository or None.
+        Returns: Description as bytes or None.
         """
         path = os.path.join(self._controldir, "description")
         try:
@@ -1795,7 +1845,7 @@ class Repo(BaseRepo):
         """Return string representation of this repository."""
         return f"<Repo at {self.path!r}>"
 
-    def set_description(self, description) -> None:
+    def set_description(self, description: bytes) -> None:
         """Set the description for this repository.
 
         Args:
@@ -1806,15 +1856,15 @@ class Repo(BaseRepo):
     @classmethod
     def _init_maybe_bare(
         cls,
-        path: Union[str, bytes, os.PathLike],
-        controldir: Union[str, bytes, os.PathLike],
-        bare,
-        object_store=None,
-        config=None,
-        default_branch=None,
+        path: Union[str, bytes, os.PathLike[str]],
+        controldir: Union[str, bytes, os.PathLike[str]],
+        bare: bool,
+        object_store: Optional[PackBasedObjectStore] = None,
+        config: Optional["StackedConfig"] = None,
+        default_branch: Optional[bytes] = None,
         symlinks: Optional[bool] = None,
         format: Optional[int] = None,
-    ):
+    ) -> "Repo":
         path = os.fspath(path)
         if isinstance(path, bytes):
             path = os.fsdecode(path)
@@ -1842,11 +1892,11 @@ class Repo(BaseRepo):
     @classmethod
     def init(
         cls,
-        path: Union[str, bytes, os.PathLike],
+        path: Union[str, bytes, os.PathLike[str]],
         *,
         mkdir: bool = False,
-        config=None,
-        default_branch=None,
+        config: Optional["StackedConfig"] = None,
+        default_branch: Optional[bytes] = None,
         symlinks: Optional[bool] = None,
         format: Optional[int] = None,
     ) -> "Repo":
@@ -1882,11 +1932,11 @@ class Repo(BaseRepo):
     @classmethod
     def _init_new_working_directory(
         cls,
-        path: Union[str, bytes, os.PathLike],
-        main_repo,
-        identifier=None,
-        mkdir=False,
-    ):
+        path: Union[str, bytes, os.PathLike[str]],
+        main_repo: "Repo",
+        identifier: Optional[str] = None,
+        mkdir: bool = False,
+    ) -> "Repo":
         """Create a new working directory linked to a repository.
 
         Args:
@@ -1931,14 +1981,14 @@ class Repo(BaseRepo):
     @classmethod
     def init_bare(
         cls,
-        path: Union[str, bytes, os.PathLike],
+        path: Union[str, bytes, os.PathLike[str]],
         *,
-        mkdir=False,
-        object_store=None,
-        config=None,
-        default_branch=None,
+        mkdir: bool = False,
+        object_store: Optional[PackBasedObjectStore] = None,
+        config: Optional["StackedConfig"] = None,
+        default_branch: Optional[bytes] = None,
         format: Optional[int] = None,
-    ):
+    ) -> "Repo":
         """Create a new bare repository.
 
         ``path`` should already exist and be an empty directory.
@@ -1977,11 +2027,16 @@ class Repo(BaseRepo):
             self.filter_context.close()
             self.filter_context = None
 
-    def __enter__(self):
+    def __enter__(self) -> "Repo":
         """Enter context manager."""
         return self
 
-    def __exit__(self, exc_type, exc_val, exc_tb):
+    def __exit__(
+        self,
+        exc_type: Optional[type[BaseException]],
+        exc_val: Optional[BaseException],
+        exc_tb: Optional[TracebackType],
+    ) -> None:
         """Exit context manager and close repository."""
         self.close()
 
@@ -2024,7 +2079,7 @@ class Repo(BaseRepo):
 
         return gitattributes
 
-    def get_blob_normalizer(self):
+    def get_blob_normalizer(self) -> "FilterBlobNormalizer":
         """Return a BlobNormalizer object."""
         from .filters import FilterBlobNormalizer, FilterContext, FilterRegistry
 
@@ -2165,6 +2220,8 @@ class MemoryRepo(BaseRepo):
     those have a stronger dependency on the filesystem.
     """
 
+    filter_context: Optional["FilterContext"]
+
     def __init__(self) -> None:
         """Create a new repository in memory."""
         from .config import ConfigFile
@@ -2175,13 +2232,24 @@ class MemoryRepo(BaseRepo):
         self._named_files: dict[str, bytes] = {}
         self.bare = True
         self._config = ConfigFile()
-        self._description = None
+        self._description: Optional[bytes] = None
         self.filter_context = None
 
-    def _append_reflog(self, *args) -> None:
-        self._reflog.append(args)
+    def _append_reflog(
+        self,
+        ref: bytes,
+        old_sha: Optional[bytes],
+        new_sha: Optional[bytes],
+        committer: Optional[bytes],
+        timestamp: Optional[int],
+        timezone: Optional[int],
+        message: Optional[bytes],
+    ) -> None:
+        self._reflog.append(
+            (ref, old_sha, new_sha, committer, timestamp, timezone, message)
+        )
 
-    def set_description(self, description) -> None:
+    def set_description(self, description: bytes) -> None:
         """Set the description for this repository.
 
         Args:
@@ -2189,7 +2257,7 @@ class MemoryRepo(BaseRepo):
         """
         self._description = description
 
-    def get_description(self):
+    def get_description(self) -> Optional[bytes]:
         """Get the description of this repository.
 
         Returns:
@@ -2197,21 +2265,21 @@ class MemoryRepo(BaseRepo):
         """
         return self._description
 
-    def _determine_file_mode(self):
+    def _determine_file_mode(self) -> bool:
         """Probe the file-system to determine whether permissions can be trusted.
 
         Returns: True if permissions can be trusted, False otherwise.
         """
         return sys.platform != "win32"
 
-    def _determine_symlinks(self):
+    def _determine_symlinks(self) -> bool:
         """Probe the file-system to determine whether permissions can be trusted.
 
         Returns: True if permissions can be trusted, False otherwise.
         """
         return sys.platform != "win32"
 
-    def _put_named_file(self, path, contents) -> None:
+    def _put_named_file(self, path: str, contents: bytes) -> None:
         """Write a file to the control dir with the given name and contents.
 
         Args:
@@ -2220,13 +2288,17 @@ class MemoryRepo(BaseRepo):
         """
         self._named_files[path] = contents
 
-    def _del_named_file(self, path) -> None:
+    def _del_named_file(self, path: str) -> None:
         try:
             del self._named_files[path]
         except KeyError:
             pass
 
-    def get_named_file(self, path, basedir=None):
+    def get_named_file(
+        self,
+        path: Union[str, bytes],
+        basedir: Optional[str] = None,
+    ) -> Optional[BytesIO]:
         """Get a file from the control dir with a specific name.
 
         Although the filename should be interpreted as a filename relative to
@@ -2238,7 +2310,8 @@ class MemoryRepo(BaseRepo):
           basedir: Optional base directory for the path
         Returns: An open file object, or None if the file does not exist.
         """
-        contents = self._named_files.get(path, None)
+        path_str = path.decode() if isinstance(path, bytes) else path
+        contents = self._named_files.get(path_str, None)
         if contents is None:
             return None
         return BytesIO(contents)
@@ -2251,14 +2324,14 @@ class MemoryRepo(BaseRepo):
         """
         raise NoIndexPresent
 
-    def get_config(self):
+    def get_config(self) -> "ConfigFile":
         """Retrieve the config object.
 
         Returns: `ConfigFile` object.
         """
         return self._config
 
-    def get_rebase_state_manager(self):
+    def get_rebase_state_manager(self) -> "RebaseStateManager":
         """Get the appropriate rebase state manager for this repository.
 
         Returns: MemoryRebaseStateManager instance
@@ -2267,7 +2340,7 @@ class MemoryRepo(BaseRepo):
 
         return MemoryRebaseStateManager(self)
 
-    def get_blob_normalizer(self):
+    def get_blob_normalizer(self) -> "FilterBlobNormalizer":
         """Return a BlobNormalizer object for checkin/checkout operations."""
         from .filters import FilterBlobNormalizer, FilterContext, FilterRegistry
 
@@ -2308,17 +2381,17 @@ class MemoryRepo(BaseRepo):
         message: Optional[bytes] = None,
         committer: Optional[bytes] = None,
         author: Optional[bytes] = None,
-        commit_timestamp=None,
-        commit_timezone=None,
-        author_timestamp=None,
-        author_timezone=None,
+        commit_timestamp: Optional[float] = None,
+        commit_timezone: Optional[int] = None,
+        author_timestamp: Optional[float] = None,
+        author_timezone: Optional[int] = None,
         tree: Optional[ObjectID] = None,
         encoding: Optional[bytes] = None,
         ref: Optional[Ref] = b"HEAD",
         merge_heads: Optional[list[ObjectID]] = None,
         no_verify: bool = False,
         sign: bool = False,
-    ):
+    ) -> bytes:
         """Create a new commit.
 
         This is a simplified implementation for in-memory repositories that
@@ -2412,7 +2485,7 @@ class MemoryRepo(BaseRepo):
                     c.id,
                     message=b"commit: " + message,
                     committer=committer,
-                    timestamp=commit_timestamp,
+                    timestamp=int(commit_timestamp),
                     timezone=commit_timezone,
                 )
             except KeyError:
@@ -2423,7 +2496,7 @@ class MemoryRepo(BaseRepo):
                     c.id,
                     message=b"commit: " + message,
                     committer=committer,
-                    timestamp=commit_timestamp,
+                    timestamp=int(commit_timestamp),
                     timezone=commit_timezone,
                 )
             if not ok:
@@ -2434,7 +2507,12 @@ class MemoryRepo(BaseRepo):
         return c.id
 
     @classmethod
-    def init_bare(cls, objects, refs, format: Optional[int] = None):
+    def init_bare(
+        cls,
+        objects: Iterable[ShaFile],
+        refs: dict[bytes, bytes],
+        format: Optional[int] = None,
+    ) -> "MemoryRepo":
         """Create a new bare repository in memory.
 
         Args:

+ 233 - 112
dulwich/server.py

@@ -52,11 +52,19 @@ import time
 import zlib
 from collections.abc import Iterable, Iterator
 from functools import partial
-from typing import TYPE_CHECKING, Optional
+from typing import IO, TYPE_CHECKING, Callable, Optional, Union
 from typing import Protocol as TypingProtocol
 
+if sys.version_info >= (3, 12):
+    from collections.abc import Buffer
+else:
+    from typing import Union
+
+    Buffer = Union[bytes, bytearray, memoryview]
+
 if TYPE_CHECKING:
     from .object_store import BaseObjectStore
+    from .repo import BaseRepo
 
 from dulwich import log_utils
 
@@ -70,9 +78,9 @@ from .errors import (
     ObjectFormatException,
     UnexpectedCommandError,
 )
-from .object_store import find_shallow
+from .object_store import MissingObjectFinder, PackBasedObjectStore, find_shallow
 from .objects import Commit, ObjectID, Tree, valid_hexsha
-from .pack import ObjectContainer, PackedObjectContainer, write_pack_from_container
+from .pack import ObjectContainer, write_pack_from_container
 from .protocol import (
     CAPABILITIES_REF,
     CAPABILITY_AGENT,
@@ -125,7 +133,7 @@ logger = log_utils.getLogger(__name__)
 class Backend:
     """A backend for the Git smart server implementation."""
 
-    def open_repository(self, path) -> "BackendRepo":
+    def open_repository(self, path: str) -> "BackendRepo":
         """Open the repository at a path.
 
         Args:
@@ -144,7 +152,7 @@ class BackendRepo(TypingProtocol):
     dulwich.repo.Repo.
     """
 
-    object_store: PackedObjectContainer
+    object_store: PackBasedObjectStore
     refs: RefsContainer
 
     def get_refs(self) -> dict[bytes, bytes]:
@@ -167,8 +175,14 @@ class BackendRepo(TypingProtocol):
         return None
 
     def find_missing_objects(
-        self, determine_wants, graph_walker, progress, get_tagged=None
-    ) -> Iterator[ObjectID]:
+        self,
+        determine_wants: Callable[[dict[bytes, bytes], Optional[int]], list[bytes]],
+        graph_walker: "_ProtocolGraphWalker",
+        progress: Optional[Callable[[bytes], None]],
+        *,
+        get_tagged: Optional[Callable[[], dict[bytes, bytes]]] = None,
+        depth: Optional[int] = None,
+    ) -> Optional["MissingObjectFinder"]:
         """Yield the objects required for a list of commits.
 
         Args:
@@ -177,6 +191,7 @@ class BackendRepo(TypingProtocol):
           progress: is a callback to send progress messages to the client
           get_tagged: Function that returns a dict of pointed-to sha ->
             tag sha for including tags.
+          depth: Maximum depth of commits to fetch for shallow clones
         """
         raise NotImplementedError
 
@@ -184,7 +199,9 @@ class BackendRepo(TypingProtocol):
 class DictBackend(Backend):
     """Trivial backend that looks up Git repositories in a dictionary."""
 
-    def __init__(self, repos) -> None:
+    def __init__(
+        self, repos: Union[dict[bytes, "BackendRepo"], dict[str, "BackendRepo"]]
+    ) -> None:
         """Initialize a DictBackend.
 
         Args:
@@ -205,18 +222,32 @@ class DictBackend(Backend):
           NotGitRepository: If no repository found at path
         """
         logger.debug("Opening repository at %s", path)
-        try:
-            return self.repos[path]
-        except KeyError as exc:
-            raise NotGitRepository(
-                "No git repository was found at {path}".format(**dict(path=path))
-            ) from exc
+        # Handle both str and bytes keys for backward compatibility
+        if path in self.repos:
+            return self.repos[path]  # type: ignore
+
+        # Try converting between str and bytes
+        if isinstance(path, bytes):
+            try:
+                alt_path = path.decode("utf-8")
+                if alt_path in self.repos:
+                    return self.repos[alt_path]
+            except UnicodeDecodeError:
+                pass
+        else:
+            alt_path_bytes = path.encode("utf-8")
+            if alt_path_bytes in self.repos:
+                return self.repos[alt_path_bytes]  # type: ignore
+
+        raise NotGitRepository(
+            "No git repository was found at {path}".format(**dict(path=path))
+        )
 
 
 class FileSystemBackend(Backend):
     """Simple backend looking up Git repositories in the local file system."""
 
-    def __init__(self, root=os.sep) -> None:
+    def __init__(self, root: str = os.sep) -> None:
         """Initialize a FileSystemBackend.
 
         Args:
@@ -225,7 +256,7 @@ class FileSystemBackend(Backend):
         super().__init__()
         self.root = (os.path.abspath(root) + os.sep).replace(os.sep * 2, os.sep)
 
-    def open_repository(self, path):
+    def open_repository(self, path: str) -> BackendRepo:
         """Open a repository from the filesystem.
 
         Args:
@@ -244,13 +275,15 @@ class FileSystemBackend(Backend):
         normcase_root = os.path.normcase(self.root)
         if not normcase_abspath.startswith(normcase_root):
             raise NotGitRepository(f"Path {path!r} not inside root {self.root!r}")
-        return Repo(abspath)
+        return Repo(abspath)  # type: ignore[return-value]
 
 
 class Handler:
     """Smart protocol command handler base class."""
 
-    def __init__(self, backend, proto, stateless_rpc=False) -> None:
+    def __init__(
+        self, backend: Backend, proto: Protocol, stateless_rpc: bool = False
+    ) -> None:
         """Initialize a Handler.
 
         Args:
@@ -270,7 +303,9 @@ class Handler:
 class PackHandler(Handler):
     """Protocol handler for packs."""
 
-    def __init__(self, backend, proto, stateless_rpc=False) -> None:
+    def __init__(
+        self, backend: Backend, proto: Protocol, stateless_rpc: bool = False
+    ) -> None:
         """Initialize a PackHandler.
 
         Args:
@@ -282,6 +317,7 @@ class PackHandler(Handler):
         self._client_capabilities: Optional[set[bytes]] = None
         # Flags needed for the no-done capability
         self._done_received = False
+        self.advertise_refs = False
 
     @classmethod
     def capabilities(cls) -> Iterable[bytes]:
@@ -357,7 +393,12 @@ class UploadPackHandler(PackHandler):
     """Protocol handler for uploading a pack to the client."""
 
     def __init__(
-        self, backend, args, proto, stateless_rpc=False, advertise_refs=False
+        self,
+        backend: Backend,
+        args: list[str],
+        proto: Protocol,
+        stateless_rpc: bool = False,
+        advertise_refs: bool = False,
     ) -> None:
         """Initialize an UploadPackHandler.
 
@@ -378,7 +419,7 @@ class UploadPackHandler(PackHandler):
         self._processing_have_lines = False
 
     @classmethod
-    def capabilities(cls):
+    def capabilities(cls) -> list[bytes]:
         """Return the list of capabilities supported by upload-pack."""
         return [
             CAPABILITY_MULTI_ACK_DETAILED,
@@ -393,7 +434,7 @@ class UploadPackHandler(PackHandler):
         ]
 
     @classmethod
-    def required_capabilities(cls):
+    def required_capabilities(cls) -> tuple[bytes, ...]:
         """Return the list of capabilities required for upload-pack."""
         return (
             CAPABILITY_SIDE_BAND_64K,
@@ -418,13 +459,22 @@ class UploadPackHandler(PackHandler):
                     self.proto.write_sideband, SIDE_BAND_CHANNEL_PROGRESS
                 )
 
-            self.write_pack_data = partial(
+            self.write_pack_data: Callable[[bytes], None] = partial(
                 self.proto.write_sideband, SIDE_BAND_CHANNEL_DATA
             )
         else:
-            self.write_pack_data = self.proto.write
-
-    def get_tagged(self, refs=None, repo=None) -> dict[ObjectID, ObjectID]:
+            # proto.write returns Optional[int], but we need to treat it as returning None
+            # for compatibility with write_pack_from_container
+            def write_data(data: bytes) -> None:
+                self.proto.write(data)
+
+            self.write_pack_data = write_data
+
+    def get_tagged(
+        self,
+        refs: Optional[dict[bytes, bytes]] = None,
+        repo: Optional[BackendRepo] = None,
+    ) -> dict[ObjectID, ObjectID]:
         """Get a dict of peeled values of tags to their original tag shas.
 
         Args:
@@ -452,7 +502,7 @@ class UploadPackHandler(PackHandler):
         tagged = {}
         for name, sha in refs.items():
             peeled_sha = repo.get_peeled(name)
-            if peeled_sha != sha:
+            if peeled_sha is not None and peeled_sha != sha:
                 tagged[peeled_sha] = sha
         return tagged
 
@@ -475,8 +525,10 @@ class UploadPackHandler(PackHandler):
         )
         wants = []
 
-        def wants_wrapper(refs, **kwargs):
-            wants.extend(graph_walker.determine_wants(refs, **kwargs))
+        def wants_wrapper(
+            refs: dict[bytes, bytes], depth: Optional[int] = None
+        ) -> list[bytes]:
+            wants.extend(graph_walker.determine_wants(refs, depth))
             return wants
 
         missing_objects = self.repo.find_missing_objects(
@@ -510,13 +562,17 @@ class UploadPackHandler(PackHandler):
         self.progress((f"counting objects: {len(object_ids)}, done.\n").encode("ascii"))
 
         write_pack_from_container(
-            self.write_pack_data, self.repo.object_store, object_ids
+            self.write_pack_data,
+            self.repo.object_store,
+            object_ids,
         )
         # we are done
         self.proto.write_pkt_line(None)
 
 
-def _split_proto_line(line, allowed):
+def _split_proto_line(
+    line: Optional[bytes], allowed: Optional[Iterable[Optional[bytes]]]
+) -> tuple[Optional[bytes], Optional[Union[bytes, int]]]:
     """Split a line read from the wire.
 
     Args:
@@ -536,12 +592,12 @@ def _split_proto_line(line, allowed):
         allowed return values.
     """
     if not line:
-        fields = [None]
+        fields: list[Optional[bytes]] = [None]
     else:
-        fields = line.rstrip(b"\n").split(b" ", 1)
+        fields = list(line.rstrip(b"\n").split(b" ", 1))
     command = fields[0]
     if allowed is not None and command not in allowed:
-        raise UnexpectedCommandError(command)
+        raise UnexpectedCommandError(command.decode("utf-8") if command else None)
     if len(fields) == 1 and command in (COMMAND_DONE, None):
         return (command, None)
     elif len(fields) == 2:
@@ -554,14 +610,16 @@ def _split_proto_line(line, allowed):
             assert fields[1] is not None
             if not valid_hexsha(fields[1]):
                 raise GitProtocolError("Invalid sha")
-            return tuple(fields)
+            return (command, fields[1])
         elif command == COMMAND_DEEPEN:
             assert fields[1] is not None
             return command, int(fields[1])
     raise GitProtocolError(f"Received invalid line from client: {line!r}")
 
 
-def _want_satisfied(store: ObjectContainer, haves, want, earliest) -> bool:
+def _want_satisfied(
+    store: ObjectContainer, haves: set[bytes], want: bytes, earliest: int
+) -> bool:
     """Check if a specific want is satisfied by a set of haves.
 
     Args:
@@ -593,7 +651,9 @@ def _want_satisfied(store: ObjectContainer, haves, want, earliest) -> bool:
     return False
 
 
-def _all_wants_satisfied(store: ObjectContainer, haves, wants) -> bool:
+def _all_wants_satisfied(
+    store: ObjectContainer, haves: set[bytes], wants: set[bytes]
+) -> bool:
     """Check whether all the current wants are satisfied by a set of haves.
 
     Args:
@@ -619,7 +679,7 @@ def _all_wants_satisfied(store: ObjectContainer, haves, wants) -> bool:
 class AckGraphWalkerImpl:
     """Base class for acknowledgment graph walker implementations."""
 
-    def __init__(self, graph_walker):
+    def __init__(self, graph_walker: "_ProtocolGraphWalker") -> None:
         """Initialize acknowledgment graph walker.
 
         Args:
@@ -635,7 +695,7 @@ class AckGraphWalkerImpl:
         """
         raise NotImplementedError
 
-    def handle_done(self, done_required, done_received):
+    def handle_done(self, done_required: bool, done_received: bool) -> bool:
         """Handle 'done' packet from client."""
         raise NotImplementedError
 
@@ -655,7 +715,11 @@ class _ProtocolGraphWalker:
     """
 
     def __init__(
-        self, handler, object_store: ObjectContainer, get_peeled, get_symrefs
+        self,
+        handler: PackHandler,
+        object_store: ObjectContainer,
+        get_peeled: Callable[[bytes], Optional[bytes]],
+        get_symrefs: Callable[[], dict[bytes, bytes]],
     ) -> None:
         """Initialize a ProtocolGraphWalker.
 
@@ -681,7 +745,9 @@ class _ProtocolGraphWalker:
         self._cache_index = 0
         self._impl: Optional[AckGraphWalkerImpl] = None
 
-    def determine_wants(self, heads, depth=None):
+    def determine_wants(
+        self, heads: dict[bytes, bytes], depth: Optional[int] = None
+    ) -> list[bytes]:
         """Determine the wants for a set of heads.
 
         The given heads are advertised to the client, who then specifies which
@@ -717,13 +783,13 @@ class _ProtocolGraphWalker:
                     line = format_ref_line(
                         ref,
                         sha,
-                        self.handler.capabilities()
+                        list(self.handler.capabilities())
                         + symref_capabilities(symrefs.items()),
                     )
                 else:
                     line = format_ref_line(ref, sha)
                 self.proto.write_pkt_line(line)
-                if peeled_sha != sha:
+                if peeled_sha is not None and peeled_sha != sha:
                     self.proto.write_pkt_line(
                         format_ref_line(ref + PEELED_TAG_SUFFIX, peeled_sha)
                     )
@@ -742,18 +808,20 @@ class _ProtocolGraphWalker:
         self.handler.set_client_capabilities(caps)
         self.set_ack_type(ack_type(caps))
         allowed = (COMMAND_WANT, COMMAND_SHALLOW, COMMAND_DEEPEN, None)
-        command, sha = _split_proto_line(line, allowed)
+        command, sha_result = _split_proto_line(line, allowed)
 
         want_revs = []
         while command == COMMAND_WANT:
-            if sha not in values:
-                raise GitProtocolError(f"Client wants invalid object {sha}")
-            want_revs.append(sha)
-            command, sha = self.read_proto_line(allowed)
+            assert isinstance(sha_result, bytes)
+            if sha_result not in values:
+                raise GitProtocolError(f"Client wants invalid object {sha_result!r}")
+            want_revs.append(sha_result)
+            command, sha_result = self.read_proto_line(allowed)
 
         self.set_wants(want_revs)
         if command in (COMMAND_SHALLOW, COMMAND_DEEPEN):
-            self.unread_proto_line(command, sha)
+            assert sha_result is not None
+            self.unread_proto_line(command, sha_result)
             self._handle_shallow_request(want_revs)
 
         if self.stateless_rpc and self.proto.eof():
@@ -765,7 +833,7 @@ class _ProtocolGraphWalker:
 
         return want_revs
 
-    def unread_proto_line(self, command, value) -> None:
+    def unread_proto_line(self, command: bytes, value: Union[bytes, int]) -> None:
         """Push a command back to be read again.
 
         Args:
@@ -779,7 +847,7 @@ class _ProtocolGraphWalker:
     def nak(self) -> None:
         """Send a NAK response."""
 
-    def ack(self, have_ref):
+    def ack(self, have_ref: bytes) -> None:
         """Acknowledge a have reference.
 
         Args:
@@ -798,7 +866,7 @@ class _ProtocolGraphWalker:
         self._cached = True
         self._cache_index = 0
 
-    def next(self):
+    def next(self) -> Optional[bytes]:
         """Get the next SHA from the graph walker.
 
         Returns: Next SHA or None if done
@@ -807,7 +875,7 @@ class _ProtocolGraphWalker:
             if not self._impl and self.stateless_rpc:
                 return None
             assert self._impl is not None
-            return next(self._impl)  # type: ignore[call-overload]
+            return next(self._impl)  # type: ignore[call-overload, no-any-return]
         self._cache_index += 1
         if self._cache_index > len(self._cache):
             return None
@@ -815,7 +883,9 @@ class _ProtocolGraphWalker:
 
     __next__ = next
 
-    def read_proto_line(self, allowed):
+    def read_proto_line(
+        self, allowed: Optional[Iterable[Optional[bytes]]]
+    ) -> tuple[Optional[bytes], Optional[Union[bytes, int]]]:
         """Read a line from the wire.
 
         Args:
@@ -827,7 +897,7 @@ class _ProtocolGraphWalker:
         """
         return _split_proto_line(self.proto.read_pkt_line(), allowed)
 
-    def _handle_shallow_request(self, wants) -> None:
+    def _handle_shallow_request(self, wants: list[bytes]) -> None:
         """Handle shallow clone requests from the client.
 
         Args:
@@ -836,8 +906,10 @@ class _ProtocolGraphWalker:
         while True:
             command, val = self.read_proto_line((COMMAND_DEEPEN, COMMAND_SHALLOW))
             if command == COMMAND_DEEPEN:
+                assert isinstance(val, int)
                 depth = val
                 break
+            assert isinstance(val, bytes)
             self.client_shallow.add(val)
         self.read_proto_line((None,))  # consume client's flush-pkt
 
@@ -851,7 +923,7 @@ class _ProtocolGraphWalker:
 
         self.update_shallow(new_shallow, unshallow)
 
-    def update_shallow(self, new_shallow, unshallow):
+    def update_shallow(self, new_shallow: set[bytes], unshallow: set[bytes]) -> None:
         """Update shallow/unshallow information to the client.
 
         Args:
@@ -870,7 +942,7 @@ class _ProtocolGraphWalker:
         # relay the message down to the handler.
         self.handler.notify_done()
 
-    def send_ack(self, sha, ack_type=b"") -> None:
+    def send_ack(self, sha: bytes, ack_type: bytes = b"") -> None:
         """Send an ACK to the client.
 
         Args:
@@ -883,7 +955,7 @@ class _ProtocolGraphWalker:
         """Send a NAK to the client."""
         self.proto.write_pkt_line(NAK_LINE)
 
-    def handle_done(self, done_required, done_received):
+    def handle_done(self, done_required: bool, done_received: bool) -> bool:
         """Handle the 'done' command.
 
         Args:
@@ -895,7 +967,7 @@ class _ProtocolGraphWalker:
         assert self._impl is not None
         return self._impl.handle_done(done_required, done_received)
 
-    def set_wants(self, wants) -> None:
+    def set_wants(self, wants: list[bytes]) -> None:
         """Set the list of wanted objects.
 
         Args:
@@ -903,7 +975,7 @@ class _ProtocolGraphWalker:
         """
         self._wants = wants
 
-    def all_wants_satisfied(self, haves):
+    def all_wants_satisfied(self, haves: set[bytes]) -> bool:
         """Check whether all the current wants are satisfied by a set of haves.
 
         Args:
@@ -911,9 +983,9 @@ class _ProtocolGraphWalker:
         Note: Wants are specified with set_wants rather than passed in since
             in the current interface they are determined outside this class.
         """
-        return _all_wants_satisfied(self.store, haves, self._wants)
+        return _all_wants_satisfied(self.store, haves, set(self._wants))
 
-    def set_ack_type(self, ack_type) -> None:
+    def set_ack_type(self, ack_type: int) -> None:
         """Set the acknowledgment type for the graph walker.
 
         Args:
@@ -933,7 +1005,7 @@ _GRAPH_WALKER_COMMANDS = (COMMAND_HAVE, COMMAND_DONE, None)
 class SingleAckGraphWalkerImpl(AckGraphWalkerImpl):
     """Graph walker implementation that speaks the single-ack protocol."""
 
-    def __init__(self, walker) -> None:
+    def __init__(self, walker: "_ProtocolGraphWalker") -> None:
         """Initialize a SingleAckGraphWalkerImpl.
 
         Args:
@@ -942,7 +1014,7 @@ class SingleAckGraphWalkerImpl(AckGraphWalkerImpl):
         self.walker = walker
         self._common: list[bytes] = []
 
-    def ack(self, have_ref) -> None:
+    def ack(self, have_ref: bytes) -> None:
         """Acknowledge a have reference.
 
         Args:
@@ -952,7 +1024,7 @@ class SingleAckGraphWalkerImpl(AckGraphWalkerImpl):
             self.walker.send_ack(have_ref)
             self._common.append(have_ref)
 
-    def next(self):
+    def next(self) -> Optional[bytes]:
         """Get next SHA from graph walker.
 
         Returns:
@@ -964,11 +1036,13 @@ class SingleAckGraphWalkerImpl(AckGraphWalkerImpl):
             self.walker.notify_done()
             return None
         elif command == COMMAND_HAVE:
+            assert isinstance(sha, bytes)
             return sha
+        return None
 
     __next__ = next
 
-    def handle_done(self, done_required, done_received) -> bool:
+    def handle_done(self, done_required: bool, done_received: bool) -> bool:
         """Handle done command.
 
         Args:
@@ -1001,7 +1075,7 @@ class SingleAckGraphWalkerImpl(AckGraphWalkerImpl):
 class MultiAckGraphWalkerImpl(AckGraphWalkerImpl):
     """Graph walker implementation that speaks the multi-ack protocol."""
 
-    def __init__(self, walker) -> None:
+    def __init__(self, walker: "_ProtocolGraphWalker") -> None:
         """Initialize multi-ack graph walker.
 
         Args:
@@ -1011,7 +1085,7 @@ class MultiAckGraphWalkerImpl(AckGraphWalkerImpl):
         self._found_base = False
         self._common: list[bytes] = []
 
-    def ack(self, have_ref) -> None:
+    def ack(self, have_ref: bytes) -> None:
         """Acknowledge a have reference.
 
         Args:
@@ -1020,11 +1094,11 @@ class MultiAckGraphWalkerImpl(AckGraphWalkerImpl):
         self._common.append(have_ref)
         if not self._found_base:
             self.walker.send_ack(have_ref, b"continue")
-            if self.walker.all_wants_satisfied(self._common):
+            if self.walker.all_wants_satisfied(set(self._common)):
                 self._found_base = True
         # else we blind ack within next
 
-    def next(self):
+    def next(self) -> Optional[bytes]:
         """Get next SHA from graph walker.
 
         Returns:
@@ -1041,6 +1115,7 @@ class MultiAckGraphWalkerImpl(AckGraphWalkerImpl):
                 self.walker.notify_done()
                 return None
             elif command == COMMAND_HAVE:
+                assert isinstance(sha, bytes)
                 if self._found_base:
                     # blind ack
                     self.walker.send_ack(sha, b"continue")
@@ -1048,7 +1123,7 @@ class MultiAckGraphWalkerImpl(AckGraphWalkerImpl):
 
     __next__ = next
 
-    def handle_done(self, done_required, done_received) -> bool:
+    def handle_done(self, done_required: bool, done_received: bool) -> bool:
         """Handle done command.
 
         Args:
@@ -1084,7 +1159,7 @@ class MultiAckGraphWalkerImpl(AckGraphWalkerImpl):
 class MultiAckDetailedGraphWalkerImpl(AckGraphWalkerImpl):
     """Graph walker implementation speaking the multi-ack-detailed protocol."""
 
-    def __init__(self, walker) -> None:
+    def __init__(self, walker: "_ProtocolGraphWalker") -> None:
         """Initialize multi-ack-detailed graph walker.
 
         Args:
@@ -1093,7 +1168,7 @@ class MultiAckDetailedGraphWalkerImpl(AckGraphWalkerImpl):
         self.walker = walker
         self._common: list[bytes] = []
 
-    def ack(self, have_ref) -> None:
+    def ack(self, have_ref: bytes) -> None:
         """Acknowledge a have reference.
 
         Args:
@@ -1103,7 +1178,7 @@ class MultiAckDetailedGraphWalkerImpl(AckGraphWalkerImpl):
         self._common.append(have_ref)
         self.walker.send_ack(have_ref, b"common")
 
-    def next(self):
+    def next(self) -> Optional[bytes]:
         """Get next SHA from graph walker.
 
         Returns:
@@ -1112,7 +1187,7 @@ class MultiAckDetailedGraphWalkerImpl(AckGraphWalkerImpl):
         while True:
             command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS)
             if command is None:
-                if self.walker.all_wants_satisfied(self._common):
+                if self.walker.all_wants_satisfied(set(self._common)):
                     self.walker.send_ack(self._common[-1], b"ready")
                 self.walker.send_nak()
                 if self.walker.stateless_rpc:
@@ -1131,13 +1206,15 @@ class MultiAckDetailedGraphWalkerImpl(AckGraphWalkerImpl):
             elif command == COMMAND_HAVE:
                 # return the sha and let the caller ACK it with the
                 # above ack method.
+                assert isinstance(sha, bytes)
                 return sha
         # don't nak unless no common commits were found, even if not
         # everything is satisfied
+        return None
 
     __next__ = next
 
-    def handle_done(self, done_required, done_received) -> bool:
+    def handle_done(self, done_required: bool, done_received: bool) -> bool:
         """Handle done command.
 
         Args:
@@ -1174,7 +1251,12 @@ class ReceivePackHandler(PackHandler):
     """Protocol handler for downloading a pack from the client."""
 
     def __init__(
-        self, backend, args, proto, stateless_rpc=False, advertise_refs=False
+        self,
+        backend: Backend,
+        args: list[str],
+        proto: Protocol,
+        stateless_rpc: bool = False,
+        advertise_refs: bool = False,
     ) -> None:
         """Initialize receive-pack handler.
 
@@ -1237,7 +1319,7 @@ class ReceivePackHandler(PackHandler):
             # string
             try:
                 recv = getattr(self.proto, "recv", None)
-                self.repo.object_store.add_thin_pack(self.proto.read, recv)
+                self.repo.object_store.add_thin_pack(self.proto.read, recv)  # type: ignore[attr-defined]
                 yield (b"unpack", b"ok")
             except all_exceptions as e:
                 yield (b"unpack", str(e).replace("\n", "").encode("utf-8"))
@@ -1286,7 +1368,7 @@ class ReceivePackHandler(PackHandler):
                 self.proto.write_pkt_line(None)
 
         else:
-            write = self.proto.write_pkt_line
+            write = self.proto.write_pkt_line  # type: ignore[assignment]
 
             def flush() -> None:
                 pass
@@ -1301,13 +1383,13 @@ class ReceivePackHandler(PackHandler):
         write(None)  # type: ignore
         flush()
 
-    def _on_post_receive(self, client_refs) -> None:
+    def _on_post_receive(self, client_refs: dict[bytes, tuple[bytes, bytes]]) -> None:
         """Run post-receive hook.
 
         Args:
             client_refs: Dictionary of ref changes from client
         """
-        hook = self.repo.hooks.get("post-receive", None)
+        hook = self.repo.hooks.get("post-receive", None)  # type: ignore[attr-defined]
         if not hook:
             return
         try:
@@ -1342,25 +1424,25 @@ class ReceivePackHandler(PackHandler):
                 return
 
         client_refs = []
-        ref = self.proto.read_pkt_line()
+        ref_line = self.proto.read_pkt_line()
 
         # if ref is none then client doesn't want to send us anything..
-        if ref is None:
+        if ref_line is None:
             return
 
-        ref, caps = extract_capabilities(ref)
+        ref_line, caps = extract_capabilities(ref_line)
         self.set_client_capabilities(caps)
 
         # client will now send us a list of (oldsha, newsha, ref)
-        while ref:
-            (oldsha, newsha, ref) = ref.split()
-            client_refs.append((oldsha, newsha, ref))
-            ref = self.proto.read_pkt_line()
+        while ref_line:
+            (oldsha, newsha, ref_name) = ref_line.split()
+            client_refs.append((oldsha, newsha, ref_name))
+            ref_line = self.proto.read_pkt_line()
 
         # backend can now deal with this refs and read a pack using self.read
         status = list(self._apply_pack(client_refs))
 
-        self._on_post_receive(client_refs)
+        self._on_post_receive(client_refs)  # type: ignore[arg-type]
 
         # when we have read all the pack from the client, send a status report
         # if the client asked for it
@@ -1371,7 +1453,13 @@ class ReceivePackHandler(PackHandler):
 class UploadArchiveHandler(Handler):
     """Handler for git-upload-archive requests."""
 
-    def __init__(self, backend, args, proto, stateless_rpc=False) -> None:
+    def __init__(
+        self,
+        backend: Backend,
+        args: list[str],
+        proto: Protocol,
+        stateless_rpc: bool = False,
+    ) -> None:
         """Initialize upload-archive handler.
 
         Args:
@@ -1386,14 +1474,14 @@ class UploadArchiveHandler(Handler):
     def handle(self) -> None:
         """Handle upload-archive request."""
 
-        def write(x):
-            return self.proto.write_sideband(SIDE_BAND_CHANNEL_DATA, x)
+        def write(x: bytes) -> None:
+            self.proto.write_sideband(SIDE_BAND_CHANNEL_DATA, x)
 
         arguments = []
         for pkt in self.proto.read_pkt_seq():
             (key, value) = pkt.split(b" ", 1)
             if key != b"argument":
-                raise GitProtocolError(f"unknown command {key}")
+                raise GitProtocolError(f"unknown command {key!r}")
             arguments.append(value.rstrip(b"\n"))
         prefix = b""
         format = "tar"
@@ -1422,7 +1510,7 @@ class UploadArchiveHandler(Handler):
             tree,
             mtime=int(time.time()),
             prefix=prefix,
-            format=format,  # type: ignore
+            format=format,
         ):
             write(chunk)
         self.proto.write_pkt_line(None)
@@ -1439,16 +1527,25 @@ DEFAULT_HANDLERS = {
 class TCPGitRequestHandler(socketserver.StreamRequestHandler):
     """TCP request handler for git protocol."""
 
-    def __init__(self, handlers, *args, **kwargs) -> None:
+    def __init__(
+        self,
+        handlers: dict[bytes, type[Handler]],
+        request: socket.socket,
+        client_address: tuple[str, int],
+        server: socketserver.TCPServer,
+    ) -> None:
         """Initialize TCP request handler.
 
         Args:
             handlers: Dictionary mapping commands to handler classes
-            *args: Additional arguments for StreamRequestHandler
-            **kwargs: Additional keyword arguments for StreamRequestHandler
+            request: Request socket
+            client_address: Client address tuple
+            server: Server instance
         """
         self.handlers = handlers
-        socketserver.StreamRequestHandler.__init__(self, *args, **kwargs)
+        socketserver.StreamRequestHandler.__init__(
+            self, request, client_address, server
+        )
 
     def handle(self) -> None:
         """Handle TCP git request."""
@@ -1469,19 +1566,31 @@ class TCPGitServer(socketserver.TCPServer):
     allow_reuse_address = True
     serve = socketserver.TCPServer.serve_forever
 
-    def _make_handler(self, *args, **kwargs):
+    def _make_handler(
+        self,
+        request: socket.socket,
+        client_address: tuple[str, int],
+        server: socketserver.TCPServer,
+    ) -> TCPGitRequestHandler:
         """Create request handler instance.
 
         Args:
-            *args: Handler arguments
-            **kwargs: Handler keyword arguments
+            request: Request socket
+            client_address: Client address tuple
+            server: Server instance
 
         Returns:
             TCPGitRequestHandler instance
         """
-        return TCPGitRequestHandler(self.handlers, *args, **kwargs)
+        return TCPGitRequestHandler(self.handlers, request, client_address, server)
 
-    def __init__(self, backend, listen_addr, port=TCP_GIT_PORT, handlers=None) -> None:
+    def __init__(
+        self,
+        backend: Backend,
+        listen_addr: str,
+        port: int = TCP_GIT_PORT,
+        handlers: Optional[dict[bytes, type[Handler]]] = None,
+    ) -> None:
         """Initialize TCP git server.
 
         Args:
@@ -1497,7 +1606,11 @@ class TCPGitServer(socketserver.TCPServer):
         logger.info("Listening for TCP connections on %s:%d", listen_addr, port)
         socketserver.TCPServer.__init__(self, (listen_addr, port), self._make_handler)
 
-    def verify_request(self, request, client_address) -> bool:
+    def verify_request(
+        self,
+        request: Union[socket.socket, tuple[bytes, socket.socket]],
+        client_address: Union[tuple[str, int], socket.socket],
+    ) -> bool:
         """Verify incoming request.
 
         Args:
@@ -1510,7 +1623,11 @@ class TCPGitServer(socketserver.TCPServer):
         logger.info("Handling request from %s", client_address)
         return True
 
-    def handle_error(self, request, client_address) -> None:
+    def handle_error(
+        self,
+        request: Union[socket.socket, tuple[bytes, socket.socket]],
+        client_address: Union[tuple[str, int], socket.socket],
+    ) -> None:
         """Handle request processing errors.
 
         Args:
@@ -1523,7 +1640,7 @@ class TCPGitServer(socketserver.TCPServer):
         )
 
 
-def main(argv=sys.argv) -> None:
+def main(argv: list[str] = sys.argv) -> None:
     """Entry point for starting a TCP git server."""
     import optparse
 
@@ -1557,7 +1674,11 @@ def main(argv=sys.argv) -> None:
 
 
 def serve_command(
-    handler_cls, argv=sys.argv, backend=None, inf=sys.stdin, outf=sys.stdout
+    handler_cls: type[Handler],
+    argv: list[str] = sys.argv,
+    backend: Optional[Backend] = None,
+    inf: IO[bytes] = sys.stdin.buffer,
+    outf: IO[bytes] = sys.stdout.buffer,
 ) -> int:
     """Serve a single command.
 
@@ -1575,30 +1696,30 @@ def serve_command(
     if backend is None:
         backend = FileSystemBackend()
 
-    def send_fn(data) -> None:
+    def send_fn(data: bytes) -> None:
         outf.write(data)
         outf.flush()
 
     proto = Protocol(inf.read, send_fn)
-    handler = handler_cls(backend, argv[1:], proto)
+    handler = handler_cls(backend, argv[1:], proto)  # type: ignore[arg-type]
     # FIXME: Catch exceptions and write a single-line summary to outf.
     handler.handle()
     return 0
 
 
-def generate_info_refs(repo):
+def generate_info_refs(repo: "BaseRepo") -> Iterator[bytes]:
     """Generate an info refs file."""
     refs = repo.get_refs()
     return write_info_refs(refs, repo.object_store)
 
 
-def generate_objects_info_packs(repo):
+def generate_objects_info_packs(repo: "BaseRepo") -> Iterator[bytes]:
     """Generate an index for for packs."""
     for pack in repo.object_store.packs:
         yield (b"P " + os.fsencode(pack.data.filename) + b"\n")
 
 
-def update_server_info(repo) -> None:
+def update_server_info(repo: "BaseRepo") -> None:
     """Generate server info for dumb file access.
 
     This generates info/refs and objects/info/packs,

+ 2 - 6
dulwich/sparse_patterns.py

@@ -164,12 +164,8 @@ def apply_included_paths(
             blob_obj = repo.object_store[index_entry.sha]
         except KeyError:
             return True
-        # Create a temporary blob for normalization
-        temp_blob = Blob()
-        temp_blob.data = disk_data
-        norm_blob = normalizer.checkin_normalize(
-            temp_blob, os.path.relpath(full_path, repo.path).encode()
-        )
+        disk_blob = Blob.from_string(disk_data)
+        norm_blob = normalizer.checkin_normalize(disk_blob, full_path.encode("utf-8"))
         norm_data = norm_blob.data
         if not isinstance(blob_obj, Blob):
             return True

+ 42 - 24
dulwich/stash.py

@@ -34,7 +34,6 @@ from .index import (
     commit_tree,
     index_entry_from_stat,
     iter_fresh_objects,
-    iter_tree_contents,
     symlink,
     update_working_tree,
     validate_path,
@@ -42,7 +41,8 @@ from .index import (
     validate_path_element_hfs,
     validate_path_element_ntfs,
 )
-from .objects import S_IFGITLINK, Blob, Commit, ObjectID
+from .object_store import iter_tree_contents
+from .objects import S_IFGITLINK, Blob, Commit, ObjectID, TreeEntry
 from .reflog import drop_reflog_entry, read_reflog
 from .refs import Ref
 
@@ -162,9 +162,9 @@ class Stash:
             symlink_fn = symlink
         else:
 
-            def symlink_fn(
-                src: Union[str, bytes, os.PathLike],
-                dst: Union[str, bytes, os.PathLike],
+            def symlink_fn(  # type: ignore[misc,unused-ignore]
+                src: Union[str, bytes],
+                dst: Union[str, bytes],
                 target_is_directory: bool = False,
                 *,
                 dir_fd: Optional[int] = None,
@@ -191,26 +191,40 @@ class Stash:
             index_tree_id = index_commit.tree
 
             # Update index entries from the stashed index tree
-            for entry in iter_tree_contents(self._repo.object_store, index_tree_id):
-                if not validate_path(entry.path, validate_path_element):
+            tree_entry: TreeEntry
+            for tree_entry in iter_tree_contents(
+                self._repo.object_store, index_tree_id
+            ):
+                assert (
+                    tree_entry.path is not None
+                    and tree_entry.mode is not None
+                    and tree_entry.sha is not None
+                )
+                if not validate_path(tree_entry.path, validate_path_element):
                     continue
 
                 # Add to index with stage 0 (normal)
                 # Get file stats for the entry
-                full_path = _tree_to_fs_path(repo_path, entry.path)
+                full_path = _tree_to_fs_path(repo_path, tree_entry.path)
                 try:
                     st = os.lstat(full_path)
                 except FileNotFoundError:
                     # File doesn't exist yet, use dummy stats
-                    st = os.stat_result((entry.mode, 0, 0, 0, 0, 0, 0, 0, 0, 0))
-                repo_index[entry.path] = index_entry_from_stat(st, entry.sha)
+                    st = os.stat_result((tree_entry.mode, 0, 0, 0, 0, 0, 0, 0, 0, 0))
+                repo_index[tree_entry.path] = index_entry_from_stat(st, tree_entry.sha)
 
         # Apply working tree changes from the stash
-        for entry in iter_tree_contents(self._repo.object_store, stash_tree_id):
-            if not validate_path(entry.path, validate_path_element):
+        tree_entry2: TreeEntry
+        for tree_entry2 in iter_tree_contents(self._repo.object_store, stash_tree_id):
+            assert (
+                tree_entry2.path is not None
+                and tree_entry2.mode is not None
+                and tree_entry2.sha is not None
+            )
+            if not validate_path(tree_entry2.path, validate_path_element):
                 continue
 
-            full_path = _tree_to_fs_path(repo_path, entry.path)
+            full_path = _tree_to_fs_path(repo_path, tree_entry2.path)
 
             # Create parent directories if needed
             parent_dir = os.path.dirname(full_path)
@@ -218,39 +232,43 @@ class Stash:
                 os.makedirs(parent_dir)
 
             # Write the file
-            if entry.mode == S_IFGITLINK:
+            if tree_entry2.mode == S_IFGITLINK:
                 # Submodule - just create directory
                 if not os.path.isdir(full_path):
                     os.mkdir(full_path)
                 st = os.lstat(full_path)
             else:
-                obj = self._repo.object_store[entry.sha]
+                obj = self._repo.object_store[tree_entry2.sha]
                 assert isinstance(obj, Blob)
                 # Apply blob normalization for checkout if normalizer is provided
                 if blob_normalizer is not None:
-                    obj = blob_normalizer.checkout_normalize(obj, entry.path)
+                    obj = blob_normalizer.checkout_normalize(obj, tree_entry2.path)
                 st = build_file_from_blob(
                     obj,
-                    entry.mode,
+                    tree_entry2.mode,
                     full_path,
                     honor_filemode=honor_filemode,
-                    symlink_fn=symlink_fn,  # type: ignore[arg-type]
+                    symlink_fn=symlink_fn,  # type: ignore[arg-type,unused-ignore]
                 )
 
             # Update index if the file wasn't already staged
-            if entry.path not in repo_index:
+            if tree_entry2.path not in repo_index:
                 # Update with file stats from disk
-                repo_index[entry.path] = index_entry_from_stat(st, entry.sha)
+                repo_index[tree_entry2.path] = index_entry_from_stat(
+                    st, tree_entry2.sha
+                )
             else:
-                existing_entry = repo_index[entry.path]
+                existing_entry = repo_index[tree_entry2.path]
 
                 if (
                     isinstance(existing_entry, IndexEntry)
-                    and existing_entry.mode == entry.mode
-                    and existing_entry.sha == entry.sha
+                    and existing_entry.mode == tree_entry2.mode
+                    and existing_entry.sha == tree_entry2.sha
                 ):
                     # Update with file stats from disk
-                    repo_index[entry.path] = index_entry_from_stat(st, entry.sha)
+                    repo_index[tree_entry2.path] = index_entry_from_stat(
+                        st, tree_entry2.sha
+                    )
 
         # Write the updated index
         repo_index.write()

+ 5 - 2
dulwich/submodule.py

@@ -29,13 +29,13 @@ from .object_store import iter_tree_contents
 from .objects import S_ISGITLINK
 
 if TYPE_CHECKING:
-    from .object_store import ObjectContainer
+    from .pack import ObjectContainer
     from .repo import Repo
 
 
 def iter_cached_submodules(
     store: "ObjectContainer", root_tree_id: bytes
-) -> Iterator[tuple[str, bytes]]:
+) -> Iterator[tuple[bytes, bytes]]:
     """Iterate over cached submodules.
 
     Args:
@@ -46,7 +46,10 @@ def iter_cached_submodules(
       Iterator over over (path, sha) tuples
     """
     for entry in iter_tree_contents(store, root_tree_id):
+        assert entry.mode is not None
         if S_ISGITLINK(entry.mode):
+            assert entry.path is not None
+            assert entry.sha is not None
             yield entry.path, entry.sha
 
 

+ 27 - 14
dulwich/tests/test_object_store.py

@@ -21,6 +21,7 @@
 
 """Tests for the object store interface."""
 
+from collections.abc import Iterator, Sequence
 from typing import TYPE_CHECKING, Any, Callable
 from unittest import TestCase
 from unittest.mock import patch
@@ -36,6 +37,9 @@ from dulwich.object_store import (
 )
 from dulwich.objects import (
     Blob,
+    Commit,
+    ShaFile,
+    Tag,
     Tree,
     TreeEntry,
 )
@@ -244,7 +248,7 @@ class ObjectStoreTests:
         actual = iter_tree_contents(self.store, tree_id, include_trees=True)
         self.assertEqual(expected, list(actual))
 
-    def make_tag(self, name, obj):
+    def make_tag(self, name: bytes, obj: ShaFile) -> Tag:
         """Helper to create and add a tag object."""
         tag = make_tag(obj, name=name)
         self.store.add_object(tag)
@@ -431,12 +435,12 @@ class PackBasedObjectStoreTests(ObjectStoreTests):
 class CommitTestHelper:
     """Helper for tests which iterate over commits."""
 
-    def setUp(self):
+    def setUp(self) -> None:
         """Set up test fixture."""
-        super().setUp()
+        super().setUp()  # type: ignore[misc]
         self._store = MemoryObjectStore()
 
-    def make_commit(self, **attrs):
+    def make_commit(self, **attrs: Any) -> Commit:  # noqa: ANN401
         """Helper to create and store a commit."""
         commit = make_commit(**attrs)
         self._store.add_object(commit)
@@ -446,7 +450,7 @@ class CommitTestHelper:
 class IterCommitContentsTests(CommitTestHelper, TestCase):
     """Tests for iter_commit_contents."""
 
-    def make_commits_with_contents(self):
+    def make_commits_with_contents(self) -> Commit:
         """Helper to prepare test commits."""
         files = [
             # (path, contents)
@@ -469,9 +473,16 @@ class IterCommitContentsTests(CommitTestHelper, TestCase):
 
         return commit
 
-    def assertCommitEntries(self, results, expected):
+    def assertCommitEntries(
+        self, results: Iterator[TreeEntry], expected: list[tuple[bytes, bytes]]
+    ) -> None:
         """Assert that iter_commit_contents results are equal to expected."""
-        actual = [(entry.path, self._store[entry.sha].data) for entry in results]
+        actual = []
+        for entry in results:
+            assert entry.sha is not None
+            obj = self._store[entry.sha]
+            assert isinstance(obj, Blob)
+            actual.append((entry.path, obj.data))
         self.assertEqual(actual, expected)
 
     def test_iter_commit_contents_no_includes(self) -> None:
@@ -585,7 +596,7 @@ class IterCommitContentsTests(CommitTestHelper, TestCase):
 class FindShallowTests(CommitTestHelper, TestCase):
     """Tests for finding shallow commits."""
 
-    def make_linear_commits(self, n, message=b""):
+    def make_linear_commits(self, n: int, message: bytes = b"") -> list[Commit]:
         """Create a linear chain of commits."""
         commits = []
         parents: list[bytes] = []
@@ -594,11 +605,13 @@ class FindShallowTests(CommitTestHelper, TestCase):
             parents = [commits[-1].id]
         return commits
 
-    def assertSameElements(self, expected, actual):
+    def assertSameElements(
+        self, expected: Sequence[object], actual: Sequence[object]
+    ) -> None:
         """Assert that two sequences contain the same elements."""
         self.assertEqual(set(expected), set(actual))
 
-    def test_linear(self):
+    def test_linear(self) -> None:
         """Test finding shallow commits in a linear history."""
         c1, c2, c3 = self.make_linear_commits(3)
 
@@ -616,7 +629,7 @@ class FindShallowTests(CommitTestHelper, TestCase):
             find_shallow(self._store, [c3.id], 4),
         )
 
-    def test_multiple_independent(self):
+    def test_multiple_independent(self) -> None:
         """Test finding shallow commits with multiple independent branches."""
         a = self.make_linear_commits(2, message=b"a")
         b = self.make_linear_commits(2, message=b"b")
@@ -628,7 +641,7 @@ class FindShallowTests(CommitTestHelper, TestCase):
             find_shallow(self._store, heads, 2),
         )
 
-    def test_multiple_overlapping(self):
+    def test_multiple_overlapping(self) -> None:
         """Test finding shallow commits with overlapping branches."""
         # Create the following commit tree:
         # 1--2
@@ -644,7 +657,7 @@ class FindShallowTests(CommitTestHelper, TestCase):
             find_shallow(self._store, [c2.id, c4.id], 3),
         )
 
-    def test_merge(self):
+    def test_merge(self) -> None:
         """Test finding shallow commits with merge commits."""
         c1 = self.make_commit()
         c2 = self.make_commit()
@@ -655,7 +668,7 @@ class FindShallowTests(CommitTestHelper, TestCase):
             find_shallow(self._store, [c3.id], 2),
         )
 
-    def test_tag(self):
+    def test_tag(self) -> None:
         """Test finding shallow commits with tags."""
         c1, c2 = self.make_linear_commits(2)
         tag = make_tag(c2, name=b"tag")

+ 9 - 9
dulwich/tests/utils.py

@@ -108,7 +108,7 @@ def make_object(cls: type[T], **attrs: Any) -> T:
 
     TestObject.__name__ = "TestObject_" + cls.__name__
 
-    obj = TestObject()  # type: ignore[abstract]
+    obj = TestObject()
     for name, value in attrs.items():
         if name == "id":
             # id property is read-only, so we overwrite sha instead.
@@ -233,7 +233,7 @@ def build_pack(
     """
     sf = SHA1Writer(f)
     num_objects = len(objects_spec)
-    write_pack_header(sf.write, num_objects)
+    write_pack_header(sf, num_objects)
 
     full_objects: dict[int, tuple[int, bytes, bytes]] = {}
     offsets: dict[int, int] = {}
@@ -242,7 +242,7 @@ def build_pack(
     while len(full_objects) < num_objects:
         for i, (type_num, data) in enumerate(objects_spec):
             if type_num not in DELTA_TYPES:
-                full_objects[i] = (type_num, data, obj_sha(type_num, [data]))  # type: ignore[no-untyped-call]
+                full_objects[i] = (type_num, data, obj_sha(type_num, [data]))
                 continue
             base, data = data
             if isinstance(base, int):
@@ -255,7 +255,7 @@ def build_pack(
             full_objects[i] = (
                 base_type_num,
                 data,
-                obj_sha(base_type_num, [data]),  # type: ignore[no-untyped-call]
+                obj_sha(base_type_num, [data]),
             )
 
     for i, (type_num, obj) in enumerate(objects_spec):
@@ -264,7 +264,7 @@ def build_pack(
             base_index, data = obj
             base = offset - offsets[base_index]
             _, base_data, _ = full_objects[base_index]
-            obj = (base, list(create_delta(base_data, data)))  # type: ignore[no-untyped-call]
+            obj = (base, list(create_delta(base_data, data)))
         elif type_num == REF_DELTA:
             base_ref, data = obj
             if isinstance(base_ref, int):
@@ -272,10 +272,10 @@ def build_pack(
             else:
                 assert store is not None
                 base_type_num, base_data = store.get_raw(base_ref)
-                base = obj_sha(base_type_num, base_data)  # type: ignore[no-untyped-call]
-            obj = (base, list(create_delta(base_data, data)))  # type: ignore[no-untyped-call]
+                base = obj_sha(base_type_num, base_data)
+            obj = (base, list(create_delta(base_data, data)))
 
-        crc32 = write_pack_object(sf.write, type_num, obj)  # type: ignore[no-untyped-call]
+        crc32 = write_pack_object(sf.write, type_num, obj)
         offsets[i] = offset
         crc32s[i] = crc32
 
@@ -285,7 +285,7 @@ def build_pack(
         assert len(sha) == 20
         expected.append((offsets[i], type_num, data, sha, crc32s[i]))
 
-    sf.write_sha()  # type: ignore[no-untyped-call]
+    sf.write_sha()
     f.seek(0)
     return expected
 

+ 20 - 19
dulwich/walk.py

@@ -364,7 +364,8 @@ class Walker:
         new_path = change.new.path if change.new is not None else None
         if self._path_matches(new_path):
             if self.follow and change.type in RENAME_CHANGE_TYPES:
-                assert old_path is not None and new_path is not None
+                assert old_path is not None
+                assert new_path is not None
                 self.paths.add(old_path)
                 self.paths.remove(new_path)
             return True
@@ -399,33 +400,33 @@ class Walker:
                 # For merge commits, only include changes with conflicts for
                 # this path. Since a rename conflict may include different
                 # old.paths, we have to check all of them.
-                if isinstance(path_changes, list):
-                    for change in path_changes:
-                        if change is not None and self._change_matches(change):
-                            return True
-                elif path_changes is not None:
-                    if self._change_matches(path_changes):
+                assert isinstance(path_changes, list)
+                for change in path_changes:
+                    from .diff_tree import TreeChange
+
+                    assert isinstance(change, TreeChange)
+                    if self._change_matches(change):
                         return True
         else:
             changes = entry.changes()
+            from .diff_tree import TreeChange
+
             # Handle both list[TreeChange] and list[list[TreeChange]]
             if changes and isinstance(changes[0], list):
                 # It's list[list[TreeChange]], flatten it
                 for change_list in changes:
-                    if isinstance(change_list, list):
-                        for item in change_list:
-                            if item is not None and self._change_matches(item):
-                                return True
+                    assert isinstance(change_list, list)
+                    for change in change_list:
+                        assert isinstance(change, TreeChange)
+                        if self._change_matches(change):
+                            return True
             else:
                 # It's list[TreeChange]
-                from .diff_tree import TreeChange
-
-                if isinstance(changes, list):
-                    for entry_item in changes:
-                        if isinstance(entry_item, TreeChange) and self._change_matches(
-                            entry_item
-                        ):
-                            return True
+                assert isinstance(changes, list)
+                for item in changes:
+                    assert isinstance(item, TreeChange)
+                    if self._change_matches(item):
+                        return True
         return None
 
     def _next(self) -> Optional[WalkEntry]:

+ 93 - 15
dulwich/web.py

@@ -22,6 +22,31 @@
 
 """HTTP server for dulwich that implements the git smart HTTP protocol."""
 
+__all__ = [
+    "HTTP_FORBIDDEN",
+    "HTTP_NOT_FOUND",
+    "HTTP_OK",
+    "GunzipFilter",
+    "HTTPGitApplication",
+    "HTTPGitRequest",
+    "LimitedInputFilter",
+    "WSGIRequestHandlerLogger",
+    "WSGIServerLogger",
+    "date_time_string",
+    "generate_info_refs",
+    "generate_objects_info_packs",
+    "get_info_packs",
+    "get_info_refs",
+    "get_loose_object",
+    "get_pack_file",
+    "get_text_file",
+    "handle_service_request",
+    "main",
+    "make_server",
+    "make_wsgi_chain",
+    "send_file",
+]
+
 import os
 import re
 import sys
@@ -29,7 +54,16 @@ import time
 from collections.abc import Iterable, Iterator
 from io import BytesIO
 from types import TracebackType
-from typing import Any, BinaryIO, Callable, ClassVar, Optional, Union, cast
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    BinaryIO,
+    Callable,
+    ClassVar,
+    Optional,
+    Union,
+    cast,
+)
 from urllib.parse import parse_qs
 from wsgiref.simple_server import (
     ServerHandler,
@@ -43,17 +77,15 @@ if sys.version_info >= (3, 11):
     from wsgiref.types import StartResponse, WSGIApplication, WSGIEnvironment
 else:
     # Fallback type definitions for Python < 3.11
-    from typing import TYPE_CHECKING
-
     if TYPE_CHECKING:
         # For type checking, use the _typeshed types if available
         try:
             from _typeshed.wsgi import StartResponse, WSGIApplication, WSGIEnvironment
         except ImportError:
             # Define our own protocol types for type checking
-            from typing import Protocol
+            from typing import Protocol as TypingProtocol
 
-            class StartResponse(Protocol):  # type: ignore[no-redef]
+            class StartResponse(TypingProtocol):  # type: ignore[no-redef]
                 """WSGI start_response callable protocol."""
 
                 def __call__(
@@ -79,8 +111,9 @@ else:
 
 from dulwich import log_utils
 
+from .errors import NotGitRepository
 from .protocol import ReceivableProtocol
-from .repo import BaseRepo, NotGitRepository, Repo
+from .repo import BaseRepo, Repo
 from .server import (
     DEFAULT_HANDLERS,
     Backend,
@@ -90,6 +123,37 @@ from .server import (
     generate_objects_info_packs,
 )
 
+if TYPE_CHECKING:
+    from typing import Protocol as TypingProtocol
+
+    from .protocol import Protocol
+
+    class HandlerConstructor(TypingProtocol):
+        """Protocol for handler constructors."""
+
+        def __call__(
+            self,
+            backend: Backend,
+            args: list[bytes],
+            proto: Protocol,
+            stateless_rpc: bool = False,
+            advertise_refs: bool = False,
+        ) -> Handler:
+            """Create a handler instance.
+
+            Args:
+                backend: The backend to use for the handler
+                args: Arguments for the handler
+                proto: Protocol object for communication
+                stateless_rpc: Whether to use stateless RPC mode
+                advertise_refs: Whether to advertise references
+
+            Returns:
+                A Handler instance
+            """
+            ...
+
+
 logger = log_utils.getLogger(__name__)
 
 
@@ -348,13 +412,16 @@ def get_info_refs(
             return len(data) if result is not None else None
 
         proto = ReceivableProtocol(BytesIO().read, write_fn)
+        from typing import Any, cast
+
         handler = handler_cls(
             backend,
-            [url_prefix(mat)],
+            cast(Any, [url_prefix(mat)]),  # handler_cls could expect bytes or str
             proto,
             stateless_rpc=True,
             advertise_refs=True,
         )
+        assert handler is not None
         handler.proto.write_pkt_line(b"# service=" + service.encode("ascii") + b"\n")
         handler.proto.write_pkt_line(None)
         handler.handle()
@@ -500,7 +567,10 @@ def handle_service_request(
     proto = ReceivableProtocol(read, write_fn)
     # TODO(jelmer): Find a way to pass in repo, rather than having handler_cls
     # reopen.
-    handler = handler_cls(backend, [url_prefix(mat)], proto, stateless_rpc=True)
+    handler = handler_cls(
+        backend, [url_prefix(mat).encode("utf-8")], proto, stateless_rpc=True
+    )
+    assert handler is not None
     handler.handle()
 
 
@@ -516,7 +586,9 @@ class HTTPGitRequest:
         environ: WSGIEnvironment,
         start_response: StartResponse,
         dumb: bool = False,
-        handlers: Optional[dict[bytes, Callable]] = None,
+        handlers: Optional[
+            dict[bytes, Union["HandlerConstructor", Callable[..., Any]]]
+        ] = None,
     ) -> None:
         """Initialize HTTPGitRequest.
 
@@ -591,8 +663,8 @@ class HTTPGitApplication:
 
     services: ClassVar[
         dict[
-            tuple[str, re.Pattern],
-            Callable[[HTTPGitRequest, Backend, re.Match], Iterator[bytes]],
+            tuple[str, re.Pattern[str]],
+            Callable[[HTTPGitRequest, Backend, re.Match[str]], Iterator[bytes]],
         ]
     ] = {
         ("GET", re.compile("/HEAD$")): get_text_file,
@@ -620,7 +692,9 @@ class HTTPGitApplication:
         self,
         backend: Backend,
         dumb: bool = False,
-        handlers: Optional[dict[bytes, Callable]] = None,
+        handlers: Optional[
+            dict[bytes, Union["HandlerConstructor", Callable[..., Any]]]
+        ] = None,
         fallback_app: Optional[WSGIApplication] = None,
     ) -> None:
         """Initialize HTTPGitApplication.
@@ -633,8 +707,8 @@ class HTTPGitApplication:
         """
         self.backend = backend
         self.dumb = dumb
-        self.handlers: dict[bytes, Union[type[Handler], Callable[..., Any]]] = dict(
-            DEFAULT_HANDLERS
+        self.handlers: dict[bytes, Union[HandlerConstructor, Callable[..., Any]]] = (
+            dict(DEFAULT_HANDLERS)
         )
         self.fallback_app = fallback_app
         if handlers is not None:
@@ -843,7 +917,11 @@ def main(argv: list[str] = sys.argv) -> None:
         gitdir = os.getcwd()
 
     log_utils.default_logging_config()
-    backend = DictBackend({"/": Repo(gitdir)})
+    from typing import cast
+
+    from dulwich.server import BackendRepo
+
+    backend = DictBackend({"/": cast(BackendRepo, Repo(gitdir))})
     app = make_wsgi_chain(backend)
     server = make_server(
         options.listen_address,

+ 26 - 18
dulwich/worktree.py

@@ -153,7 +153,7 @@ class WorkTreeContainer:
 
     def add(
         self,
-        path: str | bytes | os.PathLike,
+        path: str | bytes | os.PathLike[str],
         branch: str | bytes | None = None,
         commit: ObjectID | None = None,
         force: bool = False,
@@ -183,7 +183,7 @@ class WorkTreeContainer:
             exist_ok=exist_ok,
         )
 
-    def remove(self, path: str | bytes | os.PathLike, force: bool = False) -> None:
+    def remove(self, path: str | bytes | os.PathLike[str], force: bool = False) -> None:
         """Remove a worktree.
 
         Args:
@@ -207,7 +207,9 @@ class WorkTreeContainer:
         return prune_worktrees(self._repo, expire=expire, dry_run=dry_run)
 
     def move(
-        self, old_path: str | bytes | os.PathLike, new_path: str | bytes | os.PathLike
+        self,
+        old_path: str | bytes | os.PathLike[str],
+        new_path: str | bytes | os.PathLike[str],
     ) -> None:
         """Move a worktree to a new location.
 
@@ -217,7 +219,9 @@ class WorkTreeContainer:
         """
         move_worktree(self._repo, old_path, new_path)
 
-    def lock(self, path: str | bytes | os.PathLike, reason: str | None = None) -> None:
+    def lock(
+        self, path: str | bytes | os.PathLike[str], reason: str | None = None
+    ) -> None:
         """Lock a worktree to prevent it from being pruned.
 
         Args:
@@ -226,7 +230,7 @@ class WorkTreeContainer:
         """
         lock_worktree(self._repo, path, reason=reason)
 
-    def unlock(self, path: str | bytes | os.PathLike) -> None:
+    def unlock(self, path: str | bytes | os.PathLike[str]) -> None:
         """Unlock a worktree.
 
         Args:
@@ -246,7 +250,7 @@ class WorkTree:
     such as staging files, committing changes, and resetting the index.
     """
 
-    def __init__(self, repo: Repo, path: str | bytes | os.PathLike) -> None:
+    def __init__(self, repo: Repo, path: str | bytes | os.PathLike[str]) -> None:
         """Initialize a WorkTree for the given repository.
 
         Args:
@@ -263,7 +267,10 @@ class WorkTree:
 
     def stage(
         self,
-        fs_paths: str | bytes | os.PathLike | Iterable[str | bytes | os.PathLike],
+        fs_paths: str
+        | bytes
+        | os.PathLike[str]
+        | Iterable[str | bytes | os.PathLike[str]],
     ) -> None:
         """Stage a set of paths.
 
@@ -637,9 +644,9 @@ class WorkTree:
             symlink_fn = symlink
         else:
 
-            def symlink_fn(
-                src: Union[str, bytes, os.PathLike],
-                dst: Union[str, bytes, os.PathLike],
+            def symlink_fn(  # type: ignore[misc,unused-ignore]
+                src: Union[str, bytes],
+                dst: Union[str, bytes],
                 target_is_directory: bool = False,
                 *,
                 dir_fd: int | None = None,
@@ -655,7 +662,7 @@ class WorkTree:
             tree,
             honor_filemode=honor_filemode,
             validate_path_element=validate_path_element,
-            symlink_fn=symlink_fn,  # type: ignore[arg-type]
+            symlink_fn=symlink_fn,  # type: ignore[arg-type,unused-ignore]
             blob_normalizer=blob_normalizer,
         )
 
@@ -852,7 +859,7 @@ def list_worktrees(repo: Repo) -> list[WorkTreeInfo]:
 
 def add_worktree(
     repo: Repo,
-    path: str | bytes | os.PathLike,
+    path: str | bytes | os.PathLike[str],
     branch: str | bytes | None = None,
     commit: ObjectID | None = None,
     force: bool = False,
@@ -937,6 +944,7 @@ def add_worktree(
             f.write(checkout_ref + b"\n")
     else:
         # Point to branch
+        assert branch is not None  # Should be guaranteed by logic above
         wt_repo.refs.set_symbolic_ref(b"HEAD", branch)
 
     # Reset index to match HEAD
@@ -946,7 +954,7 @@ def add_worktree(
 
 
 def remove_worktree(
-    repo: Repo, path: str | bytes | os.PathLike, force: bool = False
+    repo: Repo, path: str | bytes | os.PathLike[str], force: bool = False
 ) -> None:
     """Remove a worktree.
 
@@ -1078,7 +1086,7 @@ def prune_worktrees(
 
 
 def lock_worktree(
-    repo: Repo, path: str | bytes | os.PathLike, reason: str | None = None
+    repo: Repo, path: str | bytes | os.PathLike[str], reason: str | None = None
 ) -> None:
     """Lock a worktree to prevent it from being pruned.
 
@@ -1096,7 +1104,7 @@ def lock_worktree(
             f.write(reason)
 
 
-def unlock_worktree(repo: Repo, path: str | bytes | os.PathLike) -> None:
+def unlock_worktree(repo: Repo, path: str | bytes | os.PathLike[str]) -> None:
     """Unlock a worktree.
 
     Args:
@@ -1111,7 +1119,7 @@ def unlock_worktree(repo: Repo, path: str | bytes | os.PathLike) -> None:
         os.remove(lock_path)
 
 
-def _find_worktree_id(repo: Repo, path: str | bytes | os.PathLike) -> str:
+def _find_worktree_id(repo: Repo, path: str | bytes | os.PathLike[str]) -> str:
     """Find the worktree identifier for the given path.
 
     Args:
@@ -1153,8 +1161,8 @@ def _find_worktree_id(repo: Repo, path: str | bytes | os.PathLike) -> str:
 
 def move_worktree(
     repo: Repo,
-    old_path: str | bytes | os.PathLike,
-    new_path: str | bytes | os.PathLike,
+    old_path: str | bytes | os.PathLike[str],
+    new_path: str | bytes | os.PathLike[str],
 ) -> None:
     """Move a worktree to a new location.
 

+ 38 - 0
pyproject.toml

@@ -56,12 +56,50 @@ patiencediff = ["patiencediff"]
 dulwich = "dulwich.cli:main"
 
 [tool.mypy]
+strict = true
+disallow_untyped_defs = true
+disallow_untyped_calls = true
+disallow_incomplete_defs = true
+check_untyped_defs = true
+disallow_untyped_decorators = true
+no_implicit_optional = true
+warn_redundant_casts = true
+warn_unused_ignores = true
+warn_return_any = true
+strict_equality = true
 ignore_missing_imports = true
 
 [[tool.mypy.overrides]]
 module = "atheris"
 ignore_missing_imports = true
 
+[[tool.mypy.overrides]]
+module = "fastimport"
+ignore_missing_imports = true
+no_implicit_reexport = false
+disallow_untyped_calls = false
+warn_return_any = false
+
+[[tool.mypy.overrides]]
+module = "fastimport.*"
+ignore_missing_imports = true
+no_implicit_reexport = false
+disallow_untyped_calls = false
+warn_return_any = false
+
+[[tool.mypy.overrides]]
+module = "patiencediff"
+ignore_missing_imports = true
+no_implicit_reexport = false
+disallow_untyped_calls = false
+warn_return_any = false
+
+[[tool.mypy.overrides]]
+module = "merge3"
+ignore_missing_imports = true
+disallow_untyped_calls = false
+warn_return_any = false
+
 [tool.setuptools]
 packages = [
     "dulwich",

+ 50 - 7
tests/compat/test_client.py

@@ -32,6 +32,7 @@ import sys
 import tarfile
 import tempfile
 import threading
+from collections.abc import Iterator
 from contextlib import suppress
 from io import BytesIO
 from typing import NoReturn
@@ -88,10 +89,19 @@ class DulwichClientTestBase:
         with repo.Repo(srcpath) as src:
             sendrefs = dict(src.get_refs())
             del sendrefs[b"HEAD"]
+
+            # Wrap generate_pack_data to match expected signature
+            def generate_pack_data_wrapper(
+                have: set[bytes], want: set[bytes], ofs_delta: bool = False
+            ) -> tuple[int, Iterator]:
+                return src.generate_pack_data(
+                    have, want, progress=None, ofs_delta=ofs_delta
+                )
+
             c.send_pack(
                 self._build_path("/dest"),
                 lambda _: sendrefs,
-                src.generate_pack_data,
+                generate_pack_data_wrapper,
             )
 
     def test_send_pack(self) -> None:
@@ -137,7 +147,16 @@ class DulwichClientTestBase:
                 )
             sendrefs = dict(local.get_refs())
             del sendrefs[b"HEAD"]
-            c.send_pack(remote_path, lambda _: sendrefs, local.generate_pack_data)
+
+            # Wrap generate_pack_data to match expected signature
+            def generate_pack_data_wrapper(
+                have: set[bytes], want: set[bytes], ofs_delta: bool = False
+            ) -> tuple[int, Iterator]:
+                return local.generate_pack_data(
+                    have, want, progress=None, ofs_delta=ofs_delta
+                )
+
+            c.send_pack(remote_path, lambda _: sendrefs, generate_pack_data_wrapper)
         with repo.Repo(server_new_path) as remote:
             self.assertEqual(remote.head(), commit_id)
 
@@ -148,10 +167,19 @@ class DulwichClientTestBase:
         with repo.Repo(srcpath) as src:
             sendrefs = dict(src.get_refs())
             del sendrefs[b"HEAD"]
+
+            # Wrap generate_pack_data to match expected signature
+            def generate_pack_data_wrapper(
+                have: set[bytes], want: set[bytes], ofs_delta: bool = False
+            ) -> tuple[int, Iterator]:
+                return src.generate_pack_data(
+                    have, want, progress=None, ofs_delta=ofs_delta
+                )
+
             c.send_pack(
                 self._build_path("/dest"),
                 lambda _: sendrefs,
-                src.generate_pack_data,
+                generate_pack_data_wrapper,
             )
             self.assertDestEqualsSrc()
 
@@ -180,7 +208,16 @@ class DulwichClientTestBase:
     def compute_send(self, src):
         sendrefs = dict(src.get_refs())
         del sendrefs[b"HEAD"]
-        return sendrefs, src.generate_pack_data
+
+        # Wrap generate_pack_data to match expected signature
+        def generate_pack_data_wrapper(
+            have: set[bytes], want: set[bytes], ofs_delta: bool = False
+        ) -> tuple[int, Iterator]:
+            return src.generate_pack_data(
+                have, want, progress=None, ofs_delta=ofs_delta
+            )
+
+        return sendrefs, generate_pack_data_wrapper
 
     def test_send_pack_one_error(self) -> None:
         dest, dummy_commit = self.disable_ff_and_make_dummy_commit()
@@ -494,9 +531,15 @@ class TestSSHVendor:
         key_filename=None,
         protocol_version=None,
     ):
-        cmd, path = command.split(" ")
-        cmd = cmd.split("-", 1)
-        path = path.replace("'", "")
+        # Handle both bytes and string commands
+        if isinstance(command, bytes):
+            cmd, path = command.split(b" ")
+            cmd = cmd.decode("utf-8").split("-", 1)
+            path = path.decode("utf-8").replace("'", "")
+        else:
+            cmd, path = command.split(" ")
+            cmd = cmd.split("-", 1)
+            path = path.replace("'", "")
         env = dict(os.environ)
         if protocol_version is None:
             protocol_version = protocol.DEFAULT_GIT_PROTOCOL_VERSION_FETCH

+ 3 - 3
tests/compat/test_dumb.py

@@ -184,7 +184,7 @@ class DumbHTTPClientNoPackTests(CompatTestCase):
 
         try:
             # Fetch from dumb HTTP
-            def determine_wants(refs):
+            def determine_wants(refs, depth=None):
                 return [
                     sha for ref, sha in refs.items() if ref.startswith(b"refs/heads/")
                 ]
@@ -238,7 +238,7 @@ class DumbHTTPClientNoPackTests(CompatTestCase):
         try:
             old_refs = dest_repo.get_refs()
 
-            def determine_wants(refs):
+            def determine_wants(refs, depth=None):
                 wants = []
                 for ref, sha in refs.items():
                     if ref.startswith(b"refs/heads/") and sha != old_refs.get(ref):
@@ -285,7 +285,7 @@ class DumbHTTPClientNoPackTests(CompatTestCase):
         try:
             client = HttpGitClient(self.server.url)
 
-            def determine_wants(refs):
+            def determine_wants(refs, depth=None):
                 return [
                     sha
                     for ref, sha in refs.items()

+ 31 - 27
tests/test_client.py

@@ -178,7 +178,7 @@ class GitClientTests(TestCase):
         )
         self.rin.seek(0)
         ret = self.client.fetch_pack(
-            b"bla", lambda heads, **kwargs: [], None, None, None
+            b"bla", lambda heads, depth=None: [], None, None, None
         )
         self.assertEqual(
             {b"HEAD": b"55dcc6bf963f922e1ed5c4bbaaefcfacef57b1d7"}, ret.refs
@@ -552,7 +552,7 @@ class TestGetTransportAndPath(TestCase):
             "ssh://git@github.com/user/repo.git", config=config
         )
         self.assertIsInstance(c, SSHGitClient)
-        self.assertIsNone(c.ssh_command)
+        self.assertEqual(c.ssh_command, "ssh")  # Now defaults to "ssh"
 
         config.set((b"core",), b"sshCommand", b"custom-ssh -o CustomOption=yes")
 
@@ -612,8 +612,9 @@ class TestGetTransportAndPath(TestCase):
 
         self.assertIsInstance(c, HttpGitClient)
         self.assertEqual("/jelmer/dulwich", path)
-        self.assertEqual("user", c._username)
-        self.assertEqual("passwd", c._password)
+        # Explicitly provided credentials should override URL credentials
+        self.assertEqual("user2", c._username)
+        self.assertEqual("blah", c._password)
 
     def test_http_no_auth(self) -> None:
         url = "https://github.com/jelmer/dulwich"
@@ -841,10 +842,10 @@ class SSHGitClientTests(TestCase):
         client._connect(b"command", b"/path/to/repo")
         self.assertEqual(b"username", server.username)
         self.assertEqual(1337, server.port)
-        self.assertEqual("git-command '/path/to/repo'", server.command)
+        self.assertEqual(b"git-command '/path/to/repo'", server.command)
 
         client._connect(b"relative-command", b"/~/path/to/repo")
-        self.assertEqual("git-relative-command '~/path/to/repo'", server.command)
+        self.assertEqual(b"git-relative-command '~/path/to/repo'", server.command)
 
     def test_ssh_command_precedence(self) -> None:
         self.overrideEnv("GIT_SSH", "/path/to/ssh")
@@ -862,11 +863,11 @@ class SSHGitClientTests(TestCase):
         # Test core.sshCommand config setting
         from dulwich.config import ConfigDict
 
-        # No config, no environment - should be None
+        # No config, no environment - should default to "ssh"
         self.overrideEnv("GIT_SSH", None)
         self.overrideEnv("GIT_SSH_COMMAND", None)
         test_client = SSHGitClient("git.samba.org")
-        self.assertIsNone(test_client.ssh_command)
+        self.assertEqual(test_client.ssh_command, "ssh")
 
         # Config with core.sshCommand
         config = ConfigDict()
@@ -947,7 +948,10 @@ class LocalGitClientTests(TestCase):
         out = BytesIO()
         walker = {}
         ret = c.fetch_pack(
-            s.path, lambda heads, **kwargs: [], graph_walker=walker, pack_data=out.write
+            s.path,
+            lambda heads, depth=None: [],
+            graph_walker=walker,
+            pack_data=out.write,
         )
         self.assertEqual(
             {
@@ -973,7 +977,7 @@ class LocalGitClientTests(TestCase):
         walker = MemoryRepo().get_graph_walker()
         ret = c.fetch_pack(
             s.path,
-            lambda heads, **kwargs: [b"a90fa2d900a17e99b433217e988c4eb4a2e9a097"],
+            lambda heads, depth=None: [b"a90fa2d900a17e99b433217e988c4eb4a2e9a097"],
             graph_walker=walker,
             pack_data=out.write,
         )
@@ -2255,38 +2259,38 @@ class GitCredentialStoreTests(TestCase):
         os.unlink(cls.fname)
 
     def test_nonmatching_scheme(self) -> None:
-        self.assertEqual(
-            get_credentials_from_store(b"http", b"example.org", fnames=[self.fname]),
-            None,
+        result = list(
+            get_credentials_from_store("http", "example.org", fnames=[self.fname])
         )
+        self.assertEqual(result, [])
 
     def test_nonmatching_hostname(self) -> None:
-        self.assertEqual(
-            get_credentials_from_store(b"https", b"noentry.org", fnames=[self.fname]),
-            None,
+        result = list(
+            get_credentials_from_store("https", "noentry.org", fnames=[self.fname])
         )
+        self.assertEqual(result, [])
 
     def test_match_without_username(self) -> None:
-        self.assertEqual(
-            get_credentials_from_store(b"https", b"example.org", fnames=[self.fname]),
-            (b"user", b"pass"),
+        result = list(
+            get_credentials_from_store("https", "example.org", fnames=[self.fname])
         )
+        self.assertEqual(result, [("user", "pass")])
 
     def test_match_with_matching_username(self) -> None:
-        self.assertEqual(
+        result = list(
             get_credentials_from_store(
-                b"https", b"example.org", b"user", fnames=[self.fname]
-            ),
-            (b"user", b"pass"),
+                "https", "example.org", "user", fnames=[self.fname]
+            )
         )
+        self.assertEqual(result, [("user", "pass")])
 
     def test_no_match_with_nonmatching_username(self) -> None:
-        self.assertEqual(
+        result = list(
             get_credentials_from_store(
-                b"https", b"example.org", b"otheruser", fnames=[self.fname]
-            ),
-            None,
+                "https", "example.org", "otheruser", fnames=[self.fname]
+            )
         )
+        self.assertEqual(result, [])
 
 
 class RemoteErrorFromStderrTests(TestCase):

+ 8 - 4
tests/test_dumb.py

@@ -263,10 +263,12 @@ fedcba9876543210fedcba9876543210fedcba98\trefs/tags/v1.0
 
         graph_walker = Mock()
 
-        def determine_wants(refs: dict[bytes, bytes]) -> list[bytes]:
+        def determine_wants(
+            refs: dict[bytes, bytes], depth: Optional[int] = None
+        ) -> list[bytes]:
             return []
 
-        result = list(self.repo.fetch_pack_data(graph_walker, determine_wants))
+        result = list(self.repo.fetch_pack_data(determine_wants, graph_walker))
         self.assertEqual([], result)
 
     def test_fetch_pack_data_with_blob(self) -> None:
@@ -288,14 +290,16 @@ fedcba9876543210fedcba9876543210fedcba98\trefs/tags/v1.0
         graph_walker = Mock()
         graph_walker.ack.return_value = []  # No existing objects
 
-        def determine_wants(refs: dict[bytes, bytes]) -> list[bytes]:
+        def determine_wants(
+            refs: dict[bytes, bytes], depth: Optional[int] = None
+        ) -> list[bytes]:
             return [blob_sha]
 
         def progress(msg: bytes) -> None:
             assert isinstance(msg, bytes)
 
         result = list(
-            self.repo.fetch_pack_data(graph_walker, determine_wants, progress)
+            self.repo.fetch_pack_data(determine_wants, graph_walker, progress)
         )
         self.assertEqual(1, len(result))
         self.assertEqual(Blob.type_num, result[0].pack_type_num)

+ 3 - 4
tests/test_index.py

@@ -1434,10 +1434,9 @@ class TestIndexEntryFromPath(TestCase):
         self.assertEqual(sorted(changes), [b"conflict", b"file1", b"file3", b"file4"])
 
         # Create a custom blob filter function
-        def filter_blob_callback(blob, path):
-            # Modify blob to make it look changed
-            blob.data = b"modified " + blob.data
-            return blob
+        def filter_blob_callback(data, path):
+            # Modify blob data to make it look changed
+            return b"modified " + data
 
         # Get unstaged changes with blob filter
         changes = list(get_unstaged_changes(index, repo_dir, filter_blob_callback))

+ 8 - 4
tests/test_object_store.py

@@ -934,16 +934,18 @@ class CommitTreeChangesTests(TestCase):
         self.tree_id = commit_tree(self.store, blobs)
 
     def test_no_changes(self) -> None:
+        # When no changes, should return the same tree SHA
         self.assertEqual(
-            self.store[self.tree_id],
+            self.tree_id,
             commit_tree_changes(self.store, self.store[self.tree_id], []),
         )
 
     def test_add_blob(self) -> None:
         blob_d = make_object(Blob, data=b"d")
-        new_tree = commit_tree_changes(
+        new_tree_id = commit_tree_changes(
             self.store, self.store[self.tree_id], [(b"d", 0o100644, blob_d.id)]
         )
+        new_tree = self.store[new_tree_id]
         self.assertEqual(
             new_tree[b"d"],
             (33188, b"c59d9b6344f1af00e504ba698129f07a34bbed8d"),
@@ -951,11 +953,12 @@ class CommitTreeChangesTests(TestCase):
 
     def test_add_blob_in_dir(self) -> None:
         blob_d = make_object(Blob, data=b"d")
-        new_tree = commit_tree_changes(
+        new_tree_id = commit_tree_changes(
             self.store,
             self.store[self.tree_id],
             [(b"e/f/d", 0o100644, blob_d.id)],
         )
+        new_tree = self.store[new_tree_id]
         self.assertEqual(
             new_tree.items(),
             [
@@ -991,9 +994,10 @@ class CommitTreeChangesTests(TestCase):
         )
 
     def test_delete_blob(self) -> None:
-        new_tree = commit_tree_changes(
+        new_tree_id = commit_tree_changes(
             self.store, self.store[self.tree_id], [(b"ad/bd/c", None, None)]
         )
+        new_tree = self.store[new_tree_id]
         self.assertEqual(set(new_tree), {b"a", b"ad", b"c"})
         ad_tree = self.store[new_tree[b"ad"][1]]
         self.assertEqual(set(ad_tree), {b"b", b"c"})

+ 1 - 1
tests/test_porcelain_lfs.py

@@ -174,7 +174,7 @@ class LFSPorcelainTestCase(TestCase):
 
         self.assertEqual(len(lfs_files), 1)
         path, _oid, size = lfs_files[0]
-        self.assertEqual(path, "large.bin")
+        self.assertEqual(path, b"large.bin")
         self.assertEqual(size, len(test_content))
 
     def test_lfs_migrate(self):

Энэ ялгаанд хэт олон файл өөрчлөгдсөн тул зарим файлыг харуулаагүй болно