Selaa lähdekoodia

Add more docstrings

Jelmer Vernooij 5 kuukautta sitten
vanhempi
commit
d6ac2f18c6

+ 130 - 0
dulwich/config.py

@@ -149,6 +149,17 @@ def match_glob_pattern(value: str, pattern: str) -> bool:
 
 
 def lower_key(key: ConfigKey) -> ConfigKey:
+    """Convert a config key to lowercase, preserving subsection case.
+
+    Args:
+      key: Configuration key (str, bytes, or tuple)
+
+    Returns:
+      Key with section names lowercased, subsection names preserved
+
+    Raises:
+      TypeError: If key is not str, bytes, or tuple
+    """
     if isinstance(key, (bytes, str)):
         return key.lower()
 
@@ -170,7 +181,18 @@ _T = TypeVar("_T")  # For get() default parameter
 
 
 class CaseInsensitiveOrderedMultiDict(MutableMapping[K, V], Generic[K, V]):
+    """A case-insensitive ordered dictionary that can store multiple values per key.
+
+    This class maintains the order of insertions and allows multiple values
+    for the same key. Keys are compared case-insensitively.
+    """
+
     def __init__(self, default_factory: Optional[Callable[[], V]] = None) -> None:
+        """Initialize a CaseInsensitiveOrderedMultiDict.
+
+        Args:
+          default_factory: Optional factory function for default values
+        """
         self._real: list[tuple[K, V]] = []
         self._keyed: dict[Any, V] = {}
         self._default_factory = default_factory
@@ -183,6 +205,18 @@ class CaseInsensitiveOrderedMultiDict(MutableMapping[K, V], Generic[K, V]):
         ] = None,
         default_factory: Optional[Callable[[], V]] = None,
     ) -> "CaseInsensitiveOrderedMultiDict[K, V]":
+        """Create a CaseInsensitiveOrderedMultiDict from an existing mapping.
+
+        Args:
+          dict_in: Optional mapping to initialize from
+          default_factory: Optional factory function for default values
+
+        Returns:
+          New CaseInsensitiveOrderedMultiDict instance
+
+        Raises:
+          TypeError: If dict_in is not a mapping or None
+        """
         if isinstance(dict_in, cls):
             return dict_in
 
@@ -200,14 +234,20 @@ class CaseInsensitiveOrderedMultiDict(MutableMapping[K, V], Generic[K, V]):
         return out
 
     def __len__(self) -> int:
+        """Return the number of unique keys in the dictionary."""
         return len(self._keyed)
 
     def keys(self) -> KeysView[K]:
+        """Return a view of the dictionary's keys."""
         return self._keyed.keys()  # type: ignore[return-value]
 
     def items(self) -> ItemsView[K, V]:
+        """Return a view of the dictionary's (key, value) pairs in insertion order."""
+
         # Return a view that iterates over the real list to preserve order
         class OrderedItemsView(ItemsView[K, V]):
+            """Items view that preserves insertion order."""
+
             def __init__(self, mapping: CaseInsensitiveOrderedMultiDict[K, V]):
                 self._mapping = mapping
 
@@ -226,16 +266,25 @@ class CaseInsensitiveOrderedMultiDict(MutableMapping[K, V], Generic[K, V]):
         return OrderedItemsView(self)
 
     def __iter__(self) -> Iterator[K]:
+        """Iterate over the dictionary's keys."""
         return iter(self._keyed)
 
     def values(self) -> ValuesView[V]:
+        """Return a view of the dictionary's values."""
         return self._keyed.values()
 
     def __setitem__(self, key: K, value: V) -> None:
+        """Set a value for a key, appending to existing values."""
         self._real.append((key, value))
         self._keyed[lower_key(key)] = value
 
     def set(self, key: K, value: V) -> None:
+        """Set a value for a key, replacing all existing values.
+
+        Args:
+          key: The key to set
+          value: The value to set
+        """
         # This method replaces all existing values for the key
         lower = lower_key(key)
         self._real = [(k, v) for k, v in self._real if lower_key(k) != lower]
@@ -243,6 +292,11 @@ class CaseInsensitiveOrderedMultiDict(MutableMapping[K, V], Generic[K, V]):
         self._keyed[lower] = value
 
     def __delitem__(self, key: K) -> None:
+        """Delete all values for a key.
+
+        Raises:
+          KeyError: If the key is not found
+        """
         lower_k = lower_key(key)
         del self._keyed[lower_k]
         for i, (actual, unused_value) in reversed(list(enumerate(self._real))):
@@ -250,9 +304,23 @@ class CaseInsensitiveOrderedMultiDict(MutableMapping[K, V], Generic[K, V]):
                 del self._real[i]
 
     def __getitem__(self, item: K) -> V:
+        """Get the last value for a key.
+
+        Raises:
+          KeyError: If the key is not found
+        """
         return self._keyed[lower_key(item)]
 
     def get(self, key: K, /, default: Union[V, _T, None] = None) -> Union[V, _T, None]:  # type: ignore[override]
+        """Get the last value for a key, or a default if not found.
+
+        Args:
+          key: The key to look up
+          default: Default value to return if key not found
+
+        Returns:
+          The value for the key, or default/default_factory result if not found
+        """
         try:
             return self[key]
         except KeyError:
@@ -264,12 +332,32 @@ class CaseInsensitiveOrderedMultiDict(MutableMapping[K, V], Generic[K, V]):
                 return None
 
     def get_all(self, key: K) -> Iterator[V]:
+        """Get all values for a key in insertion order.
+
+        Args:
+          key: The key to look up
+
+        Returns:
+          Iterator of all values for the key
+        """
         lowered_key = lower_key(key)
         for actual, value in self._real:
             if lower_key(actual) == lowered_key:
                 yield value
 
     def setdefault(self, key: K, default: Optional[V] = None) -> V:
+        """Get value for key, setting it to default if not present.
+
+        Args:
+          key: The key to look up
+          default: Default value to set if key not found
+
+        Returns:
+          The existing value or the newly set default
+
+        Raises:
+          KeyError: If key not found and no default or default_factory
+        """
         try:
             return self[key]
         except KeyError:
@@ -414,29 +502,45 @@ class ConfigDict(Config):
         )
 
     def __repr__(self) -> str:
+        """Return string representation of ConfigDict."""
         return f"{self.__class__.__name__}({self._values!r})"
 
     def __eq__(self, other: object) -> bool:
+        """Check equality with another ConfigDict."""
         return isinstance(other, self.__class__) and other._values == self._values
 
     def __getitem__(self, key: Section) -> CaseInsensitiveOrderedMultiDict[Name, Value]:
+        """Get configuration values for a section.
+
+        Raises:
+          KeyError: If section not found
+        """
         return self._values.__getitem__(key)
 
     def __setitem__(
         self, key: Section, value: CaseInsensitiveOrderedMultiDict[Name, Value]
     ) -> None:
+        """Set configuration values for a section."""
         return self._values.__setitem__(key, value)
 
     def __delitem__(self, key: Section) -> None:
+        """Delete a configuration section.
+
+        Raises:
+          KeyError: If section not found
+        """
         return self._values.__delitem__(key)
 
     def __iter__(self) -> Iterator[Section]:
+        """Iterate over configuration sections."""
         return self._values.__iter__()
 
     def __len__(self) -> int:
+        """Return the number of sections."""
         return self._values.__len__()
 
     def keys(self) -> KeysView[Section]:
+        """Return a view of section names."""
         return self._values.keys()
 
     @classmethod
@@ -750,6 +854,12 @@ class ConfigFile(ConfigDict):
         ] = None,
         encoding: Union[str, None] = None,
     ) -> None:
+        """Initialize a ConfigFile.
+
+        Args:
+          values: Optional mapping of configuration values
+          encoding: Optional encoding for the file (defaults to system encoding)
+        """
         super().__init__(values=values, encoding=encoding)
         self.path: Optional[str] = None
         self._included_paths: set[str] = set()  # Track included files to prevent cycles
@@ -1140,6 +1250,14 @@ class ConfigFile(ConfigDict):
 
 
 def get_xdg_config_home_path(*path_segments: str) -> str:
+    """Get a path in the XDG config home directory.
+
+    Args:
+      *path_segments: Path segments to join to the XDG config home
+
+    Returns:
+      Full path in XDG config home directory
+    """
     xdg_config_home = os.environ.get(
         "XDG_CONFIG_HOME",
         os.path.expanduser("~/.config/"),
@@ -1227,14 +1345,26 @@ class StackedConfig(Config):
     def __init__(
         self, backends: list[ConfigFile], writable: Optional[ConfigFile] = None
     ) -> None:
+        """Initialize a StackedConfig.
+
+        Args:
+          backends: List of config files to read from (in order of precedence)
+          writable: Optional config file to write changes to
+        """
         self.backends = backends
         self.writable = writable
 
     def __repr__(self) -> str:
+        """Return string representation of StackedConfig."""
         return f"<{self.__class__.__name__} for {self.backends!r}>"
 
     @classmethod
     def default(cls) -> "StackedConfig":
+        """Create a StackedConfig with default system/user config files.
+
+        Returns:
+          StackedConfig with default configuration files loaded
+        """
         return cls(cls.default_backends())
 
     @classmethod

+ 55 - 0
dulwich/contrib/paramiko_vendor.py

@@ -41,7 +41,15 @@ import paramiko.config
 
 
 class _ParamikoWrapper:
+    """Wrapper for paramiko SSH channel to provide a file-like interface."""
+
     def __init__(self, client: paramiko.SSHClient, channel: paramiko.Channel) -> None:
+        """Initialize the paramiko wrapper.
+
+        Args:
+            client: The SSH client instance
+            channel: The SSH channel for communication
+        """
         self.client = client
         self.channel = channel
 
@@ -50,15 +58,38 @@ class _ParamikoWrapper:
 
     @property
     def stderr(self) -> BinaryIO:
+        """Get stderr stream from the channel.
+
+        Returns:
+            Binary IO stream for stderr
+        """
         return cast(BinaryIO, self.channel.makefile_stderr("rb"))
 
     def can_read(self) -> bool:
+        """Check if data is available to read.
+
+        Returns:
+            True if data is available
+        """
         return self.channel.recv_ready()
 
     def write(self, data: bytes) -> None:
+        """Write data to the channel.
+
+        Args:
+            data: Bytes to write
+        """
         return self.channel.sendall(data)
 
     def read(self, n: Optional[int] = None) -> bytes:
+        """Read data from the channel.
+
+        Args:
+            n: Number of bytes to read (default: 4096)
+
+        Returns:
+            Bytes read from the channel
+        """
         data = self.channel.recv(n or 4096)
         data_len = len(data)
 
@@ -73,13 +104,21 @@ class _ParamikoWrapper:
         return data
 
     def close(self) -> None:
+        """Close the SSH channel."""
         self.channel.close()
 
 
 class ParamikoSSHVendor:
+    """SSH vendor implementation using paramiko."""
+
     # http://docs.paramiko.org/en/2.4/api/client.html
 
     def __init__(self, **kwargs: object) -> None:
+        """Initialize the paramiko SSH vendor.
+
+        Args:
+            **kwargs: Additional keyword arguments passed to SSHClient
+        """
         self.kwargs = kwargs
         self.ssh_config = self._load_ssh_config()
 
@@ -110,6 +149,22 @@ class ParamikoSSHVendor:
         protocol_version: Optional[int] = None,
         **kwargs: object,
     ) -> _ParamikoWrapper:
+        """Run a command on a remote host via SSH.
+
+        Args:
+            host: Hostname to connect to
+            command: Command to execute
+            username: SSH username (optional)
+            port: SSH port (optional)
+            password: SSH password (optional)
+            pkey: Private key for authentication (optional)
+            key_filename: Path to private key file (optional)
+            protocol_version: SSH protocol version (optional)
+            **kwargs: Additional keyword arguments
+
+        Returns:
+            _ParamikoWrapper instance for the SSH channel
+        """
         client = paramiko.SSHClient()
 
         # Get SSH config for this host

+ 108 - 4
dulwich/contrib/swift.py

@@ -97,7 +97,14 @@ cache_length = 20
 
 
 class PackInfoMissingObjectFinder(GreenThreadsMissingObjectFinder):
+    """Find missing objects required for pack generation."""
+
     def next(self) -> Optional[tuple[bytes, int, Union[bytes, None]]]:
+        """Get the next missing object.
+
+        Returns:
+          Tuple of (sha, pack_type_num, name) or None if no more objects
+        """
         while True:
             if not self.objects_to_send:
                 return None
@@ -179,6 +186,15 @@ def swift_load_pack_index(scon: "SwiftConnector", filename: str) -> "PackIndex":
 
 
 def pack_info_create(pack_data: "PackData", pack_index: "PackIndex") -> bytes:
+    """Create pack info file contents.
+
+    Args:
+      pack_data: The pack data object
+      pack_index: The pack index object
+
+    Returns:
+      Compressed JSON bytes containing pack information
+    """
     pack = Pack.from_objects(pack_data, pack_index)
     info: dict = {}
     for obj in pack.iterobjects():
@@ -213,6 +229,16 @@ def load_pack_info(
     scon: Optional["SwiftConnector"] = None,
     file: Optional[BinaryIO] = None,
 ) -> Optional[dict]:
+    """Load pack info from Swift or file.
+
+    Args:
+      filename: The pack info filename
+      scon: Optional Swift connector to use for loading
+      file: Optional file object to read from instead
+
+    Returns:
+      Dictionary containing pack information or None if not found
+    """
     if not file:
         if scon is None:
             return None
@@ -233,7 +259,7 @@ def load_pack_info(
 
 
 class SwiftException(Exception):
-    pass
+    """Exception raised for Swift-related errors."""
 
 
 class SwiftConnector:
@@ -281,6 +307,14 @@ class SwiftConnector:
         )
 
     def swift_auth_v1(self) -> tuple[str, str]:
+        """Authenticate with Swift using v1 authentication.
+
+        Returns:
+          Tuple of (storage_url, auth_token)
+
+        Raises:
+          SwiftException: If authentication fails
+        """
         self.user = self.user.replace(";", ":")
         auth_httpclient = HTTPClient.from_url(
             self.auth_url,
@@ -304,6 +338,14 @@ class SwiftConnector:
         return storage_url, token
 
     def swift_auth_v2(self) -> tuple[str, str]:
+        """Authenticate with Swift using v2 authentication.
+
+        Returns:
+          Tuple of (storage_url, auth_token)
+
+        Raises:
+          SwiftException: If authentication fails
+        """
         self.tenant, self.user = self.user.split(";")
         auth_dict = {}
         auth_dict["auth"] = {
@@ -615,6 +657,14 @@ class SwiftPackData(PackData):
     def get_object_at(
         self, offset: int
     ) -> tuple[int, Union[tuple[Union[bytes, int], list[bytes]], list[bytes]]]:
+        """Get the object at a specific offset in the pack.
+
+        Args:
+          offset: The offset in the pack file
+
+        Returns:
+          Tuple of (pack_type_num, object_data)
+        """
         if offset in self._offset_cache:
             return self._offset_cache[offset]
         assert offset >= self._header_size
@@ -625,11 +675,16 @@ class SwiftPackData(PackData):
         return (unpacked.pack_type_num, obj_data)
 
     def get_stored_checksum(self) -> bytes:
+        """Get the stored checksum for this pack.
+
+        Returns:
+          The pack checksum as bytes
+        """
         pack_reader = SwiftPackReader(self.scon, str(self._filename), self.pack_length)
         return pack_reader.read_checksum()
 
     def close(self) -> None:
-        pass
+        """Close the pack data (no-op for Swift)."""
 
 
 class SwiftPack(Pack):
@@ -698,6 +753,14 @@ class SwiftObjectStore(PackBasedObjectStore):
         return iter([])
 
     def pack_info_get(self, sha: bytes) -> Optional[tuple]:
+        """Get pack info for a specific SHA.
+
+        Args:
+          sha: The SHA to look up
+
+        Returns:
+          Pack info tuple or None if not found
+        """
         for pack in self.packs:
             if sha in pack:
                 if hasattr(pack, "pack_info"):
@@ -748,6 +811,11 @@ class SwiftObjectStore(PackBasedObjectStore):
         f = BytesIO()
 
         def commit() -> Optional["SwiftPack"]:
+            """Commit the pack to Swift storage.
+
+            Returns:
+              The created SwiftPack or None if empty
+            """
             f.seek(0)
             pack = PackData(file=f, filename="")
             entries = pack.sorted_entries()
@@ -770,11 +838,16 @@ class SwiftObjectStore(PackBasedObjectStore):
                 return None
 
         def abort() -> None:
-            pass
+            """Abort the pack operation (no-op)."""
 
         return f, commit, abort
 
     def add_object(self, obj: object) -> None:
+        """Add a single object to the store.
+
+        Args:
+          obj: The object to add
+        """
         self.add_objects(
             [
                 (obj, None),  # type: ignore
@@ -946,6 +1019,11 @@ class SwiftInfoRefsContainer(InfoRefsContainer):
         return True
 
     def allkeys(self) -> Iterator[bytes]:
+        """Get all reference names.
+
+        Returns:
+          Iterator of reference names as bytes
+        """
         try:
             self._refs[b"HEAD"] = self._refs[b"refs/heads/master"]
         except KeyError:
@@ -954,6 +1032,8 @@ class SwiftInfoRefsContainer(InfoRefsContainer):
 
 
 class SwiftRepo(BaseRepo):
+    """A Git repository backed by Swift object storage."""
+
     def __init__(self, root: str, conf: ConfigParser) -> None:
         """Init a Git bare Repository on top of a Swift container.
 
@@ -1020,17 +1100,31 @@ class SwiftRepo(BaseRepo):
 
 
 class SwiftSystemBackend(Backend):
+    """Backend for serving Git repositories from Swift."""
+
     def __init__(self, logger: "logging.Logger", conf: ConfigParser) -> None:
         self.conf = conf
         self.logger = logger
 
     def open_repository(self, path: str) -> "BackendRepo":
+        """Open a repository at the given path.
+
+        Args:
+          path: Path to the repository in Swift
+
+        Returns:
+          SwiftRepo instance
+        """
         self.logger.info("opening repository at %s", path)
         return cast("BackendRepo", SwiftRepo(path, self.conf))
 
 
 def cmd_daemon(args: list) -> None:
-    """Entry point for starting a TCP git server."""
+    """Start a TCP git server for Swift repositories.
+
+    Args:
+      args: Command line arguments
+    """
     import optparse
 
     parser = optparse.OptionParser()
@@ -1082,6 +1176,11 @@ def cmd_daemon(args: list) -> None:
 
 
 def cmd_init(args: list) -> None:
+    """Initialize a new Git repository in Swift.
+
+    Args:
+      args: Command line arguments
+    """
     import optparse
 
     parser = optparse.OptionParser()
@@ -1103,6 +1202,11 @@ def cmd_init(args: list) -> None:
 
 
 def main(argv: list = sys.argv) -> None:
+    """Main entry point for Swift Git command line interface.
+
+    Args:
+      argv: Command line arguments
+    """
     commands = {
         "init": cmd_init,
         "daemon": cmd_daemon,

+ 40 - 0
dulwich/fastexport.py

@@ -40,6 +40,14 @@ if TYPE_CHECKING:
 
 
 def split_email(text: bytes) -> tuple[bytes, bytes]:
+    """Split email address from name.
+
+    Args:
+        text: Full name and email (e.g. b"John Doe <john@example.com>")
+
+    Returns:
+        Tuple of (name, email)
+    """
     # TODO(jelmer): Dedupe this and the same functionality in
     # format_annotate_line.
     (name, email) = text.rsplit(b" <", 1)
@@ -50,12 +58,23 @@ class GitFastExporter:
     """Generate a fast-export output stream for Git objects."""
 
     def __init__(self, outf: BinaryIO, store: "BaseObjectStore") -> None:
+        """Initialize the fast exporter.
+
+        Args:
+            outf: Output file to write to
+            store: Object store to export from
+        """
         self.outf = outf
         self.store = store
         self.markers: dict[bytes, bytes] = {}
         self._marker_idx = 0
 
     def print_cmd(self, cmd: object) -> None:
+        """Print a command to the output stream.
+
+        Args:
+            cmd: Command object to print
+        """
         if hasattr(cmd, "__bytes__"):
             output = cmd.__bytes__()
         else:
@@ -63,15 +82,36 @@ class GitFastExporter:
         self.outf.write(output + b"\n")
 
     def _allocate_marker(self) -> bytes:
+        """Allocate a new marker.
+
+        Returns:
+            New marker as bytes
+        """
         self._marker_idx += 1
         return str(self._marker_idx).encode("ascii")
 
     def _export_blob(self, blob: Blob) -> tuple[Any, bytes]:
+        """Export a blob object.
+
+        Args:
+            blob: Blob object to export
+
+        Returns:
+            Tuple of (BlobCommand, marker)
+        """
         marker = self._allocate_marker()
         self.markers[marker] = blob.id
         return (commands.BlobCommand(marker, blob.data), marker)
 
     def emit_blob(self, blob: Blob) -> bytes:
+        """Emit a blob to the output stream.
+
+        Args:
+            blob: Blob object to emit
+
+        Returns:
+            Marker for the blob
+        """
         (cmd, marker) = self._export_blob(blob)
         self.print_cmd(cmd)
         return marker

+ 40 - 0
dulwich/graph.py

@@ -37,14 +37,27 @@ T = TypeVar("T")
 # why they do not have a builtin maxheap is simply ridiculous but
 # liveable with integer time stamps using negation
 class WorkList(Generic[T]):
+    """Priority queue for commit processing using a min-heap."""
+
     def __init__(self) -> None:
+        """Initialize an empty work list."""
         self.pq: list[tuple[int, T]] = []
 
     def add(self, item: tuple[int, T]) -> None:
+        """Add an item to the work list.
+
+        Args:
+            item: Tuple of (timestamp, commit)
+        """
         dt, cmt = item
         heappush(self.pq, (-dt, cmt))
 
     def get(self) -> Optional[tuple[int, T]]:
+        """Get the highest priority item from the work list.
+
+        Returns:
+            Tuple of (timestamp, commit) or None if empty
+        """
         item = heappop(self.pq)
         if item:
             pr, cmt = item
@@ -52,6 +65,11 @@ class WorkList(Generic[T]):
         return None
 
     def iter(self) -> Iterator[tuple[int, T]]:
+        """Iterate over items in the work list.
+
+        Yields:
+            Tuples of (timestamp, commit)
+        """
         for pr, cmt in self.pq:
             yield (-pr, cmt)
 
@@ -64,6 +82,19 @@ def _find_lcas(
     min_stamp: int = 0,
     shallows: Optional[set[ObjectID]] = None,
 ) -> list[ObjectID]:
+    """Find lowest common ancestors between commits.
+
+    Args:
+        lookup_parents: Function to get parent commits
+        c1: First commit
+        c2s: List of second commits
+        lookup_stamp: Function to get commit timestamp
+        min_stamp: Minimum timestamp to consider
+        shallows: Set of shallow commits
+
+    Returns:
+        List of lowest common ancestor commit IDs
+    """
     cands = []
     cstates = {}
 
@@ -74,6 +105,15 @@ def _find_lcas(
     _LCA = 8  # potential LCA (Lowest Common Ancestor)
 
     def _has_candidates(wlst: WorkList[ObjectID], cstates: dict[ObjectID, int]) -> bool:
+        """Check if there are any candidate commits in the work list.
+
+        Args:
+            wlst: Work list of commits
+            cstates: Dictionary of commit states
+
+        Returns:
+            True if there are candidates to process
+        """
         for dt, cmt in wlst.iter():
             if cmt in cstates:
                 if not ((cstates[cmt] & _DNC) == _DNC):

+ 149 - 0
dulwich/index.py

@@ -259,6 +259,8 @@ def _decompress_path_from_stream(
 
 
 class Stage(Enum):
+    """Represents the stage of an index entry during merge conflicts."""
+
     NORMAL = 0
     MERGE_CONFLICT_ANCESTOR = 1
     MERGE_CONFLICT_THIS = 2
@@ -267,6 +269,12 @@ class Stage(Enum):
 
 @dataclass
 class SerializedIndexEntry:
+    """Represents a serialized index entry as stored in the index file.
+
+    This dataclass holds the raw data for an index entry before it's
+    parsed into the more user-friendly IndexEntry format.
+    """
+
     name: bytes
     ctime: Union[int, float, tuple[int, int]]
     mtime: Union[int, float, tuple[int, int]]
@@ -281,6 +289,11 @@ class SerializedIndexEntry:
     extended_flags: int
 
     def stage(self) -> Stage:
+        """Extract the stage from the flags field.
+
+        Returns:
+          Stage enum value indicating merge conflict state
+        """
         return Stage((self.flags & FLAG_STAGEMASK) >> FLAG_STAGESHIFT)
 
 
@@ -325,10 +338,23 @@ class TreeExtension(IndexExtension):
 
     @classmethod
     def from_bytes(cls, data: bytes) -> "TreeExtension":
+        """Parse TreeExtension from bytes.
+
+        Args:
+          data: Raw bytes to parse
+
+        Returns:
+          TreeExtension instance
+        """
         # TODO: Implement tree cache parsing
         return cls([])
 
     def to_bytes(self) -> bytes:
+        """Serialize TreeExtension to bytes.
+
+        Returns:
+          Serialized extension data
+        """
         # TODO: Implement tree cache serialization
         return b""
 
@@ -342,10 +368,23 @@ class ResolveUndoExtension(IndexExtension):
 
     @classmethod
     def from_bytes(cls, data: bytes) -> "ResolveUndoExtension":
+        """Parse ResolveUndoExtension from bytes.
+
+        Args:
+          data: Raw bytes to parse
+
+        Returns:
+          ResolveUndoExtension instance
+        """
         # TODO: Implement resolve undo parsing
         return cls([])
 
     def to_bytes(self) -> bytes:
+        """Serialize ResolveUndoExtension to bytes.
+
+        Returns:
+          Serialized extension data
+        """
         # TODO: Implement resolve undo serialization
         return b""
 
@@ -358,11 +397,25 @@ class UntrackedExtension(IndexExtension):
 
     @classmethod
     def from_bytes(cls, data: bytes) -> "UntrackedExtension":
+        """Parse UntrackedExtension from bytes.
+
+        Args:
+          data: Raw bytes to parse
+
+        Returns:
+          UntrackedExtension instance
+        """
         return cls(data)
 
 
 @dataclass
 class IndexEntry:
+    """Represents an entry in the Git index.
+
+    This is a higher-level representation of an index entry that includes
+    parsed data and convenience methods.
+    """
+
     ctime: Union[int, float, tuple[int, int]]
     mtime: Union[int, float, tuple[int, int]]
     dev: int
@@ -377,6 +430,14 @@ class IndexEntry:
 
     @classmethod
     def from_serialized(cls, serialized: SerializedIndexEntry) -> "IndexEntry":
+        """Create an IndexEntry from a SerializedIndexEntry.
+
+        Args:
+          serialized: SerializedIndexEntry to convert
+
+        Returns:
+          New IndexEntry instance
+        """
         return cls(
             ctime=serialized.ctime,
             mtime=serialized.mtime,
@@ -392,6 +453,15 @@ class IndexEntry:
         )
 
     def serialize(self, name: bytes, stage: Stage) -> SerializedIndexEntry:
+        """Serialize this entry with a given name and stage.
+
+        Args:
+          name: Path name for the entry
+          stage: Merge conflict stage
+
+        Returns:
+          SerializedIndexEntry ready for writing to disk
+        """
         # Clear out any existing stage bits, then set them from the Stage.
         new_flags = self.flags & ~FLAG_STAGEMASK
         new_flags |= stage.value << FLAG_STAGESHIFT
@@ -411,6 +481,11 @@ class IndexEntry:
         )
 
     def stage(self) -> Stage:
+        """Get the merge conflict stage of this entry.
+
+        Returns:
+          Stage enum value
+        """
         return Stage((self.flags & FLAG_STAGEMASK) >> FLAG_STAGESHIFT)
 
     @property
@@ -889,6 +964,11 @@ class Index:
 
     @property
     def path(self) -> Union[bytes, str]:
+        """Get the path to the index file.
+
+        Returns:
+          Path to the index file
+        """
         return self._filename
 
     def __repr__(self) -> str:
@@ -992,6 +1072,11 @@ class Index:
             yield path, entry.sha, cleanup_mode(entry.mode)
 
     def has_conflicts(self) -> bool:
+        """Check if the index contains any conflicted entries.
+
+        Returns:
+          True if any entries are conflicted, False otherwise
+        """
         for value in self._byname.values():
             if isinstance(value, ConflictedIndexEntry):
                 return True
@@ -1013,18 +1098,38 @@ class Index:
     def iteritems(
         self,
     ) -> Iterator[tuple[bytes, Union[IndexEntry, ConflictedIndexEntry]]]:
+        """Iterate over (path, entry) pairs in the index.
+
+        Returns:
+          Iterator of (path, entry) tuples
+        """
         return iter(self._byname.items())
 
     def items(self) -> Iterator[tuple[bytes, Union[IndexEntry, ConflictedIndexEntry]]]:
+        """Get an iterator over (path, entry) pairs.
+
+        Returns:
+          Iterator of (path, entry) tuples
+        """
         return iter(self._byname.items())
 
     def update(
         self, entries: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]]
     ) -> None:
+        """Update the index with multiple entries.
+
+        Args:
+          entries: Dictionary mapping paths to index entries
+        """
         for key, value in entries.items():
             self[key] = value
 
     def paths(self) -> Generator[bytes, None, None]:
+        """Generate all paths in the index.
+
+        Yields:
+          Path names as bytes
+        """
         yield from self._byname.keys()
 
     def changes_from_tree(
@@ -1221,6 +1326,12 @@ if sys.platform == "win32":
     # https://github.com/jelmer/dulwich/issues/1005
 
     class WindowsSymlinkPermissionError(PermissionError):
+        """Windows-specific error for symlink creation failures.
+
+        This error is raised when symlink creation fails on Windows,
+        typically due to lack of developer mode or administrator privileges.
+        """
+
         def __init__(self, errno: int, msg: str, filename: Optional[str]) -> None:
             super(PermissionError, self).__init__(
                 errno,
@@ -1235,6 +1346,17 @@ if sys.platform == "win32":
         *,
         dir_fd: Optional[int] = None,
     ) -> None:
+        """Create a symbolic link on Windows with better error handling.
+
+        Args:
+          src: Source path for the symlink
+          dst: Destination path where symlink will be created
+          target_is_directory: Whether the target is a directory
+          dir_fd: Optional directory file descriptor
+
+        Raises:
+          WindowsSymlinkPermissionError: If symlink creation fails due to permissions
+        """
         try:
             return os.symlink(
                 src, dst, target_is_directory=target_is_directory, dir_fd=dir_fd
@@ -1346,10 +1468,26 @@ def get_path_element_normalizer(config: "Config") -> Callable[[bytes], bytes]:
 
 
 def validate_path_element_default(element: bytes) -> bool:
+    """Validate a path element using default rules.
+
+    Args:
+      element: Path element to validate
+
+    Returns:
+      True if path element is valid, False otherwise
+    """
     return _normalize_path_element_default(element) not in INVALID_DOTNAMES
 
 
 def validate_path_element_ntfs(element: bytes) -> bool:
+    """Validate a path element using NTFS filesystem rules.
+
+    Args:
+      element: Path element to validate
+
+    Returns:
+      True if path element is valid for NTFS, False otherwise
+    """
     normalized = _normalize_path_element_ntfs(element)
     if normalized in INVALID_DOTNAMES:
         return False
@@ -2368,6 +2506,17 @@ def _fs_to_tree_path(fs_path: Union[str, bytes], tree_encoding: str = "utf-8") -
 
 
 def index_entry_from_directory(st: os.stat_result, path: bytes) -> Optional[IndexEntry]:
+    """Create an index entry for a directory.
+
+    This is only used for submodules (directories containing .git).
+
+    Args:
+      st: Stat result for the directory
+      path: Path to the directory
+
+    Returns:
+      IndexEntry for a submodule, or None if not a submodule
+    """
     if os.path.exists(os.path.join(path, b".git")):
         head = read_submodule_head(path)
         if head is None:

+ 47 - 0
dulwich/notes.py

@@ -52,6 +52,15 @@ def get_note_fanout_level(tree: Tree, object_store: "BaseObjectStore") -> int:
 
     # Count the total number of notes in the tree recursively
     def count_notes(tree: Tree, level: int = 0) -> int:
+        """Count notes in a tree recursively.
+
+        Args:
+            tree: Tree to count notes in
+            level: Current recursion level
+
+        Returns:
+            Total number of notes
+        """
         count = 0
         for name, mode, sha in tree.items():
             if stat.S_ISREG(mode):
@@ -223,6 +232,16 @@ class NotesTree:
 
             # Build new tree structure
             def update_tree(tree: Tree, components: list, blob_sha: bytes) -> Tree:
+                """Update tree with new note entry.
+
+                Args:
+                    tree: Tree to update
+                    components: Path components
+                    blob_sha: SHA of the note blob
+
+                Returns:
+                    Updated tree
+                """
                 if len(components) == 1:
                     # Leaf level - add the note blob
                     new_tree = Tree()
@@ -368,6 +387,16 @@ class NotesTree:
 
         # Build new tree structure
         def update_tree(tree: Tree, components: list, blob_sha: bytes) -> Tree:
+            """Update tree with new note entry.
+
+            Args:
+                tree: Tree to update
+                components: Path components
+                blob_sha: SHA of the note blob
+
+            Returns:
+                Updated tree
+            """
             if len(components) == 1:
                 # Leaf level - add the note blob
                 new_tree = Tree()
@@ -429,6 +458,15 @@ class NotesTree:
 
         # Build new tree structure without the note
         def remove_from_tree(tree: Tree, components: list) -> Optional[Tree]:
+            """Remove note entry from tree.
+
+            Args:
+                tree: Tree to remove from
+                components: Path components
+
+            Returns:
+                Updated tree or None if empty
+            """
             if len(components) == 1:
                 # Leaf level - remove the note
                 new_tree = Tree()
@@ -484,6 +522,15 @@ class NotesTree:
         """
 
         def walk_tree(tree: Tree, prefix: bytes = b"") -> Iterator[tuple[bytes, bytes]]:
+            """Walk the notes tree recursively.
+
+            Args:
+                tree: Tree to walk
+                prefix: Path prefix for current level
+
+            Yields:
+                Tuples of (object_sha, note_sha)
+            """
             for name, mode, sha in tree.items():
                 if stat.S_ISDIR(mode):  # Directory
                     subtree = self._object_store[sha]

+ 83 - 0
dulwich/objects.py

@@ -135,6 +135,14 @@ def hex_to_sha(hex: Union[bytes, str]) -> bytes:
 
 
 def valid_hexsha(hex: Union[bytes, str]) -> bool:
+    """Check if a string is a valid hex SHA.
+
+    Args:
+      hex: Hex string to check
+
+    Returns:
+      True if valid hex SHA, False otherwise
+    """
     if len(hex) != 40:
         return False
     try:
@@ -185,10 +193,24 @@ def serializable_property(name: str, docstring: Optional[str] = None) -> propert
     """A property that helps tracking whether serialization is necessary."""
 
     def set(obj: "ShaFile", value: object) -> None:
+        """Set the property value and mark the object as needing serialization.
+
+        Args:
+          obj: The ShaFile object
+          value: The value to set
+        """
         setattr(obj, "_" + name, value)
         obj._needs_serialization = True
 
     def get(obj: "ShaFile") -> object:
+        """Get the property value.
+
+        Args:
+          obj: The ShaFile object
+
+        Returns:
+          The property value
+        """
         return getattr(obj, "_" + name)
 
     return property(get, set, doc=docstring)
@@ -691,6 +713,17 @@ class Blob(ShaFile):
 
     @classmethod
     def from_path(cls, path: Union[str, bytes]) -> "Blob":
+        """Read a blob from a file on disk.
+
+        Args:
+          path: Path to the blob file
+
+        Returns:
+          A Blob object
+
+        Raises:
+          NotBlobError: If the file is not a blob
+        """
         blob = ShaFile.from_path(path)
         if not isinstance(blob, cls):
             raise NotBlobError(_path_to_bytes(path))
@@ -839,6 +872,17 @@ class Tag(ShaFile):
 
     @classmethod
     def from_path(cls, filename: Union[str, bytes]) -> "Tag":
+        """Read a tag from a file on disk.
+
+        Args:
+          filename: Path to the tag file
+
+        Returns:
+          A Tag object
+
+        Raises:
+          NotTagError: If the file is not a tag
+        """
         tag = ShaFile.from_path(filename)
         if not isinstance(tag, cls):
             raise NotTagError(_path_to_bytes(filename))
@@ -991,6 +1035,12 @@ class Tag(ShaFile):
     signature = serializable_property("signature", "Optional detached GPG signature")
 
     def sign(self, keyid: Optional[str] = None) -> None:
+        """Sign this tag with a GPG key.
+
+        Args:
+          keyid: Optional GPG key ID to use for signing. If not specified,
+                 the default GPG key will be used.
+        """
         import gpg
 
         with gpg.Context(armor=True) as c:
@@ -1191,6 +1241,17 @@ class Tree(ShaFile):
 
     @classmethod
     def from_path(cls, filename: Union[str, bytes]) -> "Tree":
+        """Read a tree from a file on disk.
+
+        Args:
+          filename: Path to the tree file
+
+        Returns:
+          A Tree object
+
+        Raises:
+          NotTreeError: If the file is not a tree
+        """
         tree = ShaFile.from_path(filename)
         if not isinstance(tree, cls):
             raise NotTreeError(_path_to_bytes(filename))
@@ -1305,6 +1366,11 @@ class Tree(ShaFile):
         return list(serialize_tree(self.iteritems()))
 
     def as_pretty_string(self) -> str:
+        """Return a human-readable string representation of this tree.
+
+        Returns:
+          Pretty-printed tree entries
+        """
         text: list[str] = []
         for name, mode, hexsha in self.iteritems():
             text.append(pretty_format_tree_entry(name, mode, hexsha))
@@ -1541,6 +1607,17 @@ class Commit(ShaFile):
 
     @classmethod
     def from_path(cls, path: Union[str, bytes]) -> "Commit":
+        """Read a commit from a file on disk.
+
+        Args:
+          path: Path to the commit file
+
+        Returns:
+          A Commit object
+
+        Raises:
+          NotCommitError: If the file is not a commit
+        """
         commit = ShaFile.from_path(path)
         if not isinstance(commit, cls):
             raise NotCommitError(_path_to_bytes(path))
@@ -1653,6 +1730,12 @@ class Commit(ShaFile):
         # TODO: optionally check for duplicate parents
 
     def sign(self, keyid: Optional[str] = None) -> None:
+        """Sign this commit with a GPG key.
+
+        Args:
+          keyid: Optional GPG key ID to use for signing. If not specified,
+                 the default GPG key will be used.
+        """
         import gpg
 
         with gpg.Context(armor=True) as c:

+ 259 - 0
dulwich/pack.py

@@ -163,6 +163,17 @@ class PackedObjectContainer(ObjectContainer):
         allow_missing: bool = False,
         convert_ofs_delta: bool = True,
     ) -> Iterator["UnpackedObject"]:
+        """Iterate over unpacked objects from a subset of SHAs.
+
+        Args:
+          shas: Set of object SHAs to retrieve
+          include_comp: Include compressed data if True
+          allow_missing: If True, skip missing objects
+          convert_ofs_delta: If True, convert offset deltas to ref deltas
+
+        Returns:
+          Iterator of UnpackedObject instances
+        """
         raise NotImplementedError(self.iter_unpacked_subset)
 
 
@@ -1426,18 +1437,45 @@ class PackData:
 
     @property
     def filename(self):
+        """Get the filename of the pack file.
+
+        Returns:
+          Base filename without directory path
+        """
         return os.path.basename(self._filename)
 
     @property
     def path(self):
+        """Get the full path of the pack file.
+
+        Returns:
+          Full path to the pack file
+        """
         return self._filename
 
     @classmethod
     def from_file(cls, file, size=None):
+        """Create a PackData object from an open file.
+
+        Args:
+          file: Open file object
+          size: Optional file size
+
+        Returns:
+          PackData instance
+        """
         return cls(str(file), file=file, size=size)
 
     @classmethod
     def from_path(cls, path: Union[str, os.PathLike]):
+        """Create a PackData object from a file path.
+
+        Args:
+          path: Path to the pack file
+
+        Returns:
+          PackData instance
+        """
         return cls(filename=path)
 
     def close(self) -> None:
@@ -1689,6 +1727,15 @@ class DeltaChainIterator(Generic[T]):
 
     @classmethod
     def for_pack_data(cls, pack_data: PackData, resolve_ext_ref=None):
+        """Create a DeltaChainIterator from pack data.
+
+        Args:
+          pack_data: PackData object to iterate
+          resolve_ext_ref: Optional function to resolve external refs
+
+        Returns:
+          DeltaChainIterator instance
+        """
         walker = cls(None, resolve_ext_ref=resolve_ext_ref)
         walker.set_pack_data(pack_data)
         for unpacked in pack_data.iter_unpacked(include_comp=False):
@@ -1704,6 +1751,17 @@ class DeltaChainIterator(Generic[T]):
         allow_missing: bool = False,
         resolve_ext_ref=None,
     ):
+        """Create a DeltaChainIterator for a subset of objects.
+
+        Args:
+          pack: Pack object containing the data
+          shas: Iterable of object SHAs to include
+          allow_missing: If True, skip missing objects
+          resolve_ext_ref: Optional function to resolve external refs
+
+        Returns:
+          DeltaChainIterator instance
+        """
         walker = cls(None, resolve_ext_ref=resolve_ext_ref)
         walker.set_pack_data(pack.data)
         todo = set()
@@ -1737,6 +1795,11 @@ class DeltaChainIterator(Generic[T]):
         return walker
 
     def record(self, unpacked: UnpackedObject) -> None:
+        """Record an unpacked object for later processing.
+
+        Args:
+          unpacked: UnpackedObject to record
+        """
         type_num = unpacked.pack_type_num
         offset = unpacked.offset
         assert offset is not None
@@ -1752,6 +1815,11 @@ class DeltaChainIterator(Generic[T]):
             self._full_ofs.append((offset, type_num))
 
     def set_pack_data(self, pack_data: PackData) -> None:
+        """Set the pack data for iteration.
+
+        Args:
+          pack_data: PackData object to use
+        """
         self._file = pack_data._file
 
     def _walk_all_chains(self):
@@ -1837,6 +1905,14 @@ class UnpackedObjectIterator(DeltaChainIterator[UnpackedObject]):
     """Delta chain iterator that yield unpacked objects."""
 
     def _result(self, unpacked):
+        """Return the unpacked object.
+
+        Args:
+            unpacked: The unpacked object
+
+        Returns:
+            The unpacked object unchanged
+        """
         return unpacked
 
 
@@ -1846,6 +1922,14 @@ class PackIndexer(DeltaChainIterator[PackIndexEntry]):
     _compute_crc32 = True
 
     def _result(self, unpacked):
+        """Convert unpacked object to pack index entry.
+
+        Args:
+            unpacked: The unpacked object
+
+        Returns:
+            Tuple of (sha, offset, crc32) for index entry
+        """
         return unpacked.sha(), unpacked.offset, unpacked.crc32
 
 
@@ -1853,6 +1937,14 @@ class PackInflater(DeltaChainIterator[ShaFile]):
     """Delta chain iterator that yields ShaFile objects."""
 
     def _result(self, unpacked):
+        """Convert unpacked object to ShaFile.
+
+        Args:
+            unpacked: The unpacked object
+
+        Returns:
+            ShaFile object from the unpacked data
+        """
         return unpacked.sha_file()
 
 
@@ -1860,15 +1952,36 @@ class SHA1Reader(BinaryIO):
     """Wrapper for file-like object that remembers the SHA1 of its data."""
 
     def __init__(self, f) -> None:
+        """Initialize SHA1Reader.
+
+        Args:
+            f: File-like object to wrap
+        """
         self.f = f
         self.sha1 = sha1(b"")
 
     def read(self, size: int = -1) -> bytes:
+        """Read bytes and update SHA1.
+
+        Args:
+            size: Number of bytes to read, -1 for all
+
+        Returns:
+            Bytes read from file
+        """
         data = self.f.read(size)
         self.sha1.update(data)
         return data
 
     def check_sha(self, allow_empty: bool = False) -> None:
+        """Check if the SHA1 matches the expected value.
+
+        Args:
+            allow_empty: Allow empty SHA1 hash
+
+        Raises:
+            ChecksumMismatch: If SHA1 doesn't match
+        """
         stored = self.f.read(20)
         # If git option index.skipHash is set the index will be empty
         if stored != self.sha1.digest() and (
@@ -1878,62 +1991,121 @@ class SHA1Reader(BinaryIO):
             raise ChecksumMismatch(self.sha1.hexdigest(), sha_to_hex(stored))
 
     def close(self):
+        """Close the underlying file."""
         return self.f.close()
 
     def tell(self) -> int:
+        """Return current file position."""
         return self.f.tell()
 
     # BinaryIO abstract methods
     def readable(self) -> bool:
+        """Check if file is readable."""
         return True
 
     def writable(self) -> bool:
+        """Check if file is writable."""
         return False
 
     def seekable(self) -> bool:
+        """Check if file is seekable."""
         return getattr(self.f, "seekable", lambda: False)()
 
     def seek(self, offset: int, whence: int = 0) -> int:
+        """Seek to position in file.
+
+        Args:
+            offset: Position offset
+            whence: Reference point (0=start, 1=current, 2=end)
+
+        Returns:
+            New file position
+        """
         return self.f.seek(offset, whence)
 
     def flush(self) -> None:
+        """Flush the file buffer."""
         if hasattr(self.f, "flush"):
             self.f.flush()
 
     def readline(self, size: int = -1) -> bytes:
+        """Read a line from the file.
+
+        Args:
+            size: Maximum bytes to read
+
+        Returns:
+            Line read from file
+        """
         return self.f.readline(size)
 
     def readlines(self, hint: int = -1) -> list[bytes]:
+        """Read all lines from the file.
+
+        Args:
+            hint: Approximate number of bytes to read
+
+        Returns:
+            List of lines
+        """
         return self.f.readlines(hint)
 
     def writelines(self, lines) -> None:
+        """Not supported for read-only file.
+
+        Raises:
+            UnsupportedOperation: Always raised
+        """
         raise UnsupportedOperation("writelines")
 
     def write(self, data) -> int:
+        """Not supported for read-only file.
+
+        Raises:
+            UnsupportedOperation: Always raised
+        """
         raise UnsupportedOperation("write")
 
     def __enter__(self):
+        """Enter context manager."""
         return self
 
     def __exit__(self, type, value, traceback):
+        """Exit context manager and close file."""
         self.close()
 
     def __iter__(self):
+        """Return iterator over lines."""
         return self
 
     def __next__(self) -> bytes:
+        """Get next line from file.
+
+        Returns:
+            Next line
+
+        Raises:
+            StopIteration: When no more lines
+        """
         line = self.readline()
         if not line:
             raise StopIteration
         return line
 
     def fileno(self) -> int:
+        """Return file descriptor number."""
         return self.f.fileno()
 
     def isatty(self) -> bool:
+        """Check if file is a terminal."""
         return getattr(self.f, "isatty", lambda: False)()
 
     def truncate(self, size: Optional[int] = None) -> int:
+        """Not supported for read-only file.
+
+        Raises:
+            UnsupportedOperation: Always raised
+        """
         raise UnsupportedOperation("truncate")
 
 
@@ -1941,17 +2113,35 @@ class SHA1Writer(BinaryIO):
     """Wrapper for file-like object that remembers the SHA1 of its data."""
 
     def __init__(self, f) -> None:
+        """Initialize SHA1Writer.
+
+        Args:
+            f: File-like object to wrap
+        """
         self.f = f
         self.length = 0
         self.sha1 = sha1(b"")
 
     def write(self, data) -> int:
+        """Write data and update SHA1.
+
+        Args:
+            data: Data to write
+
+        Returns:
+            Number of bytes written
+        """
         self.sha1.update(data)
         self.f.write(data)
         self.length += len(data)
         return len(data)
 
     def write_sha(self):
+        """Write the SHA1 digest to the file.
+
+        Returns:
+            The SHA1 digest bytes
+        """
         sha = self.sha1.digest()
         assert len(sha) == 20
         self.f.write(sha)
@@ -1959,65 +2149,124 @@ class SHA1Writer(BinaryIO):
         return sha
 
     def close(self):
+        """Close the file after writing SHA1.
+
+        Returns:
+            The SHA1 digest bytes
+        """
         sha = self.write_sha()
         self.f.close()
         return sha
 
     def offset(self):
+        """Get the total number of bytes written.
+
+        Returns:
+            Total bytes written
+        """
         return self.length
 
     def tell(self) -> int:
+        """Return current file position."""
         return self.f.tell()
 
     # BinaryIO abstract methods
     def readable(self) -> bool:
+        """Check if file is readable."""
         return False
 
     def writable(self) -> bool:
+        """Check if file is writable."""
         return True
 
     def seekable(self) -> bool:
+        """Check if file is seekable."""
         return getattr(self.f, "seekable", lambda: False)()
 
     def seek(self, offset: int, whence: int = 0) -> int:
+        """Seek to position in file.
+
+        Args:
+            offset: Position offset
+            whence: Reference point (0=start, 1=current, 2=end)
+
+        Returns:
+            New file position
+        """
         return self.f.seek(offset, whence)
 
     def flush(self) -> None:
+        """Flush the file buffer."""
         if hasattr(self.f, "flush"):
             self.f.flush()
 
     def readline(self, size: int = -1) -> bytes:
+        """Not supported for write-only file.
+
+        Raises:
+            UnsupportedOperation: Always raised
+        """
         raise UnsupportedOperation("readline")
 
     def readlines(self, hint: int = -1) -> list[bytes]:
+        """Not supported for write-only file.
+
+        Raises:
+            UnsupportedOperation: Always raised
+        """
         raise UnsupportedOperation("readlines")
 
     def writelines(self, lines) -> None:
+        """Write multiple lines to the file.
+
+        Args:
+            lines: Iterable of lines to write
+        """
         for line in lines:
             self.write(line)
 
     def read(self, size: int = -1) -> bytes:
+        """Not supported for write-only file.
+
+        Raises:
+            UnsupportedOperation: Always raised
+        """
         raise UnsupportedOperation("read")
 
     def __enter__(self):
+        """Enter context manager."""
         return self
 
     def __exit__(self, type, value, traceback):
+        """Exit context manager and close file."""
         self.close()
 
     def __iter__(self):
+        """Return iterator."""
         return self
 
     def __next__(self) -> bytes:
+        """Not supported for write-only file.
+
+        Raises:
+            UnsupportedOperation: Always raised
+        """
         raise UnsupportedOperation("__next__")
 
     def fileno(self) -> int:
+        """Return file descriptor number."""
         return self.f.fileno()
 
     def isatty(self) -> bool:
+        """Check if file is a terminal."""
         return getattr(self.f, "isatty", lambda: False)()
 
     def truncate(self, size: Optional[int] = None) -> int:
+        """Not supported for write-only file.
+
+        Raises:
+            UnsupportedOperation: Always raised
+        """
         raise UnsupportedOperation("truncate")
 
 
@@ -2341,6 +2590,14 @@ def generate_unpacked_objects(
 
 
 def full_unpacked_object(o: ShaFile) -> UnpackedObject:
+    """Create an UnpackedObject from a ShaFile.
+
+    Args:
+      o: ShaFile object to convert
+
+    Returns:
+      UnpackedObject with full object data
+    """
     return UnpackedObject(
         o.type_num,
         delta_base=None,
@@ -2419,6 +2676,8 @@ def write_pack_objects(
 
 
 class PackChunkGenerator:
+    """Generator for pack data chunks."""
+
     def __init__(
         self,
         num_records=None,

+ 42 - 0
dulwich/patch.py

@@ -197,6 +197,14 @@ def is_binary(content: bytes) -> bool:
 
 
 def shortid(hexsha: Optional[bytes]) -> bytes:
+    """Get short object ID.
+
+    Args:
+        hexsha: Full hex SHA or None
+
+    Returns:
+        7-character short ID
+    """
     if hexsha is None:
         return b"0" * 7
     else:
@@ -204,6 +212,15 @@ def shortid(hexsha: Optional[bytes]) -> bytes:
 
 
 def patch_filename(p: Optional[bytes], root: bytes) -> bytes:
+    """Generate patch filename.
+
+    Args:
+        p: Path or None
+        root: Root directory
+
+    Returns:
+        Full patch filename
+    """
     if p is None:
         return b"/dev/null"
     else:
@@ -235,6 +252,15 @@ def write_object_diff(
     patched_new_path = patch_filename(new_path, b"b")
 
     def content(mode: Optional[int], hexsha: Optional[bytes]) -> Blob:
+        """Get blob content for a file.
+
+        Args:
+            mode: File mode
+            hexsha: Object SHA
+
+        Returns:
+            Blob object
+        """
         from typing import cast
 
         if hexsha is None:
@@ -250,6 +276,14 @@ def write_object_diff(
                 return cast(Blob, Blob.from_string(obj.as_raw_string()))
 
     def lines(content: "Blob") -> list[bytes]:
+        """Split blob content into lines.
+
+        Args:
+            content: Blob content
+
+        Returns:
+            List of lines
+        """
         if not content:
             return []
         else:
@@ -338,6 +372,14 @@ def write_blob_diff(
     patched_new_path = patch_filename(new_path, b"b")
 
     def lines(blob: Optional["Blob"]) -> list[bytes]:
+        """Split blob content into lines.
+
+        Args:
+            blob: Blob object or None
+
+        Returns:
+            List of lines
+        """
         if blob is not None:
             return blob.splitlines()
         else:

+ 12 - 0
dulwich/porcelain.py

@@ -1247,10 +1247,22 @@ def show_commit(repo: RepoPath, commit, decode, outstream=sys.stdout) -> None:
 
     # Create a wrapper for ColorizedDiffStream to handle string/bytes conversion
     class _StreamWrapper:
+        """Wrapper for ColorizedDiffStream to handle string/bytes conversion."""
+
         def __init__(self, stream):
+            """Initialize a _StreamWrapper.
+
+            Args:
+              stream: The underlying stream to wrap
+            """
             self.stream = stream
 
         def write(self, data):
+            """Write data to the stream, converting strings to bytes if needed.
+
+            Args:
+              data: Data to write (str or bytes)
+            """
             if isinstance(data, str):
                 # Convert string to bytes for ColorizedDiffStream
                 self.stream.write(data.encode("utf-8"))

+ 120 - 0
dulwich/protocol.py

@@ -131,22 +131,57 @@ NAK_LINE = b"NAK\n"
 
 
 def agent_string() -> bytes:
+    """Generate the agent string for dulwich.
+
+    Returns:
+      Agent string as bytes
+    """
     return ("dulwich/" + ".".join(map(str, dulwich.__version__))).encode("ascii")
 
 
 def capability_agent() -> bytes:
+    """Generate the agent capability string.
+
+    Returns:
+      Agent capability with dulwich version
+    """
     return CAPABILITY_AGENT + b"=" + agent_string()
 
 
 def capability_symref(from_ref: bytes, to_ref: bytes) -> bytes:
+    """Generate a symref capability string.
+
+    Args:
+      from_ref: Source reference name
+      to_ref: Target reference name
+
+    Returns:
+      Symref capability string
+    """
     return CAPABILITY_SYMREF + b"=" + from_ref + b":" + to_ref
 
 
 def extract_capability_names(capabilities: Iterable[bytes]) -> set[bytes]:
+    """Extract capability names from a list of capabilities.
+
+    Args:
+      capabilities: List of capability strings
+
+    Returns:
+      Set of capability names
+    """
     return {parse_capability(c)[0] for c in capabilities}
 
 
 def parse_capability(capability: bytes) -> tuple[bytes, Optional[bytes]]:
+    """Parse a capability string into name and value.
+
+    Args:
+      capability: Capability string
+
+    Returns:
+      Tuple of (capability_name, capability_value)
+    """
     parts = capability.split(b"=", 1)
     if len(parts) == 1:
         return (parts[0], None)
@@ -154,6 +189,14 @@ def parse_capability(capability: bytes) -> tuple[bytes, Optional[bytes]]:
 
 
 def symref_capabilities(symrefs: Iterable[tuple[bytes, bytes]]) -> list[bytes]:
+    """Generate symref capability strings from symref pairs.
+
+    Args:
+      symrefs: Iterable of (from_ref, to_ref) tuples
+
+    Returns:
+      List of symref capability strings
+    """
     return [capability_symref(*k) for k in symrefs]
 
 
@@ -166,10 +209,27 @@ COMMAND_HAVE = b"have"
 
 
 def format_cmd_pkt(cmd: bytes, *args: bytes) -> bytes:
+    """Format a command packet.
+
+    Args:
+      cmd: Command name
+      *args: Command arguments
+
+    Returns:
+      Formatted command packet
+    """
     return cmd + b" " + b"".join([(a + b"\0") for a in args])
 
 
 def parse_cmd_pkt(line: bytes) -> tuple[bytes, list[bytes]]:
+    """Parse a command packet.
+
+    Args:
+      line: Command line to parse
+
+    Returns:
+      Tuple of (command, [arguments])
+    """
     splice_at = line.find(b" ")
     cmd, args = line[:splice_at], line[splice_at + 1 :]
     assert args[-1:] == b"\x00"
@@ -236,6 +296,7 @@ class Protocol:
         self._readahead: Optional[BytesIO] = None
 
     def close(self) -> None:
+        """Close the underlying transport if a close function was provided."""
         if self._close:
             self._close()
 
@@ -411,6 +472,14 @@ class ReceivableProtocol(Protocol):
         self._rbufsize = rbufsize
 
     def read(self, size: int) -> bytes:
+        """Read bytes from the socket.
+
+        Args:
+          size: Number of bytes to read
+
+        Returns:
+          Bytes read from socket
+        """
         # From _fileobj.read in socket.py in the Python 2.6.5 standard library,
         # with the following modifications:
         #  - omit the size <= 0 branch
@@ -472,6 +541,14 @@ class ReceivableProtocol(Protocol):
         return buf.read()
 
     def recv(self, size: int) -> bytes:
+        """Receive bytes from the socket with buffering.
+
+        Args:
+          size: Maximum number of bytes to receive
+
+        Returns:
+          Bytes received from socket
+        """
         assert size > 0
 
         buf = self._rbuf
@@ -613,12 +690,30 @@ class PktLineParser:
 
 
 def format_capability_line(capabilities: Iterable[bytes]) -> bytes:
+    """Format a capabilities list for the wire protocol.
+
+    Args:
+      capabilities: List of capability strings
+
+    Returns:
+      Space-separated capabilities as bytes
+    """
     return b"".join([b" " + c for c in capabilities])
 
 
 def format_ref_line(
     ref: bytes, sha: bytes, capabilities: Optional[list[bytes]] = None
 ) -> bytes:
+    """Format a ref advertisement line.
+
+    Args:
+      ref: Reference name
+      sha: SHA hash
+      capabilities: Optional list of capabilities
+
+    Returns:
+      Formatted ref line
+    """
     if capabilities is None:
         return sha + b" " + ref + b"\n"
     else:
@@ -626,14 +721,39 @@ def format_ref_line(
 
 
 def format_shallow_line(sha: bytes) -> bytes:
+    """Format a shallow line.
+
+    Args:
+      sha: SHA to mark as shallow
+
+    Returns:
+      Formatted shallow line
+    """
     return COMMAND_SHALLOW + b" " + sha
 
 
 def format_unshallow_line(sha: bytes) -> bytes:
+    """Format an unshallow line.
+
+    Args:
+      sha: SHA to unshallow
+
+    Returns:
+      Formatted unshallow line
+    """
     return COMMAND_UNSHALLOW + b" " + sha
 
 
 def format_ack_line(sha: bytes, ack_type: bytes = b"") -> bytes:
+    """Format an ACK line.
+
+    Args:
+      sha: SHA to acknowledge
+      ack_type: Optional ACK type (e.g. b"continue")
+
+    Returns:
+      Formatted ACK line
+    """
     if ack_type:
         ack_type = b" " + ack_type
     return b"ACK " + sha + ack_type + b"\n"

+ 209 - 0
dulwich/refs.py

@@ -56,6 +56,12 @@ class SymrefLoop(Exception):
     """There is a loop between one or more symrefs."""
 
     def __init__(self, ref, depth) -> None:
+        """Initialize a SymrefLoop exception.
+
+        Args:
+          ref: The ref that caused the loop
+          depth: Depth at which the loop was detected
+        """
         self.ref = ref
         self.depth = depth
 
@@ -137,6 +143,11 @@ class RefsContainer:
     """A container for refs."""
 
     def __init__(self, logger=None) -> None:
+        """Initialize a RefsContainer.
+
+        Args:
+          logger: Optional logger for reflog updates
+        """
         self._logger = logger
 
     def _log(
@@ -213,6 +224,17 @@ class RefsContainer:
         message: Optional[bytes] = None,
         prune: bool = False,
     ) -> None:
+        """Import refs from another repository.
+
+        Args:
+          base: Base ref to import into (e.g., b'refs/remotes/origin')
+          other: Dictionary of refs to import
+          committer: Optional committer for reflog
+          timestamp: Optional timestamp for reflog
+          timezone: Optional timezone for reflog
+          message: Optional message for reflog
+          prune: If True, remove refs not in other
+        """
         if prune:
             to_delete = set(self.subkeys(base))
         else:
@@ -237,6 +259,7 @@ class RefsContainer:
         raise NotImplementedError(self.allkeys)
 
     def __iter__(self):
+        """Iterate over all ref names."""
         return iter(self.allkeys())
 
     def keys(self, base=None):
@@ -346,6 +369,14 @@ class RefsContainer:
         return refnames, contents
 
     def __contains__(self, refname) -> bool:
+        """Check if a ref exists.
+
+        Args:
+          refname: Name of the ref to check
+
+        Returns:
+          True if the ref exists
+        """
         if self.read_ref(refname):
             return True
         return False
@@ -492,12 +523,30 @@ class DictRefsContainer(RefsContainer):
         self._watchers: set[Any] = set()
 
     def allkeys(self):
+        """Get all ref names.
+
+        Returns:
+          All ref names in the container
+        """
         return self._refs.keys()
 
     def read_loose_ref(self, name):
+        """Read a reference from the refs dictionary.
+
+        Args:
+          name: The ref name to read
+
+        Returns:
+          The ref value or None if not found
+        """
         return self._refs.get(name, None)
 
     def get_packed_refs(self):
+        """Get packed refs (always empty for DictRefsContainer).
+
+        Returns:
+          Empty dictionary
+        """
         return {}
 
     def _notify(self, ref, newsha) -> None:
@@ -513,6 +562,16 @@ class DictRefsContainer(RefsContainer):
         timezone=None,
         message=None,
     ) -> None:
+        """Make a ref point at another ref.
+
+        Args:
+          name: Name of the ref to set
+          other: Name of the ref to point at
+          committer: Optional committer name for reflog
+          timestamp: Optional timestamp for reflog
+          timezone: Optional timezone for reflog
+          message: Optional message for reflog
+        """
         old = self.follow(name)[-1]
         new = SYMREF + other
         self._refs[name] = new
@@ -537,6 +596,24 @@ class DictRefsContainer(RefsContainer):
         timezone=None,
         message=None,
     ) -> bool:
+        """Set a refname to new_ref only if it currently equals old_ref.
+
+        This method follows all symbolic references, and can be used to perform
+        an atomic compare-and-swap operation.
+
+        Args:
+          name: The refname to set.
+          old_ref: The old sha the refname must refer to, or None to set
+            unconditionally.
+          new_ref: The new sha the refname will refer to.
+          committer: Optional committer name for reflog
+          timestamp: Optional timestamp for reflog
+          timezone: Optional timezone for reflog
+          message: Optional message for reflog
+
+        Returns:
+          True if the set was successful, False otherwise.
+        """
         if old_ref is not None and self._refs.get(name, ZERO_SHA) != old_ref:
             return False
         # Only update the specific ref requested, not the whole chain
@@ -564,6 +641,19 @@ class DictRefsContainer(RefsContainer):
         timezone=None,
         message: Optional[bytes] = None,
     ) -> bool:
+        """Add a new reference only if it does not already exist.
+
+        Args:
+          name: Ref name
+          ref: Ref value
+          committer: Optional committer name for reflog
+          timestamp: Optional timestamp for reflog
+          timezone: Optional timezone for reflog
+          message: Optional message for reflog
+
+        Returns:
+          True if the add was successful, False otherwise.
+        """
         if name in self._refs:
             return False
         self._refs[name] = ref
@@ -588,6 +678,23 @@ class DictRefsContainer(RefsContainer):
         timezone=None,
         message=None,
     ) -> bool:
+        """Remove a refname only if it currently equals old_ref.
+
+        This method does not follow symbolic references. It can be used to
+        perform an atomic compare-and-delete operation.
+
+        Args:
+          name: The refname to delete.
+          old_ref: The old sha the refname must refer to, or None to
+            delete unconditionally.
+          committer: Optional committer name for reflog
+          timestamp: Optional timestamp for reflog
+          timezone: Optional timezone for reflog
+          message: Optional message for reflog
+
+        Returns:
+          True if the delete was successful, False otherwise.
+        """
         if old_ref is not None and self._refs.get(name, ZERO_SHA) != old_ref:
             return False
         try:
@@ -608,6 +715,14 @@ class DictRefsContainer(RefsContainer):
         return True
 
     def get_peeled(self, name):
+        """Get the peeled value of a ref.
+
+        Args:
+          name: Ref name to get peeled value for
+
+        Returns:
+          The peeled SHA or None if not available
+        """
         return self._peeled.get(name)
 
     def _update(self, refs) -> None:
@@ -626,21 +741,55 @@ class InfoRefsContainer(RefsContainer):
     """Refs container that reads refs from a info/refs file."""
 
     def __init__(self, f) -> None:
+        """Initialize an InfoRefsContainer.
+
+        Args:
+          f: File-like object containing info/refs data
+        """
         self._refs = {}
         self._peeled = {}
         refs = read_info_refs(f)
         (self._refs, self._peeled) = split_peeled_refs(refs)
 
     def allkeys(self):
+        """Get all ref names.
+
+        Returns:
+          All ref names in the info/refs file
+        """
         return self._refs.keys()
 
     def read_loose_ref(self, name):
+        """Read a reference from the parsed info/refs.
+
+        Args:
+          name: The ref name to read
+
+        Returns:
+          The ref value or None if not found
+        """
         return self._refs.get(name, None)
 
     def get_packed_refs(self):
+        """Get packed refs (always empty for InfoRefsContainer).
+
+        Returns:
+          Empty dictionary
+        """
         return {}
 
     def get_peeled(self, name):
+        """Get the peeled value of a ref.
+
+        Args:
+          name: Ref name to get peeled value for
+
+        Returns:
+          The peeled SHA if available, otherwise the ref value itself
+
+        Raises:
+          KeyError: If the ref doesn't exist
+        """
         try:
             return self._peeled[name]
         except KeyError:
@@ -667,9 +816,18 @@ class DiskRefsContainer(RefsContainer):
         self._peeled_refs = None
 
     def __repr__(self) -> str:
+        """Return string representation of DiskRefsContainer."""
         return f"{self.__class__.__name__}({self.path!r})"
 
     def subkeys(self, base):
+        """Get all ref names under a base ref.
+
+        Args:
+          base: Base ref path to search under
+
+        Returns:
+          Set of ref names under the base (without base prefix)
+        """
         subkeys = set()
         path = self.refpath(base)
         for root, unused_dirs, files in os.walk(path):
@@ -689,6 +847,11 @@ class DiskRefsContainer(RefsContainer):
         return subkeys
 
     def allkeys(self):
+        """Get all ref names from disk.
+
+        Returns:
+          Set of all ref names (both loose and packed)
+        """
         allkeys = set()
         if os.path.exists(self.refpath(HEADREF)):
             allkeys.add(HEADREF)
@@ -1211,6 +1374,14 @@ def write_packed_refs(f, packed_refs, peeled_refs=None) -> None:
 
 
 def read_info_refs(f):
+    """Read info/refs file.
+
+    Args:
+      f: File-like object to read from
+
+    Returns:
+      Dictionary mapping ref names to SHA1s
+    """
     ret = {}
     for line in f.readlines():
         (sha, name) = line.rstrip(b"\r\n").split(b"\t", 1)
@@ -1239,6 +1410,14 @@ def write_info_refs(refs, store: ObjectContainer):
 
 
 def is_local_branch(x):
+    """Check if a ref name refers to a local branch.
+
+    Args:
+      x: Ref name to check
+
+    Returns:
+      True if ref is a local branch (refs/heads/...)
+    """
     return x.startswith(LOCAL_BRANCH_PREFIX)
 
 
@@ -1356,6 +1535,15 @@ def _import_remote_refs(
 
 
 def serialize_refs(store, refs):
+    """Serialize refs with peeled refs.
+
+    Args:
+      store: Object store to peel refs from
+      refs: Dictionary of ref names to SHAs
+
+    Returns:
+      Dictionary with refs and peeled refs (marked with ^{})
+    """
     # TODO: Avoid recursive import :(
     from .object_store import peel_sha
 
@@ -1385,6 +1573,12 @@ class locked_ref:
     """
 
     def __init__(self, refs_container: DiskRefsContainer, refname: Ref) -> None:
+        """Initialize a locked ref.
+
+        Args:
+          refs_container: The DiskRefsContainer to lock the ref in
+          refname: The ref name to lock
+        """
         self._refs_container = refs_container
         self._refname = refname
         self._file: Optional[_GitFile] = None
@@ -1392,6 +1586,14 @@ class locked_ref:
         self._deleted = False
 
     def __enter__(self) -> "locked_ref":
+        """Enter the context manager and acquire the lock.
+
+        Returns:
+          This locked_ref instance
+
+        Raises:
+          OSError: If the lock cannot be acquired
+        """
         self._refs_container._check_refname(self._refname)
         try:
             realnames, _ = self._refs_container.follow(self._refname)
@@ -1411,6 +1613,13 @@ class locked_ref:
         exc_value: Optional[BaseException],
         traceback: Optional[types.TracebackType],
     ) -> None:
+        """Exit the context manager and release the lock.
+
+        Args:
+          exc_type: Type of exception if one occurred
+          exc_value: Exception instance if one occurred
+          traceback: Traceback if an exception occurred
+        """
         if self._file:
             if exc_type is not None or self._deleted:
                 self._file.abort()

+ 63 - 0
dulwich/repo.py

@@ -331,6 +331,8 @@ def _set_filesystem_hidden(path) -> None:
 
 
 class ParentsProvider:
+    """Provides parents for commits, handling grafts and shallow commits."""
+
     def __init__(self, store, grafts={}, shallows=[]) -> None:
         self.store = store
         self.grafts = grafts
@@ -340,6 +342,15 @@ class ParentsProvider:
         self.commit_graph = store.get_commit_graph()
 
     def get_parents(self, commit_id, commit=None):
+        """Get the parents of a commit.
+
+        Args:
+          commit_id: The commit SHA to get parents for
+          commit: Optional commit object to avoid fetching
+
+        Returns:
+          List of parent commit SHAs
+        """
         try:
             return self.grafts[commit_id]
         except KeyError:
@@ -581,7 +592,14 @@ class BaseRepo:
                 return None
 
             class DummyMissingObjectFinder:
+                """Dummy finder that returns no missing objects."""
+
                 def get_remote_has(self) -> None:
+                    """Get remote has (always returns None).
+
+                    Returns:
+                      None
+                    """
                     return None
 
                 def __len__(self) -> int:
@@ -607,6 +625,14 @@ class BaseRepo:
         parents_provider = ParentsProvider(self.object_store, shallows=current_shallow)
 
         def get_parents(commit):
+            """Get parents for a commit using the parents provider.
+
+            Args:
+              commit: Commit object
+
+            Returns:
+              List of parent commit SHAs
+            """
             return parents_provider.get_parents(commit.id, commit)
 
         return MissingObjectFinder(
@@ -708,6 +734,11 @@ class BaseRepo:
         return self.object_store[sha]
 
     def parents_provider(self) -> ParentsProvider:
+        """Get a parents provider for this repository.
+
+        Returns:
+          ParentsProvider instance configured with grafts and shallows
+        """
         return ParentsProvider(
             self.object_store,
             grafts=self._graftpoints,
@@ -1581,6 +1612,15 @@ class Repo(BaseRepo):
 
         # Add gitdir matchers
         def match_gitdir(pattern: str, case_sensitive: bool = True) -> bool:
+            """Match gitdir against a pattern.
+
+            Args:
+              pattern: Pattern to match against
+              case_sensitive: Whether to match case-sensitively
+
+            Returns:
+              True if gitdir matches pattern
+            """
             # Handle relative patterns (starting with ./)
             if pattern.startswith("./"):
                 # Can't handle relative patterns without config directory context
@@ -1618,6 +1658,14 @@ class Repo(BaseRepo):
 
         # Add onbranch matcher
         def match_onbranch(pattern: str) -> bool:
+            """Match current branch against a pattern.
+
+            Args:
+              pattern: Pattern to match against
+
+            Returns:
+              True if current branch matches pattern
+            """
             try:
                 # Get the current branch using refs
                 ref_chain, _ = self.refs.follow(b"HEAD")
@@ -1640,6 +1688,11 @@ class Repo(BaseRepo):
         return matchers
 
     def get_worktree_config(self) -> "ConfigFile":
+        """Get the worktree-specific config.
+
+        Returns:
+          ConfigFile object for the worktree config
+        """
         from .config import ConfigFile
 
         path = os.path.join(self.commondir(), "config.worktree")
@@ -2060,9 +2113,19 @@ class MemoryRepo(BaseRepo):
         self._reflog.append(args)
 
     def set_description(self, description) -> None:
+        """Set the description for this repository.
+
+        Args:
+          description: Text to set as description
+        """
         self._description = description
 
     def get_description(self):
+        """Get the description of this repository.
+
+        Returns:
+          Repository description as bytes
+        """
         return self._description
 
     def _determine_file_mode(self):

+ 166 - 0
dulwich/server.py

@@ -277,6 +277,11 @@ class PackHandler(Handler):
 
     @classmethod
     def innocuous_capabilities(cls) -> Iterable[bytes]:
+        """Return capabilities that don't affect protocol behavior.
+
+        Returns:
+            List of innocuous capability names
+        """
         return [
             CAPABILITY_INCLUDE_TAG,
             CAPABILITY_THIN_PACK,
@@ -600,6 +605,11 @@ class AckGraphWalkerImpl:
     """Base class for acknowledgment graph walker implementations."""
 
     def __init__(self, graph_walker):
+        """Initialize acknowledgment graph walker.
+
+        Args:
+            graph_walker: Graph walker to wrap
+        """
         raise NotImplementedError
 
     def ack(self, have_ref: ObjectID) -> None:
@@ -909,11 +919,21 @@ class SingleAckGraphWalkerImpl(AckGraphWalkerImpl):
         self._common: list[bytes] = []
 
     def ack(self, have_ref) -> None:
+        """Acknowledge a have reference.
+
+        Args:
+            have_ref: Object ID to acknowledge
+        """
         if not self._common:
             self.walker.send_ack(have_ref)
             self._common.append(have_ref)
 
     def next(self):
+        """Get next SHA from graph walker.
+
+        Returns:
+            SHA bytes or None if done
+        """
         command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS)
         if command in (None, COMMAND_DONE):
             # defer the handling of done
@@ -925,6 +945,15 @@ class SingleAckGraphWalkerImpl(AckGraphWalkerImpl):
     __next__ = next
 
     def handle_done(self, done_required, done_received) -> bool:
+        """Handle done command.
+
+        Args:
+            done_required: Whether done is required
+            done_received: Whether done was received
+
+        Returns:
+            True if handling completed successfully
+        """
         if not self._common:
             self.walker.send_nak()
 
@@ -949,11 +978,21 @@ class MultiAckGraphWalkerImpl(AckGraphWalkerImpl):
     """Graph walker implementation that speaks the multi-ack protocol."""
 
     def __init__(self, walker) -> None:
+        """Initialize multi-ack graph walker.
+
+        Args:
+            walker: Parent ProtocolGraphWalker instance
+        """
         self.walker = walker
         self._found_base = False
         self._common: list[bytes] = []
 
     def ack(self, have_ref) -> None:
+        """Acknowledge a have reference.
+
+        Args:
+            have_ref: Object ID to acknowledge
+        """
         self._common.append(have_ref)
         if not self._found_base:
             self.walker.send_ack(have_ref, b"continue")
@@ -962,6 +1001,11 @@ class MultiAckGraphWalkerImpl(AckGraphWalkerImpl):
         # else we blind ack within next
 
     def next(self):
+        """Get next SHA from graph walker.
+
+        Returns:
+            SHA bytes or None if done
+        """
         while True:
             command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS)
             if command is None:
@@ -981,6 +1025,15 @@ class MultiAckGraphWalkerImpl(AckGraphWalkerImpl):
     __next__ = next
 
     def handle_done(self, done_required, done_received) -> bool:
+        """Handle done command.
+
+        Args:
+            done_required: Whether done is required
+            done_received: Whether done was received
+
+        Returns:
+            True if handling completed successfully
+        """
         if done_required and not done_received:
             # we are not done, especially when done is required; skip
             # the pack for this request and especially do not handle
@@ -1008,15 +1061,30 @@ class MultiAckDetailedGraphWalkerImpl(AckGraphWalkerImpl):
     """Graph walker implementation speaking the multi-ack-detailed protocol."""
 
     def __init__(self, walker) -> None:
+        """Initialize multi-ack-detailed graph walker.
+
+        Args:
+            walker: Parent ProtocolGraphWalker instance
+        """
         self.walker = walker
         self._common: list[bytes] = []
 
     def ack(self, have_ref) -> None:
+        """Acknowledge a have reference.
+
+        Args:
+            have_ref: Object ID to acknowledge
+        """
         # Should only be called iff have_ref is common
         self._common.append(have_ref)
         self.walker.send_ack(have_ref, b"common")
 
     def next(self):
+        """Get next SHA from graph walker.
+
+        Returns:
+            SHA bytes or None if done
+        """
         while True:
             command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS)
             if command is None:
@@ -1046,6 +1114,15 @@ class MultiAckDetailedGraphWalkerImpl(AckGraphWalkerImpl):
     __next__ = next
 
     def handle_done(self, done_required, done_received) -> bool:
+        """Handle done command.
+
+        Args:
+            done_required: Whether done is required
+            done_received: Whether done was received
+
+        Returns:
+            True if handling completed successfully
+        """
         if done_required and not done_received:
             # we are not done, especially when done is required; skip
             # the pack for this request and especially do not handle
@@ -1075,12 +1152,26 @@ class ReceivePackHandler(PackHandler):
     def __init__(
         self, backend, args, proto, stateless_rpc=False, advertise_refs=False
     ) -> None:
+        """Initialize receive-pack handler.
+
+        Args:
+            backend: Backend instance
+            args: Command arguments
+            proto: Protocol instance
+            stateless_rpc: Whether to use stateless RPC
+            advertise_refs: Whether to advertise refs
+        """
         super().__init__(backend, proto, stateless_rpc=stateless_rpc)
         self.repo = backend.open_repository(args[0])
         self.advertise_refs = advertise_refs
 
     @classmethod
     def capabilities(cls) -> Iterable[bytes]:
+        """Return supported capabilities.
+
+        Returns:
+            List of capability names
+        """
         return [
             CAPABILITY_REPORT_STATUS,
             CAPABILITY_DELETE_REFS,
@@ -1093,6 +1184,14 @@ class ReceivePackHandler(PackHandler):
     def _apply_pack(
         self, refs: list[tuple[ObjectID, ObjectID, Ref]]
     ) -> Iterator[tuple[bytes, bytes]]:
+        """Apply received pack to repository.
+
+        Args:
+            refs: List of (old_sha, new_sha, ref_name) tuples
+
+        Yields:
+            Tuples of (ref_name, error_message) for any errors
+        """
         all_exceptions = (
             IOError,
             OSError,
@@ -1147,6 +1246,11 @@ class ReceivePackHandler(PackHandler):
             yield (ref, ref_status)
 
     def _report_status(self, status: list[tuple[bytes, bytes]]) -> None:
+        """Report status to client.
+
+        Args:
+            status: List of (ref_name, status_message) tuples
+        """
         if self.has_capability(CAPABILITY_SIDE_BAND_64K):
             writer = BufferedPktLineWriter(
                 lambda d: self.proto.write_sideband(SIDE_BAND_CHANNEL_DATA, d)
@@ -1174,6 +1278,11 @@ class ReceivePackHandler(PackHandler):
         flush()
 
     def _on_post_receive(self, client_refs) -> None:
+        """Run post-receive hook.
+
+        Args:
+            client_refs: Dictionary of ref changes from client
+        """
         hook = self.repo.hooks.get("post-receive", None)
         if not hook:
             return
@@ -1185,6 +1294,7 @@ class ReceivePackHandler(PackHandler):
             self.proto.write_sideband(SIDE_BAND_CHANNEL_FATAL, str(err).encode("utf-8"))
 
     def handle(self) -> None:
+        """Handle receive-pack request."""
         if self.advertise_refs or not self.stateless_rpc:
             refs = sorted(self.repo.get_refs().items())
             symrefs = sorted(self.repo.refs.get_symrefs().items())
@@ -1235,11 +1345,23 @@ class ReceivePackHandler(PackHandler):
 
 
 class UploadArchiveHandler(Handler):
+    """Handler for git-upload-archive requests."""
+
     def __init__(self, backend, args, proto, stateless_rpc=False) -> None:
+        """Initialize upload-archive handler.
+
+        Args:
+            backend: Backend instance
+            args: Command arguments
+            proto: Protocol instance
+            stateless_rpc: Whether to use stateless RPC
+        """
         super().__init__(backend, proto, stateless_rpc)
         self.repo = backend.open_repository(args[0])
 
     def handle(self) -> None:
+        """Handle upload-archive request."""
+
         def write(x):
             return self.proto.write_sideband(SIDE_BAND_CHANNEL_DATA, x)
 
@@ -1287,11 +1409,21 @@ DEFAULT_HANDLERS = {
 
 
 class TCPGitRequestHandler(socketserver.StreamRequestHandler):
+    """TCP request handler for git protocol."""
+
     def __init__(self, handlers, *args, **kwargs) -> None:
+        """Initialize TCP request handler.
+
+        Args:
+            handlers: Dictionary mapping commands to handler classes
+            *args: Additional arguments for StreamRequestHandler
+            **kwargs: Additional keyword arguments for StreamRequestHandler
+        """
         self.handlers = handlers
         socketserver.StreamRequestHandler.__init__(self, *args, **kwargs)
 
     def handle(self) -> None:
+        """Handle TCP git request."""
         proto = ReceivableProtocol(self.connection.recv, self.wfile.write)
         command, args = proto.read_cmd()
         logger.info("Handling %s request, args=%s", command, args)
@@ -1304,13 +1436,32 @@ class TCPGitRequestHandler(socketserver.StreamRequestHandler):
 
 
 class TCPGitServer(socketserver.TCPServer):
+    """TCP server for git protocol."""
+
     allow_reuse_address = True
     serve = socketserver.TCPServer.serve_forever
 
     def _make_handler(self, *args, **kwargs):
+        """Create request handler instance.
+
+        Args:
+            *args: Handler arguments
+            **kwargs: Handler keyword arguments
+
+        Returns:
+            TCPGitRequestHandler instance
+        """
         return TCPGitRequestHandler(self.handlers, *args, **kwargs)
 
     def __init__(self, backend, listen_addr, port=TCP_GIT_PORT, handlers=None) -> None:
+        """Initialize TCP git server.
+
+        Args:
+            backend: Backend instance
+            listen_addr: Address to listen on
+            port: Port to listen on (default: TCP_GIT_PORT)
+            handlers: Optional dictionary of custom handlers
+        """
         self.handlers = dict(DEFAULT_HANDLERS)
         if handlers is not None:
             self.handlers.update(handlers)
@@ -1319,10 +1470,25 @@ class TCPGitServer(socketserver.TCPServer):
         socketserver.TCPServer.__init__(self, (listen_addr, port), self._make_handler)
 
     def verify_request(self, request, client_address) -> bool:
+        """Verify incoming request.
+
+        Args:
+            request: Request socket
+            client_address: Client address tuple
+
+        Returns:
+            True to accept request
+        """
         logger.info("Handling request from %s", client_address)
         return True
 
     def handle_error(self, request, client_address) -> None:
+        """Handle request processing errors.
+
+        Args:
+            request: Request socket
+            client_address: Client address tuple
+        """
         logger.exception(
             "Exception happened during processing of request from %s",
             client_address,

+ 134 - 0
dulwich/web.py

@@ -67,6 +67,14 @@ NO_CACHE_HEADERS = [
 
 
 def cache_forever_headers(now: Optional[float] = None) -> list[tuple[str, str]]:
+    """Generate headers for caching forever.
+
+    Args:
+      now: Timestamp to use as base (defaults to current time)
+
+    Returns:
+      List of (header_name, header_value) tuples for caching forever
+    """
     if now is None:
         now = time.time()
     return [
@@ -77,6 +85,14 @@ def cache_forever_headers(now: Optional[float] = None) -> list[tuple[str, str]]:
 
 
 def date_time_string(timestamp: Optional[float] = None) -> str:
+    """Convert a timestamp to an HTTP date string.
+
+    Args:
+      timestamp: Unix timestamp to convert (defaults to current time)
+
+    Returns:
+      HTTP date string in RFC 1123 format
+    """
     # From BaseHTTPRequestHandler.date_time_string in BaseHTTPServer.py in the
     # Python 2.6.5 standard library, following modifications:
     #  - Made a global rather than an instance method.
@@ -164,6 +180,16 @@ def _url_to_path(url: str) -> str:
 def get_text_file(
     req: "HTTPGitRequest", backend: "Backend", mat: re.Match[str]
 ) -> Iterator[bytes]:
+    """Send a plain text file from the repository.
+
+    Args:
+      req: The HTTP request object
+      backend: The git backend
+      mat: The regex match for the requested path
+
+    Returns:
+      Iterator yielding file contents as bytes
+    """
     req.nocache()
     path = _url_to_path(mat.group())
     logger.info("Sending plain text file %s", path)
@@ -173,6 +199,16 @@ def get_text_file(
 def get_loose_object(
     req: "HTTPGitRequest", backend: "Backend", mat: re.Match[str]
 ) -> Iterator[bytes]:
+    """Send a loose git object.
+
+    Args:
+      req: The HTTP request object
+      backend: The git backend
+      mat: The regex match containing object path segments
+
+    Returns:
+      Iterator yielding object contents as bytes
+    """
     sha = (mat.group(1) + mat.group(2)).encode("ascii")
     logger.info("Sending loose object %s", sha)
     object_store = get_repo(backend, mat).object_store
@@ -192,6 +228,16 @@ def get_loose_object(
 def get_pack_file(
     req: "HTTPGitRequest", backend: "Backend", mat: re.Match[str]
 ) -> Iterator[bytes]:
+    """Send a git pack file.
+
+    Args:
+      req: The HTTP request object
+      backend: The git backend
+      mat: The regex match for the requested pack file
+
+    Returns:
+      Iterator yielding pack file contents as bytes
+    """
     req.cache_forever()
     path = _url_to_path(mat.group())
     logger.info("Sending pack file %s", path)
@@ -205,6 +251,16 @@ def get_pack_file(
 def get_idx_file(
     req: "HTTPGitRequest", backend: "Backend", mat: re.Match[str]
 ) -> Iterator[bytes]:
+    """Send a git pack index file.
+
+    Args:
+      req: The HTTP request object
+      backend: The git backend
+      mat: The regex match for the requested index file
+
+    Returns:
+      Iterator yielding index file contents as bytes
+    """
     req.cache_forever()
     path = _url_to_path(mat.group())
     logger.info("Sending pack file %s", path)
@@ -218,6 +274,16 @@ def get_idx_file(
 def get_info_refs(
     req: "HTTPGitRequest", backend: "Backend", mat: re.Match[str]
 ) -> Iterator[bytes]:
+    """Send git info/refs for discovery.
+
+    Args:
+      req: The HTTP request object
+      backend: The git backend
+      mat: The regex match for the info/refs request
+
+    Returns:
+      Iterator yielding refs advertisement or info/refs contents
+    """
     params = parse_qs(req.environ["QUERY_STRING"])
     service = params.get("service", [None])[0]
     try:
@@ -255,6 +321,16 @@ def get_info_refs(
 def get_info_packs(
     req: "HTTPGitRequest", backend: "Backend", mat: re.Match[str]
 ) -> Iterator[bytes]:
+    """Send git info/packs file listing available packs.
+
+    Args:
+      req: The HTTP request object
+      backend: The git backend
+      mat: The regex match for the info/packs request
+
+    Returns:
+      Iterator yielding pack listing as bytes
+    """
     req.nocache()
     req.respond(HTTP_OK, "text/plain")
     logger.info("Emulating dumb info/packs")
@@ -279,6 +355,14 @@ class ChunkReader:
         self._buffer: list[bytes] = []
 
     def read(self, n: int) -> bytes:
+        """Read n bytes from the chunked stream.
+
+        Args:
+          n: Number of bytes to read
+
+        Returns:
+          Up to n bytes of data
+        """
         while sum(map(len, self._buffer)) < n:
             try:
                 self._buffer.append(next(self._iter))
@@ -303,6 +387,14 @@ class _LengthLimitedFile:
         self._bytes_avail = max_bytes
 
     def read(self, size: int = -1) -> bytes:
+        """Read up to size bytes from the limited input.
+
+        Args:
+          size: Maximum number of bytes to read, or -1 for all available
+
+        Returns:
+          Up to size bytes of data
+        """
         if self._bytes_avail <= 0:
             return b""
         if size == -1 or size > self._bytes_avail:
@@ -316,6 +408,16 @@ class _LengthLimitedFile:
 def handle_service_request(
     req: "HTTPGitRequest", backend: "Backend", mat: re.Match[str]
 ) -> Iterator[bytes]:
+    """Handle a git service request (upload-pack or receive-pack).
+
+    Args:
+      req: The HTTP request object
+      backend: The git backend
+      mat: The regex match for the service request
+
+    Returns:
+      Iterator yielding service response as bytes
+    """
     service = mat.group().lstrip("/")
     logger.info("Handling service request for %s", service)
     handler_cls = req.handlers.get(service.encode("ascii"), None)
@@ -531,15 +633,31 @@ class ServerHandlerLogger(ServerHandler):
     """ServerHandler that uses dulwich's logger for logging exceptions."""
 
     def log_exception(self, exc_info) -> None:
+        """Log an exception using dulwich's logger.
+
+        Args:
+          exc_info: Exception information tuple
+        """
         logger.exception(
             "Exception happened during processing of request",
             exc_info=exc_info,
         )
 
     def log_message(self, format, *args) -> None:
+        """Log a message using dulwich's logger.
+
+        Args:
+          format: Format string for the message
+          *args: Arguments for the format string
+        """
         logger.info(format, *args)
 
     def log_error(self, *args) -> None:
+        """Log an error using dulwich's logger.
+
+        Args:
+          *args: Error message components
+        """
         logger.error(*args)
 
 
@@ -547,15 +665,31 @@ class WSGIRequestHandlerLogger(WSGIRequestHandler):
     """WSGIRequestHandler that uses dulwich's logger for logging exceptions."""
 
     def log_exception(self, exc_info) -> None:
+        """Log an exception using dulwich's logger.
+
+        Args:
+          exc_info: Exception information tuple
+        """
         logger.exception(
             "Exception happened during processing of request",
             exc_info=exc_info,
         )
 
     def log_message(self, format, *args) -> None:
+        """Log a message using dulwich's logger.
+
+        Args:
+          format: Format string for the message
+          *args: Arguments for the format string
+        """
         logger.info(format, *args)
 
     def log_error(self, *args) -> None:
+        """Log an error using dulwich's logger.
+
+        Args:
+          *args: Error message components
+        """
         logger.error(*args)
 
     def handle(self) -> None: