2 hafta önce · 68191f6606
--- a/dulwich/object_filters.py
+++ b/dulwich/object_filters.py
@@ -0,0 +1,614 @@
 
				+# object_filters.py -- Object filtering for partial clone and similar operations
			
 
				+# Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
			
 
				+#
			
 
				+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
			
 
				+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
			
 
				+# General Public License as published by the Free Software Foundation; version 2.0
			
 
				+# or (at your option) any later version. You can redistribute it and/or
			
 
				+# modify it under the terms of either of these two licenses.
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+#
			
 
				+# You should have received a copy of the licenses; if not, see
			
 
				+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
			
 
				+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
			
 
				+# License, Version 2.0.
			
 
				+#
			
 
				+
			
 
				+"""Object filtering for Git partial clone and pack generation.
			
 
				+
			
 
				+This module implements Git's object filter specifications for partial clone,
			
 
				+as documented in:
			
 
				+https://git-scm.com/docs/rev-list-options#Documentation/rev-list-options.txt---filterltfilter-specgt
			
 
				+
			
 
				+Filter specifications control which objects are included when generating packs,
			
 
				+enabling partial clone (downloading only needed objects) and similar operations.
			
 
				+
			
 
				+Supported filter specs:
			
 
				+- blob:none - Exclude all blobs
			
 
				+- blob:limit=<n>[kmg] - Exclude blobs larger than n bytes/KB/MB/GB
			
 
				+- tree:<depth> - Exclude trees beyond specified depth
			
 
				+- sparse:oid=<oid> - Use sparse specification from object
			
 
				+- combine:<filter>+<filter>+... - Combine multiple filters
			
 
				+"""
			
 
				+
			
 
				+__all__ = [
			
 
				+    "BlobLimitFilter",
			
 
				+    "BlobNoneFilter",
			
 
				+    "CombineFilter",
			
 
				+    "FilterSpec",
			
 
				+    "SparseOidFilter",
			
 
				+    "TreeDepthFilter",
			
 
				+    "filter_pack_objects",
			
 
				+    "filter_pack_objects_with_paths",
			
 
				+    "parse_filter_spec",
			
 
				+]
			
 
				+
			
 
				+from abc import ABC, abstractmethod
			
 
				+from typing import TYPE_CHECKING
			
 
				+
			
 
				+from .objects import S_ISGITLINK, Blob, Commit, ObjectID, Tag, Tree, valid_hexsha
			
 
				+
			
 
				+if TYPE_CHECKING:
			
 
				+    from collections.abc import Callable
			
 
				+
			
 
				+    from .object_store import BaseObjectStore
			
 
				+    from .objects import ObjectID
			
 
				+
			
 
				+
			
 
				+class FilterSpec(ABC):
			
 
				+    """Base class for all filter specifications."""
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def should_include_blob(self, blob_size: int) -> bool:
			
 
				+        """Determine if a blob of given size should be included.
			
 
				+
			
 
				+        Args:
			
 
				+            blob_size: Size of the blob in bytes
			
 
				+
			
 
				+        Returns:
			
 
				+            True if the blob should be included, False otherwise
			
 
				+        """
			
 
				+        ...
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def should_include_tree(self, depth: int) -> bool:
			
 
				+        """Determine if a tree at given depth should be included.
			
 
				+
			
 
				+        Args:
			
 
				+            depth: Depth of the tree (0 = root)
			
 
				+
			
 
				+        Returns:
			
 
				+            True if the tree should be included, False otherwise
			
 
				+        """
			
 
				+        ...
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def to_spec_string(self) -> str:
			
 
				+        """Convert filter spec back to string format.
			
 
				+
			
 
				+        Returns:
			
 
				+            Filter specification string (e.g., ``blob:none``, ``blob:limit=1m``)
			
 
				+        """
			
 
				+        ...
			
 
				+
			
 
				+
			
 
				+class BlobNoneFilter(FilterSpec):
			
 
				+    """Filter that excludes all blobs."""
			
 
				+
			
 
				+    def should_include_blob(self, blob_size: int) -> bool:
			
 
				+        """Exclude all blobs."""
			
 
				+        return False
			
 
				+
			
 
				+    def should_include_tree(self, depth: int) -> bool:
			
 
				+        """Include all trees."""
			
 
				+        return True
			
 
				+
			
 
				+    def to_spec_string(self) -> str:
			
 
				+        """Return 'blob:none'."""
			
 
				+        return "blob:none"
			
 
				+
			
 
				+    def __repr__(self) -> str:
			
 
				+        """Return string representation of the filter."""
			
 
				+        return "BlobNoneFilter()"
			
 
				+
			
 
				+
			
 
				+class BlobLimitFilter(FilterSpec):
			
 
				+    """Filter that excludes blobs larger than a specified size."""
			
 
				+
			
 
				+    def __init__(self, limit: int) -> None:
			
 
				+        """Initialize blob limit filter.
			
 
				+
			
 
				+        Args:
			
 
				+            limit: Maximum blob size in bytes
			
 
				+        """
			
 
				+        self.limit = limit
			
 
				+
			
 
				+    def should_include_blob(self, blob_size: int) -> bool:
			
 
				+        """Include only blobs smaller than or equal to the limit."""
			
 
				+        return blob_size <= self.limit
			
 
				+
			
 
				+    def should_include_tree(self, depth: int) -> bool:
			
 
				+        """Include all trees."""
			
 
				+        return True
			
 
				+
			
 
				+    def to_spec_string(self) -> str:
			
 
				+        """Return 'blob:limit=<size>' with appropriate unit."""
			
 
				+        size = self.limit
			
 
				+        if size >= 1024 * 1024 * 1024 and size % (1024 * 1024 * 1024) == 0:
			
 
				+            return f"blob:limit={size // (1024 * 1024 * 1024)}g"
			
 
				+        elif size >= 1024 * 1024 and size % (1024 * 1024) == 0:
			
 
				+            return f"blob:limit={size // (1024 * 1024)}m"
			
 
				+        elif size >= 1024 and size % 1024 == 0:
			
 
				+            return f"blob:limit={size // 1024}k"
			
 
				+        else:
			
 
				+            return f"blob:limit={size}"
			
 
				+
			
 
				+    def __repr__(self) -> str:
			
 
				+        """Return string representation of the filter."""
			
 
				+        return f"BlobLimitFilter(limit={self.limit})"
			
 
				+
			
 
				+
			
 
				+class TreeDepthFilter(FilterSpec):
			
 
				+    """Filter that excludes trees beyond a specified depth."""
			
 
				+
			
 
				+    def __init__(self, max_depth: int) -> None:
			
 
				+        """Initialize tree depth filter.
			
 
				+
			
 
				+        Args:
			
 
				+            max_depth: Maximum tree depth (0 = only root tree)
			
 
				+        """
			
 
				+        self.max_depth = max_depth
			
 
				+
			
 
				+    def should_include_blob(self, blob_size: int) -> bool:
			
 
				+        """Include all blobs."""
			
 
				+        return True
			
 
				+
			
 
				+    def should_include_tree(self, depth: int) -> bool:
			
 
				+        """Include only trees up to max_depth."""
			
 
				+        return depth <= self.max_depth
			
 
				+
			
 
				+    def to_spec_string(self) -> str:
			
 
				+        """Return 'tree:<depth>'."""
			
 
				+        return f"tree:{self.max_depth}"
			
 
				+
			
 
				+    def __repr__(self) -> str:
			
 
				+        """Return string representation of the filter."""
			
 
				+        return f"TreeDepthFilter(max_depth={self.max_depth})"
			
 
				+
			
 
				+
			
 
				+class SparseOidFilter(FilterSpec):
			
 
				+    """Filter that uses a sparse specification from an object.
			
 
				+
			
 
				+    This filter reads sparse-checkout patterns from a blob object and uses them
			
 
				+    to determine which paths should be included in the partial clone.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self, oid: "ObjectID", object_store: "BaseObjectStore | None" = None
			
 
				+    ) -> None:
			
 
				+        """Initialize sparse OID filter.
			
 
				+
			
 
				+        Args:
			
 
				+            oid: Object ID of the sparse specification blob
			
 
				+            object_store: Optional object store to load the sparse patterns from
			
 
				+        """
			
 
				+        self.oid = oid
			
 
				+        self._patterns: list[tuple[str, bool, bool, bool]] | None = None
			
 
				+        self._object_store = object_store
			
 
				+
			
 
				+    def _load_patterns(self) -> None:
			
 
				+        """Load and parse sparse patterns from the blob."""
			
 
				+        if self._patterns is not None:
			
 
				+            return
			
 
				+
			
 
				+        if self._object_store is None:
			
 
				+            raise ValueError("Cannot load sparse patterns without an object store")
			
 
				+
			
 
				+        from .sparse_patterns import parse_sparse_patterns
			
 
				+
			
 
				+        try:
			
 
				+            obj = self._object_store[self.oid]
			
 
				+        except KeyError:
			
 
				+            raise ValueError(
			
 
				+                f"Sparse specification blob {self.oid.hex() if isinstance(self.oid, bytes) else self.oid} not found"
			
 
				+            )
			
 
				+
			
 
				+        if not isinstance(obj, Blob):
			
 
				+            raise ValueError(
			
 
				+                f"Sparse specification {self.oid.hex() if isinstance(self.oid, bytes) else self.oid} is not a blob"
			
 
				+            )
			
 
				+
			
 
				+        # Parse the blob content as sparse patterns
			
 
				+        lines = obj.data.decode("utf-8").splitlines()
			
 
				+        self._patterns = parse_sparse_patterns(lines)
			
 
				+
			
 
				+    def should_include_path(self, path: str) -> bool:
			
 
				+        """Determine if a path should be included based on sparse patterns.
			
 
				+
			
 
				+        Args:
			
 
				+            path: Path to check (e.g., 'src/file.py')
			
 
				+
			
 
				+        Returns:
			
 
				+            True if the path matches the sparse patterns, False otherwise
			
 
				+        """
			
 
				+        self._load_patterns()
			
 
				+        from .sparse_patterns import match_sparse_patterns
			
 
				+
			
 
				+        # Determine if path is a directory based on whether it ends with '/'
			
 
				+        path_is_dir = path.endswith("/")
			
 
				+        path_str = path.rstrip("/")
			
 
				+
			
 
				+        assert self._patterns is not None  # _load_patterns ensures this
			
 
				+        return match_sparse_patterns(path_str, self._patterns, path_is_dir=path_is_dir)
			
 
				+
			
 
				+    def should_include_blob(self, blob_size: int) -> bool:
			
 
				+        """Include all blobs (sparse filtering is path-based, not size-based)."""
			
 
				+        return True
			
 
				+
			
 
				+    def should_include_tree(self, depth: int) -> bool:
			
 
				+        """Include all trees (sparse filtering is path-based)."""
			
 
				+        return True
			
 
				+
			
 
				+    def to_spec_string(self) -> str:
			
 
				+        """Return 'sparse:oid=<oid>'."""
			
 
				+        return f"sparse:oid={self.oid.decode('ascii') if isinstance(self.oid, bytes) else self.oid}"
			
 
				+
			
 
				+    def __repr__(self) -> str:
			
 
				+        """Return string representation of the filter."""
			
 
				+        oid_str = self.oid.decode("ascii") if isinstance(self.oid, bytes) else self.oid
			
 
				+        return f"SparseOidFilter(oid={oid_str!r})"
			
 
				+
			
 
				+
			
 
				+class CombineFilter(FilterSpec):
			
 
				+    """Filter that combines multiple filters with AND logic."""
			
 
				+
			
 
				+    def __init__(self, filters: list[FilterSpec]) -> None:
			
 
				+        """Initialize combine filter.
			
 
				+
			
 
				+        Args:
			
 
				+            filters: List of filters to combine
			
 
				+        """
			
 
				+        self.filters = filters
			
 
				+
			
 
				+    def should_include_blob(self, blob_size: int) -> bool:
			
 
				+        """Include blob only if all filters agree."""
			
 
				+        return all(f.should_include_blob(blob_size) for f in self.filters)
			
 
				+
			
 
				+    def should_include_tree(self, depth: int) -> bool:
			
 
				+        """Include tree only if all filters agree."""
			
 
				+        return all(f.should_include_tree(depth) for f in self.filters)
			
 
				+
			
 
				+    def to_spec_string(self) -> str:
			
 
				+        """Return 'combine:<filter1>+<filter2>+...'."""
			
 
				+        return "combine:" + "+".join(f.to_spec_string() for f in self.filters)
			
 
				+
			
 
				+    def __repr__(self) -> str:
			
 
				+        """Return string representation of the filter."""
			
 
				+        return f"CombineFilter(filters={self.filters!r})"
			
 
				+
			
 
				+
			
 
				+def _parse_size(size_str: str) -> int:
			
 
				+    """Parse a size specification like '100', '10k', '5m', '1g'.
			
 
				+
			
 
				+    Args:
			
 
				+        size_str: Size string with optional unit suffix
			
 
				+
			
 
				+    Returns:
			
 
				+        Size in bytes
			
 
				+
			
 
				+    Raises:
			
 
				+        ValueError: If size_str is not a valid size specification
			
 
				+    """
			
 
				+    size_str = size_str.lower()
			
 
				+    multipliers = {"k": 1024, "m": 1024 * 1024, "g": 1024 * 1024 * 1024}
			
 
				+
			
 
				+    if size_str[-1] in multipliers:
			
 
				+        try:
			
 
				+            value = int(size_str[:-1])
			
 
				+            return value * multipliers[size_str[-1]]
			
 
				+        except ValueError:
			
 
				+            raise ValueError(f"Invalid size specification: {size_str}")
			
 
				+    else:
			
 
				+        try:
			
 
				+            return int(size_str)
			
 
				+        except ValueError:
			
 
				+            raise ValueError(f"Invalid size specification: {size_str}")
			
 
				+
			
 
				+
			
 
				+def parse_filter_spec(
			
 
				+    spec: str | bytes, object_store: "BaseObjectStore | None" = None
			
 
				+) -> FilterSpec:
			
 
				+    """Parse a filter specification string.
			
 
				+
			
 
				+    Args:
			
 
				+        spec: Filter specification (e.g., 'blob:none', 'blob:limit=1m')
			
 
				+        object_store: Optional object store for loading sparse specifications
			
 
				+
			
 
				+    Returns:
			
 
				+        Parsed FilterSpec object
			
 
				+
			
 
				+    Raises:
			
 
				+        ValueError: If spec is not a valid filter specification
			
 
				+
			
 
				+    Examples:
			
 
				+        >>> parse_filter_spec("blob:none")
			
 
				+        BlobNoneFilter()
			
 
				+        >>> parse_filter_spec("blob:limit=1m")
			
 
				+        BlobLimitFilter(limit=1048576)
			
 
				+        >>> parse_filter_spec("tree:0")
			
 
				+        TreeDepthFilter(max_depth=0)
			
 
				+    """
			
 
				+    if isinstance(spec, bytes):
			
 
				+        try:
			
 
				+            spec = spec.decode("utf-8")
			
 
				+        except UnicodeDecodeError as e:
			
 
				+            raise ValueError(f"Filter specification must be valid UTF-8: {e}")
			
 
				+
			
 
				+    spec = spec.strip()
			
 
				+
			
 
				+    if not spec:
			
 
				+        raise ValueError("Filter specification cannot be empty")
			
 
				+
			
 
				+    if spec == "blob:none":
			
 
				+        return BlobNoneFilter()
			
 
				+    elif spec.startswith("blob:limit="):
			
 
				+        limit_str = spec[11:]  # len('blob:limit=') == 11
			
 
				+        if not limit_str:
			
 
				+            raise ValueError("blob:limit requires a size value (e.g., blob:limit=1m)")
			
 
				+        try:
			
 
				+            limit = _parse_size(limit_str)
			
 
				+            if limit < 0:
			
 
				+                raise ValueError(
			
 
				+                    f"blob:limit size must be non-negative, got {limit_str}"
			
 
				+                )
			
 
				+            return BlobLimitFilter(limit)
			
 
				+        except ValueError as e:
			
 
				+            raise ValueError(f"Invalid blob:limit specification: {e}")
			
 
				+    elif spec.startswith("tree:"):
			
 
				+        depth_str = spec[5:]  # len('tree:') == 5
			
 
				+        if not depth_str:
			
 
				+            raise ValueError("tree filter requires a depth value (e.g., tree:0)")
			
 
				+        try:
			
 
				+            depth = int(depth_str)
			
 
				+            if depth < 0:
			
 
				+                raise ValueError(f"tree depth must be non-negative, got {depth}")
			
 
				+            return TreeDepthFilter(depth)
			
 
				+        except ValueError as e:
			
 
				+            raise ValueError(f"Invalid tree filter: {e}")
			
 
				+    elif spec.startswith("sparse:oid="):
			
 
				+        oid_str = spec[11:]  # len('sparse:oid=') == 11
			
 
				+        if not oid_str:
			
 
				+            raise ValueError(
			
 
				+                "sparse:oid requires an object ID (e.g., sparse:oid=abc123...)"
			
 
				+            )
			
 
				+        # Validate OID format (should be 40 hex chars for SHA-1 or 64 for SHA-256)
			
 
				+        if not valid_hexsha(oid_str):
			
 
				+            raise ValueError(
			
 
				+                f"sparse:oid requires a valid object ID (40 or 64 hex chars), got {len(oid_str)} chars"
			
 
				+            )
			
 
				+
			
 
				+        oid: ObjectID = ObjectID(oid_str.encode("ascii"))
			
 
				+        return SparseOidFilter(oid, object_store=object_store)
			
 
				+    elif spec.startswith("combine:"):
			
 
				+        filter_str = spec[8:]  # len('combine:') == 8
			
 
				+        if not filter_str:
			
 
				+            raise ValueError(
			
 
				+                "combine filter requires at least one filter (e.g., combine:blob:none+tree:0)"
			
 
				+            )
			
 
				+        filter_specs = filter_str.split("+")
			
 
				+        if len(filter_specs) < 2:
			
 
				+            raise ValueError(
			
 
				+                "combine filter requires at least two filters separated by '+'"
			
 
				+            )
			
 
				+        try:
			
 
				+            filters = [
			
 
				+                parse_filter_spec(f, object_store=object_store) for f in filter_specs
			
 
				+            ]
			
 
				+        except ValueError as e:
			
 
				+            raise ValueError(f"Invalid filter in combine specification: {e}")
			
 
				+        return CombineFilter(filters)
			
 
				+    else:
			
 
				+        # Provide helpful error message with supported formats
			
 
				+        raise ValueError(
			
 
				+            f"Unknown filter specification: '{spec}'. "
			
 
				+            f"Supported formats: blob:none, blob:limit=<n>[kmg], tree:<depth>, "
			
 
				+            f"sparse:oid=<oid>, combine:<filter>+<filter>+..."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def filter_pack_objects(
			
 
				+    object_store: "BaseObjectStore",
			
 
				+    object_ids: list["ObjectID"],
			
 
				+    filter_spec: FilterSpec,
			
 
				+) -> list["ObjectID"]:
			
 
				+    """Filter a list of object IDs based on a filter specification.
			
 
				+
			
 
				+    This function examines each object and excludes those that don't match
			
 
				+    the filter criteria (e.g., blobs that are too large, trees beyond max depth).
			
 
				+
			
 
				+    Args:
			
 
				+        object_store: Object store to retrieve objects from
			
 
				+        object_ids: List of object IDs to filter
			
 
				+        filter_spec: Filter specification to apply
			
 
				+
			
 
				+    Returns:
			
 
				+        Filtered list of object IDs that should be included in the pack
			
 
				+
			
 
				+    Note:
			
 
				+        This function currently supports blob size filtering. Tree depth filtering
			
 
				+        requires additional path/depth tracking which is not yet implemented.
			
 
				+    """
			
 
				+    filtered_ids = []
			
 
				+
			
 
				+    for oid in object_ids:
			
 
				+        try:
			
 
				+            obj = object_store[oid]
			
 
				+        except KeyError:
			
 
				+            # Object not found, skip it
			
 
				+            continue
			
 
				+
			
 
				+        # Determine object type and apply appropriate filter
			
 
				+        if isinstance(obj, Blob):
			
 
				+            # Check if blob should be included based on size
			
 
				+            blob_size = len(obj.data)
			
 
				+            if filter_spec.should_include_blob(blob_size):
			
 
				+                filtered_ids.append(oid)
			
 
				+            # else: blob is filtered out
			
 
				+        elif isinstance(obj, (Tree, Commit, Tag)):
			
 
				+            # For now, include all trees, commits, and tags
			
 
				+            # Tree depth filtering would require tracking depth during traversal
			
 
				+            # which needs to be implemented at the object collection stage
			
 
				+            if filter_spec.should_include_tree(0):  # depth=0 for now
			
 
				+                filtered_ids.append(oid)
			
 
				+        else:
			
 
				+            # Unknown object type, include it to be safe
			
 
				+            filtered_ids.append(oid)
			
 
				+
			
 
				+    return filtered_ids
			
 
				+
			
 
				+
			
 
				+def filter_pack_objects_with_paths(
			
 
				+    object_store: "BaseObjectStore",
			
 
				+    wants: list["ObjectID"],
			
 
				+    filter_spec: FilterSpec,
			
 
				+    *,
			
 
				+    progress: "Callable[[bytes], None] | None" = None,
			
 
				+) -> list["ObjectID"]:
			
 
				+    """Filter objects for a pack with full path and depth tracking.
			
 
				+
			
 
				+    This function performs a complete tree traversal starting from the wanted
			
 
				+    commits, tracking paths and depths to enable proper filtering for sparse:oid
			
 
				+    and tree:<depth> filters.
			
 
				+
			
 
				+    Args:
			
 
				+        object_store: Object store to retrieve objects from
			
 
				+        wants: List of commit/tree/blob IDs that are wanted
			
 
				+        filter_spec: Filter specification to apply
			
 
				+        progress: Optional progress callback
			
 
				+
			
 
				+    Returns:
			
 
				+        Filtered list of object IDs that should be included in the pack
			
 
				+    """
			
 
				+    import stat
			
 
				+
			
 
				+    included_objects: set[ObjectID] = set()
			
 
				+    # Track (oid, path, depth) tuples to process
			
 
				+    to_process: list[tuple[ObjectID, str, int]] = []
			
 
				+
			
 
				+    # Start with the wanted commits
			
 
				+    for want in wants:
			
 
				+        try:
			
 
				+            obj = object_store[want]
			
 
				+        except KeyError:
			
 
				+            continue
			
 
				+
			
 
				+        if isinstance(obj, Commit):
			
 
				+            # Always include commits
			
 
				+            included_objects.add(want)
			
 
				+            # Add the root tree to process with depth 0
			
 
				+            to_process.append((obj.tree, "", 0))
			
 
				+        elif isinstance(obj, Tree):
			
 
				+            # Direct tree wants start at depth 0
			
 
				+            to_process.append((want, "", 0))
			
 
				+        elif isinstance(obj, Tag):
			
 
				+            # Always include tags
			
 
				+            included_objects.add(want)
			
 
				+            # Process the tagged object
			
 
				+            tagged_oid = obj.object[1]
			
 
				+            to_process.append((tagged_oid, "", 0))
			
 
				+        elif isinstance(obj, Blob):
			
 
				+            # Direct blob wants - check size filter
			
 
				+            blob_size = len(obj.data)
			
 
				+            if filter_spec.should_include_blob(blob_size):
			
 
				+                included_objects.add(want)
			
 
				+
			
 
				+    # Process trees and their contents
			
 
				+    processed_trees: set[ObjectID] = set()
			
 
				+
			
 
				+    while to_process:
			
 
				+        oid, current_path, depth = to_process.pop()
			
 
				+
			
 
				+        # Skip if already processed
			
 
				+        if oid in processed_trees:
			
 
				+            continue
			
 
				+
			
 
				+        try:
			
 
				+            obj = object_store[oid]
			
 
				+        except KeyError:
			
 
				+            continue
			
 
				+
			
 
				+        if isinstance(obj, Tree):
			
 
				+            # Check if this tree should be included based on depth
			
 
				+            if not filter_spec.should_include_tree(depth):
			
 
				+                continue
			
 
				+
			
 
				+            # Include this tree
			
 
				+            included_objects.add(oid)
			
 
				+            processed_trees.add(oid)
			
 
				+
			
 
				+            # Process tree entries
			
 
				+            for name, mode, entry_oid in obj.iteritems():
			
 
				+                assert name is not None
			
 
				+                assert mode is not None
			
 
				+                assert entry_oid is not None
			
 
				+
			
 
				+                # Skip gitlinks
			
 
				+                if S_ISGITLINK(mode):
			
 
				+                    continue
			
 
				+
			
 
				+                # Build full path
			
 
				+                if current_path:
			
 
				+                    full_path = f"{current_path}/{name.decode('utf-8')}"
			
 
				+                else:
			
 
				+                    full_path = name.decode("utf-8")
			
 
				+
			
 
				+                if stat.S_ISDIR(mode):
			
 
				+                    # It's a subdirectory - add to process list with increased depth
			
 
				+                    to_process.append((entry_oid, full_path, depth + 1))
			
 
				+                elif stat.S_ISREG(mode):
			
 
				+                    # It's a blob - check filters
			
 
				+                    try:
			
 
				+                        blob = object_store[entry_oid]
			
 
				+                    except KeyError:
			
 
				+                        continue
			
 
				+
			
 
				+                    if not isinstance(blob, Blob):
			
 
				+                        continue
			
 
				+
			
 
				+                    # Check filters
			
 
				+                    blob_size = len(blob.data)
			
 
				+
			
 
				+                    # For non-path-based filters (size, blob:none), check directly
			
 
				+                    if not filter_spec.should_include_blob(blob_size):
			
 
				+                        continue
			
 
				+
			
 
				+                    # Check path filter for sparse:oid
			
 
				+                    path_allowed = True
			
 
				+                    if isinstance(filter_spec, SparseOidFilter):
			
 
				+                        path_allowed = filter_spec.should_include_path(full_path)
			
 
				+                    elif isinstance(filter_spec, CombineFilter):
			
 
				+                        # Check path filters in combination
			
 
				+                        for f in filter_spec.filters:
			
 
				+                            if isinstance(f, SparseOidFilter):
			
 
				+                                if not f.should_include_path(full_path):
			
 
				+                                    path_allowed = False
			
 
				+                                    break
			
 
				+
			
 
				+                    if not path_allowed:
			
 
				+                        continue
			
 
				+
			
 
				+                    # Include this blob
			
 
				+                    included_objects.add(entry_oid)
			
 
				+
			
 
				+        elif isinstance(obj, Blob):
			
 
				+            # Standalone blob (shouldn't normally happen in tree traversal)
			
 
				+            blob_size = len(obj.data)
			
 
				+            if filter_spec.should_include_blob(blob_size):
			
 
				+                included_objects.add(oid)
			
 
				+
			
 
				+    return list(included_objects)
			
--- a/dulwich/protocol.py
+++ b/dulwich/protocol.py
@@ -52,6 +52,7 @@ __all__ = [
 
				     "COMMAND_DEEPEN_NOT",
			
 
				     "COMMAND_DEEPEN_SINCE",
			
 
				     "COMMAND_DONE",
			
 
				+    "COMMAND_FILTER",
			
 
				     "COMMAND_HAVE",
			
 
				     "COMMAND_SHALLOW",
			
 
				     "COMMAND_UNSHALLOW",
			
@@ -84,6 +85,7 @@ __all__ = [
 
				     "extract_capabilities",
			
 
				     "extract_capability_names",
			
 
				     "extract_want_line_capabilities",
			
 
				+    "find_capability",
			
 
				     "format_ack_line",
			
 
				     "format_capability_line",
			
 
				     "format_cmd_pkt",
			
@@ -201,6 +203,7 @@ KNOWN_UPLOAD_CAPABILITIES = set(
 
				         CAPABILITY_ALLOW_TIP_SHA1_IN_WANT,
			
 
				         CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT,
			
 
				         CAPABILITY_FETCH,
			
 
				+        CAPABILITY_FILTER,
			
 
				     ]
			
 
				 )
			
 
				 KNOWN_RECEIVE_CAPABILITIES = set(
			
@@ -308,6 +311,7 @@ COMMAND_UNSHALLOW = b"unshallow"
 
				 COMMAND_DONE = b"done"
			
 
				 COMMAND_WANT = b"want"
			
 
				 COMMAND_HAVE = b"have"
			
 
				+COMMAND_FILTER = b"filter"
			
 
				 
			
 
				 
			
 
				 def format_cmd_pkt(cmd: bytes, *args: bytes) -> bytes:
			
@@ -721,6 +725,40 @@ def ack_type(capabilities: Iterable[bytes]) -> int:
 
				     return SINGLE_ACK
			
 
				 
			
 
				 
			
 
				+def find_capability(
			
 
				+    capabilities: Iterable[bytes], *capability_names: bytes
			
 
				+) -> bytes | None:
			
 
				+    """Find a capability value in a list of capabilities.
			
 
				+
			
 
				+    This function looks for capabilities that may include arguments after an equals sign
			
 
				+    and returns only the value part (after the '='). For capabilities without values,
			
 
				+    returns the capability name itself.
			
 
				+
			
 
				+    Args:
			
 
				+      capabilities: List of capability strings
			
 
				+      capability_names: Capability name(s) to search for
			
 
				+
			
 
				+    Returns:
			
 
				+      The value after '=' if found, or the capability name if no '=', or None if not found
			
 
				+
			
 
				+    Example:
			
 
				+      >>> caps = [b'filter=blob:none', b'agent=git/2.0', b'thin-pack']
			
 
				+      >>> find_capability(caps, b'filter')
			
 
				+      b'blob:none'
			
 
				+      >>> find_capability(caps, b'thin-pack')
			
 
				+      b'thin-pack'
			
 
				+      >>> find_capability(caps, b'missing')
			
 
				+      None
			
 
				+    """
			
 
				+    for cap in capabilities:
			
 
				+        for name in capability_names:
			
 
				+            if cap == name:
			
 
				+                return cap
			
 
				+            elif cap.startswith(name + b"="):
			
 
				+                return cap[len(name) + 1 :]
			
 
				+    return None
			
 
				+
			
 
				+
			
 
				 class BufferedPktLineWriter:
			
 
				     """Writer that wraps its data in pkt-lines and has an independent buffer.
			
 
				 
			
--- a/dulwich/server.py
+++ b/dulwich/server.py
@@ -97,6 +97,15 @@ from .errors import (
 
				     ObjectFormatException,
			
 
				     UnexpectedCommandError,
			
 
				 )
			
 
				+from .object_filters import (
			
 
				+    CombineFilter,
			
 
				+    FilterSpec,
			
 
				+    SparseOidFilter,
			
 
				+    TreeDepthFilter,
			
 
				+    filter_pack_objects,
			
 
				+    filter_pack_objects_with_paths,
			
 
				+    parse_filter_spec,
			
 
				+)
			
 
				 from .object_store import MissingObjectFinder, PackBasedObjectStore, find_shallow
			
 
				 from .objects import Commit, ObjectID, Tree, valid_hexsha
			
 
				 from .pack import ObjectContainer, write_pack_from_container
			
@@ -104,6 +113,7 @@ from .protocol import (
 
				     CAPABILITIES_REF,
			
 
				     CAPABILITY_AGENT,
			
 
				     CAPABILITY_DELETE_REFS,
			
 
				+    CAPABILITY_FILTER,
			
 
				     CAPABILITY_INCLUDE_TAG,
			
 
				     CAPABILITY_MULTI_ACK,
			
 
				     CAPABILITY_MULTI_ACK_DETAILED,
			
@@ -117,6 +127,7 @@ from .protocol import (
 
				     CAPABILITY_THIN_PACK,
			
 
				     COMMAND_DEEPEN,
			
 
				     COMMAND_DONE,
			
 
				+    COMMAND_FILTER,
			
 
				     COMMAND_HAVE,
			
 
				     COMMAND_SHALLOW,
			
 
				     COMMAND_UNSHALLOW,
			
@@ -138,6 +149,7 @@ from .protocol import (
 
				     capability_object_format,
			
 
				     extract_capabilities,
			
 
				     extract_want_line_capabilities,
			
 
				+    find_capability,
			
 
				     format_ack_line,
			
 
				     format_ref_line,
			
 
				     format_shallow_line,
			
@@ -378,6 +390,13 @@ class PackHandler(Handler):
 
				         for cap in caps:
			
 
				             if cap.startswith(CAPABILITY_AGENT + b"="):
			
 
				                 continue
			
 
				+            if cap.startswith(CAPABILITY_FILTER + b"="):
			
 
				+                # Filter capability can have a value (e.g., filter=blob:none)
			
 
				+                if CAPABILITY_FILTER not in allowable_caps:
			
 
				+                    raise GitProtocolError(
			
 
				+                        f"Client asked for capability {cap!r} that was not advertised."
			
 
				+                    )
			
 
				+                continue
			
 
				             if cap not in allowable_caps:
			
 
				                 raise GitProtocolError(
			
 
				                     f"Client asked for capability {cap!r} that was not advertised."
			
@@ -438,6 +457,8 @@ class UploadPackHandler(PackHandler):
 
				         # being processed, and the client is not accepting any other
			
 
				         # data (such as side-band, see the progress method here).
			
 
				         self._processing_have_lines = False
			
 
				+        # Filter specification for partial clone support
			
 
				+        self.filter_spec: FilterSpec | None = None
			
 
				 
			
 
				     def capabilities(self) -> list[bytes]:
			
 
				         """Return the list of capabilities supported by upload-pack.
			
@@ -455,6 +476,7 @@ class UploadPackHandler(PackHandler):
 
				             CAPABILITY_INCLUDE_TAG,
			
 
				             CAPABILITY_SHALLOW,
			
 
				             CAPABILITY_NO_DONE,
			
 
				+            CAPABILITY_FILTER,
			
 
				             capability_object_format(self.repo.object_format.name),
			
 
				         ]
			
 
				 
			
@@ -467,6 +489,26 @@ class UploadPackHandler(PackHandler):
 
				             CAPABILITY_OFS_DELTA,
			
 
				         )
			
 
				 
			
 
				+    def set_client_capabilities(self, caps: Iterable[bytes]) -> None:
			
 
				+        """Set client capabilities and parse filter specification if present.
			
 
				+
			
 
				+        Args:
			
 
				+            caps: List of capability strings from the client
			
 
				+        """
			
 
				+        super().set_client_capabilities(caps)
			
 
				+        # Parse filter specification if present in capabilities
			
 
				+        # In protocol v1, filter can be sent as "filter=<spec>" capability
			
 
				+        # In protocol v2, filter is sent as a separate "filter <spec>" command
			
 
				+        filter_spec_bytes = find_capability(caps, CAPABILITY_FILTER)
			
 
				+        if filter_spec_bytes and filter_spec_bytes != CAPABILITY_FILTER:
			
 
				+            # Only parse if there's an actual spec (not just the capability name)
			
 
				+            try:
			
 
				+                self.filter_spec = parse_filter_spec(
			
 
				+                    filter_spec_bytes, object_store=self.repo.object_store
			
 
				+                )
			
 
				+            except ValueError as e:
			
 
				+                raise GitProtocolError(f"Invalid filter specification: {e}")
			
 
				+
			
 
				     def progress(self, message: bytes) -> None:
			
 
				         """Send a progress message to the client.
			
 
				 
			
@@ -584,6 +626,46 @@ class UploadPackHandler(PackHandler):
 
				             return
			
 
				 
			
 
				         self._start_pack_send_phase()
			
 
				+
			
 
				+        # Apply filter if specified (partial clone support)
			
 
				+        if self.filter_spec is not None:
			
 
				+            original_count = len(object_ids)
			
 
				+
			
 
				+            # Use path-aware filtering for tree depth and sparse:oid filters
			
 
				+            # Check if filter requires path tracking
			
 
				+            def needs_path_tracking(filter_spec: FilterSpec) -> bool:
			
 
				+                if isinstance(filter_spec, (TreeDepthFilter, SparseOidFilter)):
			
 
				+                    return True
			
 
				+                if isinstance(filter_spec, CombineFilter):
			
 
				+                    return any(needs_path_tracking(f) for f in filter_spec.filters)
			
 
				+                return False
			
 
				+
			
 
				+            if needs_path_tracking(self.filter_spec):
			
 
				+                # Path-aware filtering returns list of OIDs, convert to tuples for pack generation
			
 
				+                filtered_oids = filter_pack_objects_with_paths(
			
 
				+                    self.repo.object_store,
			
 
				+                    wants,
			
 
				+                    self.filter_spec,
			
 
				+                    progress=self.progress,
			
 
				+                )
			
 
				+                object_ids = [(oid, None) for oid in filtered_oids]
			
 
				+            else:
			
 
				+                # Extract just the object IDs (filter_pack_objects expects list of OIDs)
			
 
				+                # object_ids is a list of tuples (oid, (depth, path))
			
 
				+                oid_list = [oid for oid, _hint in object_ids]
			
 
				+                filtered_oids = filter_pack_objects(
			
 
				+                    self.repo.object_store, oid_list, self.filter_spec
			
 
				+                )
			
 
				+                # Reconstruct tuples with hints for pack generation
			
 
				+                filtered_oid_set = set(filtered_oids)
			
 
				+                object_ids = [
			
 
				+                    (oid, hint) for oid, hint in object_ids if oid in filtered_oid_set
			
 
				+                ]
			
 
				+
			
 
				+            filtered_count = original_count - len(object_ids)
			
 
				+            if filtered_count > 0:
			
 
				+                self.progress((f"filtered {filtered_count} objects.\n").encode("ascii"))
			
 
				+
			
 
				         self.progress((f"counting objects: {len(object_ids)}, done.\n").encode("ascii"))
			
 
				 
			
 
				         write_pack_from_container(
			
@@ -640,6 +722,10 @@ def _split_proto_line(
 
				         elif command == COMMAND_DEEPEN:
			
 
				             assert fields[1] is not None
			
 
				             return command, int(fields[1])
			
 
				+        elif command == COMMAND_FILTER:
			
 
				+            # Filter specification (e.g., "filter blob:none")
			
 
				+            assert fields[1] is not None
			
 
				+            return (command, fields[1])
			
 
				     raise GitProtocolError(f"Received invalid line from client: {line!r}")
			
 
				 
			
 
				 
			
@@ -742,7 +828,7 @@ class _ProtocolGraphWalker:
 
				 
			
 
				     def __init__(
			
 
				         self,
			
 
				-        handler: PackHandler,
			
 
				+        handler: "UploadPackHandler",
			
 
				         object_store: ObjectContainer,
			
 
				         get_peeled: Callable[[bytes], ObjectID | None],
			
 
				         get_symrefs: Callable[[], dict[Ref, Ref]],
			
@@ -833,7 +919,7 @@ class _ProtocolGraphWalker:
 
				         line, caps = extract_want_line_capabilities(want)
			
 
				         self.handler.set_client_capabilities(caps)
			
 
				         self.set_ack_type(ack_type(caps))
			
 
				-        allowed = (COMMAND_WANT, COMMAND_SHALLOW, COMMAND_DEEPEN, None)
			
 
				+        allowed = (COMMAND_WANT, COMMAND_SHALLOW, COMMAND_DEEPEN, COMMAND_FILTER, None)
			
 
				         command, sha_result = _split_proto_line(line, allowed)
			
 
				 
			
 
				         want_revs: list[ObjectID] = []
			
@@ -845,6 +931,19 @@ class _ProtocolGraphWalker:
 
				             command, sha_result = self.read_proto_line(allowed)
			
 
				 
			
 
				         self.set_wants(want_revs)
			
 
				+
			
 
				+        # Handle filter command if present (protocol v2)
			
 
				+        if command == COMMAND_FILTER:
			
 
				+            assert isinstance(sha_result, bytes)
			
 
				+            try:
			
 
				+                self.handler.filter_spec = parse_filter_spec(
			
 
				+                    sha_result, object_store=self.handler.repo.object_store
			
 
				+                )
			
 
				+            except ValueError as e:
			
 
				+                raise GitProtocolError(f"Invalid filter specification: {e}")
			
 
				+            # Read next command after processing filter
			
 
				+            command, sha_result = self.read_proto_line(allowed)
			
 
				+
			
 
				         if command in (COMMAND_SHALLOW, COMMAND_DEEPEN):
			
 
				             assert sha_result is not None
			
 
				             self.unread_proto_line(command, sha_result)
			
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -169,6 +169,7 @@ def self_test_suite() -> unittest.TestSuite:
 
				         "notes",
			
 
				         "objects",
			
 
				         "objectspec",
			
 
				+        "object_filters",
			
 
				         "object_store",
			
 
				         "pack",
			
 
				         "patch",
			
--- a/tests/compat/__init__.py
+++ b/tests/compat/__init__.py
@@ -37,6 +37,7 @@ def test_suite() -> unittest.TestSuite:
 
				         "lfs",
			
 
				         "midx",
			
 
				         "pack",
			
 
				+        "partial_clone",
			
 
				         "patch",
			
 
				         "porcelain",
			
 
				         "reftable",
			
--- a/tests/compat/test_partial_clone.py
+++ b/tests/compat/test_partial_clone.py
@@ -0,0 +1,489 @@
 
				+# test_partial_clone.py -- Compatibility tests for partial clone.
			
 
				+# Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
			
 
				+#
			
 
				+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
			
 
				+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
			
 
				+# General Public License as published by the Free Software Foundation; version 2.0
			
 
				+# or (at your option) any later version. You can redistribute it and/or
			
 
				+# modify it under the terms of either of these two licenses.
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+#
			
 
				+# You should have received a copy of the licenses; if not, see
			
 
				+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
			
 
				+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
			
 
				+# License, Version 2.0.
			
 
				+#
			
 
				+
			
 
				+"""Compatibility tests for partial clone support."""
			
 
				+
			
 
				+import os
			
 
				+import shutil
			
 
				+import sys
			
 
				+import tempfile
			
 
				+import threading
			
 
				+
			
 
				+from dulwich.objects import Blob, Tree
			
 
				+from dulwich.repo import Repo
			
 
				+from dulwich.server import DictBackend, TCPGitServer
			
 
				+from dulwich.tests.utils import make_commit
			
 
				+
			
 
				+from .. import skipIf
			
 
				+from .utils import CompatTestCase, require_git_version, run_git_or_fail
			
 
				+
			
 
				+
			
 
				+@skipIf(sys.platform == "win32", "Broken on windows, with very long fail time.")
			
 
				+class PartialCloneServerTestCase(CompatTestCase):
			
 
				+    """Tests for partial clone server compatibility with git client."""
			
 
				+
			
 
				+    protocol = "git"
			
 
				+    # Partial clone support was introduced in git 2.17.0
			
 
				+    min_git_version = (2, 17, 0)
			
 
				+
			
 
				+    def setUp(self) -> None:
			
 
				+        super().setUp()
			
 
				+        require_git_version(self.min_git_version)
			
 
				+
			
 
				+    def _start_server(self, repo):
			
 
				+        backend = DictBackend({b"/": repo})
			
 
				+        dul_server = TCPGitServer(backend, b"localhost", 0)
			
 
				+
			
 
				+        # Start server in a thread
			
 
				+        server_thread = threading.Thread(target=dul_server.serve)
			
 
				+        server_thread.daemon = True
			
 
				+        server_thread.start()
			
 
				+
			
 
				+        # Add cleanup
			
 
				+        def cleanup_server():
			
 
				+            dul_server.shutdown()
			
 
				+            dul_server.server_close()
			
 
				+            server_thread.join(timeout=1.0)
			
 
				+
			
 
				+        self.addCleanup(cleanup_server)
			
 
				+        self._server = dul_server
			
 
				+        _, port = self._server.socket.getsockname()
			
 
				+        return port
			
 
				+
			
 
				+    def url(self, port) -> str:
			
 
				+        return f"{self.protocol}://localhost:{port}/"
			
 
				+
			
 
				+    def test_clone_with_blob_none_filter(self) -> None:
			
 
				+        """Test that git client can clone with blob:none filter."""
			
 
				+        # Create repository with dulwich
			
 
				+        repo_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(shutil.rmtree, repo_path)
			
 
				+        source_repo = Repo.init(repo_path, mkdir=False)
			
 
				+
			
 
				+        # Create test content with multiple blobs
			
 
				+        blob1 = Blob.from_string(b"File 1 content - this is a test file")
			
 
				+        blob2 = Blob.from_string(b"File 2 content - another test file")
			
 
				+        blob3 = Blob.from_string(b"File 3 content - third test file")
			
 
				+
			
 
				+        tree = Tree()
			
 
				+        tree.add(b"file1.txt", 0o100644, blob1.id)
			
 
				+        tree.add(b"file2.txt", 0o100644, blob2.id)
			
 
				+        tree.add(b"file3.txt", 0o100644, blob3.id)
			
 
				+
			
 
				+        # Add objects to repo
			
 
				+        source_repo.object_store.add_object(blob1)
			
 
				+        source_repo.object_store.add_object(blob2)
			
 
				+        source_repo.object_store.add_object(blob3)
			
 
				+        source_repo.object_store.add_object(tree)
			
 
				+
			
 
				+        commit = make_commit(tree=tree.id, message=b"Test commit with multiple files")
			
 
				+        source_repo.object_store.add_object(commit)
			
 
				+        source_repo.refs[b"refs/heads/master"] = commit.id
			
 
				+
			
 
				+        # Start dulwich server
			
 
				+        port = self._start_server(source_repo)
			
 
				+
			
 
				+        # Clone with blob:none filter
			
 
				+        clone_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(shutil.rmtree, clone_path)
			
 
				+        clone_dir = os.path.join(clone_path, "cloned_repo")
			
 
				+
			
 
				+        run_git_or_fail(
			
 
				+            ["clone", "--filter=blob:none", "--no-checkout", self.url(port), clone_dir],
			
 
				+            cwd=clone_path,
			
 
				+        )
			
 
				+
			
 
				+        # Verify cloned repo has commit and tree but no blobs
			
 
				+        cloned_repo = Repo(clone_dir)
			
 
				+        self.addCleanup(cloned_repo.close)
			
 
				+
			
 
				+        # Commit should exist
			
 
				+        self.assertEqual(cloned_repo.refs[b"refs/heads/master"], commit.id)
			
 
				+
			
 
				+        # Tree should exist
			
 
				+        self.assertIn(tree.id, cloned_repo.object_store)
			
 
				+
			
 
				+        # Blobs should NOT be in object store (filtered out)
			
 
				+        # Note: git may still have the blobs if they're small enough to be inlined
			
 
				+        # or if it fetched them anyway, so we just verify the filter was accepted
			
 
				+
			
 
				+        # Verify git recognizes this as a partial clone
			
 
				+        config_output = run_git_or_fail(
			
 
				+            ["config", "--get", "remote.origin.promisor"], cwd=clone_dir
			
 
				+        )
			
 
				+        self.assertEqual(config_output.strip(), b"true")
			
 
				+
			
 
				+        source_repo.close()
			
 
				+
			
 
				+    def test_clone_with_blob_limit_filter(self) -> None:
			
 
				+        """Test that git client can clone with blob:limit filter."""
			
 
				+        # Create repository
			
 
				+        repo_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(shutil.rmtree, repo_path)
			
 
				+        source_repo = Repo.init(repo_path, mkdir=False)
			
 
				+
			
 
				+        # Create blobs of different sizes
			
 
				+        small_blob = Blob.from_string(b"small")  # 5 bytes
			
 
				+        large_blob = Blob.from_string(b"x" * 1000)  # 1000 bytes
			
 
				+
			
 
				+        tree = Tree()
			
 
				+        tree.add(b"small.txt", 0o100644, small_blob.id)
			
 
				+        tree.add(b"large.txt", 0o100644, large_blob.id)
			
 
				+
			
 
				+        source_repo.object_store.add_object(small_blob)
			
 
				+        source_repo.object_store.add_object(large_blob)
			
 
				+        source_repo.object_store.add_object(tree)
			
 
				+
			
 
				+        commit = make_commit(tree=tree.id, message=b"Test commit with mixed sizes")
			
 
				+        source_repo.object_store.add_object(commit)
			
 
				+        source_repo.refs[b"refs/heads/master"] = commit.id
			
 
				+
			
 
				+        # Start server
			
 
				+        port = self._start_server(source_repo)
			
 
				+
			
 
				+        # Clone with blob:limit=100 filter (should exclude large blob)
			
 
				+        clone_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(shutil.rmtree, clone_path)
			
 
				+        clone_dir = os.path.join(clone_path, "cloned_repo")
			
 
				+
			
 
				+        run_git_or_fail(
			
 
				+            [
			
 
				+                "clone",
			
 
				+                "--filter=blob:limit=100",
			
 
				+                "--no-checkout",
			
 
				+                self.url(port),
			
 
				+                clone_dir,
			
 
				+            ],
			
 
				+            cwd=clone_path,
			
 
				+        )
			
 
				+
			
 
				+        # Verify it's a partial clone
			
 
				+        cloned_repo = Repo(clone_dir)
			
 
				+        self.addCleanup(cloned_repo.close)
			
 
				+
			
 
				+        config_output = run_git_or_fail(
			
 
				+            ["config", "--get", "remote.origin.promisor"], cwd=clone_dir
			
 
				+        )
			
 
				+        self.assertEqual(config_output.strip(), b"true")
			
 
				+
			
 
				+        source_repo.close()
			
 
				+
			
 
				+    def test_clone_with_tree_depth_filter(self) -> None:
			
 
				+        """Test that git client can clone with tree:0 filter."""
			
 
				+        # Create repository with nested structure
			
 
				+        repo_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(shutil.rmtree, repo_path)
			
 
				+        source_repo = Repo.init(repo_path, mkdir=False)
			
 
				+
			
 
				+        # Create nested tree structure
			
 
				+        blob1 = Blob.from_string(b"root file")
			
 
				+        blob2 = Blob.from_string(b"nested file")
			
 
				+
			
 
				+        inner_tree = Tree()
			
 
				+        inner_tree.add(b"nested.txt", 0o100644, blob2.id)
			
 
				+
			
 
				+        outer_tree = Tree()
			
 
				+        outer_tree.add(b"root.txt", 0o100644, blob1.id)
			
 
				+        outer_tree.add(b"subdir", 0o040000, inner_tree.id)
			
 
				+
			
 
				+        source_repo.object_store.add_object(blob1)
			
 
				+        source_repo.object_store.add_object(blob2)
			
 
				+        source_repo.object_store.add_object(inner_tree)
			
 
				+        source_repo.object_store.add_object(outer_tree)
			
 
				+
			
 
				+        commit = make_commit(tree=outer_tree.id, message=b"Test nested structure")
			
 
				+        source_repo.object_store.add_object(commit)
			
 
				+        source_repo.refs[b"refs/heads/master"] = commit.id
			
 
				+
			
 
				+        # Start server
			
 
				+        port = self._start_server(source_repo)
			
 
				+
			
 
				+        # Clone with tree:0 filter
			
 
				+        clone_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(shutil.rmtree, clone_path)
			
 
				+        clone_dir = os.path.join(clone_path, "cloned_repo")
			
 
				+
			
 
				+        run_git_or_fail(
			
 
				+            ["clone", "--filter=tree:0", "--no-checkout", self.url(port), clone_dir],
			
 
				+            cwd=clone_path,
			
 
				+        )
			
 
				+
			
 
				+        # Verify it's a partial clone
			
 
				+        cloned_repo = Repo(clone_dir)
			
 
				+        self.addCleanup(cloned_repo.close)
			
 
				+
			
 
				+        config_output = run_git_or_fail(
			
 
				+            ["config", "--get", "remote.origin.promisor"], cwd=clone_dir
			
 
				+        )
			
 
				+        self.assertEqual(config_output.strip(), b"true")
			
 
				+
			
 
				+        source_repo.close()
			
 
				+
			
 
				+    def test_clone_with_filter_protocol_v0(self) -> None:
			
 
				+        """Test that git client can clone with filter using protocol v0."""
			
 
				+        # Create repository with dulwich
			
 
				+        repo_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(shutil.rmtree, repo_path)
			
 
				+        source_repo = Repo.init(repo_path, mkdir=False)
			
 
				+
			
 
				+        # Create test content
			
 
				+        blob = Blob.from_string(b"test content")
			
 
				+        tree = Tree()
			
 
				+        tree.add(b"file.txt", 0o100644, blob.id)
			
 
				+
			
 
				+        source_repo.object_store.add_object(blob)
			
 
				+        source_repo.object_store.add_object(tree)
			
 
				+
			
 
				+        commit = make_commit(tree=tree.id, message=b"Test commit")
			
 
				+        source_repo.object_store.add_object(commit)
			
 
				+        source_repo.refs[b"refs/heads/master"] = commit.id
			
 
				+
			
 
				+        # Start server
			
 
				+        port = self._start_server(source_repo)
			
 
				+
			
 
				+        # Clone with protocol v0 and blob:none filter
			
 
				+        clone_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(shutil.rmtree, clone_path)
			
 
				+        clone_dir = os.path.join(clone_path, "cloned_repo")
			
 
				+
			
 
				+        run_git_or_fail(
			
 
				+            [
			
 
				+                "-c",
			
 
				+                "protocol.version=0",
			
 
				+                "clone",
			
 
				+                "--filter=blob:none",
			
 
				+                "--no-checkout",
			
 
				+                self.url(port),
			
 
				+                clone_dir,
			
 
				+            ],
			
 
				+            cwd=clone_path,
			
 
				+        )
			
 
				+
			
 
				+        # Verify partial clone
			
 
				+        cloned_repo = Repo(clone_dir)
			
 
				+        self.addCleanup(cloned_repo.close)
			
 
				+        self.assertIn(commit.id, cloned_repo.object_store)
			
 
				+        self.assertIn(tree.id, cloned_repo.object_store)
			
 
				+
			
 
				+        source_repo.close()
			
 
				+
			
 
				+    def test_clone_with_filter_protocol_v2(self) -> None:
			
 
				+        """Test that git client can clone with filter using protocol v2."""
			
 
				+        # Create repository with dulwich
			
 
				+        repo_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(shutil.rmtree, repo_path)
			
 
				+        source_repo = Repo.init(repo_path, mkdir=False)
			
 
				+
			
 
				+        # Create test content
			
 
				+        blob = Blob.from_string(b"test content")
			
 
				+        tree = Tree()
			
 
				+        tree.add(b"file.txt", 0o100644, blob.id)
			
 
				+
			
 
				+        source_repo.object_store.add_object(blob)
			
 
				+        source_repo.object_store.add_object(tree)
			
 
				+
			
 
				+        commit = make_commit(tree=tree.id, message=b"Test commit")
			
 
				+        source_repo.object_store.add_object(commit)
			
 
				+        source_repo.refs[b"refs/heads/master"] = commit.id
			
 
				+
			
 
				+        # Start server
			
 
				+        port = self._start_server(source_repo)
			
 
				+
			
 
				+        # Clone with protocol v2 and blob:none filter
			
 
				+        clone_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(shutil.rmtree, clone_path)
			
 
				+        clone_dir = os.path.join(clone_path, "cloned_repo")
			
 
				+
			
 
				+        run_git_or_fail(
			
 
				+            [
			
 
				+                "-c",
			
 
				+                "protocol.version=2",
			
 
				+                "clone",
			
 
				+                "--filter=blob:none",
			
 
				+                "--no-checkout",
			
 
				+                self.url(port),
			
 
				+                clone_dir,
			
 
				+            ],
			
 
				+            cwd=clone_path,
			
 
				+        )
			
 
				+
			
 
				+        # Verify partial clone
			
 
				+        cloned_repo = Repo(clone_dir)
			
 
				+        self.addCleanup(cloned_repo.close)
			
 
				+        self.assertIn(commit.id, cloned_repo.object_store)
			
 
				+        self.assertIn(tree.id, cloned_repo.object_store)
			
 
				+
			
 
				+        source_repo.close()
			
 
				+
			
 
				+
			
 
				+@skipIf(sys.platform == "win32", "Broken on windows, with very long fail time.")
			
 
				+class PartialCloneClientTestCase(CompatTestCase):
			
 
				+    """Tests for partial clone client compatibility with git server."""
			
 
				+
			
 
				+    # Partial clone support was introduced in git 2.17.0
			
 
				+    min_git_version = (2, 17, 0)
			
 
				+
			
 
				+    def setUp(self) -> None:
			
 
				+        super().setUp()
			
 
				+        require_git_version(self.min_git_version)
			
 
				+
			
 
				+    def test_fetch_with_blob_none_filter(self) -> None:
			
 
				+        """Test that dulwich client can fetch with blob:none filter."""
			
 
				+        from dulwich.client import get_transport_and_path
			
 
				+
			
 
				+        # Create a git repository using git itself
			
 
				+        repo_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(shutil.rmtree, repo_path)
			
 
				+
			
 
				+        # Initialize with git
			
 
				+        run_git_or_fail(["init"], cwd=repo_path)
			
 
				+        run_git_or_fail(["config", "user.name", "Test User"], cwd=repo_path)
			
 
				+        run_git_or_fail(["config", "user.email", "test@example.com"], cwd=repo_path)
			
 
				+
			
 
				+        # Create test files
			
 
				+        file1 = os.path.join(repo_path, "file1.txt")
			
 
				+        with open(file1, "wb") as f:
			
 
				+            f.write(b"Content of file 1")
			
 
				+
			
 
				+        file2 = os.path.join(repo_path, "file2.txt")
			
 
				+        with open(file2, "wb") as f:
			
 
				+            f.write(b"Content of file 2")
			
 
				+
			
 
				+        # Commit files
			
 
				+        run_git_or_fail(["add", "."], cwd=repo_path)
			
 
				+        run_git_or_fail(["commit", "-m", "Initial commit"], cwd=repo_path)
			
 
				+
			
 
				+        # Start git daemon
			
 
				+        daemon_port = self._start_git_daemon(repo_path)
			
 
				+
			
 
				+        # Create destination repo
			
 
				+        dest_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(shutil.rmtree, dest_path)
			
 
				+        dest_repo = Repo.init(dest_path, mkdir=False)
			
 
				+        self.addCleanup(dest_repo.close)
			
 
				+
			
 
				+        # Fetch with blob:none filter using dulwich client
			
 
				+        client, path = get_transport_and_path(
			
 
				+            f"git://localhost:{daemon_port}/",
			
 
				+            thin_packs=False,
			
 
				+        )
			
 
				+
			
 
				+        def determine_wants(refs, depth=None):
			
 
				+            # Get all refs
			
 
				+            return list(refs.values())
			
 
				+
			
 
				+        # Fetch with filter
			
 
				+        result = client.fetch(
			
 
				+            path,
			
 
				+            dest_repo,
			
 
				+            determine_wants=determine_wants,
			
 
				+            progress=None,
			
 
				+            filter_spec=b"blob:none",
			
 
				+        )
			
 
				+
			
 
				+        # The fetch should succeed with partial clone
			
 
				+        self.assertIsNotNone(result)
			
 
				+
			
 
				+    def test_clone_with_filter(self) -> None:
			
 
				+        """Test that dulwich clone function works with filter."""
			
 
				+        from dulwich.client import get_transport_and_path
			
 
				+
			
 
				+        # Create a git repository
			
 
				+        repo_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(shutil.rmtree, repo_path)
			
 
				+
			
 
				+        run_git_or_fail(["init"], cwd=repo_path)
			
 
				+        run_git_or_fail(["config", "user.name", "Test User"], cwd=repo_path)
			
 
				+        run_git_or_fail(["config", "user.email", "test@example.com"], cwd=repo_path)
			
 
				+
			
 
				+        # Create and commit a file
			
 
				+        test_file = os.path.join(repo_path, "test.txt")
			
 
				+        with open(test_file, "wb") as f:
			
 
				+            f.write(b"Test content for partial clone")
			
 
				+        run_git_or_fail(["add", "."], cwd=repo_path)
			
 
				+        run_git_or_fail(["commit", "-m", "Test commit"], cwd=repo_path)
			
 
				+
			
 
				+        # Start git daemon
			
 
				+        daemon_port = self._start_git_daemon(repo_path)
			
 
				+
			
 
				+        # Clone with dulwich using filter
			
 
				+        dest_path = tempfile.mkdtemp()
			
 
				+        self.addCleanup(shutil.rmtree, dest_path)
			
 
				+
			
 
				+        client, path = get_transport_and_path(f"git://localhost:{daemon_port}/")
			
 
				+
			
 
				+        # Clone with blob:limit filter
			
 
				+        cloned_repo = client.clone(
			
 
				+            path,
			
 
				+            dest_path,
			
 
				+            mkdir=False,
			
 
				+            filter_spec=b"blob:limit=100",
			
 
				+        )
			
 
				+        self.addCleanup(cloned_repo.close)
			
 
				+
			
 
				+        # Verify clone succeeded
			
 
				+        self.assertTrue(os.path.exists(dest_path))
			
 
				+        self.assertTrue(os.path.exists(os.path.join(dest_path, ".git")))
			
 
				+
			
 
				+    def _start_git_daemon(self, repo_path):
			
 
				+        """Start git daemon for testing."""
			
 
				+        import socket
			
 
				+        import subprocess
			
 
				+        import time
			
 
				+
			
 
				+        # Find an available port
			
 
				+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
			
 
				+        sock.bind(("localhost", 0))
			
 
				+        _, port = sock.getsockname()
			
 
				+        sock.close()
			
 
				+
			
 
				+        # Mark directory as git daemon export
			
 
				+        export_file = os.path.join(repo_path, "git-daemon-export-ok")
			
 
				+        with open(export_file, "w") as f:
			
 
				+            f.write("")
			
 
				+
			
 
				+        # Start git daemon
			
 
				+        daemon_process = subprocess.Popen(
			
 
				+            [
			
 
				+                "git",
			
 
				+                "daemon",
			
 
				+                "--reuseaddr",
			
 
				+                f"--port={port}",
			
 
				+                "--base-path=.",
			
 
				+                "--export-all",
			
 
				+                "--enable=receive-pack",
			
 
				+                ".",
			
 
				+            ],
			
 
				+            cwd=repo_path,
			
 
				+            stdout=subprocess.PIPE,
			
 
				+            stderr=subprocess.PIPE,
			
 
				+        )
			
 
				+
			
 
				+        # Give daemon time to start
			
 
				+        time.sleep(0.5)
			
 
				+
			
 
				+        def cleanup_daemon():
			
 
				+            daemon_process.terminate()
			
 
				+            daemon_process.wait(timeout=2)
			
 
				+
			
 
				+        self.addCleanup(cleanup_daemon)
			
 
				+
			
 
				+        return port
			
--- a/tests/test_object_filters.py
+++ b/tests/test_object_filters.py
@@ -0,0 +1,999 @@
 
				+# test_object_filters.py -- Tests for object filtering
			
 
				+# Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
			
 
				+#
			
 
				+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
			
 
				+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
			
 
				+# General Public License as published by the Free Software Foundation; version 2.0
			
 
				+# or (at your option) any later version. You can redistribute it and/or
			
 
				+# modify it under the terms of either of these two licenses.
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+#
			
 
				+# You should have received a copy of the licenses; if not, see
			
 
				+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
			
 
				+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
			
 
				+# License, Version 2.0.
			
 
				+#
			
 
				+
			
 
				+"""Tests for object filtering (partial clone filter specifications)."""
			
 
				+
			
 
				+import os
			
 
				+import tempfile
			
 
				+
			
 
				+from dulwich.object_filters import (
			
 
				+    BlobLimitFilter,
			
 
				+    BlobNoneFilter,
			
 
				+    CombineFilter,
			
 
				+    SparseOidFilter,
			
 
				+    TreeDepthFilter,
			
 
				+    filter_pack_objects,
			
 
				+    parse_filter_spec,
			
 
				+)
			
 
				+from dulwich.object_store import MemoryObjectStore
			
 
				+from dulwich.objects import Blob, Tree
			
 
				+from dulwich.repo import Repo
			
 
				+from dulwich.tests.utils import make_commit
			
 
				+
			
 
				+from . import TestCase
			
 
				+
			
 
				+
			
 
				+class ParseFilterSpecTests(TestCase):
			
 
				+    """Test parse_filter_spec function."""
			
 
				+
			
 
				+    def test_parse_blob_none(self):
			
 
				+        """Test parsing 'blob:none' filter."""
			
 
				+        filter_spec = parse_filter_spec("blob:none")
			
 
				+        self.assertIsInstance(filter_spec, BlobNoneFilter)
			
 
				+        self.assertEqual("blob:none", filter_spec.to_spec_string())
			
 
				+
			
 
				+    def test_parse_blob_none_bytes(self):
			
 
				+        """Test parsing 'blob:none' as bytes."""
			
 
				+        filter_spec = parse_filter_spec(b"blob:none")
			
 
				+        self.assertIsInstance(filter_spec, BlobNoneFilter)
			
 
				+
			
 
				+    def test_parse_blob_limit_bytes(self):
			
 
				+        """Test parsing 'blob:limit=100' in bytes."""
			
 
				+        filter_spec = parse_filter_spec("blob:limit=100")
			
 
				+        self.assertIsInstance(filter_spec, BlobLimitFilter)
			
 
				+        self.assertEqual(100, filter_spec.limit)
			
 
				+
			
 
				+    def test_parse_blob_limit_kb(self):
			
 
				+        """Test parsing 'blob:limit=10k'."""
			
 
				+        filter_spec = parse_filter_spec("blob:limit=10k")
			
 
				+        self.assertIsInstance(filter_spec, BlobLimitFilter)
			
 
				+        self.assertEqual(10 * 1024, filter_spec.limit)
			
 
				+
			
 
				+    def test_parse_blob_limit_mb(self):
			
 
				+        """Test parsing 'blob:limit=5m'."""
			
 
				+        filter_spec = parse_filter_spec("blob:limit=5m")
			
 
				+        self.assertIsInstance(filter_spec, BlobLimitFilter)
			
 
				+        self.assertEqual(5 * 1024 * 1024, filter_spec.limit)
			
 
				+
			
 
				+    def test_parse_blob_limit_gb(self):
			
 
				+        """Test parsing 'blob:limit=1g'."""
			
 
				+        filter_spec = parse_filter_spec("blob:limit=1g")
			
 
				+        self.assertIsInstance(filter_spec, BlobLimitFilter)
			
 
				+        self.assertEqual(1024 * 1024 * 1024, filter_spec.limit)
			
 
				+
			
 
				+    def test_parse_tree_depth(self):
			
 
				+        """Test parsing 'tree:0' filter."""
			
 
				+        filter_spec = parse_filter_spec("tree:0")
			
 
				+        self.assertIsInstance(filter_spec, TreeDepthFilter)
			
 
				+        self.assertEqual(0, filter_spec.max_depth)
			
 
				+
			
 
				+    def test_parse_tree_depth_nonzero(self):
			
 
				+        """Test parsing 'tree:3' filter."""
			
 
				+        filter_spec = parse_filter_spec("tree:3")
			
 
				+        self.assertIsInstance(filter_spec, TreeDepthFilter)
			
 
				+        self.assertEqual(3, filter_spec.max_depth)
			
 
				+
			
 
				+    def test_parse_sparse_oid(self):
			
 
				+        """Test parsing 'sparse:oid=<oid>' filter."""
			
 
				+        oid = b"1234567890abcdef1234567890abcdef12345678"
			
 
				+        filter_spec = parse_filter_spec(f"sparse:oid={oid.decode('ascii')}")
			
 
				+        self.assertIsInstance(filter_spec, SparseOidFilter)
			
 
				+        self.assertEqual(oid, filter_spec.oid)
			
 
				+
			
 
				+    def test_parse_combine(self):
			
 
				+        """Test parsing 'combine:blob:none+tree:0' filter."""
			
 
				+        filter_spec = parse_filter_spec("combine:blob:none+tree:0")
			
 
				+        self.assertIsInstance(filter_spec, CombineFilter)
			
 
				+        self.assertEqual(2, len(filter_spec.filters))
			
 
				+        self.assertIsInstance(filter_spec.filters[0], BlobNoneFilter)
			
 
				+        self.assertIsInstance(filter_spec.filters[1], TreeDepthFilter)
			
 
				+
			
 
				+    def test_parse_combine_multiple(self):
			
 
				+        """Test parsing combine filter with 3+ filters."""
			
 
				+        filter_spec = parse_filter_spec("combine:blob:none+tree:0+blob:limit=1m")
			
 
				+        self.assertIsInstance(filter_spec, CombineFilter)
			
 
				+        self.assertEqual(3, len(filter_spec.filters))
			
 
				+
			
 
				+    def test_parse_unknown_spec(self):
			
 
				+        """Test that unknown filter specs raise ValueError."""
			
 
				+        with self.assertRaises(ValueError) as cm:
			
 
				+            parse_filter_spec("unknown:spec")
			
 
				+        self.assertIn("Unknown filter specification", str(cm.exception))
			
 
				+
			
 
				+    def test_parse_invalid_tree_depth(self):
			
 
				+        """Test that invalid tree depth raises ValueError."""
			
 
				+        with self.assertRaises(ValueError) as cm:
			
 
				+            parse_filter_spec("tree:invalid")
			
 
				+        self.assertIn("Invalid tree filter", str(cm.exception))
			
 
				+
			
 
				+    def test_parse_invalid_blob_limit(self):
			
 
				+        """Test that invalid blob limit raises ValueError."""
			
 
				+        with self.assertRaises(ValueError) as cm:
			
 
				+            parse_filter_spec("blob:limit=invalid")
			
 
				+        self.assertIn("Invalid", str(cm.exception))
			
 
				+
			
 
				+    def test_parse_empty_spec(self):
			
 
				+        """Test that empty filter spec raises ValueError."""
			
 
				+        with self.assertRaises(ValueError) as cm:
			
 
				+            parse_filter_spec("")
			
 
				+        self.assertIn("cannot be empty", str(cm.exception))
			
 
				+
			
 
				+    def test_parse_blob_limit_no_value(self):
			
 
				+        """Test that blob:limit without value raises ValueError."""
			
 
				+        with self.assertRaises(ValueError) as cm:
			
 
				+            parse_filter_spec("blob:limit=")
			
 
				+        self.assertIn("requires a size value", str(cm.exception))
			
 
				+
			
 
				+    def test_parse_tree_no_value(self):
			
 
				+        """Test that tree: without depth raises ValueError."""
			
 
				+        with self.assertRaises(ValueError) as cm:
			
 
				+            parse_filter_spec("tree:")
			
 
				+        self.assertIn("requires a depth value", str(cm.exception))
			
 
				+
			
 
				+    def test_parse_tree_negative_depth(self):
			
 
				+        """Test that negative tree depth raises ValueError."""
			
 
				+        with self.assertRaises(ValueError) as cm:
			
 
				+            parse_filter_spec("tree:-1")
			
 
				+        self.assertIn("non-negative", str(cm.exception))
			
 
				+
			
 
				+    def test_parse_sparse_oid_invalid_length(self):
			
 
				+        """Test that invalid OID length raises ValueError."""
			
 
				+        with self.assertRaises(ValueError) as cm:
			
 
				+            parse_filter_spec("sparse:oid=abc123")
			
 
				+        self.assertIn("40 or 64 hex chars", str(cm.exception))
			
 
				+
			
 
				+    def test_parse_sparse_oid_invalid_hex(self):
			
 
				+        """Test that non-hex OID raises ValueError."""
			
 
				+        with self.assertRaises(ValueError) as cm:
			
 
				+            parse_filter_spec("sparse:oid=" + "x" * 40)
			
 
				+        self.assertIn("valid object ID", str(cm.exception))
			
 
				+
			
 
				+    def test_parse_combine_single_filter(self):
			
 
				+        """Test that combine with single filter raises ValueError."""
			
 
				+        with self.assertRaises(ValueError) as cm:
			
 
				+            parse_filter_spec("combine:blob:none")
			
 
				+        self.assertIn("at least two filters", str(cm.exception))
			
 
				+
			
 
				+    def test_parse_unknown_with_helpful_message(self):
			
 
				+        """Test that unknown spec gives helpful error message."""
			
 
				+        with self.assertRaises(ValueError) as cm:
			
 
				+            parse_filter_spec("unknown:spec")
			
 
				+        error_msg = str(cm.exception)
			
 
				+        self.assertIn("Unknown filter specification", error_msg)
			
 
				+        self.assertIn("Supported formats", error_msg)
			
 
				+        self.assertIn("blob:none", error_msg)
			
 
				+
			
 
				+
			
 
				+class BlobNoneFilterTests(TestCase):
			
 
				+    """Test BlobNoneFilter class."""
			
 
				+
			
 
				+    def test_should_include_blob(self):
			
 
				+        """Test that BlobNoneFilter excludes all blobs."""
			
 
				+        filter_spec = BlobNoneFilter()
			
 
				+        self.assertFalse(filter_spec.should_include_blob(0))
			
 
				+        self.assertFalse(filter_spec.should_include_blob(100))
			
 
				+        self.assertFalse(filter_spec.should_include_blob(1024 * 1024))
			
 
				+
			
 
				+    def test_should_include_tree(self):
			
 
				+        """Test that BlobNoneFilter includes all trees."""
			
 
				+        filter_spec = BlobNoneFilter()
			
 
				+        self.assertTrue(filter_spec.should_include_tree(0))
			
 
				+        self.assertTrue(filter_spec.should_include_tree(1))
			
 
				+        self.assertTrue(filter_spec.should_include_tree(100))
			
 
				+
			
 
				+    def test_to_spec_string(self):
			
 
				+        """Test conversion back to spec string."""
			
 
				+        filter_spec = BlobNoneFilter()
			
 
				+        self.assertEqual("blob:none", filter_spec.to_spec_string())
			
 
				+
			
 
				+    def test_repr(self):
			
 
				+        """Test repr output."""
			
 
				+        filter_spec = BlobNoneFilter()
			
 
				+        self.assertEqual("BlobNoneFilter()", repr(filter_spec))
			
 
				+
			
 
				+
			
 
				+class BlobLimitFilterTests(TestCase):
			
 
				+    """Test BlobLimitFilter class."""
			
 
				+
			
 
				+    def test_should_include_blob_under_limit(self):
			
 
				+        """Test that blobs under limit are included."""
			
 
				+        filter_spec = BlobLimitFilter(1024)
			
 
				+        self.assertTrue(filter_spec.should_include_blob(0))
			
 
				+        self.assertTrue(filter_spec.should_include_blob(512))
			
 
				+        self.assertTrue(filter_spec.should_include_blob(1024))
			
 
				+
			
 
				+    def test_should_include_blob_over_limit(self):
			
 
				+        """Test that blobs over limit are excluded."""
			
 
				+        filter_spec = BlobLimitFilter(1024)
			
 
				+        self.assertFalse(filter_spec.should_include_blob(1025))
			
 
				+        self.assertFalse(filter_spec.should_include_blob(2048))
			
 
				+
			
 
				+    def test_should_include_tree(self):
			
 
				+        """Test that BlobLimitFilter includes all trees."""
			
 
				+        filter_spec = BlobLimitFilter(1024)
			
 
				+        self.assertTrue(filter_spec.should_include_tree(0))
			
 
				+        self.assertTrue(filter_spec.should_include_tree(100))
			
 
				+
			
 
				+    def test_to_spec_string_bytes(self):
			
 
				+        """Test conversion to spec string with bytes."""
			
 
				+        filter_spec = BlobLimitFilter(100)
			
 
				+        self.assertEqual("blob:limit=100", filter_spec.to_spec_string())
			
 
				+
			
 
				+    def test_to_spec_string_kb(self):
			
 
				+        """Test conversion to spec string with KB."""
			
 
				+        filter_spec = BlobLimitFilter(10 * 1024)
			
 
				+        self.assertEqual("blob:limit=10k", filter_spec.to_spec_string())
			
 
				+
			
 
				+    def test_to_spec_string_mb(self):
			
 
				+        """Test conversion to spec string with MB."""
			
 
				+        filter_spec = BlobLimitFilter(5 * 1024 * 1024)
			
 
				+        self.assertEqual("blob:limit=5m", filter_spec.to_spec_string())
			
 
				+
			
 
				+    def test_to_spec_string_gb(self):
			
 
				+        """Test conversion to spec string with GB."""
			
 
				+        filter_spec = BlobLimitFilter(2 * 1024 * 1024 * 1024)
			
 
				+        self.assertEqual("blob:limit=2g", filter_spec.to_spec_string())
			
 
				+
			
 
				+    def test_to_spec_string_not_round(self):
			
 
				+        """Test conversion to spec string with non-round size."""
			
 
				+        filter_spec = BlobLimitFilter(1500)
			
 
				+        self.assertEqual("blob:limit=1500", filter_spec.to_spec_string())
			
 
				+
			
 
				+    def test_repr(self):
			
 
				+        """Test repr output."""
			
 
				+        filter_spec = BlobLimitFilter(1024)
			
 
				+        self.assertEqual("BlobLimitFilter(limit=1024)", repr(filter_spec))
			
 
				+
			
 
				+
			
 
				+class TreeDepthFilterTests(TestCase):
			
 
				+    """Test TreeDepthFilter class."""
			
 
				+
			
 
				+    def test_should_include_blob(self):
			
 
				+        """Test that TreeDepthFilter includes all blobs."""
			
 
				+        filter_spec = TreeDepthFilter(0)
			
 
				+        self.assertTrue(filter_spec.should_include_blob(0))
			
 
				+        self.assertTrue(filter_spec.should_include_blob(1024))
			
 
				+
			
 
				+    def test_should_include_tree_at_depth(self):
			
 
				+        """Test that trees at or below max_depth are included."""
			
 
				+        filter_spec = TreeDepthFilter(2)
			
 
				+        self.assertTrue(filter_spec.should_include_tree(0))
			
 
				+        self.assertTrue(filter_spec.should_include_tree(1))
			
 
				+        self.assertTrue(filter_spec.should_include_tree(2))
			
 
				+
			
 
				+    def test_should_include_tree_beyond_depth(self):
			
 
				+        """Test that trees beyond max_depth are excluded."""
			
 
				+        filter_spec = TreeDepthFilter(2)
			
 
				+        self.assertFalse(filter_spec.should_include_tree(3))
			
 
				+        self.assertFalse(filter_spec.should_include_tree(10))
			
 
				+
			
 
				+    def test_to_spec_string(self):
			
 
				+        """Test conversion back to spec string."""
			
 
				+        filter_spec = TreeDepthFilter(3)
			
 
				+        self.assertEqual("tree:3", filter_spec.to_spec_string())
			
 
				+
			
 
				+    def test_repr(self):
			
 
				+        """Test repr output."""
			
 
				+        filter_spec = TreeDepthFilter(2)
			
 
				+        self.assertEqual("TreeDepthFilter(max_depth=2)", repr(filter_spec))
			
 
				+
			
 
				+
			
 
				+class SparseOidFilterTests(TestCase):
			
 
				+    """Test SparseOidFilter class."""
			
 
				+
			
 
				+    def test_should_include_blob(self):
			
 
				+        """Test that SparseOidFilter includes all blobs."""
			
 
				+        oid = b"1234567890abcdef1234567890abcdef12345678"
			
 
				+        filter_spec = SparseOidFilter(oid)
			
 
				+        self.assertTrue(filter_spec.should_include_blob(0))
			
 
				+        self.assertTrue(filter_spec.should_include_blob(1024))
			
 
				+
			
 
				+    def test_should_include_tree(self):
			
 
				+        """Test that SparseOidFilter includes all trees."""
			
 
				+        oid = b"1234567890abcdef1234567890abcdef12345678"
			
 
				+        filter_spec = SparseOidFilter(oid)
			
 
				+        self.assertTrue(filter_spec.should_include_tree(0))
			
 
				+        self.assertTrue(filter_spec.should_include_tree(10))
			
 
				+
			
 
				+    def test_to_spec_string(self):
			
 
				+        """Test conversion back to spec string."""
			
 
				+        oid = b"1234567890abcdef1234567890abcdef12345678"
			
 
				+        filter_spec = SparseOidFilter(oid)
			
 
				+        expected = "sparse:oid=1234567890abcdef1234567890abcdef12345678"
			
 
				+        self.assertEqual(expected, filter_spec.to_spec_string())
			
 
				+
			
 
				+    def test_repr(self):
			
 
				+        """Test repr output."""
			
 
				+        oid = b"1234567890abcdef1234567890abcdef12345678"
			
 
				+        filter_spec = SparseOidFilter(oid)
			
 
				+        self.assertIn("SparseOidFilter", repr(filter_spec))
			
 
				+        self.assertIn("1234567890abcdef1234567890abcdef12345678", repr(filter_spec))
			
 
				+
			
 
				+    def test_load_patterns_from_blob(self):
			
 
				+        """Test loading sparse patterns from a blob object."""
			
 
				+        from dulwich.object_store import MemoryObjectStore
			
 
				+        from dulwich.objects import Blob
			
 
				+
			
 
				+        # Create a sparse patterns blob
			
 
				+        patterns = b"*.txt\n!*.log\n/src/\n"
			
 
				+        blob = Blob.from_string(patterns)
			
 
				+
			
 
				+        object_store = MemoryObjectStore()
			
 
				+        object_store.add_object(blob)
			
 
				+
			
 
				+        filter_spec = SparseOidFilter(blob.id, object_store=object_store)
			
 
				+        filter_spec._load_patterns()
			
 
				+
			
 
				+        # Verify patterns were loaded
			
 
				+        self.assertIsNotNone(filter_spec._patterns)
			
 
				+        self.assertEqual(3, len(filter_spec._patterns))
			
 
				+
			
 
				+    def test_load_patterns_missing_blob(self):
			
 
				+        """Test error when sparse blob is not found."""
			
 
				+        from dulwich.object_store import MemoryObjectStore
			
 
				+
			
 
				+        oid = b"1234567890abcdef1234567890abcdef12345678"
			
 
				+        object_store = MemoryObjectStore()
			
 
				+
			
 
				+        filter_spec = SparseOidFilter(oid, object_store=object_store)
			
 
				+
			
 
				+        with self.assertRaises(ValueError) as cm:
			
 
				+            filter_spec._load_patterns()
			
 
				+        self.assertIn("not found", str(cm.exception))
			
 
				+
			
 
				+    def test_load_patterns_not_a_blob(self):
			
 
				+        """Test error when sparse OID points to non-blob object."""
			
 
				+        from dulwich.object_store import MemoryObjectStore
			
 
				+        from dulwich.objects import Tree
			
 
				+
			
 
				+        tree = Tree()
			
 
				+        object_store = MemoryObjectStore()
			
 
				+        object_store.add_object(tree)
			
 
				+
			
 
				+        filter_spec = SparseOidFilter(tree.id, object_store=object_store)
			
 
				+
			
 
				+        with self.assertRaises(ValueError) as cm:
			
 
				+            filter_spec._load_patterns()
			
 
				+        self.assertIn("not a blob", str(cm.exception))
			
 
				+
			
 
				+    def test_load_patterns_without_object_store(self):
			
 
				+        """Test error when trying to load patterns without object store."""
			
 
				+        oid = b"1234567890abcdef1234567890abcdef12345678"
			
 
				+        filter_spec = SparseOidFilter(oid)
			
 
				+
			
 
				+        with self.assertRaises(ValueError) as cm:
			
 
				+            filter_spec._load_patterns()
			
 
				+        self.assertIn("without an object store", str(cm.exception))
			
 
				+
			
 
				+    def test_should_include_path_matching(self):
			
 
				+        """Test path matching with sparse patterns."""
			
 
				+        from dulwich.object_store import MemoryObjectStore
			
 
				+        from dulwich.objects import Blob
			
 
				+
			
 
				+        # Create a sparse patterns blob: include *.txt files
			
 
				+        patterns = b"*.txt\n"
			
 
				+        blob = Blob.from_string(patterns)
			
 
				+
			
 
				+        object_store = MemoryObjectStore()
			
 
				+        object_store.add_object(blob)
			
 
				+
			
 
				+        filter_spec = SparseOidFilter(blob.id, object_store=object_store)
			
 
				+
			
 
				+        # .txt files should be included
			
 
				+        self.assertTrue(filter_spec.should_include_path("readme.txt"))
			
 
				+        self.assertTrue(filter_spec.should_include_path("docs/file.txt"))
			
 
				+
			
 
				+        # Other files should not be included
			
 
				+        self.assertFalse(filter_spec.should_include_path("readme.md"))
			
 
				+        self.assertFalse(filter_spec.should_include_path("script.py"))
			
 
				+
			
 
				+    def test_should_include_path_negation(self):
			
 
				+        """Test path matching with negation patterns."""
			
 
				+        from dulwich.object_store import MemoryObjectStore
			
 
				+        from dulwich.objects import Blob
			
 
				+
			
 
				+        # Include all .txt files except logs
			
 
				+        patterns = b"*.txt\n!*.log\n"
			
 
				+        blob = Blob.from_string(patterns)
			
 
				+
			
 
				+        object_store = MemoryObjectStore()
			
 
				+        object_store.add_object(blob)
			
 
				+
			
 
				+        filter_spec = SparseOidFilter(blob.id, object_store=object_store)
			
 
				+
			
 
				+        # .txt files should be included
			
 
				+        self.assertTrue(filter_spec.should_include_path("readme.txt"))
			
 
				+
			
 
				+        # But .log files should be excluded (even though they end in .txt pattern)
			
 
				+        # Note: This depends on pattern order and sparse_patterns implementation
			
 
				+        self.assertFalse(filter_spec.should_include_path("debug.log"))
			
 
				+
			
 
				+
			
 
				+class CombineFilterTests(TestCase):
			
 
				+    """Test CombineFilter class."""
			
 
				+
			
 
				+    def test_should_include_blob_all_allow(self):
			
 
				+        """Test that blob is included when all filters allow it."""
			
 
				+        filters = [BlobLimitFilter(1024), BlobLimitFilter(2048)]
			
 
				+        filter_spec = CombineFilter(filters)
			
 
				+        self.assertTrue(filter_spec.should_include_blob(512))
			
 
				+
			
 
				+    def test_should_include_blob_one_denies(self):
			
 
				+        """Test that blob is excluded when one filter denies it."""
			
 
				+        filters = [BlobLimitFilter(1024), BlobNoneFilter()]
			
 
				+        filter_spec = CombineFilter(filters)
			
 
				+        self.assertFalse(filter_spec.should_include_blob(512))
			
 
				+
			
 
				+    def test_should_include_tree_all_allow(self):
			
 
				+        """Test that tree is included when all filters allow it."""
			
 
				+        filters = [TreeDepthFilter(2), TreeDepthFilter(3)]
			
 
				+        filter_spec = CombineFilter(filters)
			
 
				+        self.assertTrue(filter_spec.should_include_tree(1))
			
 
				+
			
 
				+    def test_should_include_tree_one_denies(self):
			
 
				+        """Test that tree is excluded when one filter denies it."""
			
 
				+        filters = [TreeDepthFilter(2), TreeDepthFilter(1)]
			
 
				+        filter_spec = CombineFilter(filters)
			
 
				+        self.assertFalse(filter_spec.should_include_tree(2))
			
 
				+
			
 
				+    def test_to_spec_string(self):
			
 
				+        """Test conversion back to spec string."""
			
 
				+        filters = [BlobNoneFilter(), TreeDepthFilter(0)]
			
 
				+        filter_spec = CombineFilter(filters)
			
 
				+        self.assertEqual("combine:blob:none+tree:0", filter_spec.to_spec_string())
			
 
				+
			
 
				+    def test_repr(self):
			
 
				+        """Test repr output."""
			
 
				+        filters = [BlobNoneFilter()]
			
 
				+        filter_spec = CombineFilter(filters)
			
 
				+        self.assertIn("CombineFilter", repr(filter_spec))
			
 
				+
			
 
				+
			
 
				+class FilterPackObjectsTests(TestCase):
			
 
				+    """Test filter_pack_objects function."""
			
 
				+
			
 
				+    def setUp(self):
			
 
				+        super().setUp()
			
 
				+        self.store = MemoryObjectStore()
			
 
				+
			
 
				+        # Create test objects
			
 
				+        self.small_blob = Blob.from_string(b"small")
			
 
				+        self.large_blob = Blob.from_string(b"x" * 2000)
			
 
				+        self.tree = Tree()
			
 
				+        self.commit = make_commit(tree=self.tree.id)
			
 
				+
			
 
				+        # Add objects to store
			
 
				+        self.store.add_object(self.small_blob)
			
 
				+        self.store.add_object(self.large_blob)
			
 
				+        self.store.add_object(self.tree)
			
 
				+        self.store.add_object(self.commit)
			
 
				+
			
 
				+    def test_filter_blob_none(self):
			
 
				+        """Test that blob:none filter excludes all blobs."""
			
 
				+        object_ids = [
			
 
				+            self.small_blob.id,
			
 
				+            self.large_blob.id,
			
 
				+            self.tree.id,
			
 
				+            self.commit.id,
			
 
				+        ]
			
 
				+
			
 
				+        filter_spec = BlobNoneFilter()
			
 
				+        filtered = filter_pack_objects(self.store, object_ids, filter_spec)
			
 
				+
			
 
				+        # Should exclude both blobs but keep tree and commit
			
 
				+        self.assertNotIn(self.small_blob.id, filtered)
			
 
				+        self.assertNotIn(self.large_blob.id, filtered)
			
 
				+        self.assertIn(self.tree.id, filtered)
			
 
				+        self.assertIn(self.commit.id, filtered)
			
 
				+
			
 
				+    def test_filter_blob_limit(self):
			
 
				+        """Test that blob:limit filter excludes blobs over size limit."""
			
 
				+        object_ids = [
			
 
				+            self.small_blob.id,
			
 
				+            self.large_blob.id,
			
 
				+            self.tree.id,
			
 
				+        ]
			
 
				+
			
 
				+        # Set limit to 100 bytes
			
 
				+        filter_spec = BlobLimitFilter(100)
			
 
				+        filtered = filter_pack_objects(self.store, object_ids, filter_spec)
			
 
				+
			
 
				+        # Should keep small blob but exclude large blob
			
 
				+        self.assertIn(self.small_blob.id, filtered)
			
 
				+        self.assertNotIn(self.large_blob.id, filtered)
			
 
				+        self.assertIn(self.tree.id, filtered)
			
 
				+
			
 
				+    def test_filter_no_filter_keeps_all(self):
			
 
				+        """Test that without filtering all objects are kept."""
			
 
				+        # Create a filter that includes everything
			
 
				+        filter_spec = BlobLimitFilter(10000)  # Large limit
			
 
				+
			
 
				+        object_ids = [
			
 
				+            self.small_blob.id,
			
 
				+            self.large_blob.id,
			
 
				+            self.tree.id,
			
 
				+            self.commit.id,
			
 
				+        ]
			
 
				+
			
 
				+        filtered = filter_pack_objects(self.store, object_ids, filter_spec)
			
 
				+
			
 
				+        # All objects should be included
			
 
				+        self.assertEqual(len(filtered), len(object_ids))
			
 
				+        for oid in object_ids:
			
 
				+            self.assertIn(oid, filtered)
			
 
				+
			
 
				+    def test_filter_missing_object(self):
			
 
				+        """Test that missing objects are skipped without error."""
			
 
				+        from dulwich.objects import ObjectID
			
 
				+
			
 
				+        fake_id = ObjectID(b"0" * 40)
			
 
				+        object_ids = [fake_id, self.small_blob.id]
			
 
				+
			
 
				+        filter_spec = BlobNoneFilter()
			
 
				+        filtered = filter_pack_objects(self.store, object_ids, filter_spec)
			
 
				+
			
 
				+        # Should skip the missing object
			
 
				+        self.assertNotIn(fake_id, filtered)
			
 
				+
			
 
				+    def test_filter_combine(self):
			
 
				+        """Test combined filters."""
			
 
				+        object_ids = [
			
 
				+            self.small_blob.id,
			
 
				+            self.large_blob.id,
			
 
				+            self.tree.id,
			
 
				+        ]
			
 
				+
			
 
				+        # Combine blob:limit with another filter
			
 
				+        filter_spec = CombineFilter(
			
 
				+            [
			
 
				+                BlobLimitFilter(100),
			
 
				+                BlobNoneFilter(),  # This will exclude ALL blobs
			
 
				+            ]
			
 
				+        )
			
 
				+
			
 
				+        filtered = filter_pack_objects(self.store, object_ids, filter_spec)
			
 
				+
			
 
				+        # Should exclude all blobs due to BlobNoneFilter
			
 
				+        self.assertNotIn(self.small_blob.id, filtered)
			
 
				+        self.assertNotIn(self.large_blob.id, filtered)
			
 
				+        self.assertIn(self.tree.id, filtered)
			
 
				+
			
 
				+
			
 
				+class PartialCloneIntegrationTests(TestCase):
			
 
				+    """Integration tests for partial clone with real repositories."""
			
 
				+
			
 
				+    def setUp(self):
			
 
				+        super().setUp()
			
 
				+        self.repo_dir = tempfile.mkdtemp()
			
 
				+        self.addCleanup(self._cleanup)
			
 
				+        self.repo = Repo.init(self.repo_dir)
			
 
				+
			
 
				+    def _cleanup(self):
			
 
				+        """Clean up test repository."""
			
 
				+        import shutil
			
 
				+
			
 
				+        if os.path.exists(self.repo_dir):
			
 
				+            shutil.rmtree(self.repo_dir)
			
 
				+
			
 
				+    def test_blob_none_filter_with_real_repo(self):
			
 
				+        """Test blob:none filter excludes blobs in real repository."""
			
 
				+        # Create a tree with files
			
 
				+        tree = Tree()
			
 
				+
			
 
				+        # Add some blobs to the tree
			
 
				+        blob1 = Blob.from_string(b"file1 content")
			
 
				+        blob2 = Blob.from_string(b"file2 content")
			
 
				+        tree.add(b"file1.txt", 0o100644, blob1.id)
			
 
				+        tree.add(b"file2.txt", 0o100644, blob2.id)
			
 
				+
			
 
				+        # Add objects to repo
			
 
				+        self.repo.object_store.add_object(blob1)
			
 
				+        self.repo.object_store.add_object(blob2)
			
 
				+        self.repo.object_store.add_object(tree)
			
 
				+
			
 
				+        # Create commit
			
 
				+        commit = make_commit(tree=tree.id, message=b"Test commit")
			
 
				+        self.repo.object_store.add_object(commit)
			
 
				+
			
 
				+        # Get all objects
			
 
				+        object_ids = [blob1.id, blob2.id, tree.id, commit.id]
			
 
				+
			
 
				+        # Apply blob:none filter
			
 
				+        filter_spec = BlobNoneFilter()
			
 
				+        filtered = filter_pack_objects(self.repo.object_store, object_ids, filter_spec)
			
 
				+
			
 
				+        # Verify blobs are excluded
			
 
				+        self.assertNotIn(blob1.id, filtered)
			
 
				+        self.assertNotIn(blob2.id, filtered)
			
 
				+        # But tree and commit are included
			
 
				+        self.assertIn(tree.id, filtered)
			
 
				+        self.assertIn(commit.id, filtered)
			
 
				+
			
 
				+        # Verify we have only 2 objects (tree + commit)
			
 
				+        self.assertEqual(2, len(filtered))
			
 
				+
			
 
				+    def test_blob_limit_filter_with_mixed_sizes(self):
			
 
				+        """Test blob:limit filter with mixed blob sizes."""
			
 
				+        tree = Tree()
			
 
				+
			
 
				+        # Create blobs of different sizes
			
 
				+        small_blob = Blob.from_string(b"small")  # 5 bytes
			
 
				+        medium_blob = Blob.from_string(b"x" * 50)  # 50 bytes
			
 
				+        large_blob = Blob.from_string(b"y" * 500)  # 500 bytes
			
 
				+
			
 
				+        tree.add(b"small.txt", 0o100644, small_blob.id)
			
 
				+        tree.add(b"medium.txt", 0o100644, medium_blob.id)
			
 
				+        tree.add(b"large.txt", 0o100644, large_blob.id)
			
 
				+
			
 
				+        # Add to repo
			
 
				+        self.repo.object_store.add_object(small_blob)
			
 
				+        self.repo.object_store.add_object(medium_blob)
			
 
				+        self.repo.object_store.add_object(large_blob)
			
 
				+        self.repo.object_store.add_object(tree)
			
 
				+
			
 
				+        commit = make_commit(tree=tree.id)
			
 
				+        self.repo.object_store.add_object(commit)
			
 
				+
			
 
				+        # Test with 100 byte limit
			
 
				+        object_ids = [
			
 
				+            small_blob.id,
			
 
				+            medium_blob.id,
			
 
				+            large_blob.id,
			
 
				+            tree.id,
			
 
				+            commit.id,
			
 
				+        ]
			
 
				+
			
 
				+        filter_spec = BlobLimitFilter(100)
			
 
				+        filtered = filter_pack_objects(self.repo.object_store, object_ids, filter_spec)
			
 
				+
			
 
				+        # Small and medium should be included
			
 
				+        self.assertIn(small_blob.id, filtered)
			
 
				+        self.assertIn(medium_blob.id, filtered)
			
 
				+        # Large should be excluded
			
 
				+        self.assertNotIn(large_blob.id, filtered)
			
 
				+        # Tree and commit included
			
 
				+        self.assertIn(tree.id, filtered)
			
 
				+        self.assertIn(commit.id, filtered)
			
 
				+
			
 
				+    def test_combined_filter_integration(self):
			
 
				+        """Test combined filters in real scenario."""
			
 
				+        tree = Tree()
			
 
				+
			
 
				+        blob1 = Blob.from_string(b"content1")
			
 
				+        blob2 = Blob.from_string(b"x" * 1000)
			
 
				+
			
 
				+        tree.add(b"file1.txt", 0o100644, blob1.id)
			
 
				+        tree.add(b"file2.txt", 0o100644, blob2.id)
			
 
				+
			
 
				+        self.repo.object_store.add_object(blob1)
			
 
				+        self.repo.object_store.add_object(blob2)
			
 
				+        self.repo.object_store.add_object(tree)
			
 
				+
			
 
				+        commit = make_commit(tree=tree.id)
			
 
				+        self.repo.object_store.add_object(commit)
			
 
				+
			
 
				+        # Combine: limit to 500 bytes, but also apply blob:none
			
 
				+        # This should exclude ALL blobs (blob:none overrides limit)
			
 
				+        filter_spec = CombineFilter(
			
 
				+            [
			
 
				+                BlobLimitFilter(500),
			
 
				+                BlobNoneFilter(),
			
 
				+            ]
			
 
				+        )
			
 
				+
			
 
				+        object_ids = [blob1.id, blob2.id, tree.id, commit.id]
			
 
				+        filtered = filter_pack_objects(self.repo.object_store, object_ids, filter_spec)
			
 
				+
			
 
				+        # All blobs excluded
			
 
				+        self.assertNotIn(blob1.id, filtered)
			
 
				+        self.assertNotIn(blob2.id, filtered)
			
 
				+        # Only tree and commit
			
 
				+        self.assertEqual(2, len(filtered))
			
 
				+
			
 
				+
			
 
				+class FilterPackObjectsWithPathsTests(TestCase):
			
 
				+    """Test filter_pack_objects_with_paths function."""
			
 
				+
			
 
				+    def setUp(self):
			
 
				+        super().setUp()
			
 
				+        self.object_store = MemoryObjectStore()
			
 
				+
			
 
				+    def test_tree_depth_filtering(self):
			
 
				+        """Test filtering by tree depth."""
			
 
				+        from dulwich.object_filters import (
			
 
				+            TreeDepthFilter,
			
 
				+            filter_pack_objects_with_paths,
			
 
				+        )
			
 
				+        from dulwich.objects import Blob, Tree
			
 
				+        from dulwich.tests.utils import make_commit
			
 
				+
			
 
				+        # Create a nested tree structure:
			
 
				+        # root/
			
 
				+        #   file1.txt (blob1)
			
 
				+        #   dir1/
			
 
				+        #     file2.txt (blob2)
			
 
				+        #     dir2/
			
 
				+        #       file3.txt (blob3)
			
 
				+
			
 
				+        blob1 = Blob.from_string(b"file1 content")
			
 
				+        blob2 = Blob.from_string(b"file2 content")
			
 
				+        blob3 = Blob.from_string(b"file3 content")
			
 
				+
			
 
				+        # deepest tree (dir2)
			
 
				+        tree_dir2 = Tree()
			
 
				+        tree_dir2.add(b"file3.txt", 0o100644, blob3.id)
			
 
				+
			
 
				+        # middle tree (dir1)
			
 
				+        tree_dir1 = Tree()
			
 
				+        tree_dir1.add(b"file2.txt", 0o100644, blob2.id)
			
 
				+        tree_dir1.add(b"dir2", 0o040000, tree_dir2.id)
			
 
				+
			
 
				+        # root tree
			
 
				+        tree_root = Tree()
			
 
				+        tree_root.add(b"file1.txt", 0o100644, blob1.id)
			
 
				+        tree_root.add(b"dir1", 0o040000, tree_dir1.id)
			
 
				+
			
 
				+        # Add all objects to store
			
 
				+        for obj in [blob1, blob2, blob3, tree_dir2, tree_dir1, tree_root]:
			
 
				+            self.object_store.add_object(obj)
			
 
				+
			
 
				+        commit = make_commit(tree=tree_root.id)
			
 
				+        self.object_store.add_object(commit)
			
 
				+
			
 
				+        # Filter with depth=1 (root + 1 level deep)
			
 
				+        filter_spec = TreeDepthFilter(1)
			
 
				+        filtered = filter_pack_objects_with_paths(
			
 
				+            self.object_store, [commit.id], filter_spec
			
 
				+        )
			
 
				+
			
 
				+        # Should include: commit, tree_root (depth 0), tree_dir1 (depth 1),
			
 
				+        # blob1 (in root), blob2 (in dir1)
			
 
				+        # Should exclude: tree_dir2 (depth 2), blob3 (in dir2)
			
 
				+        self.assertIn(commit.id, filtered)
			
 
				+        self.assertIn(tree_root.id, filtered)
			
 
				+        self.assertIn(tree_dir1.id, filtered)
			
 
				+        self.assertIn(blob1.id, filtered)
			
 
				+        self.assertIn(blob2.id, filtered)
			
 
				+        self.assertNotIn(tree_dir2.id, filtered)
			
 
				+        self.assertNotIn(blob3.id, filtered)
			
 
				+
			
 
				+    def test_sparse_oid_path_filtering(self):
			
 
				+        """Test filtering by sparse checkout patterns."""
			
 
				+        from dulwich.object_filters import (
			
 
				+            SparseOidFilter,
			
 
				+            filter_pack_objects_with_paths,
			
 
				+        )
			
 
				+        from dulwich.objects import Blob, Tree
			
 
				+        from dulwich.tests.utils import make_commit
			
 
				+
			
 
				+        # Create sparse patterns blob that includes only *.txt files
			
 
				+        patterns = b"*.txt\n"
			
 
				+        patterns_blob = Blob.from_string(patterns)
			
 
				+        self.object_store.add_object(patterns_blob)
			
 
				+
			
 
				+        # Create a tree with mixed file types:
			
 
				+        # root/
			
 
				+        #   readme.txt (should be included)
			
 
				+        #   script.py (should be excluded)
			
 
				+        #   docs/
			
 
				+        #     guide.txt (should be included)
			
 
				+        #     image.png (should be excluded)
			
 
				+
			
 
				+        blob_readme = Blob.from_string(b"readme content")
			
 
				+        blob_script = Blob.from_string(b"script content")
			
 
				+        blob_guide = Blob.from_string(b"guide content")
			
 
				+        blob_image = Blob.from_string(b"image content")
			
 
				+
			
 
				+        tree_docs = Tree()
			
 
				+        tree_docs.add(b"guide.txt", 0o100644, blob_guide.id)
			
 
				+        tree_docs.add(b"image.png", 0o100644, blob_image.id)
			
 
				+
			
 
				+        tree_root = Tree()
			
 
				+        tree_root.add(b"readme.txt", 0o100644, blob_readme.id)
			
 
				+        tree_root.add(b"script.py", 0o100644, blob_script.id)
			
 
				+        tree_root.add(b"docs", 0o040000, tree_docs.id)
			
 
				+
			
 
				+        # Add all objects
			
 
				+        for obj in [
			
 
				+            blob_readme,
			
 
				+            blob_script,
			
 
				+            blob_guide,
			
 
				+            blob_image,
			
 
				+            tree_docs,
			
 
				+            tree_root,
			
 
				+        ]:
			
 
				+            self.object_store.add_object(obj)
			
 
				+
			
 
				+        commit = make_commit(tree=tree_root.id)
			
 
				+        self.object_store.add_object(commit)
			
 
				+
			
 
				+        # Create sparse filter
			
 
				+        filter_spec = SparseOidFilter(patterns_blob.id, object_store=self.object_store)
			
 
				+        filtered = filter_pack_objects_with_paths(
			
 
				+            self.object_store, [commit.id], filter_spec
			
 
				+        )
			
 
				+
			
 
				+        # Should include: commit, trees, and .txt blobs
			
 
				+        self.assertIn(commit.id, filtered)
			
 
				+        self.assertIn(tree_root.id, filtered)
			
 
				+        self.assertIn(tree_docs.id, filtered)
			
 
				+        self.assertIn(blob_readme.id, filtered)
			
 
				+        self.assertIn(blob_guide.id, filtered)
			
 
				+
			
 
				+        # Should exclude: non-.txt blobs
			
 
				+        self.assertNotIn(blob_script.id, filtered)
			
 
				+        self.assertNotIn(blob_image.id, filtered)
			
 
				+
			
 
				+    def test_blob_size_filtering_with_paths(self):
			
 
				+        """Test that blob size filtering still works with path tracking."""
			
 
				+        from dulwich.object_filters import (
			
 
				+            BlobLimitFilter,
			
 
				+            filter_pack_objects_with_paths,
			
 
				+        )
			
 
				+        from dulwich.objects import Blob, Tree
			
 
				+        from dulwich.tests.utils import make_commit
			
 
				+
			
 
				+        # Create blobs of different sizes
			
 
				+        blob_small = Blob.from_string(b"small")  # 5 bytes
			
 
				+        blob_large = Blob.from_string(b"x" * 1000)  # 1000 bytes
			
 
				+
			
 
				+        tree = Tree()
			
 
				+        tree.add(b"small.txt", 0o100644, blob_small.id)
			
 
				+        tree.add(b"large.txt", 0o100644, blob_large.id)
			
 
				+
			
 
				+        for obj in [blob_small, blob_large, tree]:
			
 
				+            self.object_store.add_object(obj)
			
 
				+
			
 
				+        commit = make_commit(tree=tree.id)
			
 
				+        self.object_store.add_object(commit)
			
 
				+
			
 
				+        # Filter with 100 byte limit
			
 
				+        filter_spec = BlobLimitFilter(100)
			
 
				+        filtered = filter_pack_objects_with_paths(
			
 
				+            self.object_store, [commit.id], filter_spec
			
 
				+        )
			
 
				+
			
 
				+        # Should include small blob but not large
			
 
				+        self.assertIn(commit.id, filtered)
			
 
				+        self.assertIn(tree.id, filtered)
			
 
				+        self.assertIn(blob_small.id, filtered)
			
 
				+        self.assertNotIn(blob_large.id, filtered)
			
 
				+
			
 
				+    def test_combined_sparse_and_size_filter(self):
			
 
				+        """Test combining sparse patterns with blob size limits."""
			
 
				+        from dulwich.object_filters import (
			
 
				+            BlobLimitFilter,
			
 
				+            CombineFilter,
			
 
				+            SparseOidFilter,
			
 
				+            filter_pack_objects_with_paths,
			
 
				+        )
			
 
				+        from dulwich.objects import Blob, Tree
			
 
				+        from dulwich.tests.utils import make_commit
			
 
				+
			
 
				+        # Create sparse patterns: only *.txt files
			
 
				+        patterns = b"*.txt\n"
			
 
				+        patterns_blob = Blob.from_string(patterns)
			
 
				+        self.object_store.add_object(patterns_blob)
			
 
				+
			
 
				+        # Create files:
			
 
				+        # - small.txt (5 bytes, .txt) -> should be included
			
 
				+        # - large.txt (1000 bytes, .txt) -> excluded by size
			
 
				+        # - small.py (5 bytes, .py) -> excluded by pattern
			
 
				+        # - large.py (1000 bytes, .py) -> excluded by both
			
 
				+
			
 
				+        blob_small_txt = Blob.from_string(b"small txt")
			
 
				+        blob_large_txt = Blob.from_string(b"x" * 1000)
			
 
				+        blob_small_py = Blob.from_string(b"small py")
			
 
				+        blob_large_py = Blob.from_string(b"y" * 1000)
			
 
				+
			
 
				+        tree = Tree()
			
 
				+        tree.add(b"small.txt", 0o100644, blob_small_txt.id)
			
 
				+        tree.add(b"large.txt", 0o100644, blob_large_txt.id)
			
 
				+        tree.add(b"small.py", 0o100644, blob_small_py.id)
			
 
				+        tree.add(b"large.py", 0o100644, blob_large_py.id)
			
 
				+
			
 
				+        for obj in [blob_small_txt, blob_large_txt, blob_small_py, blob_large_py, tree]:
			
 
				+            self.object_store.add_object(obj)
			
 
				+
			
 
				+        commit = make_commit(tree=tree.id)
			
 
				+        self.object_store.add_object(commit)
			
 
				+
			
 
				+        # Combine: sparse filter + 100 byte limit
			
 
				+        filter_spec = CombineFilter(
			
 
				+            [
			
 
				+                SparseOidFilter(patterns_blob.id, object_store=self.object_store),
			
 
				+                BlobLimitFilter(100),
			
 
				+            ]
			
 
				+        )
			
 
				+
			
 
				+        filtered = filter_pack_objects_with_paths(
			
 
				+            self.object_store, [commit.id], filter_spec
			
 
				+        )
			
 
				+
			
 
				+        # Only small.txt should be included (matches pattern AND size limit)
			
 
				+        self.assertIn(commit.id, filtered)
			
 
				+        self.assertIn(tree.id, filtered)
			
 
				+        self.assertIn(blob_small_txt.id, filtered)
			
 
				+        self.assertNotIn(blob_large_txt.id, filtered)  # Too large
			
 
				+        self.assertNotIn(blob_small_py.id, filtered)  # Wrong pattern
			
 
				+        self.assertNotIn(blob_large_py.id, filtered)  # Both wrong
			
 
				+
			
 
				+    def test_blob_none_filter_with_paths(self):
			
 
				+        """Test that blob:none excludes all blobs with path tracking."""
			
 
				+        from dulwich.object_filters import (
			
 
				+            BlobNoneFilter,
			
 
				+            filter_pack_objects_with_paths,
			
 
				+        )
			
 
				+        from dulwich.objects import Blob, Tree
			
 
				+        from dulwich.tests.utils import make_commit
			
 
				+
			
 
				+        blob1 = Blob.from_string(b"content1")
			
 
				+        blob2 = Blob.from_string(b"content2")
			
 
				+
			
 
				+        tree = Tree()
			
 
				+        tree.add(b"file1.txt", 0o100644, blob1.id)
			
 
				+        tree.add(b"file2.txt", 0o100644, blob2.id)
			
 
				+
			
 
				+        for obj in [blob1, blob2, tree]:
			
 
				+            self.object_store.add_object(obj)
			
 
				+
			
 
				+        commit = make_commit(tree=tree.id)
			
 
				+        self.object_store.add_object(commit)
			
 
				+
			
 
				+        filter_spec = BlobNoneFilter()
			
 
				+        filtered = filter_pack_objects_with_paths(
			
 
				+            self.object_store, [commit.id], filter_spec
			
 
				+        )
			
 
				+
			
 
				+        # Should include commit and tree but no blobs
			
 
				+        self.assertIn(commit.id, filtered)
			
 
				+        self.assertIn(tree.id, filtered)
			
 
				+        self.assertNotIn(blob1.id, filtered)
			
 
				+        self.assertNotIn(blob2.id, filtered)
			
 
				+
			
 
				+    def test_direct_tree_want(self):
			
 
				+        """Test filtering when a tree (not commit) is wanted."""
			
 
				+        from dulwich.object_filters import (
			
 
				+            BlobLimitFilter,
			
 
				+            filter_pack_objects_with_paths,
			
 
				+        )
			
 
				+        from dulwich.objects import Blob, Tree
			
 
				+
			
 
				+        blob_small = Blob.from_string(b"small")
			
 
				+        blob_large = Blob.from_string(b"x" * 1000)
			
 
				+
			
 
				+        tree = Tree()
			
 
				+        tree.add(b"small.txt", 0o100644, blob_small.id)
			
 
				+        tree.add(b"large.txt", 0o100644, blob_large.id)
			
 
				+
			
 
				+        for obj in [blob_small, blob_large, tree]:
			
 
				+            self.object_store.add_object(obj)
			
 
				+
			
 
				+        # Want the tree directly (not via commit)
			
 
				+        filter_spec = BlobLimitFilter(100)
			
 
				+        filtered = filter_pack_objects_with_paths(
			
 
				+            self.object_store, [tree.id], filter_spec
			
 
				+        )
			
 
				+
			
 
				+        # Should include tree and small blob
			
 
				+        self.assertIn(tree.id, filtered)
			
 
				+        self.assertIn(blob_small.id, filtered)
			
 
				+        self.assertNotIn(blob_large.id, filtered)
			
--- a/tests/test_protocol.py
+++ b/tests/test_protocol.py
@@ -25,6 +25,8 @@ from io import BytesIO
 
				 
			
 
				 from dulwich.errors import HangupException
			
 
				 from dulwich.protocol import (
			
 
				+    CAPABILITY_FILTER,
			
 
				+    KNOWN_UPLOAD_CAPABILITIES,
			
 
				     MULTI_ACK,
			
 
				     MULTI_ACK_DETAILED,
			
 
				     SINGLE_ACK,
			
@@ -36,6 +38,7 @@ from dulwich.protocol import (
 
				     ack_type,
			
 
				     extract_capabilities,
			
 
				     extract_want_line_capabilities,
			
 
				+    find_capability,
			
 
				     pkt_line,
			
 
				     pkt_seq,
			
 
				 )
			
@@ -348,3 +351,41 @@ class PktLineParserTests(TestCase):
 
				         parser.parse(b"0005z0006aba")
			
 
				         self.assertEqual(pktlines, [b"z", b"ab"])
			
 
				         self.assertEqual(b"a", parser.get_tail())
			
 
				+
			
 
				+
			
 
				+class CapabilitiesTests(TestCase):
			
 
				+    """Tests for protocol capabilities."""
			
 
				+
			
 
				+    def test_filter_capability_in_known_upload_capabilities(self) -> None:
			
 
				+        """Test that CAPABILITY_FILTER is in KNOWN_UPLOAD_CAPABILITIES."""
			
 
				+        self.assertIn(CAPABILITY_FILTER, KNOWN_UPLOAD_CAPABILITIES)
			
 
				+
			
 
				+
			
 
				+class FindCapabilityTests(TestCase):
			
 
				+    """Tests for find_capability function."""
			
 
				+
			
 
				+    def test_find_capability_with_value(self) -> None:
			
 
				+        """Test finding a capability with a value."""
			
 
				+        caps = [b"filter=blob:none", b"agent=git/2.0"]
			
 
				+        self.assertEqual(b"blob:none", find_capability(caps, b"filter"))
			
 
				+
			
 
				+    def test_find_capability_without_value(self) -> None:
			
 
				+        """Test finding a capability without a value."""
			
 
				+        caps = [b"thin-pack", b"ofs-delta"]
			
 
				+        self.assertEqual(b"thin-pack", find_capability(caps, b"thin-pack"))
			
 
				+
			
 
				+    def test_find_capability_not_found(self) -> None:
			
 
				+        """Test finding a capability that doesn't exist."""
			
 
				+        caps = [b"thin-pack", b"ofs-delta"]
			
 
				+        self.assertIsNone(find_capability(caps, b"missing"))
			
 
				+
			
 
				+    def test_find_capability_multiple_names(self) -> None:
			
 
				+        """Test finding with multiple capability names."""
			
 
				+        caps = [b"filter=blob:none", b"agent=git/2.0"]
			
 
				+        # Should find first match
			
 
				+        self.assertEqual(b"git/2.0", find_capability(caps, b"missing", b"agent"))
			
 
				+
			
 
				+    def test_find_capability_complex_value(self) -> None:
			
 
				+        """Test finding capability with complex value."""
			
 
				+        caps = [b"filter=combine:blob:none+tree:0"]
			
 
				+        self.assertEqual(b"combine:blob:none+tree:0", find_capability(caps, b"filter"))
			
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -180,6 +180,34 @@ class UploadPackHandlerTestCase(TestCase):
 
				         self._handler.progress(b"second message")
			
 
				         self.assertRaises(IndexError, self._handler.proto.get_received_line, 2)
			
 
				 
			
 
				+    def test_filter_capability_advertised(self) -> None:
			
 
				+        """Test that the filter capability is advertised by UploadPackHandler."""
			
 
				+        caps = self._handler.capabilities()
			
 
				+        self.assertIn(b"filter", caps)
			
 
				+
			
 
				+    def test_filter_spec_parsed(self) -> None:
			
 
				+        """Test that filter specification is parsed from client capabilities."""
			
 
				+        from dulwich.object_filters import BlobNoneFilter
			
 
				+
			
 
				+        caps = [b"filter=blob:none", *list(self._handler.required_capabilities())]
			
 
				+        self._handler.set_client_capabilities(caps)
			
 
				+        self.assertIsNotNone(self._handler.filter_spec)
			
 
				+        self.assertIsInstance(self._handler.filter_spec, BlobNoneFilter)
			
 
				+
			
 
				+    def test_filter_spec_not_present(self) -> None:
			
 
				+        """Test that filter_spec is None when filter capability is not used."""
			
 
				+        caps = self._handler.required_capabilities()
			
 
				+        self._handler.set_client_capabilities(caps)
			
 
				+        self.assertIsNone(self._handler.filter_spec)
			
 
				+
			
 
				+    def test_filter_spec_invalid(self) -> None:
			
 
				+        """Test that invalid filter spec raises GitProtocolError."""
			
 
				+        from dulwich.errors import GitProtocolError
			
 
				+
			
 
				+        caps = [b"filter=invalid:spec", *list(self._handler.required_capabilities())]
			
 
				+        with self.assertRaises(GitProtocolError):
			
 
				+            self._handler.set_client_capabilities(caps)
			
 
				+
			
 
				     def test_get_tagged(self) -> None:
			
 
				         refs = {
			
 
				             b"refs/tags/tag1": ONE,