hace 2 semanas · 68191f6606
--- a/dulwich/object_filters.py
+++ b/dulwich/object_filters.py
@@ -0,0 +1,614 @@
 
															+# object_filters.py -- Object filtering for partial clone and similar operations
														
 
															+# Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
														
 
															+#
														
 
															+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
														
 
															+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
														
 
															+# General Public License as published by the Free Software Foundation; version 2.0
														
 
															+# or (at your option) any later version. You can redistribute it and/or
														
 
															+# modify it under the terms of either of these two licenses.
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+#
														
 
															+# You should have received a copy of the licenses; if not, see
														
 
															+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
														
 
															+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
														
 
															+# License, Version 2.0.
														
 
															+#
														
 
															+
														
 
															+"""Object filtering for Git partial clone and pack generation.
														
 
															+
														
 
															+This module implements Git's object filter specifications for partial clone,
														
 
															+as documented in:
														
 
															+https://git-scm.com/docs/rev-list-options#Documentation/rev-list-options.txt---filterltfilter-specgt
														
 
															+
														
 
															+Filter specifications control which objects are included when generating packs,
														
 
															+enabling partial clone (downloading only needed objects) and similar operations.
														
 
															+
														
 
															+Supported filter specs:
														
 
															+- blob:none - Exclude all blobs
														
 
															+- blob:limit=<n>[kmg] - Exclude blobs larger than n bytes/KB/MB/GB
														
 
															+- tree:<depth> - Exclude trees beyond specified depth
														
 
															+- sparse:oid=<oid> - Use sparse specification from object
														
 
															+- combine:<filter>+<filter>+... - Combine multiple filters
														
 
															+"""
														
 
															+
														
 
															+__all__ = [
														
 
															+    "BlobLimitFilter",
														
 
															+    "BlobNoneFilter",
														
 
															+    "CombineFilter",
														
 
															+    "FilterSpec",
														
 
															+    "SparseOidFilter",
														
 
															+    "TreeDepthFilter",
														
 
															+    "filter_pack_objects",
														
 
															+    "filter_pack_objects_with_paths",
														
 
															+    "parse_filter_spec",
														
 
															+]
														
 
															+
														
 
															+from abc import ABC, abstractmethod
														
 
															+from typing import TYPE_CHECKING
														
 
															+
														
 
															+from .objects import S_ISGITLINK, Blob, Commit, ObjectID, Tag, Tree, valid_hexsha
														
 
															+
														
 
															+if TYPE_CHECKING:
														
 
															+    from collections.abc import Callable
														
 
															+
														
 
															+    from .object_store import BaseObjectStore
														
 
															+    from .objects import ObjectID
														
 
															+
														
 
															+
														
 
															+class FilterSpec(ABC):
														
 
															+    """Base class for all filter specifications."""
														
 
															+
														
 
															+    @abstractmethod
														
 
															+    def should_include_blob(self, blob_size: int) -> bool:
														
 
															+        """Determine if a blob of given size should be included.
														
 
															+
														
 
															+        Args:
														
 
															+            blob_size: Size of the blob in bytes
														
 
															+
														
 
															+        Returns:
														
 
															+            True if the blob should be included, False otherwise
														
 
															+        """
														
 
															+        ...
														
 
															+
														
 
															+    @abstractmethod
														
 
															+    def should_include_tree(self, depth: int) -> bool:
														
 
															+        """Determine if a tree at given depth should be included.
														
 
															+
														
 
															+        Args:
														
 
															+            depth: Depth of the tree (0 = root)
														
 
															+
														
 
															+        Returns:
														
 
															+            True if the tree should be included, False otherwise
														
 
															+        """
														
 
															+        ...
														
 
															+
														
 
															+    @abstractmethod
														
 
															+    def to_spec_string(self) -> str:
														
 
															+        """Convert filter spec back to string format.
														
 
															+
														
 
															+        Returns:
														
 
															+            Filter specification string (e.g., ``blob:none``, ``blob:limit=1m``)
														
 
															+        """
														
 
															+        ...
														
 
															+
														
 
															+
														
 
															+class BlobNoneFilter(FilterSpec):
														
 
															+    """Filter that excludes all blobs."""
														
 
															+
														
 
															+    def should_include_blob(self, blob_size: int) -> bool:
														
 
															+        """Exclude all blobs."""
														
 
															+        return False
														
 
															+
														
 
															+    def should_include_tree(self, depth: int) -> bool:
														
 
															+        """Include all trees."""
														
 
															+        return True
														
 
															+
														
 
															+    def to_spec_string(self) -> str:
														
 
															+        """Return 'blob:none'."""
														
 
															+        return "blob:none"
														
 
															+
														
 
															+    def __repr__(self) -> str:
														
 
															+        """Return string representation of the filter."""
														
 
															+        return "BlobNoneFilter()"
														
 
															+
														
 
															+
														
 
															+class BlobLimitFilter(FilterSpec):
														
 
															+    """Filter that excludes blobs larger than a specified size."""
														
 
															+
														
 
															+    def __init__(self, limit: int) -> None:
														
 
															+        """Initialize blob limit filter.
														
 
															+
														
 
															+        Args:
														
 
															+            limit: Maximum blob size in bytes
														
 
															+        """
														
 
															+        self.limit = limit
														
 
															+
														
 
															+    def should_include_blob(self, blob_size: int) -> bool:
														
 
															+        """Include only blobs smaller than or equal to the limit."""
														
 
															+        return blob_size <= self.limit
														
 
															+
														
 
															+    def should_include_tree(self, depth: int) -> bool:
														
 
															+        """Include all trees."""
														
 
															+        return True
														
 
															+
														
 
															+    def to_spec_string(self) -> str:
														
 
															+        """Return 'blob:limit=<size>' with appropriate unit."""
														
 
															+        size = self.limit
														
 
															+        if size >= 1024 * 1024 * 1024 and size % (1024 * 1024 * 1024) == 0:
														
 
															+            return f"blob:limit={size // (1024 * 1024 * 1024)}g"
														
 
															+        elif size >= 1024 * 1024 and size % (1024 * 1024) == 0:
														
 
															+            return f"blob:limit={size // (1024 * 1024)}m"
														
 
															+        elif size >= 1024 and size % 1024 == 0:
														
 
															+            return f"blob:limit={size // 1024}k"
														
 
															+        else:
														
 
															+            return f"blob:limit={size}"
														
 
															+
														
 
															+    def __repr__(self) -> str:
														
 
															+        """Return string representation of the filter."""
														
 
															+        return f"BlobLimitFilter(limit={self.limit})"
														
 
															+
														
 
															+
														
 
															+class TreeDepthFilter(FilterSpec):
														
 
															+    """Filter that excludes trees beyond a specified depth."""
														
 
															+
														
 
															+    def __init__(self, max_depth: int) -> None:
														
 
															+        """Initialize tree depth filter.
														
 
															+
														
 
															+        Args:
														
 
															+            max_depth: Maximum tree depth (0 = only root tree)
														
 
															+        """
														
 
															+        self.max_depth = max_depth
														
 
															+
														
 
															+    def should_include_blob(self, blob_size: int) -> bool:
														
 
															+        """Include all blobs."""
														
 
															+        return True
														
 
															+
														
 
															+    def should_include_tree(self, depth: int) -> bool:
														
 
															+        """Include only trees up to max_depth."""
														
 
															+        return depth <= self.max_depth
														
 
															+
														
 
															+    def to_spec_string(self) -> str:
														
 
															+        """Return 'tree:<depth>'."""
														
 
															+        return f"tree:{self.max_depth}"
														
 
															+
														
 
															+    def __repr__(self) -> str:
														
 
															+        """Return string representation of the filter."""
														
 
															+        return f"TreeDepthFilter(max_depth={self.max_depth})"
														
 
															+
														
 
															+
														
 
															+class SparseOidFilter(FilterSpec):
														
 
															+    """Filter that uses a sparse specification from an object.
														
 
															+
														
 
															+    This filter reads sparse-checkout patterns from a blob object and uses them
														
 
															+    to determine which paths should be included in the partial clone.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(
														
 
															+        self, oid: "ObjectID", object_store: "BaseObjectStore | None" = None
														
 
															+    ) -> None:
														
 
															+        """Initialize sparse OID filter.
														
 
															+
														
 
															+        Args:
														
 
															+            oid: Object ID of the sparse specification blob
														
 
															+            object_store: Optional object store to load the sparse patterns from
														
 
															+        """
														
 
															+        self.oid = oid
														
 
															+        self._patterns: list[tuple[str, bool, bool, bool]] | None = None
														
 
															+        self._object_store = object_store
														
 
															+
														
 
															+    def _load_patterns(self) -> None:
														
 
															+        """Load and parse sparse patterns from the blob."""
														
 
															+        if self._patterns is not None:
														
 
															+            return
														
 
															+
														
 
															+        if self._object_store is None:
														
 
															+            raise ValueError("Cannot load sparse patterns without an object store")
														
 
															+
														
 
															+        from .sparse_patterns import parse_sparse_patterns
														
 
															+
														
 
															+        try:
														
 
															+            obj = self._object_store[self.oid]
														
 
															+        except KeyError:
														
 
															+            raise ValueError(
														
 
															+                f"Sparse specification blob {self.oid.hex() if isinstance(self.oid, bytes) else self.oid} not found"
														
 
															+            )
														
 
															+
														
 
															+        if not isinstance(obj, Blob):
														
 
															+            raise ValueError(
														
 
															+                f"Sparse specification {self.oid.hex() if isinstance(self.oid, bytes) else self.oid} is not a blob"
														
 
															+            )
														
 
															+
														
 
															+        # Parse the blob content as sparse patterns
														
 
															+        lines = obj.data.decode("utf-8").splitlines()
														
 
															+        self._patterns = parse_sparse_patterns(lines)
														
 
															+
														
 
															+    def should_include_path(self, path: str) -> bool:
														
 
															+        """Determine if a path should be included based on sparse patterns.
														
 
															+
														
 
															+        Args:
														
 
															+            path: Path to check (e.g., 'src/file.py')
														
 
															+
														
 
															+        Returns:
														
 
															+            True if the path matches the sparse patterns, False otherwise
														
 
															+        """
														
 
															+        self._load_patterns()
														
 
															+        from .sparse_patterns import match_sparse_patterns
														
 
															+
														
 
															+        # Determine if path is a directory based on whether it ends with '/'
														
 
															+        path_is_dir = path.endswith("/")
														
 
															+        path_str = path.rstrip("/")
														
 
															+
														
 
															+        assert self._patterns is not None  # _load_patterns ensures this
														
 
															+        return match_sparse_patterns(path_str, self._patterns, path_is_dir=path_is_dir)
														
 
															+
														
 
															+    def should_include_blob(self, blob_size: int) -> bool:
														
 
															+        """Include all blobs (sparse filtering is path-based, not size-based)."""
														
 
															+        return True
														
 
															+
														
 
															+    def should_include_tree(self, depth: int) -> bool:
														
 
															+        """Include all trees (sparse filtering is path-based)."""
														
 
															+        return True
														
 
															+
														
 
															+    def to_spec_string(self) -> str:
														
 
															+        """Return 'sparse:oid=<oid>'."""
														
 
															+        return f"sparse:oid={self.oid.decode('ascii') if isinstance(self.oid, bytes) else self.oid}"
														
 
															+
														
 
															+    def __repr__(self) -> str:
														
 
															+        """Return string representation of the filter."""
														
 
															+        oid_str = self.oid.decode("ascii") if isinstance(self.oid, bytes) else self.oid
														
 
															+        return f"SparseOidFilter(oid={oid_str!r})"
														
 
															+
														
 
															+
														
 
															+class CombineFilter(FilterSpec):
														
 
															+    """Filter that combines multiple filters with AND logic."""
														
 
															+
														
 
															+    def __init__(self, filters: list[FilterSpec]) -> None:
														
 
															+        """Initialize combine filter.
														
 
															+
														
 
															+        Args:
														
 
															+            filters: List of filters to combine
														
 
															+        """
														
 
															+        self.filters = filters
														
 
															+
														
 
															+    def should_include_blob(self, blob_size: int) -> bool:
														
 
															+        """Include blob only if all filters agree."""
														
 
															+        return all(f.should_include_blob(blob_size) for f in self.filters)
														
 
															+
														
 
															+    def should_include_tree(self, depth: int) -> bool:
														
 
															+        """Include tree only if all filters agree."""
														
 
															+        return all(f.should_include_tree(depth) for f in self.filters)
														
 
															+
														
 
															+    def to_spec_string(self) -> str:
														
 
															+        """Return 'combine:<filter1>+<filter2>+...'."""
														
 
															+        return "combine:" + "+".join(f.to_spec_string() for f in self.filters)
														
 
															+
														
 
															+    def __repr__(self) -> str:
														
 
															+        """Return string representation of the filter."""
														
 
															+        return f"CombineFilter(filters={self.filters!r})"
														
 
															+
														
 
															+
														
 
															+def _parse_size(size_str: str) -> int:
														
 
															+    """Parse a size specification like '100', '10k', '5m', '1g'.
														
 
															+
														
 
															+    Args:
														
 
															+        size_str: Size string with optional unit suffix
														
 
															+
														
 
															+    Returns:
														
 
															+        Size in bytes
														
 
															+
														
 
															+    Raises:
														
 
															+        ValueError: If size_str is not a valid size specification
														
 
															+    """
														
 
															+    size_str = size_str.lower()
														
 
															+    multipliers = {"k": 1024, "m": 1024 * 1024, "g": 1024 * 1024 * 1024}
														
 
															+
														
 
															+    if size_str[-1] in multipliers:
														
 
															+        try:
														
 
															+            value = int(size_str[:-1])
														
 
															+            return value * multipliers[size_str[-1]]
														
 
															+        except ValueError:
														
 
															+            raise ValueError(f"Invalid size specification: {size_str}")
														
 
															+    else:
														
 
															+        try:
														
 
															+            return int(size_str)
														
 
															+        except ValueError:
														
 
															+            raise ValueError(f"Invalid size specification: {size_str}")
														
 
															+
														
 
															+
														
 
															+def parse_filter_spec(
														
 
															+    spec: str | bytes, object_store: "BaseObjectStore | None" = None
														
 
															+) -> FilterSpec:
														
 
															+    """Parse a filter specification string.
														
 
															+
														
 
															+    Args:
														
 
															+        spec: Filter specification (e.g., 'blob:none', 'blob:limit=1m')
														
 
															+        object_store: Optional object store for loading sparse specifications
														
 
															+
														
 
															+    Returns:
														
 
															+        Parsed FilterSpec object
														
 
															+
														
 
															+    Raises:
														
 
															+        ValueError: If spec is not a valid filter specification
														
 
															+
														
 
															+    Examples:
														
 
															+        >>> parse_filter_spec("blob:none")
														
 
															+        BlobNoneFilter()
														
 
															+        >>> parse_filter_spec("blob:limit=1m")
														
 
															+        BlobLimitFilter(limit=1048576)
														
 
															+        >>> parse_filter_spec("tree:0")
														
 
															+        TreeDepthFilter(max_depth=0)
														
 
															+    """
														
 
															+    if isinstance(spec, bytes):
														
 
															+        try:
														
 
															+            spec = spec.decode("utf-8")
														
 
															+        except UnicodeDecodeError as e:
														
 
															+            raise ValueError(f"Filter specification must be valid UTF-8: {e}")
														
 
															+
														
 
															+    spec = spec.strip()
														
 
															+
														
 
															+    if not spec:
														
 
															+        raise ValueError("Filter specification cannot be empty")
														
 
															+
														
 
															+    if spec == "blob:none":
														
 
															+        return BlobNoneFilter()
														
 
															+    elif spec.startswith("blob:limit="):
														
 
															+        limit_str = spec[11:]  # len('blob:limit=') == 11
														
 
															+        if not limit_str:
														
 
															+            raise ValueError("blob:limit requires a size value (e.g., blob:limit=1m)")
														
 
															+        try:
														
 
															+            limit = _parse_size(limit_str)
														
 
															+            if limit < 0:
														
 
															+                raise ValueError(
														
 
															+                    f"blob:limit size must be non-negative, got {limit_str}"
														
 
															+                )
														
 
															+            return BlobLimitFilter(limit)
														
 
															+        except ValueError as e:
														
 
															+            raise ValueError(f"Invalid blob:limit specification: {e}")
														
 
															+    elif spec.startswith("tree:"):
														
 
															+        depth_str = spec[5:]  # len('tree:') == 5
														
 
															+        if not depth_str:
														
 
															+            raise ValueError("tree filter requires a depth value (e.g., tree:0)")
														
 
															+        try:
														
 
															+            depth = int(depth_str)
														
 
															+            if depth < 0:
														
 
															+                raise ValueError(f"tree depth must be non-negative, got {depth}")
														
 
															+            return TreeDepthFilter(depth)
														
 
															+        except ValueError as e:
														
 
															+            raise ValueError(f"Invalid tree filter: {e}")
														
 
															+    elif spec.startswith("sparse:oid="):
														
 
															+        oid_str = spec[11:]  # len('sparse:oid=') == 11
														
 
															+        if not oid_str:
														
 
															+            raise ValueError(
														
 
															+                "sparse:oid requires an object ID (e.g., sparse:oid=abc123...)"
														
 
															+            )
														
 
															+        # Validate OID format (should be 40 hex chars for SHA-1 or 64 for SHA-256)
														
 
															+        if not valid_hexsha(oid_str):
														
 
															+            raise ValueError(
														
 
															+                f"sparse:oid requires a valid object ID (40 or 64 hex chars), got {len(oid_str)} chars"
														
 
															+            )
														
 
															+
														
 
															+        oid: ObjectID = ObjectID(oid_str.encode("ascii"))
														
 
															+        return SparseOidFilter(oid, object_store=object_store)
														
 
															+    elif spec.startswith("combine:"):
														
 
															+        filter_str = spec[8:]  # len('combine:') == 8
														
 
															+        if not filter_str:
														
 
															+            raise ValueError(
														
 
															+                "combine filter requires at least one filter (e.g., combine:blob:none+tree:0)"
														
 
															+            )
														
 
															+        filter_specs = filter_str.split("+")
														
 
															+        if len(filter_specs) < 2:
														
 
															+            raise ValueError(
														
 
															+                "combine filter requires at least two filters separated by '+'"
														
 
															+            )
														
 
															+        try:
														
 
															+            filters = [
														
 
															+                parse_filter_spec(f, object_store=object_store) for f in filter_specs
														
 
															+            ]
														
 
															+        except ValueError as e:
														
 
															+            raise ValueError(f"Invalid filter in combine specification: {e}")
														
 
															+        return CombineFilter(filters)
														
 
															+    else:
														
 
															+        # Provide helpful error message with supported formats
														
 
															+        raise ValueError(
														
 
															+            f"Unknown filter specification: '{spec}'. "
														
 
															+            f"Supported formats: blob:none, blob:limit=<n>[kmg], tree:<depth>, "
														
 
															+            f"sparse:oid=<oid>, combine:<filter>+<filter>+..."
														
 
															+        )
														
 
															+
														
 
															+
														
 
															+def filter_pack_objects(
														
 
															+    object_store: "BaseObjectStore",
														
 
															+    object_ids: list["ObjectID"],
														
 
															+    filter_spec: FilterSpec,
														
 
															+) -> list["ObjectID"]:
														
 
															+    """Filter a list of object IDs based on a filter specification.
														
 
															+
														
 
															+    This function examines each object and excludes those that don't match
														
 
															+    the filter criteria (e.g., blobs that are too large, trees beyond max depth).
														
 
															+
														
 
															+    Args:
														
 
															+        object_store: Object store to retrieve objects from
														
 
															+        object_ids: List of object IDs to filter
														
 
															+        filter_spec: Filter specification to apply
														
 
															+
														
 
															+    Returns:
														
 
															+        Filtered list of object IDs that should be included in the pack
														
 
															+
														
 
															+    Note:
														
 
															+        This function currently supports blob size filtering. Tree depth filtering
														
 
															+        requires additional path/depth tracking which is not yet implemented.
														
 
															+    """
														
 
															+    filtered_ids = []
														
 
															+
														
 
															+    for oid in object_ids:
														
 
															+        try:
														
 
															+            obj = object_store[oid]
														
 
															+        except KeyError:
														
 
															+            # Object not found, skip it
														
 
															+            continue
														
 
															+
														
 
															+        # Determine object type and apply appropriate filter
														
 
															+        if isinstance(obj, Blob):
														
 
															+            # Check if blob should be included based on size
														
 
															+            blob_size = len(obj.data)
														
 
															+            if filter_spec.should_include_blob(blob_size):
														
 
															+                filtered_ids.append(oid)
														
 
															+            # else: blob is filtered out
														
 
															+        elif isinstance(obj, (Tree, Commit, Tag)):
														
 
															+            # For now, include all trees, commits, and tags
														
 
															+            # Tree depth filtering would require tracking depth during traversal
														
 
															+            # which needs to be implemented at the object collection stage
														
 
															+            if filter_spec.should_include_tree(0):  # depth=0 for now
														
 
															+                filtered_ids.append(oid)
														
 
															+        else:
														
 
															+            # Unknown object type, include it to be safe
														
 
															+            filtered_ids.append(oid)
														
 
															+
														
 
															+    return filtered_ids
														
 
															+
														
 
															+
														
 
															+def filter_pack_objects_with_paths(
														
 
															+    object_store: "BaseObjectStore",
														
 
															+    wants: list["ObjectID"],
														
 
															+    filter_spec: FilterSpec,
														
 
															+    *,
														
 
															+    progress: "Callable[[bytes], None] | None" = None,
														
 
															+) -> list["ObjectID"]:
														
 
															+    """Filter objects for a pack with full path and depth tracking.
														
 
															+
														
 
															+    This function performs a complete tree traversal starting from the wanted
														
 
															+    commits, tracking paths and depths to enable proper filtering for sparse:oid
														
 
															+    and tree:<depth> filters.
														
 
															+
														
 
															+    Args:
														
 
															+        object_store: Object store to retrieve objects from
														
 
															+        wants: List of commit/tree/blob IDs that are wanted
														
 
															+        filter_spec: Filter specification to apply
														
 
															+        progress: Optional progress callback
														
 
															+
														
 
															+    Returns:
														
 
															+        Filtered list of object IDs that should be included in the pack
														
 
															+    """
														
 
															+    import stat
														
 
															+
														
 
															+    included_objects: set[ObjectID] = set()
														
 
															+    # Track (oid, path, depth) tuples to process
														
 
															+    to_process: list[tuple[ObjectID, str, int]] = []
														
 
															+
														
 
															+    # Start with the wanted commits
														
 
															+    for want in wants:
														
 
															+        try:
														
 
															+            obj = object_store[want]
														
 
															+        except KeyError:
														
 
															+            continue
														
 
															+
														
 
															+        if isinstance(obj, Commit):
														
 
															+            # Always include commits
														
 
															+            included_objects.add(want)
														
 
															+            # Add the root tree to process with depth 0
														
 
															+            to_process.append((obj.tree, "", 0))
														
 
															+        elif isinstance(obj, Tree):
														
 
															+            # Direct tree wants start at depth 0
														
 
															+            to_process.append((want, "", 0))
														
 
															+        elif isinstance(obj, Tag):
														
 
															+            # Always include tags
														
 
															+            included_objects.add(want)
														
 
															+            # Process the tagged object
														
 
															+            tagged_oid = obj.object[1]
														
 
															+            to_process.append((tagged_oid, "", 0))
														
 
															+        elif isinstance(obj, Blob):
														
 
															+            # Direct blob wants - check size filter
														
 
															+            blob_size = len(obj.data)
														
 
															+            if filter_spec.should_include_blob(blob_size):
														
 
															+                included_objects.add(want)
														
 
															+
														
 
															+    # Process trees and their contents
														
 
															+    processed_trees: set[ObjectID] = set()
														
 
															+
														
 
															+    while to_process:
														
 
															+        oid, current_path, depth = to_process.pop()
														
 
															+
														
 
															+        # Skip if already processed
														
 
															+        if oid in processed_trees:
														
 
															+            continue
														
 
															+
														
 
															+        try:
														
 
															+            obj = object_store[oid]
														
 
															+        except KeyError:
														
 
															+            continue
														
 
															+
														
 
															+        if isinstance(obj, Tree):
														
 
															+            # Check if this tree should be included based on depth
														
 
															+            if not filter_spec.should_include_tree(depth):
														
 
															+                continue
														
 
															+
														
 
															+            # Include this tree
														
 
															+            included_objects.add(oid)
														
 
															+            processed_trees.add(oid)
														
 
															+
														
 
															+            # Process tree entries
														
 
															+            for name, mode, entry_oid in obj.iteritems():
														
 
															+                assert name is not None
														
 
															+                assert mode is not None
														
 
															+                assert entry_oid is not None
														
 
															+
														
 
															+                # Skip gitlinks
														
 
															+                if S_ISGITLINK(mode):
														
 
															+                    continue
														
 
															+
														
 
															+                # Build full path
														
 
															+                if current_path:
														
 
															+                    full_path = f"{current_path}/{name.decode('utf-8')}"
														
 
															+                else:
														
 
															+                    full_path = name.decode("utf-8")
														
 
															+
														
 
															+                if stat.S_ISDIR(mode):
														
 
															+                    # It's a subdirectory - add to process list with increased depth
														
 
															+                    to_process.append((entry_oid, full_path, depth + 1))
														
 
															+                elif stat.S_ISREG(mode):
														
 
															+                    # It's a blob - check filters
														
 
															+                    try:
														
 
															+                        blob = object_store[entry_oid]
														
 
															+                    except KeyError:
														
 
															+                        continue
														
 
															+
														
 
															+                    if not isinstance(blob, Blob):
														
 
															+                        continue
														
 
															+
														
 
															+                    # Check filters
														
 
															+                    blob_size = len(blob.data)
														
 
															+
														
 
															+                    # For non-path-based filters (size, blob:none), check directly
														
 
															+                    if not filter_spec.should_include_blob(blob_size):
														
 
															+                        continue
														
 
															+
														
 
															+                    # Check path filter for sparse:oid
														
 
															+                    path_allowed = True
														
 
															+                    if isinstance(filter_spec, SparseOidFilter):
														
 
															+                        path_allowed = filter_spec.should_include_path(full_path)
														
 
															+                    elif isinstance(filter_spec, CombineFilter):
														
 
															+                        # Check path filters in combination
														
 
															+                        for f in filter_spec.filters:
														
 
															+                            if isinstance(f, SparseOidFilter):
														
 
															+                                if not f.should_include_path(full_path):
														
 
															+                                    path_allowed = False
														
 
															+                                    break
														
 
															+
														
 
															+                    if not path_allowed:
														
 
															+                        continue
														
 
															+
														
 
															+                    # Include this blob
														
 
															+                    included_objects.add(entry_oid)
														
 
															+
														
 
															+        elif isinstance(obj, Blob):
														
 
															+            # Standalone blob (shouldn't normally happen in tree traversal)
														
 
															+            blob_size = len(obj.data)
														
 
															+            if filter_spec.should_include_blob(blob_size):
														
 
															+                included_objects.add(oid)
														
 
															+
														
 
															+    return list(included_objects)
														
--- a/dulwich/protocol.py
+++ b/dulwich/protocol.py
@@ -52,6 +52,7 @@ __all__ = [
 
															     "COMMAND_DEEPEN_NOT",
														
 
															     "COMMAND_DEEPEN_SINCE",
														
 
															     "COMMAND_DONE",
														
 
															+    "COMMAND_FILTER",
														
 
															     "COMMAND_HAVE",
														
 
															     "COMMAND_SHALLOW",
														
 
															     "COMMAND_UNSHALLOW",
														
@@ -84,6 +85,7 @@ __all__ = [
 
															     "extract_capabilities",
														
 
															     "extract_capability_names",
														
 
															     "extract_want_line_capabilities",
														
 
															+    "find_capability",
														
 
															     "format_ack_line",
														
 
															     "format_capability_line",
														
 
															     "format_cmd_pkt",
														
@@ -201,6 +203,7 @@ KNOWN_UPLOAD_CAPABILITIES = set(
 
															         CAPABILITY_ALLOW_TIP_SHA1_IN_WANT,
														
 
															         CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT,
														
 
															         CAPABILITY_FETCH,
														
 
															+        CAPABILITY_FILTER,
														
 
															     ]
														
 
															 )
														
 
															 KNOWN_RECEIVE_CAPABILITIES = set(
														
@@ -308,6 +311,7 @@ COMMAND_UNSHALLOW = b"unshallow"
 
															 COMMAND_DONE = b"done"
														
 
															 COMMAND_WANT = b"want"
														
 
															 COMMAND_HAVE = b"have"
														
 
															+COMMAND_FILTER = b"filter"
														
 
															 def format_cmd_pkt(cmd: bytes, *args: bytes) -> bytes:
														
@@ -721,6 +725,40 @@ def ack_type(capabilities: Iterable[bytes]) -> int:
 
															     return SINGLE_ACK
														
 
															+def find_capability(
														
 
															+    capabilities: Iterable[bytes], *capability_names: bytes
														
 
															+) -> bytes | None:
														
 
															+    """Find a capability value in a list of capabilities.
														
 
															+
														
 
															+    This function looks for capabilities that may include arguments after an equals sign
														
 
															+    and returns only the value part (after the '='). For capabilities without values,
														
 
															+    returns the capability name itself.
														
 
															+
														
 
															+    Args:
														
 
															+      capabilities: List of capability strings
														
 
															+      capability_names: Capability name(s) to search for
														
 
															+
														
 
															+    Returns:
														
 
															+      The value after '=' if found, or the capability name if no '=', or None if not found
														
 
															+
														
 
															+    Example:
														
 
															+      >>> caps = [b'filter=blob:none', b'agent=git/2.0', b'thin-pack']
														
 
															+      >>> find_capability(caps, b'filter')
														
 
															+      b'blob:none'
														
 
															+      >>> find_capability(caps, b'thin-pack')
														
 
															+      b'thin-pack'
														
 
															+      >>> find_capability(caps, b'missing')
														
 
															+      None
														
 
															+    """
														
 
															+    for cap in capabilities:
														
 
															+        for name in capability_names:
														
 
															+            if cap == name:
														
 
															+                return cap
														
 
															+            elif cap.startswith(name + b"="):
														
 
															+                return cap[len(name) + 1 :]
														
 
															+    return None
														
 
															+
														
 
															+
														
 
															 class BufferedPktLineWriter:
														
 
															     """Writer that wraps its data in pkt-lines and has an independent buffer.
														
--- a/dulwich/server.py
+++ b/dulwich/server.py
@@ -97,6 +97,15 @@ from .errors import (
 
															     ObjectFormatException,
														
 
															     UnexpectedCommandError,
														
 
															 )
														
 
															+from .object_filters import (
														
 
															+    CombineFilter,
														
 
															+    FilterSpec,
														
 
															+    SparseOidFilter,
														
 
															+    TreeDepthFilter,
														
 
															+    filter_pack_objects,
														
 
															+    filter_pack_objects_with_paths,
														
 
															+    parse_filter_spec,
														
 
															+)
														
 
															 from .object_store import MissingObjectFinder, PackBasedObjectStore, find_shallow
														
 
															 from .objects import Commit, ObjectID, Tree, valid_hexsha
														
 
															 from .pack import ObjectContainer, write_pack_from_container
														
@@ -104,6 +113,7 @@ from .protocol import (
 
															     CAPABILITIES_REF,
														
 
															     CAPABILITY_AGENT,
														
 
															     CAPABILITY_DELETE_REFS,
														
 
															+    CAPABILITY_FILTER,
														
 
															     CAPABILITY_INCLUDE_TAG,
														
 
															     CAPABILITY_MULTI_ACK,
														
 
															     CAPABILITY_MULTI_ACK_DETAILED,
														
@@ -117,6 +127,7 @@ from .protocol import (
 
															     CAPABILITY_THIN_PACK,
														
 
															     COMMAND_DEEPEN,
														
 
															     COMMAND_DONE,
														
 
															+    COMMAND_FILTER,
														
 
															     COMMAND_HAVE,
														
 
															     COMMAND_SHALLOW,
														
 
															     COMMAND_UNSHALLOW,
														
@@ -138,6 +149,7 @@ from .protocol import (
 
															     capability_object_format,
														
 
															     extract_capabilities,
														
 
															     extract_want_line_capabilities,
														
 
															+    find_capability,
														
 
															     format_ack_line,
														
 
															     format_ref_line,
														
 
															     format_shallow_line,
														
@@ -378,6 +390,13 @@ class PackHandler(Handler):
 
															         for cap in caps:
														
 
															             if cap.startswith(CAPABILITY_AGENT + b"="):
														
 
															                 continue
														
 
															+            if cap.startswith(CAPABILITY_FILTER + b"="):
														
 
															+                # Filter capability can have a value (e.g., filter=blob:none)
														
 
															+                if CAPABILITY_FILTER not in allowable_caps:
														
 
															+                    raise GitProtocolError(
														
 
															+                        f"Client asked for capability {cap!r} that was not advertised."
														
 
															+                    )
														
 
															+                continue
														
 
															             if cap not in allowable_caps:
														
 
															                 raise GitProtocolError(
														
 
															                     f"Client asked for capability {cap!r} that was not advertised."
														
@@ -438,6 +457,8 @@ class UploadPackHandler(PackHandler):
 
															         # being processed, and the client is not accepting any other
														
 
															         # data (such as side-band, see the progress method here).
														
 
															         self._processing_have_lines = False
														
 
															+        # Filter specification for partial clone support
														
 
															+        self.filter_spec: FilterSpec | None = None
														
 
															     def capabilities(self) -> list[bytes]:
														
 
															         """Return the list of capabilities supported by upload-pack.
														
@@ -455,6 +476,7 @@ class UploadPackHandler(PackHandler):
 
															             CAPABILITY_INCLUDE_TAG,
														
 
															             CAPABILITY_SHALLOW,
														
 
															             CAPABILITY_NO_DONE,
														
 
															+            CAPABILITY_FILTER,
														
 
															             capability_object_format(self.repo.object_format.name),
														
 
															         ]
														
@@ -467,6 +489,26 @@ class UploadPackHandler(PackHandler):
 
															             CAPABILITY_OFS_DELTA,
														
 
															         )
														
 
															+    def set_client_capabilities(self, caps: Iterable[bytes]) -> None:
														
 
															+        """Set client capabilities and parse filter specification if present.
														
 
															+
														
 
															+        Args:
														
 
															+            caps: List of capability strings from the client
														
 
															+        """
														
 
															+        super().set_client_capabilities(caps)
														
 
															+        # Parse filter specification if present in capabilities
														
 
															+        # In protocol v1, filter can be sent as "filter=<spec>" capability
														
 
															+        # In protocol v2, filter is sent as a separate "filter <spec>" command
														
 
															+        filter_spec_bytes = find_capability(caps, CAPABILITY_FILTER)
														
 
															+        if filter_spec_bytes and filter_spec_bytes != CAPABILITY_FILTER:
														
 
															+            # Only parse if there's an actual spec (not just the capability name)
														
 
															+            try:
														
 
															+                self.filter_spec = parse_filter_spec(
														
 
															+                    filter_spec_bytes, object_store=self.repo.object_store
														
 
															+                )
														
 
															+            except ValueError as e:
														
 
															+                raise GitProtocolError(f"Invalid filter specification: {e}")
														
 
															+
														
 
															     def progress(self, message: bytes) -> None:
														
 
															         """Send a progress message to the client.
														
@@ -584,6 +626,46 @@ class UploadPackHandler(PackHandler):
 
															             return
														
 
															         self._start_pack_send_phase()
														
 
															+
														
 
															+        # Apply filter if specified (partial clone support)
														
 
															+        if self.filter_spec is not None:
														
 
															+            original_count = len(object_ids)
														
 
															+
														
 
															+            # Use path-aware filtering for tree depth and sparse:oid filters
														
 
															+            # Check if filter requires path tracking
														
 
															+            def needs_path_tracking(filter_spec: FilterSpec) -> bool:
														
 
															+                if isinstance(filter_spec, (TreeDepthFilter, SparseOidFilter)):
														
 
															+                    return True
														
 
															+                if isinstance(filter_spec, CombineFilter):
														
 
															+                    return any(needs_path_tracking(f) for f in filter_spec.filters)
														
 
															+                return False
														
 
															+
														
 
															+            if needs_path_tracking(self.filter_spec):
														
 
															+                # Path-aware filtering returns list of OIDs, convert to tuples for pack generation
														
 
															+                filtered_oids = filter_pack_objects_with_paths(
														
 
															+                    self.repo.object_store,
														
 
															+                    wants,
														
 
															+                    self.filter_spec,
														
 
															+                    progress=self.progress,
														
 
															+                )
														
 
															+                object_ids = [(oid, None) for oid in filtered_oids]
														
 
															+            else:
														
 
															+                # Extract just the object IDs (filter_pack_objects expects list of OIDs)
														
 
															+                # object_ids is a list of tuples (oid, (depth, path))
														
 
															+                oid_list = [oid for oid, _hint in object_ids]
														
 
															+                filtered_oids = filter_pack_objects(
														
 
															+                    self.repo.object_store, oid_list, self.filter_spec
														
 
															+                )
														
 
															+                # Reconstruct tuples with hints for pack generation
														
 
															+                filtered_oid_set = set(filtered_oids)
														
 
															+                object_ids = [
														
 
															+                    (oid, hint) for oid, hint in object_ids if oid in filtered_oid_set
														
 
															+                ]
														
 
															+
														
 
															+            filtered_count = original_count - len(object_ids)
														
 
															+            if filtered_count > 0:
														
 
															+                self.progress((f"filtered {filtered_count} objects.\n").encode("ascii"))
														
 
															+
														
 
															         self.progress((f"counting objects: {len(object_ids)}, done.\n").encode("ascii"))
														
 
															         write_pack_from_container(
														
@@ -640,6 +722,10 @@ def _split_proto_line(
 
															         elif command == COMMAND_DEEPEN:
														
 
															             assert fields[1] is not None
														
 
															             return command, int(fields[1])
														
 
															+        elif command == COMMAND_FILTER:
														
 
															+            # Filter specification (e.g., "filter blob:none")
														
 
															+            assert fields[1] is not None
														
 
															+            return (command, fields[1])
														
 
															     raise GitProtocolError(f"Received invalid line from client: {line!r}")
														
@@ -742,7 +828,7 @@ class _ProtocolGraphWalker:
 
															     def __init__(
														
 
															         self,
														
 
															-        handler: PackHandler,
														
 
															+        handler: "UploadPackHandler",
														
 
															         object_store: ObjectContainer,
														
 
															         get_peeled: Callable[[bytes], ObjectID | None],
														
 
															         get_symrefs: Callable[[], dict[Ref, Ref]],
														
@@ -833,7 +919,7 @@ class _ProtocolGraphWalker:
 
															         line, caps = extract_want_line_capabilities(want)
														
 
															         self.handler.set_client_capabilities(caps)
														
 
															         self.set_ack_type(ack_type(caps))
														
 
															-        allowed = (COMMAND_WANT, COMMAND_SHALLOW, COMMAND_DEEPEN, None)
														
 
															+        allowed = (COMMAND_WANT, COMMAND_SHALLOW, COMMAND_DEEPEN, COMMAND_FILTER, None)
														
 
															         command, sha_result = _split_proto_line(line, allowed)
														
 
															         want_revs: list[ObjectID] = []
														
@@ -845,6 +931,19 @@ class _ProtocolGraphWalker:
 
															             command, sha_result = self.read_proto_line(allowed)
														
 
															         self.set_wants(want_revs)
														
 
															+
														
 
															+        # Handle filter command if present (protocol v2)
														
 
															+        if command == COMMAND_FILTER:
														
 
															+            assert isinstance(sha_result, bytes)
														
 
															+            try:
														
 
															+                self.handler.filter_spec = parse_filter_spec(
														
 
															+                    sha_result, object_store=self.handler.repo.object_store
														
 
															+                )
														
 
															+            except ValueError as e:
														
 
															+                raise GitProtocolError(f"Invalid filter specification: {e}")
														
 
															+            # Read next command after processing filter
														
 
															+            command, sha_result = self.read_proto_line(allowed)
														
 
															+
														
 
															         if command in (COMMAND_SHALLOW, COMMAND_DEEPEN):
														
 
															             assert sha_result is not None
														
 
															             self.unread_proto_line(command, sha_result)
														
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -169,6 +169,7 @@ def self_test_suite() -> unittest.TestSuite:
 
															         "notes",
														
 
															         "objects",
														
 
															         "objectspec",
														
 
															+        "object_filters",
														
 
															         "object_store",
														
 
															         "pack",
														
 
															         "patch",
														
--- a/tests/compat/__init__.py
+++ b/tests/compat/__init__.py
@@ -37,6 +37,7 @@ def test_suite() -> unittest.TestSuite:
 
															         "lfs",
														
 
															         "midx",
														
 
															         "pack",
														
 
															+        "partial_clone",
														
 
															         "patch",
														
 
															         "porcelain",
														
 
															         "reftable",
														
--- a/tests/compat/test_partial_clone.py
+++ b/tests/compat/test_partial_clone.py
@@ -0,0 +1,489 @@
 
															+# test_partial_clone.py -- Compatibility tests for partial clone.
														
 
															+# Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
														
 
															+#
														
 
															+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
														
 
															+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
														
 
															+# General Public License as published by the Free Software Foundation; version 2.0
														
 
															+# or (at your option) any later version. You can redistribute it and/or
														
 
															+# modify it under the terms of either of these two licenses.
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+#
														
 
															+# You should have received a copy of the licenses; if not, see
														
 
															+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
														
 
															+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
														
 
															+# License, Version 2.0.
														
 
															+#
														
 
															+
														
 
															+"""Compatibility tests for partial clone support."""
														
 
															+
														
 
															+import os
														
 
															+import shutil
														
 
															+import sys
														
 
															+import tempfile
														
 
															+import threading
														
 
															+
														
 
															+from dulwich.objects import Blob, Tree
														
 
															+from dulwich.repo import Repo
														
 
															+from dulwich.server import DictBackend, TCPGitServer
														
 
															+from dulwich.tests.utils import make_commit
														
 
															+
														
 
															+from .. import skipIf
														
 
															+from .utils import CompatTestCase, require_git_version, run_git_or_fail
														
 
															+
														
 
															+
														
 
															+@skipIf(sys.platform == "win32", "Broken on windows, with very long fail time.")
														
 
															+class PartialCloneServerTestCase(CompatTestCase):
														
 
															+    """Tests for partial clone server compatibility with git client."""
														
 
															+
														
 
															+    protocol = "git"
														
 
															+    # Partial clone support was introduced in git 2.17.0
														
 
															+    min_git_version = (2, 17, 0)
														
 
															+
														
 
															+    def setUp(self) -> None:
														
 
															+        super().setUp()
														
 
															+        require_git_version(self.min_git_version)
														
 
															+
														
 
															+    def _start_server(self, repo):
														
 
															+        backend = DictBackend({b"/": repo})
														
 
															+        dul_server = TCPGitServer(backend, b"localhost", 0)
														
 
															+
														
 
															+        # Start server in a thread
														
 
															+        server_thread = threading.Thread(target=dul_server.serve)
														
 
															+        server_thread.daemon = True
														
 
															+        server_thread.start()
														
 
															+
														
 
															+        # Add cleanup
														
 
															+        def cleanup_server():
														
 
															+            dul_server.shutdown()
														
 
															+            dul_server.server_close()
														
 
															+            server_thread.join(timeout=1.0)
														
 
															+
														
 
															+        self.addCleanup(cleanup_server)
														
 
															+        self._server = dul_server
														
 
															+        _, port = self._server.socket.getsockname()
														
 
															+        return port
														
 
															+
														
 
															+    def url(self, port) -> str:
														
 
															+        return f"{self.protocol}://localhost:{port}/"
														
 
															+
														
 
															+    def test_clone_with_blob_none_filter(self) -> None:
														
 
															+        """Test that git client can clone with blob:none filter."""
														
 
															+        # Create repository with dulwich
														
 
															+        repo_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(shutil.rmtree, repo_path)
														
 
															+        source_repo = Repo.init(repo_path, mkdir=False)
														
 
															+
														
 
															+        # Create test content with multiple blobs
														
 
															+        blob1 = Blob.from_string(b"File 1 content - this is a test file")
														
 
															+        blob2 = Blob.from_string(b"File 2 content - another test file")
														
 
															+        blob3 = Blob.from_string(b"File 3 content - third test file")
														
 
															+
														
 
															+        tree = Tree()
														
 
															+        tree.add(b"file1.txt", 0o100644, blob1.id)
														
 
															+        tree.add(b"file2.txt", 0o100644, blob2.id)
														
 
															+        tree.add(b"file3.txt", 0o100644, blob3.id)
														
 
															+
														
 
															+        # Add objects to repo
														
 
															+        source_repo.object_store.add_object(blob1)
														
 
															+        source_repo.object_store.add_object(blob2)
														
 
															+        source_repo.object_store.add_object(blob3)
														
 
															+        source_repo.object_store.add_object(tree)
														
 
															+
														
 
															+        commit = make_commit(tree=tree.id, message=b"Test commit with multiple files")
														
 
															+        source_repo.object_store.add_object(commit)
														
 
															+        source_repo.refs[b"refs/heads/master"] = commit.id
														
 
															+
														
 
															+        # Start dulwich server
														
 
															+        port = self._start_server(source_repo)
														
 
															+
														
 
															+        # Clone with blob:none filter
														
 
															+        clone_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(shutil.rmtree, clone_path)
														
 
															+        clone_dir = os.path.join(clone_path, "cloned_repo")
														
 
															+
														
 
															+        run_git_or_fail(
														
 
															+            ["clone", "--filter=blob:none", "--no-checkout", self.url(port), clone_dir],
														
 
															+            cwd=clone_path,
														
 
															+        )
														
 
															+
														
 
															+        # Verify cloned repo has commit and tree but no blobs
														
 
															+        cloned_repo = Repo(clone_dir)
														
 
															+        self.addCleanup(cloned_repo.close)
														
 
															+
														
 
															+        # Commit should exist
														
 
															+        self.assertEqual(cloned_repo.refs[b"refs/heads/master"], commit.id)
														
 
															+
														
 
															+        # Tree should exist
														
 
															+        self.assertIn(tree.id, cloned_repo.object_store)
														
 
															+
														
 
															+        # Blobs should NOT be in object store (filtered out)
														
 
															+        # Note: git may still have the blobs if they're small enough to be inlined
														
 
															+        # or if it fetched them anyway, so we just verify the filter was accepted
														
 
															+
														
 
															+        # Verify git recognizes this as a partial clone
														
 
															+        config_output = run_git_or_fail(
														
 
															+            ["config", "--get", "remote.origin.promisor"], cwd=clone_dir
														
 
															+        )
														
 
															+        self.assertEqual(config_output.strip(), b"true")
														
 
															+
														
 
															+        source_repo.close()
														
 
															+
														
 
															+    def test_clone_with_blob_limit_filter(self) -> None:
														
 
															+        """Test that git client can clone with blob:limit filter."""
														
 
															+        # Create repository
														
 
															+        repo_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(shutil.rmtree, repo_path)
														
 
															+        source_repo = Repo.init(repo_path, mkdir=False)
														
 
															+
														
 
															+        # Create blobs of different sizes
														
 
															+        small_blob = Blob.from_string(b"small")  # 5 bytes
														
 
															+        large_blob = Blob.from_string(b"x" * 1000)  # 1000 bytes
														
 
															+
														
 
															+        tree = Tree()
														
 
															+        tree.add(b"small.txt", 0o100644, small_blob.id)
														
 
															+        tree.add(b"large.txt", 0o100644, large_blob.id)
														
 
															+
														
 
															+        source_repo.object_store.add_object(small_blob)
														
 
															+        source_repo.object_store.add_object(large_blob)
														
 
															+        source_repo.object_store.add_object(tree)
														
 
															+
														
 
															+        commit = make_commit(tree=tree.id, message=b"Test commit with mixed sizes")
														
 
															+        source_repo.object_store.add_object(commit)
														
 
															+        source_repo.refs[b"refs/heads/master"] = commit.id
														
 
															+
														
 
															+        # Start server
														
 
															+        port = self._start_server(source_repo)
														
 
															+
														
 
															+        # Clone with blob:limit=100 filter (should exclude large blob)
														
 
															+        clone_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(shutil.rmtree, clone_path)
														
 
															+        clone_dir = os.path.join(clone_path, "cloned_repo")
														
 
															+
														
 
															+        run_git_or_fail(
														
 
															+            [
														
 
															+                "clone",
														
 
															+                "--filter=blob:limit=100",
														
 
															+                "--no-checkout",
														
 
															+                self.url(port),
														
 
															+                clone_dir,
														
 
															+            ],
														
 
															+            cwd=clone_path,
														
 
															+        )
														
 
															+
														
 
															+        # Verify it's a partial clone
														
 
															+        cloned_repo = Repo(clone_dir)
														
 
															+        self.addCleanup(cloned_repo.close)
														
 
															+
														
 
															+        config_output = run_git_or_fail(
														
 
															+            ["config", "--get", "remote.origin.promisor"], cwd=clone_dir
														
 
															+        )
														
 
															+        self.assertEqual(config_output.strip(), b"true")
														
 
															+
														
 
															+        source_repo.close()
														
 
															+
														
 
															+    def test_clone_with_tree_depth_filter(self) -> None:
														
 
															+        """Test that git client can clone with tree:0 filter."""
														
 
															+        # Create repository with nested structure
														
 
															+        repo_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(shutil.rmtree, repo_path)
														
 
															+        source_repo = Repo.init(repo_path, mkdir=False)
														
 
															+
														
 
															+        # Create nested tree structure
														
 
															+        blob1 = Blob.from_string(b"root file")
														
 
															+        blob2 = Blob.from_string(b"nested file")
														
 
															+
														
 
															+        inner_tree = Tree()
														
 
															+        inner_tree.add(b"nested.txt", 0o100644, blob2.id)
														
 
															+
														
 
															+        outer_tree = Tree()
														
 
															+        outer_tree.add(b"root.txt", 0o100644, blob1.id)
														
 
															+        outer_tree.add(b"subdir", 0o040000, inner_tree.id)
														
 
															+
														
 
															+        source_repo.object_store.add_object(blob1)
														
 
															+        source_repo.object_store.add_object(blob2)
														
 
															+        source_repo.object_store.add_object(inner_tree)
														
 
															+        source_repo.object_store.add_object(outer_tree)
														
 
															+
														
 
															+        commit = make_commit(tree=outer_tree.id, message=b"Test nested structure")
														
 
															+        source_repo.object_store.add_object(commit)
														
 
															+        source_repo.refs[b"refs/heads/master"] = commit.id
														
 
															+
														
 
															+        # Start server
														
 
															+        port = self._start_server(source_repo)
														
 
															+
														
 
															+        # Clone with tree:0 filter
														
 
															+        clone_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(shutil.rmtree, clone_path)
														
 
															+        clone_dir = os.path.join(clone_path, "cloned_repo")
														
 
															+
														
 
															+        run_git_or_fail(
														
 
															+            ["clone", "--filter=tree:0", "--no-checkout", self.url(port), clone_dir],
														
 
															+            cwd=clone_path,
														
 
															+        )
														
 
															+
														
 
															+        # Verify it's a partial clone
														
 
															+        cloned_repo = Repo(clone_dir)
														
 
															+        self.addCleanup(cloned_repo.close)
														
 
															+
														
 
															+        config_output = run_git_or_fail(
														
 
															+            ["config", "--get", "remote.origin.promisor"], cwd=clone_dir
														
 
															+        )
														
 
															+        self.assertEqual(config_output.strip(), b"true")
														
 
															+
														
 
															+        source_repo.close()
														
 
															+
														
 
															+    def test_clone_with_filter_protocol_v0(self) -> None:
														
 
															+        """Test that git client can clone with filter using protocol v0."""
														
 
															+        # Create repository with dulwich
														
 
															+        repo_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(shutil.rmtree, repo_path)
														
 
															+        source_repo = Repo.init(repo_path, mkdir=False)
														
 
															+
														
 
															+        # Create test content
														
 
															+        blob = Blob.from_string(b"test content")
														
 
															+        tree = Tree()
														
 
															+        tree.add(b"file.txt", 0o100644, blob.id)
														
 
															+
														
 
															+        source_repo.object_store.add_object(blob)
														
 
															+        source_repo.object_store.add_object(tree)
														
 
															+
														
 
															+        commit = make_commit(tree=tree.id, message=b"Test commit")
														
 
															+        source_repo.object_store.add_object(commit)
														
 
															+        source_repo.refs[b"refs/heads/master"] = commit.id
														
 
															+
														
 
															+        # Start server
														
 
															+        port = self._start_server(source_repo)
														
 
															+
														
 
															+        # Clone with protocol v0 and blob:none filter
														
 
															+        clone_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(shutil.rmtree, clone_path)
														
 
															+        clone_dir = os.path.join(clone_path, "cloned_repo")
														
 
															+
														
 
															+        run_git_or_fail(
														
 
															+            [
														
 
															+                "-c",
														
 
															+                "protocol.version=0",
														
 
															+                "clone",
														
 
															+                "--filter=blob:none",
														
 
															+                "--no-checkout",
														
 
															+                self.url(port),
														
 
															+                clone_dir,
														
 
															+            ],
														
 
															+            cwd=clone_path,
														
 
															+        )
														
 
															+
														
 
															+        # Verify partial clone
														
 
															+        cloned_repo = Repo(clone_dir)
														
 
															+        self.addCleanup(cloned_repo.close)
														
 
															+        self.assertIn(commit.id, cloned_repo.object_store)
														
 
															+        self.assertIn(tree.id, cloned_repo.object_store)
														
 
															+
														
 
															+        source_repo.close()
														
 
															+
														
 
															+    def test_clone_with_filter_protocol_v2(self) -> None:
														
 
															+        """Test that git client can clone with filter using protocol v2."""
														
 
															+        # Create repository with dulwich
														
 
															+        repo_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(shutil.rmtree, repo_path)
														
 
															+        source_repo = Repo.init(repo_path, mkdir=False)
														
 
															+
														
 
															+        # Create test content
														
 
															+        blob = Blob.from_string(b"test content")
														
 
															+        tree = Tree()
														
 
															+        tree.add(b"file.txt", 0o100644, blob.id)
														
 
															+
														
 
															+        source_repo.object_store.add_object(blob)
														
 
															+        source_repo.object_store.add_object(tree)
														
 
															+
														
 
															+        commit = make_commit(tree=tree.id, message=b"Test commit")
														
 
															+        source_repo.object_store.add_object(commit)
														
 
															+        source_repo.refs[b"refs/heads/master"] = commit.id
														
 
															+
														
 
															+        # Start server
														
 
															+        port = self._start_server(source_repo)
														
 
															+
														
 
															+        # Clone with protocol v2 and blob:none filter
														
 
															+        clone_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(shutil.rmtree, clone_path)
														
 
															+        clone_dir = os.path.join(clone_path, "cloned_repo")
														
 
															+
														
 
															+        run_git_or_fail(
														
 
															+            [
														
 
															+                "-c",
														
 
															+                "protocol.version=2",
														
 
															+                "clone",
														
 
															+                "--filter=blob:none",
														
 
															+                "--no-checkout",
														
 
															+                self.url(port),
														
 
															+                clone_dir,
														
 
															+            ],
														
 
															+            cwd=clone_path,
														
 
															+        )
														
 
															+
														
 
															+        # Verify partial clone
														
 
															+        cloned_repo = Repo(clone_dir)
														
 
															+        self.addCleanup(cloned_repo.close)
														
 
															+        self.assertIn(commit.id, cloned_repo.object_store)
														
 
															+        self.assertIn(tree.id, cloned_repo.object_store)
														
 
															+
														
 
															+        source_repo.close()
														
 
															+
														
 
															+
														
 
															+@skipIf(sys.platform == "win32", "Broken on windows, with very long fail time.")
														
 
															+class PartialCloneClientTestCase(CompatTestCase):
														
 
															+    """Tests for partial clone client compatibility with git server."""
														
 
															+
														
 
															+    # Partial clone support was introduced in git 2.17.0
														
 
															+    min_git_version = (2, 17, 0)
														
 
															+
														
 
															+    def setUp(self) -> None:
														
 
															+        super().setUp()
														
 
															+        require_git_version(self.min_git_version)
														
 
															+
														
 
															+    def test_fetch_with_blob_none_filter(self) -> None:
														
 
															+        """Test that dulwich client can fetch with blob:none filter."""
														
 
															+        from dulwich.client import get_transport_and_path
														
 
															+
														
 
															+        # Create a git repository using git itself
														
 
															+        repo_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(shutil.rmtree, repo_path)
														
 
															+
														
 
															+        # Initialize with git
														
 
															+        run_git_or_fail(["init"], cwd=repo_path)
														
 
															+        run_git_or_fail(["config", "user.name", "Test User"], cwd=repo_path)
														
 
															+        run_git_or_fail(["config", "user.email", "test@example.com"], cwd=repo_path)
														
 
															+
														
 
															+        # Create test files
														
 
															+        file1 = os.path.join(repo_path, "file1.txt")
														
 
															+        with open(file1, "wb") as f:
														
 
															+            f.write(b"Content of file 1")
														
 
															+
														
 
															+        file2 = os.path.join(repo_path, "file2.txt")
														
 
															+        with open(file2, "wb") as f:
														
 
															+            f.write(b"Content of file 2")
														
 
															+
														
 
															+        # Commit files
														
 
															+        run_git_or_fail(["add", "."], cwd=repo_path)
														
 
															+        run_git_or_fail(["commit", "-m", "Initial commit"], cwd=repo_path)
														
 
															+
														
 
															+        # Start git daemon
														
 
															+        daemon_port = self._start_git_daemon(repo_path)
														
 
															+
														
 
															+        # Create destination repo
														
 
															+        dest_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(shutil.rmtree, dest_path)
														
 
															+        dest_repo = Repo.init(dest_path, mkdir=False)
														
 
															+        self.addCleanup(dest_repo.close)
														
 
															+
														
 
															+        # Fetch with blob:none filter using dulwich client
														
 
															+        client, path = get_transport_and_path(
														
 
															+            f"git://localhost:{daemon_port}/",
														
 
															+            thin_packs=False,
														
 
															+        )
														
 
															+
														
 
															+        def determine_wants(refs, depth=None):
														
 
															+            # Get all refs
														
 
															+            return list(refs.values())
														
 
															+
														
 
															+        # Fetch with filter
														
 
															+        result = client.fetch(
														
 
															+            path,
														
 
															+            dest_repo,
														
 
															+            determine_wants=determine_wants,
														
 
															+            progress=None,
														
 
															+            filter_spec=b"blob:none",
														
 
															+        )
														
 
															+
														
 
															+        # The fetch should succeed with partial clone
														
 
															+        self.assertIsNotNone(result)
														
 
															+
														
 
															+    def test_clone_with_filter(self) -> None:
														
 
															+        """Test that dulwich clone function works with filter."""
														
 
															+        from dulwich.client import get_transport_and_path
														
 
															+
														
 
															+        # Create a git repository
														
 
															+        repo_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(shutil.rmtree, repo_path)
														
 
															+
														
 
															+        run_git_or_fail(["init"], cwd=repo_path)
														
 
															+        run_git_or_fail(["config", "user.name", "Test User"], cwd=repo_path)
														
 
															+        run_git_or_fail(["config", "user.email", "test@example.com"], cwd=repo_path)
														
 
															+
														
 
															+        # Create and commit a file
														
 
															+        test_file = os.path.join(repo_path, "test.txt")
														
 
															+        with open(test_file, "wb") as f:
														
 
															+            f.write(b"Test content for partial clone")
														
 
															+        run_git_or_fail(["add", "."], cwd=repo_path)
														
 
															+        run_git_or_fail(["commit", "-m", "Test commit"], cwd=repo_path)
														
 
															+
														
 
															+        # Start git daemon
														
 
															+        daemon_port = self._start_git_daemon(repo_path)
														
 
															+
														
 
															+        # Clone with dulwich using filter
														
 
															+        dest_path = tempfile.mkdtemp()
														
 
															+        self.addCleanup(shutil.rmtree, dest_path)
														
 
															+
														
 
															+        client, path = get_transport_and_path(f"git://localhost:{daemon_port}/")
														
 
															+
														
 
															+        # Clone with blob:limit filter
														
 
															+        cloned_repo = client.clone(
														
 
															+            path,
														
 
															+            dest_path,
														
 
															+            mkdir=False,
														
 
															+            filter_spec=b"blob:limit=100",
														
 
															+        )
														
 
															+        self.addCleanup(cloned_repo.close)
														
 
															+
														
 
															+        # Verify clone succeeded
														
 
															+        self.assertTrue(os.path.exists(dest_path))
														
 
															+        self.assertTrue(os.path.exists(os.path.join(dest_path, ".git")))
														
 
															+
														
 
															+    def _start_git_daemon(self, repo_path):
														
 
															+        """Start git daemon for testing."""
														
 
															+        import socket
														
 
															+        import subprocess
														
 
															+        import time
														
 
															+
														
 
															+        # Find an available port
														
 
															+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
														
 
															+        sock.bind(("localhost", 0))
														
 
															+        _, port = sock.getsockname()
														
 
															+        sock.close()
														
 
															+
														
 
															+        # Mark directory as git daemon export
														
 
															+        export_file = os.path.join(repo_path, "git-daemon-export-ok")
														
 
															+        with open(export_file, "w") as f:
														
 
															+            f.write("")
														
 
															+
														
 
															+        # Start git daemon
														
 
															+        daemon_process = subprocess.Popen(
														
 
															+            [
														
 
															+                "git",
														
 
															+                "daemon",
														
 
															+                "--reuseaddr",
														
 
															+                f"--port={port}",
														
 
															+                "--base-path=.",
														
 
															+                "--export-all",
														
 
															+                "--enable=receive-pack",
														
 
															+                ".",
														
 
															+            ],
														
 
															+            cwd=repo_path,
														
 
															+            stdout=subprocess.PIPE,
														
 
															+            stderr=subprocess.PIPE,
														
 
															+        )
														
 
															+
														
 
															+        # Give daemon time to start
														
 
															+        time.sleep(0.5)
														
 
															+
														
 
															+        def cleanup_daemon():
														
 
															+            daemon_process.terminate()
														
 
															+            daemon_process.wait(timeout=2)
														
 
															+
														
 
															+        self.addCleanup(cleanup_daemon)
														
 
															+
														
 
															+        return port
														
--- a/tests/test_object_filters.py
+++ b/tests/test_object_filters.py
@@ -0,0 +1,999 @@
 
															+# test_object_filters.py -- Tests for object filtering
														
 
															+# Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
														
 
															+#
														
 
															+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
														
 
															+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
														
 
															+# General Public License as published by the Free Software Foundation; version 2.0
														
 
															+# or (at your option) any later version. You can redistribute it and/or
														
 
															+# modify it under the terms of either of these two licenses.
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+#
														
 
															+# You should have received a copy of the licenses; if not, see
														
 
															+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
														
 
															+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
														
 
															+# License, Version 2.0.
														
 
															+#
														
 
															+
														
 
															+"""Tests for object filtering (partial clone filter specifications)."""
														
 
															+
														
 
															+import os
														
 
															+import tempfile
														
 
															+
														
 
															+from dulwich.object_filters import (
														
 
															+    BlobLimitFilter,
														
 
															+    BlobNoneFilter,
														
 
															+    CombineFilter,
														
 
															+    SparseOidFilter,
														
 
															+    TreeDepthFilter,
														
 
															+    filter_pack_objects,
														
 
															+    parse_filter_spec,
														
 
															+)
														
 
															+from dulwich.object_store import MemoryObjectStore
														
 
															+from dulwich.objects import Blob, Tree
														
 
															+from dulwich.repo import Repo
														
 
															+from dulwich.tests.utils import make_commit
														
 
															+
														
 
															+from . import TestCase
														
 
															+
														
 
															+
														
 
															+class ParseFilterSpecTests(TestCase):
														
 
															+    """Test parse_filter_spec function."""
														
 
															+
														
 
															+    def test_parse_blob_none(self):
														
 
															+        """Test parsing 'blob:none' filter."""
														
 
															+        filter_spec = parse_filter_spec("blob:none")
														
 
															+        self.assertIsInstance(filter_spec, BlobNoneFilter)
														
 
															+        self.assertEqual("blob:none", filter_spec.to_spec_string())
														
 
															+
														
 
															+    def test_parse_blob_none_bytes(self):
														
 
															+        """Test parsing 'blob:none' as bytes."""
														
 
															+        filter_spec = parse_filter_spec(b"blob:none")
														
 
															+        self.assertIsInstance(filter_spec, BlobNoneFilter)
														
 
															+
														
 
															+    def test_parse_blob_limit_bytes(self):
														
 
															+        """Test parsing 'blob:limit=100' in bytes."""
														
 
															+        filter_spec = parse_filter_spec("blob:limit=100")
														
 
															+        self.assertIsInstance(filter_spec, BlobLimitFilter)
														
 
															+        self.assertEqual(100, filter_spec.limit)
														
 
															+
														
 
															+    def test_parse_blob_limit_kb(self):
														
 
															+        """Test parsing 'blob:limit=10k'."""
														
 
															+        filter_spec = parse_filter_spec("blob:limit=10k")
														
 
															+        self.assertIsInstance(filter_spec, BlobLimitFilter)
														
 
															+        self.assertEqual(10 * 1024, filter_spec.limit)
														
 
															+
														
 
															+    def test_parse_blob_limit_mb(self):
														
 
															+        """Test parsing 'blob:limit=5m'."""
														
 
															+        filter_spec = parse_filter_spec("blob:limit=5m")
														
 
															+        self.assertIsInstance(filter_spec, BlobLimitFilter)
														
 
															+        self.assertEqual(5 * 1024 * 1024, filter_spec.limit)
														
 
															+
														
 
															+    def test_parse_blob_limit_gb(self):
														
 
															+        """Test parsing 'blob:limit=1g'."""
														
 
															+        filter_spec = parse_filter_spec("blob:limit=1g")
														
 
															+        self.assertIsInstance(filter_spec, BlobLimitFilter)
														
 
															+        self.assertEqual(1024 * 1024 * 1024, filter_spec.limit)
														
 
															+
														
 
															+    def test_parse_tree_depth(self):
														
 
															+        """Test parsing 'tree:0' filter."""
														
 
															+        filter_spec = parse_filter_spec("tree:0")
														
 
															+        self.assertIsInstance(filter_spec, TreeDepthFilter)
														
 
															+        self.assertEqual(0, filter_spec.max_depth)
														
 
															+
														
 
															+    def test_parse_tree_depth_nonzero(self):
														
 
															+        """Test parsing 'tree:3' filter."""
														
 
															+        filter_spec = parse_filter_spec("tree:3")
														
 
															+        self.assertIsInstance(filter_spec, TreeDepthFilter)
														
 
															+        self.assertEqual(3, filter_spec.max_depth)
														
 
															+
														
 
															+    def test_parse_sparse_oid(self):
														
 
															+        """Test parsing 'sparse:oid=<oid>' filter."""
														
 
															+        oid = b"1234567890abcdef1234567890abcdef12345678"
														
 
															+        filter_spec = parse_filter_spec(f"sparse:oid={oid.decode('ascii')}")
														
 
															+        self.assertIsInstance(filter_spec, SparseOidFilter)
														
 
															+        self.assertEqual(oid, filter_spec.oid)
														
 
															+
														
 
															+    def test_parse_combine(self):
														
 
															+        """Test parsing 'combine:blob:none+tree:0' filter."""
														
 
															+        filter_spec = parse_filter_spec("combine:blob:none+tree:0")
														
 
															+        self.assertIsInstance(filter_spec, CombineFilter)
														
 
															+        self.assertEqual(2, len(filter_spec.filters))
														
 
															+        self.assertIsInstance(filter_spec.filters[0], BlobNoneFilter)
														
 
															+        self.assertIsInstance(filter_spec.filters[1], TreeDepthFilter)
														
 
															+
														
 
															+    def test_parse_combine_multiple(self):
														
 
															+        """Test parsing combine filter with 3+ filters."""
														
 
															+        filter_spec = parse_filter_spec("combine:blob:none+tree:0+blob:limit=1m")
														
 
															+        self.assertIsInstance(filter_spec, CombineFilter)
														
 
															+        self.assertEqual(3, len(filter_spec.filters))
														
 
															+
														
 
															+    def test_parse_unknown_spec(self):
														
 
															+        """Test that unknown filter specs raise ValueError."""
														
 
															+        with self.assertRaises(ValueError) as cm:
														
 
															+            parse_filter_spec("unknown:spec")
														
 
															+        self.assertIn("Unknown filter specification", str(cm.exception))
														
 
															+
														
 
															+    def test_parse_invalid_tree_depth(self):
														
 
															+        """Test that invalid tree depth raises ValueError."""
														
 
															+        with self.assertRaises(ValueError) as cm:
														
 
															+            parse_filter_spec("tree:invalid")
														
 
															+        self.assertIn("Invalid tree filter", str(cm.exception))
														
 
															+
														
 
															+    def test_parse_invalid_blob_limit(self):
														
 
															+        """Test that invalid blob limit raises ValueError."""
														
 
															+        with self.assertRaises(ValueError) as cm:
														
 
															+            parse_filter_spec("blob:limit=invalid")
														
 
															+        self.assertIn("Invalid", str(cm.exception))
														
 
															+
														
 
															+    def test_parse_empty_spec(self):
														
 
															+        """Test that empty filter spec raises ValueError."""
														
 
															+        with self.assertRaises(ValueError) as cm:
														
 
															+            parse_filter_spec("")
														
 
															+        self.assertIn("cannot be empty", str(cm.exception))
														
 
															+
														
 
															+    def test_parse_blob_limit_no_value(self):
														
 
															+        """Test that blob:limit without value raises ValueError."""
														
 
															+        with self.assertRaises(ValueError) as cm:
														
 
															+            parse_filter_spec("blob:limit=")
														
 
															+        self.assertIn("requires a size value", str(cm.exception))
														
 
															+
														
 
															+    def test_parse_tree_no_value(self):
														
 
															+        """Test that tree: without depth raises ValueError."""
														
 
															+        with self.assertRaises(ValueError) as cm:
														
 
															+            parse_filter_spec("tree:")
														
 
															+        self.assertIn("requires a depth value", str(cm.exception))
														
 
															+
														
 
															+    def test_parse_tree_negative_depth(self):
														
 
															+        """Test that negative tree depth raises ValueError."""
														
 
															+        with self.assertRaises(ValueError) as cm:
														
 
															+            parse_filter_spec("tree:-1")
														
 
															+        self.assertIn("non-negative", str(cm.exception))
														
 
															+
														
 
															+    def test_parse_sparse_oid_invalid_length(self):
														
 
															+        """Test that invalid OID length raises ValueError."""
														
 
															+        with self.assertRaises(ValueError) as cm:
														
 
															+            parse_filter_spec("sparse:oid=abc123")
														
 
															+        self.assertIn("40 or 64 hex chars", str(cm.exception))
														
 
															+
														
 
															+    def test_parse_sparse_oid_invalid_hex(self):
														
 
															+        """Test that non-hex OID raises ValueError."""
														
 
															+        with self.assertRaises(ValueError) as cm:
														
 
															+            parse_filter_spec("sparse:oid=" + "x" * 40)
														
 
															+        self.assertIn("valid object ID", str(cm.exception))
														
 
															+
														
 
															+    def test_parse_combine_single_filter(self):
														
 
															+        """Test that combine with single filter raises ValueError."""
														
 
															+        with self.assertRaises(ValueError) as cm:
														
 
															+            parse_filter_spec("combine:blob:none")
														
 
															+        self.assertIn("at least two filters", str(cm.exception))
														
 
															+
														
 
															+    def test_parse_unknown_with_helpful_message(self):
														
 
															+        """Test that unknown spec gives helpful error message."""
														
 
															+        with self.assertRaises(ValueError) as cm:
														
 
															+            parse_filter_spec("unknown:spec")
														
 
															+        error_msg = str(cm.exception)
														
 
															+        self.assertIn("Unknown filter specification", error_msg)
														
 
															+        self.assertIn("Supported formats", error_msg)
														
 
															+        self.assertIn("blob:none", error_msg)
														
 
															+
														
 
															+
														
 
															+class BlobNoneFilterTests(TestCase):
														
 
															+    """Test BlobNoneFilter class."""
														
 
															+
														
 
															+    def test_should_include_blob(self):
														
 
															+        """Test that BlobNoneFilter excludes all blobs."""
														
 
															+        filter_spec = BlobNoneFilter()
														
 
															+        self.assertFalse(filter_spec.should_include_blob(0))
														
 
															+        self.assertFalse(filter_spec.should_include_blob(100))
														
 
															+        self.assertFalse(filter_spec.should_include_blob(1024 * 1024))
														
 
															+
														
 
															+    def test_should_include_tree(self):
														
 
															+        """Test that BlobNoneFilter includes all trees."""
														
 
															+        filter_spec = BlobNoneFilter()
														
 
															+        self.assertTrue(filter_spec.should_include_tree(0))
														
 
															+        self.assertTrue(filter_spec.should_include_tree(1))
														
 
															+        self.assertTrue(filter_spec.should_include_tree(100))
														
 
															+
														
 
															+    def test_to_spec_string(self):
														
 
															+        """Test conversion back to spec string."""
														
 
															+        filter_spec = BlobNoneFilter()
														
 
															+        self.assertEqual("blob:none", filter_spec.to_spec_string())
														
 
															+
														
 
															+    def test_repr(self):
														
 
															+        """Test repr output."""
														
 
															+        filter_spec = BlobNoneFilter()
														
 
															+        self.assertEqual("BlobNoneFilter()", repr(filter_spec))
														
 
															+
														
 
															+
														
 
															+class BlobLimitFilterTests(TestCase):
														
 
															+    """Test BlobLimitFilter class."""
														
 
															+
														
 
															+    def test_should_include_blob_under_limit(self):
														
 
															+        """Test that blobs under limit are included."""
														
 
															+        filter_spec = BlobLimitFilter(1024)
														
 
															+        self.assertTrue(filter_spec.should_include_blob(0))
														
 
															+        self.assertTrue(filter_spec.should_include_blob(512))
														
 
															+        self.assertTrue(filter_spec.should_include_blob(1024))
														
 
															+
														
 
															+    def test_should_include_blob_over_limit(self):
														
 
															+        """Test that blobs over limit are excluded."""
														
 
															+        filter_spec = BlobLimitFilter(1024)
														
 
															+        self.assertFalse(filter_spec.should_include_blob(1025))
														
 
															+        self.assertFalse(filter_spec.should_include_blob(2048))
														
 
															+
														
 
															+    def test_should_include_tree(self):
														
 
															+        """Test that BlobLimitFilter includes all trees."""
														
 
															+        filter_spec = BlobLimitFilter(1024)
														
 
															+        self.assertTrue(filter_spec.should_include_tree(0))
														
 
															+        self.assertTrue(filter_spec.should_include_tree(100))
														
 
															+
														
 
															+    def test_to_spec_string_bytes(self):
														
 
															+        """Test conversion to spec string with bytes."""
														
 
															+        filter_spec = BlobLimitFilter(100)
														
 
															+        self.assertEqual("blob:limit=100", filter_spec.to_spec_string())
														
 
															+
														
 
															+    def test_to_spec_string_kb(self):
														
 
															+        """Test conversion to spec string with KB."""
														
 
															+        filter_spec = BlobLimitFilter(10 * 1024)
														
 
															+        self.assertEqual("blob:limit=10k", filter_spec.to_spec_string())
														
 
															+
														
 
															+    def test_to_spec_string_mb(self):
														
 
															+        """Test conversion to spec string with MB."""
														
 
															+        filter_spec = BlobLimitFilter(5 * 1024 * 1024)
														
 
															+        self.assertEqual("blob:limit=5m", filter_spec.to_spec_string())
														
 
															+
														
 
															+    def test_to_spec_string_gb(self):
														
 
															+        """Test conversion to spec string with GB."""
														
 
															+        filter_spec = BlobLimitFilter(2 * 1024 * 1024 * 1024)
														
 
															+        self.assertEqual("blob:limit=2g", filter_spec.to_spec_string())
														
 
															+
														
 
															+    def test_to_spec_string_not_round(self):
														
 
															+        """Test conversion to spec string with non-round size."""
														
 
															+        filter_spec = BlobLimitFilter(1500)
														
 
															+        self.assertEqual("blob:limit=1500", filter_spec.to_spec_string())
														
 
															+
														
 
															+    def test_repr(self):
														
 
															+        """Test repr output."""
														
 
															+        filter_spec = BlobLimitFilter(1024)
														
 
															+        self.assertEqual("BlobLimitFilter(limit=1024)", repr(filter_spec))
														
 
															+
														
 
															+
														
 
															+class TreeDepthFilterTests(TestCase):
														
 
															+    """Test TreeDepthFilter class."""
														
 
															+
														
 
															+    def test_should_include_blob(self):
														
 
															+        """Test that TreeDepthFilter includes all blobs."""
														
 
															+        filter_spec = TreeDepthFilter(0)
														
 
															+        self.assertTrue(filter_spec.should_include_blob(0))
														
 
															+        self.assertTrue(filter_spec.should_include_blob(1024))
														
 
															+
														
 
															+    def test_should_include_tree_at_depth(self):
														
 
															+        """Test that trees at or below max_depth are included."""
														
 
															+        filter_spec = TreeDepthFilter(2)
														
 
															+        self.assertTrue(filter_spec.should_include_tree(0))
														
 
															+        self.assertTrue(filter_spec.should_include_tree(1))
														
 
															+        self.assertTrue(filter_spec.should_include_tree(2))
														
 
															+
														
 
															+    def test_should_include_tree_beyond_depth(self):
														
 
															+        """Test that trees beyond max_depth are excluded."""
														
 
															+        filter_spec = TreeDepthFilter(2)
														
 
															+        self.assertFalse(filter_spec.should_include_tree(3))
														
 
															+        self.assertFalse(filter_spec.should_include_tree(10))
														
 
															+
														
 
															+    def test_to_spec_string(self):
														
 
															+        """Test conversion back to spec string."""
														
 
															+        filter_spec = TreeDepthFilter(3)
														
 
															+        self.assertEqual("tree:3", filter_spec.to_spec_string())
														
 
															+
														
 
															+    def test_repr(self):
														
 
															+        """Test repr output."""
														
 
															+        filter_spec = TreeDepthFilter(2)
														
 
															+        self.assertEqual("TreeDepthFilter(max_depth=2)", repr(filter_spec))
														
 
															+
														
 
															+
														
 
															+class SparseOidFilterTests(TestCase):
														
 
															+    """Test SparseOidFilter class."""
														
 
															+
														
 
															+    def test_should_include_blob(self):
														
 
															+        """Test that SparseOidFilter includes all blobs."""
														
 
															+        oid = b"1234567890abcdef1234567890abcdef12345678"
														
 
															+        filter_spec = SparseOidFilter(oid)
														
 
															+        self.assertTrue(filter_spec.should_include_blob(0))
														
 
															+        self.assertTrue(filter_spec.should_include_blob(1024))
														
 
															+
														
 
															+    def test_should_include_tree(self):
														
 
															+        """Test that SparseOidFilter includes all trees."""
														
 
															+        oid = b"1234567890abcdef1234567890abcdef12345678"
														
 
															+        filter_spec = SparseOidFilter(oid)
														
 
															+        self.assertTrue(filter_spec.should_include_tree(0))
														
 
															+        self.assertTrue(filter_spec.should_include_tree(10))
														
 
															+
														
 
															+    def test_to_spec_string(self):
														
 
															+        """Test conversion back to spec string."""
														
 
															+        oid = b"1234567890abcdef1234567890abcdef12345678"
														
 
															+        filter_spec = SparseOidFilter(oid)
														
 
															+        expected = "sparse:oid=1234567890abcdef1234567890abcdef12345678"
														
 
															+        self.assertEqual(expected, filter_spec.to_spec_string())
														
 
															+
														
 
															+    def test_repr(self):
														
 
															+        """Test repr output."""
														
 
															+        oid = b"1234567890abcdef1234567890abcdef12345678"
														
 
															+        filter_spec = SparseOidFilter(oid)
														
 
															+        self.assertIn("SparseOidFilter", repr(filter_spec))
														
 
															+        self.assertIn("1234567890abcdef1234567890abcdef12345678", repr(filter_spec))
														
 
															+
														
 
															+    def test_load_patterns_from_blob(self):
														
 
															+        """Test loading sparse patterns from a blob object."""
														
 
															+        from dulwich.object_store import MemoryObjectStore
														
 
															+        from dulwich.objects import Blob
														
 
															+
														
 
															+        # Create a sparse patterns blob
														
 
															+        patterns = b"*.txt\n!*.log\n/src/\n"
														
 
															+        blob = Blob.from_string(patterns)
														
 
															+
														
 
															+        object_store = MemoryObjectStore()
														
 
															+        object_store.add_object(blob)
														
 
															+
														
 
															+        filter_spec = SparseOidFilter(blob.id, object_store=object_store)
														
 
															+        filter_spec._load_patterns()
														
 
															+
														
 
															+        # Verify patterns were loaded
														
 
															+        self.assertIsNotNone(filter_spec._patterns)
														
 
															+        self.assertEqual(3, len(filter_spec._patterns))
														
 
															+
														
 
															+    def test_load_patterns_missing_blob(self):
														
 
															+        """Test error when sparse blob is not found."""
														
 
															+        from dulwich.object_store import MemoryObjectStore
														
 
															+
														
 
															+        oid = b"1234567890abcdef1234567890abcdef12345678"
														
 
															+        object_store = MemoryObjectStore()
														
 
															+
														
 
															+        filter_spec = SparseOidFilter(oid, object_store=object_store)
														
 
															+
														
 
															+        with self.assertRaises(ValueError) as cm:
														
 
															+            filter_spec._load_patterns()
														
 
															+        self.assertIn("not found", str(cm.exception))
														
 
															+
														
 
															+    def test_load_patterns_not_a_blob(self):
														
 
															+        """Test error when sparse OID points to non-blob object."""
														
 
															+        from dulwich.object_store import MemoryObjectStore
														
 
															+        from dulwich.objects import Tree
														
 
															+
														
 
															+        tree = Tree()
														
 
															+        object_store = MemoryObjectStore()
														
 
															+        object_store.add_object(tree)
														
 
															+
														
 
															+        filter_spec = SparseOidFilter(tree.id, object_store=object_store)
														
 
															+
														
 
															+        with self.assertRaises(ValueError) as cm:
														
 
															+            filter_spec._load_patterns()
														
 
															+        self.assertIn("not a blob", str(cm.exception))
														
 
															+
														
 
															+    def test_load_patterns_without_object_store(self):
														
 
															+        """Test error when trying to load patterns without object store."""
														
 
															+        oid = b"1234567890abcdef1234567890abcdef12345678"
														
 
															+        filter_spec = SparseOidFilter(oid)
														
 
															+
														
 
															+        with self.assertRaises(ValueError) as cm:
														
 
															+            filter_spec._load_patterns()
														
 
															+        self.assertIn("without an object store", str(cm.exception))
														
 
															+
														
 
															+    def test_should_include_path_matching(self):
														
 
															+        """Test path matching with sparse patterns."""
														
 
															+        from dulwich.object_store import MemoryObjectStore
														
 
															+        from dulwich.objects import Blob
														
 
															+
														
 
															+        # Create a sparse patterns blob: include *.txt files
														
 
															+        patterns = b"*.txt\n"
														
 
															+        blob = Blob.from_string(patterns)
														
 
															+
														
 
															+        object_store = MemoryObjectStore()
														
 
															+        object_store.add_object(blob)
														
 
															+
														
 
															+        filter_spec = SparseOidFilter(blob.id, object_store=object_store)
														
 
															+
														
 
															+        # .txt files should be included
														
 
															+        self.assertTrue(filter_spec.should_include_path("readme.txt"))
														
 
															+        self.assertTrue(filter_spec.should_include_path("docs/file.txt"))
														
 
															+
														
 
															+        # Other files should not be included
														
 
															+        self.assertFalse(filter_spec.should_include_path("readme.md"))
														
 
															+        self.assertFalse(filter_spec.should_include_path("script.py"))
														
 
															+
														
 
															+    def test_should_include_path_negation(self):
														
 
															+        """Test path matching with negation patterns."""
														
 
															+        from dulwich.object_store import MemoryObjectStore
														
 
															+        from dulwich.objects import Blob
														
 
															+
														
 
															+        # Include all .txt files except logs
														
 
															+        patterns = b"*.txt\n!*.log\n"
														
 
															+        blob = Blob.from_string(patterns)
														
 
															+
														
 
															+        object_store = MemoryObjectStore()
														
 
															+        object_store.add_object(blob)
														
 
															+
														
 
															+        filter_spec = SparseOidFilter(blob.id, object_store=object_store)
														
 
															+
														
 
															+        # .txt files should be included
														
 
															+        self.assertTrue(filter_spec.should_include_path("readme.txt"))
														
 
															+
														
 
															+        # But .log files should be excluded (even though they end in .txt pattern)
														
 
															+        # Note: This depends on pattern order and sparse_patterns implementation
														
 
															+        self.assertFalse(filter_spec.should_include_path("debug.log"))
														
 
															+
														
 
															+
														
 
															+class CombineFilterTests(TestCase):
														
 
															+    """Test CombineFilter class."""
														
 
															+
														
 
															+    def test_should_include_blob_all_allow(self):
														
 
															+        """Test that blob is included when all filters allow it."""
														
 
															+        filters = [BlobLimitFilter(1024), BlobLimitFilter(2048)]
														
 
															+        filter_spec = CombineFilter(filters)
														
 
															+        self.assertTrue(filter_spec.should_include_blob(512))
														
 
															+
														
 
															+    def test_should_include_blob_one_denies(self):
														
 
															+        """Test that blob is excluded when one filter denies it."""
														
 
															+        filters = [BlobLimitFilter(1024), BlobNoneFilter()]
														
 
															+        filter_spec = CombineFilter(filters)
														
 
															+        self.assertFalse(filter_spec.should_include_blob(512))
														
 
															+
														
 
															+    def test_should_include_tree_all_allow(self):
														
 
															+        """Test that tree is included when all filters allow it."""
														
 
															+        filters = [TreeDepthFilter(2), TreeDepthFilter(3)]
														
 
															+        filter_spec = CombineFilter(filters)
														
 
															+        self.assertTrue(filter_spec.should_include_tree(1))
														
 
															+
														
 
															+    def test_should_include_tree_one_denies(self):
														
 
															+        """Test that tree is excluded when one filter denies it."""
														
 
															+        filters = [TreeDepthFilter(2), TreeDepthFilter(1)]
														
 
															+        filter_spec = CombineFilter(filters)
														
 
															+        self.assertFalse(filter_spec.should_include_tree(2))
														
 
															+
														
 
															+    def test_to_spec_string(self):
														
 
															+        """Test conversion back to spec string."""
														
 
															+        filters = [BlobNoneFilter(), TreeDepthFilter(0)]
														
 
															+        filter_spec = CombineFilter(filters)
														
 
															+        self.assertEqual("combine:blob:none+tree:0", filter_spec.to_spec_string())
														
 
															+
														
 
															+    def test_repr(self):
														
 
															+        """Test repr output."""
														
 
															+        filters = [BlobNoneFilter()]
														
 
															+        filter_spec = CombineFilter(filters)
														
 
															+        self.assertIn("CombineFilter", repr(filter_spec))
														
 
															+
														
 
															+
														
 
															+class FilterPackObjectsTests(TestCase):
														
 
															+    """Test filter_pack_objects function."""
														
 
															+
														
 
															+    def setUp(self):
														
 
															+        super().setUp()
														
 
															+        self.store = MemoryObjectStore()
														
 
															+
														
 
															+        # Create test objects
														
 
															+        self.small_blob = Blob.from_string(b"small")
														
 
															+        self.large_blob = Blob.from_string(b"x" * 2000)
														
 
															+        self.tree = Tree()
														
 
															+        self.commit = make_commit(tree=self.tree.id)
														
 
															+
														
 
															+        # Add objects to store
														
 
															+        self.store.add_object(self.small_blob)
														
 
															+        self.store.add_object(self.large_blob)
														
 
															+        self.store.add_object(self.tree)
														
 
															+        self.store.add_object(self.commit)
														
 
															+
														
 
															+    def test_filter_blob_none(self):
														
 
															+        """Test that blob:none filter excludes all blobs."""
														
 
															+        object_ids = [
														
 
															+            self.small_blob.id,
														
 
															+            self.large_blob.id,
														
 
															+            self.tree.id,
														
 
															+            self.commit.id,
														
 
															+        ]
														
 
															+
														
 
															+        filter_spec = BlobNoneFilter()
														
 
															+        filtered = filter_pack_objects(self.store, object_ids, filter_spec)
														
 
															+
														
 
															+        # Should exclude both blobs but keep tree and commit
														
 
															+        self.assertNotIn(self.small_blob.id, filtered)
														
 
															+        self.assertNotIn(self.large_blob.id, filtered)
														
 
															+        self.assertIn(self.tree.id, filtered)
														
 
															+        self.assertIn(self.commit.id, filtered)
														
 
															+
														
 
															+    def test_filter_blob_limit(self):
														
 
															+        """Test that blob:limit filter excludes blobs over size limit."""
														
 
															+        object_ids = [
														
 
															+            self.small_blob.id,
														
 
															+            self.large_blob.id,
														
 
															+            self.tree.id,
														
 
															+        ]
														
 
															+
														
 
															+        # Set limit to 100 bytes
														
 
															+        filter_spec = BlobLimitFilter(100)
														
 
															+        filtered = filter_pack_objects(self.store, object_ids, filter_spec)
														
 
															+
														
 
															+        # Should keep small blob but exclude large blob
														
 
															+        self.assertIn(self.small_blob.id, filtered)
														
 
															+        self.assertNotIn(self.large_blob.id, filtered)
														
 
															+        self.assertIn(self.tree.id, filtered)
														
 
															+
														
 
															+    def test_filter_no_filter_keeps_all(self):
														
 
															+        """Test that without filtering all objects are kept."""
														
 
															+        # Create a filter that includes everything
														
 
															+        filter_spec = BlobLimitFilter(10000)  # Large limit
														
 
															+
														
 
															+        object_ids = [
														
 
															+            self.small_blob.id,
														
 
															+            self.large_blob.id,
														
 
															+            self.tree.id,
														
 
															+            self.commit.id,
														
 
															+        ]
														
 
															+
														
 
															+        filtered = filter_pack_objects(self.store, object_ids, filter_spec)
														
 
															+
														
 
															+        # All objects should be included
														
 
															+        self.assertEqual(len(filtered), len(object_ids))
														
 
															+        for oid in object_ids:
														
 
															+            self.assertIn(oid, filtered)
														
 
															+
														
 
															+    def test_filter_missing_object(self):
														
 
															+        """Test that missing objects are skipped without error."""
														
 
															+        from dulwich.objects import ObjectID
														
 
															+
														
 
															+        fake_id = ObjectID(b"0" * 40)
														
 
															+        object_ids = [fake_id, self.small_blob.id]
														
 
															+
														
 
															+        filter_spec = BlobNoneFilter()
														
 
															+        filtered = filter_pack_objects(self.store, object_ids, filter_spec)
														
 
															+
														
 
															+        # Should skip the missing object
														
 
															+        self.assertNotIn(fake_id, filtered)
														
 
															+
														
 
															+    def test_filter_combine(self):
														
 
															+        """Test combined filters."""
														
 
															+        object_ids = [
														
 
															+            self.small_blob.id,
														
 
															+            self.large_blob.id,
														
 
															+            self.tree.id,
														
 
															+        ]
														
 
															+
														
 
															+        # Combine blob:limit with another filter
														
 
															+        filter_spec = CombineFilter(
														
 
															+            [
														
 
															+                BlobLimitFilter(100),
														
 
															+                BlobNoneFilter(),  # This will exclude ALL blobs
														
 
															+            ]
														
 
															+        )
														
 
															+
														
 
															+        filtered = filter_pack_objects(self.store, object_ids, filter_spec)
														
 
															+
														
 
															+        # Should exclude all blobs due to BlobNoneFilter
														
 
															+        self.assertNotIn(self.small_blob.id, filtered)
														
 
															+        self.assertNotIn(self.large_blob.id, filtered)
														
 
															+        self.assertIn(self.tree.id, filtered)
														
 
															+
														
 
															+
														
 
															+class PartialCloneIntegrationTests(TestCase):
														
 
															+    """Integration tests for partial clone with real repositories."""
														
 
															+
														
 
															+    def setUp(self):
														
 
															+        super().setUp()
														
 
															+        self.repo_dir = tempfile.mkdtemp()
														
 
															+        self.addCleanup(self._cleanup)
														
 
															+        self.repo = Repo.init(self.repo_dir)
														
 
															+
														
 
															+    def _cleanup(self):
														
 
															+        """Clean up test repository."""
														
 
															+        import shutil
														
 
															+
														
 
															+        if os.path.exists(self.repo_dir):
														
 
															+            shutil.rmtree(self.repo_dir)
														
 
															+
														
 
															+    def test_blob_none_filter_with_real_repo(self):
														
 
															+        """Test blob:none filter excludes blobs in real repository."""
														
 
															+        # Create a tree with files
														
 
															+        tree = Tree()
														
 
															+
														
 
															+        # Add some blobs to the tree
														
 
															+        blob1 = Blob.from_string(b"file1 content")
														
 
															+        blob2 = Blob.from_string(b"file2 content")
														
 
															+        tree.add(b"file1.txt", 0o100644, blob1.id)
														
 
															+        tree.add(b"file2.txt", 0o100644, blob2.id)
														
 
															+
														
 
															+        # Add objects to repo
														
 
															+        self.repo.object_store.add_object(blob1)
														
 
															+        self.repo.object_store.add_object(blob2)
														
 
															+        self.repo.object_store.add_object(tree)
														
 
															+
														
 
															+        # Create commit
														
 
															+        commit = make_commit(tree=tree.id, message=b"Test commit")
														
 
															+        self.repo.object_store.add_object(commit)
														
 
															+
														
 
															+        # Get all objects
														
 
															+        object_ids = [blob1.id, blob2.id, tree.id, commit.id]
														
 
															+
														
 
															+        # Apply blob:none filter
														
 
															+        filter_spec = BlobNoneFilter()
														
 
															+        filtered = filter_pack_objects(self.repo.object_store, object_ids, filter_spec)
														
 
															+
														
 
															+        # Verify blobs are excluded
														
 
															+        self.assertNotIn(blob1.id, filtered)
														
 
															+        self.assertNotIn(blob2.id, filtered)
														
 
															+        # But tree and commit are included
														
 
															+        self.assertIn(tree.id, filtered)
														
 
															+        self.assertIn(commit.id, filtered)
														
 
															+
														
 
															+        # Verify we have only 2 objects (tree + commit)
														
 
															+        self.assertEqual(2, len(filtered))
														
 
															+
														
 
															+    def test_blob_limit_filter_with_mixed_sizes(self):
														
 
															+        """Test blob:limit filter with mixed blob sizes."""
														
 
															+        tree = Tree()
														
 
															+
														
 
															+        # Create blobs of different sizes
														
 
															+        small_blob = Blob.from_string(b"small")  # 5 bytes
														
 
															+        medium_blob = Blob.from_string(b"x" * 50)  # 50 bytes
														
 
															+        large_blob = Blob.from_string(b"y" * 500)  # 500 bytes
														
 
															+
														
 
															+        tree.add(b"small.txt", 0o100644, small_blob.id)
														
 
															+        tree.add(b"medium.txt", 0o100644, medium_blob.id)
														
 
															+        tree.add(b"large.txt", 0o100644, large_blob.id)
														
 
															+
														
 
															+        # Add to repo
														
 
															+        self.repo.object_store.add_object(small_blob)
														
 
															+        self.repo.object_store.add_object(medium_blob)
														
 
															+        self.repo.object_store.add_object(large_blob)
														
 
															+        self.repo.object_store.add_object(tree)
														
 
															+
														
 
															+        commit = make_commit(tree=tree.id)
														
 
															+        self.repo.object_store.add_object(commit)
														
 
															+
														
 
															+        # Test with 100 byte limit
														
 
															+        object_ids = [
														
 
															+            small_blob.id,
														
 
															+            medium_blob.id,
														
 
															+            large_blob.id,
														
 
															+            tree.id,
														
 
															+            commit.id,
														
 
															+        ]
														
 
															+
														
 
															+        filter_spec = BlobLimitFilter(100)
														
 
															+        filtered = filter_pack_objects(self.repo.object_store, object_ids, filter_spec)
														
 
															+
														
 
															+        # Small and medium should be included
														
 
															+        self.assertIn(small_blob.id, filtered)
														
 
															+        self.assertIn(medium_blob.id, filtered)
														
 
															+        # Large should be excluded
														
 
															+        self.assertNotIn(large_blob.id, filtered)
														
 
															+        # Tree and commit included
														
 
															+        self.assertIn(tree.id, filtered)
														
 
															+        self.assertIn(commit.id, filtered)
														
 
															+
														
 
															+    def test_combined_filter_integration(self):
														
 
															+        """Test combined filters in real scenario."""
														
 
															+        tree = Tree()
														
 
															+
														
 
															+        blob1 = Blob.from_string(b"content1")
														
 
															+        blob2 = Blob.from_string(b"x" * 1000)
														
 
															+
														
 
															+        tree.add(b"file1.txt", 0o100644, blob1.id)
														
 
															+        tree.add(b"file2.txt", 0o100644, blob2.id)
														
 
															+
														
 
															+        self.repo.object_store.add_object(blob1)
														
 
															+        self.repo.object_store.add_object(blob2)
														
 
															+        self.repo.object_store.add_object(tree)
														
 
															+
														
 
															+        commit = make_commit(tree=tree.id)
														
 
															+        self.repo.object_store.add_object(commit)
														
 
															+
														
 
															+        # Combine: limit to 500 bytes, but also apply blob:none
														
 
															+        # This should exclude ALL blobs (blob:none overrides limit)
														
 
															+        filter_spec = CombineFilter(
														
 
															+            [
														
 
															+                BlobLimitFilter(500),
														
 
															+                BlobNoneFilter(),
														
 
															+            ]
														
 
															+        )
														
 
															+
														
 
															+        object_ids = [blob1.id, blob2.id, tree.id, commit.id]
														
 
															+        filtered = filter_pack_objects(self.repo.object_store, object_ids, filter_spec)
														
 
															+
														
 
															+        # All blobs excluded
														
 
															+        self.assertNotIn(blob1.id, filtered)
														
 
															+        self.assertNotIn(blob2.id, filtered)
														
 
															+        # Only tree and commit
														
 
															+        self.assertEqual(2, len(filtered))
														
 
															+
														
 
															+
														
 
															+class FilterPackObjectsWithPathsTests(TestCase):
														
 
															+    """Test filter_pack_objects_with_paths function."""
														
 
															+
														
 
															+    def setUp(self):
														
 
															+        super().setUp()
														
 
															+        self.object_store = MemoryObjectStore()
														
 
															+
														
 
															+    def test_tree_depth_filtering(self):
														
 
															+        """Test filtering by tree depth."""
														
 
															+        from dulwich.object_filters import (
														
 
															+            TreeDepthFilter,
														
 
															+            filter_pack_objects_with_paths,
														
 
															+        )
														
 
															+        from dulwich.objects import Blob, Tree
														
 
															+        from dulwich.tests.utils import make_commit
														
 
															+
														
 
															+        # Create a nested tree structure:
														
 
															+        # root/
														
 
															+        #   file1.txt (blob1)
														
 
															+        #   dir1/
														
 
															+        #     file2.txt (blob2)
														
 
															+        #     dir2/
														
 
															+        #       file3.txt (blob3)
														
 
															+
														
 
															+        blob1 = Blob.from_string(b"file1 content")
														
 
															+        blob2 = Blob.from_string(b"file2 content")
														
 
															+        blob3 = Blob.from_string(b"file3 content")
														
 
															+
														
 
															+        # deepest tree (dir2)
														
 
															+        tree_dir2 = Tree()
														
 
															+        tree_dir2.add(b"file3.txt", 0o100644, blob3.id)
														
 
															+
														
 
															+        # middle tree (dir1)
														
 
															+        tree_dir1 = Tree()
														
 
															+        tree_dir1.add(b"file2.txt", 0o100644, blob2.id)
														
 
															+        tree_dir1.add(b"dir2", 0o040000, tree_dir2.id)
														
 
															+
														
 
															+        # root tree
														
 
															+        tree_root = Tree()
														
 
															+        tree_root.add(b"file1.txt", 0o100644, blob1.id)
														
 
															+        tree_root.add(b"dir1", 0o040000, tree_dir1.id)
														
 
															+
														
 
															+        # Add all objects to store
														
 
															+        for obj in [blob1, blob2, blob3, tree_dir2, tree_dir1, tree_root]:
														
 
															+            self.object_store.add_object(obj)
														
 
															+
														
 
															+        commit = make_commit(tree=tree_root.id)
														
 
															+        self.object_store.add_object(commit)
														
 
															+
														
 
															+        # Filter with depth=1 (root + 1 level deep)
														
 
															+        filter_spec = TreeDepthFilter(1)
														
 
															+        filtered = filter_pack_objects_with_paths(
														
 
															+            self.object_store, [commit.id], filter_spec
														
 
															+        )
														
 
															+
														
 
															+        # Should include: commit, tree_root (depth 0), tree_dir1 (depth 1),
														
 
															+        # blob1 (in root), blob2 (in dir1)
														
 
															+        # Should exclude: tree_dir2 (depth 2), blob3 (in dir2)
														
 
															+        self.assertIn(commit.id, filtered)
														
 
															+        self.assertIn(tree_root.id, filtered)
														
 
															+        self.assertIn(tree_dir1.id, filtered)
														
 
															+        self.assertIn(blob1.id, filtered)
														
 
															+        self.assertIn(blob2.id, filtered)
														
 
															+        self.assertNotIn(tree_dir2.id, filtered)
														
 
															+        self.assertNotIn(blob3.id, filtered)
														
 
															+
														
 
															+    def test_sparse_oid_path_filtering(self):
														
 
															+        """Test filtering by sparse checkout patterns."""
														
 
															+        from dulwich.object_filters import (
														
 
															+            SparseOidFilter,
														
 
															+            filter_pack_objects_with_paths,
														
 
															+        )
														
 
															+        from dulwich.objects import Blob, Tree
														
 
															+        from dulwich.tests.utils import make_commit
														
 
															+
														
 
															+        # Create sparse patterns blob that includes only *.txt files
														
 
															+        patterns = b"*.txt\n"
														
 
															+        patterns_blob = Blob.from_string(patterns)
														
 
															+        self.object_store.add_object(patterns_blob)
														
 
															+
														
 
															+        # Create a tree with mixed file types:
														
 
															+        # root/
														
 
															+        #   readme.txt (should be included)
														
 
															+        #   script.py (should be excluded)
														
 
															+        #   docs/
														
 
															+        #     guide.txt (should be included)
														
 
															+        #     image.png (should be excluded)
														
 
															+
														
 
															+        blob_readme = Blob.from_string(b"readme content")
														
 
															+        blob_script = Blob.from_string(b"script content")
														
 
															+        blob_guide = Blob.from_string(b"guide content")
														
 
															+        blob_image = Blob.from_string(b"image content")
														
 
															+
														
 
															+        tree_docs = Tree()
														
 
															+        tree_docs.add(b"guide.txt", 0o100644, blob_guide.id)
														
 
															+        tree_docs.add(b"image.png", 0o100644, blob_image.id)
														
 
															+
														
 
															+        tree_root = Tree()
														
 
															+        tree_root.add(b"readme.txt", 0o100644, blob_readme.id)
														
 
															+        tree_root.add(b"script.py", 0o100644, blob_script.id)
														
 
															+        tree_root.add(b"docs", 0o040000, tree_docs.id)
														
 
															+
														
 
															+        # Add all objects
														
 
															+        for obj in [
														
 
															+            blob_readme,
														
 
															+            blob_script,
														
 
															+            blob_guide,
														
 
															+            blob_image,
														
 
															+            tree_docs,
														
 
															+            tree_root,
														
 
															+        ]:
														
 
															+            self.object_store.add_object(obj)
														
 
															+
														
 
															+        commit = make_commit(tree=tree_root.id)
														
 
															+        self.object_store.add_object(commit)
														
 
															+
														
 
															+        # Create sparse filter
														
 
															+        filter_spec = SparseOidFilter(patterns_blob.id, object_store=self.object_store)
														
 
															+        filtered = filter_pack_objects_with_paths(
														
 
															+            self.object_store, [commit.id], filter_spec
														
 
															+        )
														
 
															+
														
 
															+        # Should include: commit, trees, and .txt blobs
														
 
															+        self.assertIn(commit.id, filtered)
														
 
															+        self.assertIn(tree_root.id, filtered)
														
 
															+        self.assertIn(tree_docs.id, filtered)
														
 
															+        self.assertIn(blob_readme.id, filtered)
														
 
															+        self.assertIn(blob_guide.id, filtered)
														
 
															+
														
 
															+        # Should exclude: non-.txt blobs
														
 
															+        self.assertNotIn(blob_script.id, filtered)
														
 
															+        self.assertNotIn(blob_image.id, filtered)
														
 
															+
														
 
															+    def test_blob_size_filtering_with_paths(self):
														
 
															+        """Test that blob size filtering still works with path tracking."""
														
 
															+        from dulwich.object_filters import (
														
 
															+            BlobLimitFilter,
														
 
															+            filter_pack_objects_with_paths,
														
 
															+        )
														
 
															+        from dulwich.objects import Blob, Tree
														
 
															+        from dulwich.tests.utils import make_commit
														
 
															+
														
 
															+        # Create blobs of different sizes
														
 
															+        blob_small = Blob.from_string(b"small")  # 5 bytes
														
 
															+        blob_large = Blob.from_string(b"x" * 1000)  # 1000 bytes
														
 
															+
														
 
															+        tree = Tree()
														
 
															+        tree.add(b"small.txt", 0o100644, blob_small.id)
														
 
															+        tree.add(b"large.txt", 0o100644, blob_large.id)
														
 
															+
														
 
															+        for obj in [blob_small, blob_large, tree]:
														
 
															+            self.object_store.add_object(obj)
														
 
															+
														
 
															+        commit = make_commit(tree=tree.id)
														
 
															+        self.object_store.add_object(commit)
														
 
															+
														
 
															+        # Filter with 100 byte limit
														
 
															+        filter_spec = BlobLimitFilter(100)
														
 
															+        filtered = filter_pack_objects_with_paths(
														
 
															+            self.object_store, [commit.id], filter_spec
														
 
															+        )
														
 
															+
														
 
															+        # Should include small blob but not large
														
 
															+        self.assertIn(commit.id, filtered)
														
 
															+        self.assertIn(tree.id, filtered)
														
 
															+        self.assertIn(blob_small.id, filtered)
														
 
															+        self.assertNotIn(blob_large.id, filtered)
														
 
															+
														
 
															+    def test_combined_sparse_and_size_filter(self):
														
 
															+        """Test combining sparse patterns with blob size limits."""
														
 
															+        from dulwich.object_filters import (
														
 
															+            BlobLimitFilter,
														
 
															+            CombineFilter,
														
 
															+            SparseOidFilter,
														
 
															+            filter_pack_objects_with_paths,
														
 
															+        )
														
 
															+        from dulwich.objects import Blob, Tree
														
 
															+        from dulwich.tests.utils import make_commit
														
 
															+
														
 
															+        # Create sparse patterns: only *.txt files
														
 
															+        patterns = b"*.txt\n"
														
 
															+        patterns_blob = Blob.from_string(patterns)
														
 
															+        self.object_store.add_object(patterns_blob)
														
 
															+
														
 
															+        # Create files:
														
 
															+        # - small.txt (5 bytes, .txt) -> should be included
														
 
															+        # - large.txt (1000 bytes, .txt) -> excluded by size
														
 
															+        # - small.py (5 bytes, .py) -> excluded by pattern
														
 
															+        # - large.py (1000 bytes, .py) -> excluded by both
														
 
															+
														
 
															+        blob_small_txt = Blob.from_string(b"small txt")
														
 
															+        blob_large_txt = Blob.from_string(b"x" * 1000)
														
 
															+        blob_small_py = Blob.from_string(b"small py")
														
 
															+        blob_large_py = Blob.from_string(b"y" * 1000)
														
 
															+
														
 
															+        tree = Tree()
														
 
															+        tree.add(b"small.txt", 0o100644, blob_small_txt.id)
														
 
															+        tree.add(b"large.txt", 0o100644, blob_large_txt.id)
														
 
															+        tree.add(b"small.py", 0o100644, blob_small_py.id)
														
 
															+        tree.add(b"large.py", 0o100644, blob_large_py.id)
														
 
															+
														
 
															+        for obj in [blob_small_txt, blob_large_txt, blob_small_py, blob_large_py, tree]:
														
 
															+            self.object_store.add_object(obj)
														
 
															+
														
 
															+        commit = make_commit(tree=tree.id)
														
 
															+        self.object_store.add_object(commit)
														
 
															+
														
 
															+        # Combine: sparse filter + 100 byte limit
														
 
															+        filter_spec = CombineFilter(
														
 
															+            [
														
 
															+                SparseOidFilter(patterns_blob.id, object_store=self.object_store),
														
 
															+                BlobLimitFilter(100),
														
 
															+            ]
														
 
															+        )
														
 
															+
														
 
															+        filtered = filter_pack_objects_with_paths(
														
 
															+            self.object_store, [commit.id], filter_spec
														
 
															+        )
														
 
															+
														
 
															+        # Only small.txt should be included (matches pattern AND size limit)
														
 
															+        self.assertIn(commit.id, filtered)
														
 
															+        self.assertIn(tree.id, filtered)
														
 
															+        self.assertIn(blob_small_txt.id, filtered)
														
 
															+        self.assertNotIn(blob_large_txt.id, filtered)  # Too large
														
 
															+        self.assertNotIn(blob_small_py.id, filtered)  # Wrong pattern
														
 
															+        self.assertNotIn(blob_large_py.id, filtered)  # Both wrong
														
 
															+
														
 
															+    def test_blob_none_filter_with_paths(self):
														
 
															+        """Test that blob:none excludes all blobs with path tracking."""
														
 
															+        from dulwich.object_filters import (
														
 
															+            BlobNoneFilter,
														
 
															+            filter_pack_objects_with_paths,
														
 
															+        )
														
 
															+        from dulwich.objects import Blob, Tree
														
 
															+        from dulwich.tests.utils import make_commit
														
 
															+
														
 
															+        blob1 = Blob.from_string(b"content1")
														
 
															+        blob2 = Blob.from_string(b"content2")
														
 
															+
														
 
															+        tree = Tree()
														
 
															+        tree.add(b"file1.txt", 0o100644, blob1.id)
														
 
															+        tree.add(b"file2.txt", 0o100644, blob2.id)
														
 
															+
														
 
															+        for obj in [blob1, blob2, tree]:
														
 
															+            self.object_store.add_object(obj)
														
 
															+
														
 
															+        commit = make_commit(tree=tree.id)
														
 
															+        self.object_store.add_object(commit)
														
 
															+
														
 
															+        filter_spec = BlobNoneFilter()
														
 
															+        filtered = filter_pack_objects_with_paths(
														
 
															+            self.object_store, [commit.id], filter_spec
														
 
															+        )
														
 
															+
														
 
															+        # Should include commit and tree but no blobs
														
 
															+        self.assertIn(commit.id, filtered)
														
 
															+        self.assertIn(tree.id, filtered)
														
 
															+        self.assertNotIn(blob1.id, filtered)
														
 
															+        self.assertNotIn(blob2.id, filtered)
														
 
															+
														
 
															+    def test_direct_tree_want(self):
														
 
															+        """Test filtering when a tree (not commit) is wanted."""
														
 
															+        from dulwich.object_filters import (
														
 
															+            BlobLimitFilter,
														
 
															+            filter_pack_objects_with_paths,
														
 
															+        )
														
 
															+        from dulwich.objects import Blob, Tree
														
 
															+
														
 
															+        blob_small = Blob.from_string(b"small")
														
 
															+        blob_large = Blob.from_string(b"x" * 1000)
														
 
															+
														
 
															+        tree = Tree()
														
 
															+        tree.add(b"small.txt", 0o100644, blob_small.id)
														
 
															+        tree.add(b"large.txt", 0o100644, blob_large.id)
														
 
															+
														
 
															+        for obj in [blob_small, blob_large, tree]:
														
 
															+            self.object_store.add_object(obj)
														
 
															+
														
 
															+        # Want the tree directly (not via commit)
														
 
															+        filter_spec = BlobLimitFilter(100)
														
 
															+        filtered = filter_pack_objects_with_paths(
														
 
															+            self.object_store, [tree.id], filter_spec
														
 
															+        )
														
 
															+
														
 
															+        # Should include tree and small blob
														
 
															+        self.assertIn(tree.id, filtered)
														
 
															+        self.assertIn(blob_small.id, filtered)
														
 
															+        self.assertNotIn(blob_large.id, filtered)
														
--- a/tests/test_protocol.py
+++ b/tests/test_protocol.py
@@ -25,6 +25,8 @@ from io import BytesIO
 
															 from dulwich.errors import HangupException
														
 
															 from dulwich.protocol import (
														
 
															+    CAPABILITY_FILTER,
														
 
															+    KNOWN_UPLOAD_CAPABILITIES,
														
 
															     MULTI_ACK,
														
 
															     MULTI_ACK_DETAILED,
														
 
															     SINGLE_ACK,
														
@@ -36,6 +38,7 @@ from dulwich.protocol import (
 
															     ack_type,
														
 
															     extract_capabilities,
														
 
															     extract_want_line_capabilities,
														
 
															+    find_capability,
														
 
															     pkt_line,
														
 
															     pkt_seq,
														
 
															 )
														
@@ -348,3 +351,41 @@ class PktLineParserTests(TestCase):
 
															         parser.parse(b"0005z0006aba")
														
 
															         self.assertEqual(pktlines, [b"z", b"ab"])
														
 
															         self.assertEqual(b"a", parser.get_tail())
														
 
															+
														
 
															+
														
 
															+class CapabilitiesTests(TestCase):
														
 
															+    """Tests for protocol capabilities."""
														
 
															+
														
 
															+    def test_filter_capability_in_known_upload_capabilities(self) -> None:
														
 
															+        """Test that CAPABILITY_FILTER is in KNOWN_UPLOAD_CAPABILITIES."""
														
 
															+        self.assertIn(CAPABILITY_FILTER, KNOWN_UPLOAD_CAPABILITIES)
														
 
															+
														
 
															+
														
 
															+class FindCapabilityTests(TestCase):
														
 
															+    """Tests for find_capability function."""
														
 
															+
														
 
															+    def test_find_capability_with_value(self) -> None:
														
 
															+        """Test finding a capability with a value."""
														
 
															+        caps = [b"filter=blob:none", b"agent=git/2.0"]
														
 
															+        self.assertEqual(b"blob:none", find_capability(caps, b"filter"))
														
 
															+
														
 
															+    def test_find_capability_without_value(self) -> None:
														
 
															+        """Test finding a capability without a value."""
														
 
															+        caps = [b"thin-pack", b"ofs-delta"]
														
 
															+        self.assertEqual(b"thin-pack", find_capability(caps, b"thin-pack"))
														
 
															+
														
 
															+    def test_find_capability_not_found(self) -> None:
														
 
															+        """Test finding a capability that doesn't exist."""
														
 
															+        caps = [b"thin-pack", b"ofs-delta"]
														
 
															+        self.assertIsNone(find_capability(caps, b"missing"))
														
 
															+
														
 
															+    def test_find_capability_multiple_names(self) -> None:
														
 
															+        """Test finding with multiple capability names."""
														
 
															+        caps = [b"filter=blob:none", b"agent=git/2.0"]
														
 
															+        # Should find first match
														
 
															+        self.assertEqual(b"git/2.0", find_capability(caps, b"missing", b"agent"))
														
 
															+
														
 
															+    def test_find_capability_complex_value(self) -> None:
														
 
															+        """Test finding capability with complex value."""
														
 
															+        caps = [b"filter=combine:blob:none+tree:0"]
														
 
															+        self.assertEqual(b"combine:blob:none+tree:0", find_capability(caps, b"filter"))
														
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -180,6 +180,34 @@ class UploadPackHandlerTestCase(TestCase):
 
															         self._handler.progress(b"second message")
														
 
															         self.assertRaises(IndexError, self._handler.proto.get_received_line, 2)
														
 
															+    def test_filter_capability_advertised(self) -> None:
														
 
															+        """Test that the filter capability is advertised by UploadPackHandler."""
														
 
															+        caps = self._handler.capabilities()
														
 
															+        self.assertIn(b"filter", caps)
														
 
															+
														
 
															+    def test_filter_spec_parsed(self) -> None:
														
 
															+        """Test that filter specification is parsed from client capabilities."""
														
 
															+        from dulwich.object_filters import BlobNoneFilter
														
 
															+
														
 
															+        caps = [b"filter=blob:none", *list(self._handler.required_capabilities())]
														
 
															+        self._handler.set_client_capabilities(caps)
														
 
															+        self.assertIsNotNone(self._handler.filter_spec)
														
 
															+        self.assertIsInstance(self._handler.filter_spec, BlobNoneFilter)
														
 
															+
														
 
															+    def test_filter_spec_not_present(self) -> None:
														
 
															+        """Test that filter_spec is None when filter capability is not used."""
														
 
															+        caps = self._handler.required_capabilities()
														
 
															+        self._handler.set_client_capabilities(caps)
														
 
															+        self.assertIsNone(self._handler.filter_spec)
														
 
															+
														
 
															+    def test_filter_spec_invalid(self) -> None:
														
 
															+        """Test that invalid filter spec raises GitProtocolError."""
														
 
															+        from dulwich.errors import GitProtocolError
														
 
															+
														
 
															+        caps = [b"filter=invalid:spec", *list(self._handler.required_capabilities())]
														
 
															+        with self.assertRaises(GitProtocolError):
														
 
															+            self._handler.set_client_capabilities(caps)
														
 
															+
														
 
															     def test_get_tagged(self) -> None:
														
 
															         refs = {
														
 
															             b"refs/tags/tag1": ONE,