Преглед изворни кода

Add filter specification parsing for partial clone support

Jelmer Vernooij пре 3 недеља
родитељ
комит
a742846e24
6 измењених фајлова са 715 додато и 0 уклоњено
  1. 293 0
      dulwich/partial_clone.py
  2. 33 0
      dulwich/protocol.py
  3. 26 0
      dulwich/server.py
  4. 309 0
      tests/test_partial_clone.py
  5. 31 0
      tests/test_protocol.py
  6. 23 0
      tests/test_server.py

+ 293 - 0
dulwich/partial_clone.py

@@ -0,0 +1,293 @@
+# partial_clone.py -- Partial clone filter specification handling
+# Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as published by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Partial clone filter specification parsing and handling.
+
+This module implements Git's partial clone filter specifications as documented in:
+https://git-scm.com/docs/rev-list-options#Documentation/rev-list-options.txt---filterltfilter-specgt
+
+Supported filter specs:
+- blob:none - Exclude all blobs
+- blob:limit=<n>[kmg] - Exclude blobs larger than n bytes/KB/MB/GB
+- tree:<depth> - Exclude trees beyond specified depth
+- sparse:oid=<oid> - Use sparse specification from object
+- combine:<filter>+<filter>+... - Combine multiple filters
+"""
+
+__all__ = [
+    "FilterSpec",
+    "BlobNoneFilter",
+    "BlobLimitFilter",
+    "TreeDepthFilter",
+    "SparseOidFilter",
+    "CombineFilter",
+    "parse_filter_spec",
+]
+
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from .objects import ObjectID, ShaFile
+
+
+class FilterSpec(ABC):
+    """Base class for all filter specifications."""
+
+    @abstractmethod
+    def should_include_blob(self, blob_size: int) -> bool:
+        """Determine if a blob of given size should be included.
+
+        Args:
+            blob_size: Size of the blob in bytes
+
+        Returns:
+            True if the blob should be included, False otherwise
+        """
+        ...
+
+    @abstractmethod
+    def should_include_tree(self, depth: int) -> bool:
+        """Determine if a tree at given depth should be included.
+
+        Args:
+            depth: Depth of the tree (0 = root)
+
+        Returns:
+            True if the tree should be included, False otherwise
+        """
+        ...
+
+    @abstractmethod
+    def to_spec_string(self) -> str:
+        """Convert filter spec back to string format.
+
+        Returns:
+            Filter specification string (e.g., 'blob:none', 'blob:limit=1m')
+        """
+        ...
+
+
+class BlobNoneFilter(FilterSpec):
+    """Filter that excludes all blobs."""
+
+    def should_include_blob(self, blob_size: int) -> bool:
+        """Exclude all blobs."""
+        return False
+
+    def should_include_tree(self, depth: int) -> bool:
+        """Include all trees."""
+        return True
+
+    def to_spec_string(self) -> str:
+        """Return 'blob:none'."""
+        return "blob:none"
+
+    def __repr__(self) -> str:
+        return f"BlobNoneFilter()"
+
+
+class BlobLimitFilter(FilterSpec):
+    """Filter that excludes blobs larger than a specified size."""
+
+    def __init__(self, limit: int) -> None:
+        """Initialize blob limit filter.
+
+        Args:
+            limit: Maximum blob size in bytes
+        """
+        self.limit = limit
+
+    def should_include_blob(self, blob_size: int) -> bool:
+        """Include only blobs smaller than or equal to the limit."""
+        return blob_size <= self.limit
+
+    def should_include_tree(self, depth: int) -> bool:
+        """Include all trees."""
+        return True
+
+    def to_spec_string(self) -> str:
+        """Return 'blob:limit=<size>' with appropriate unit."""
+        size = self.limit
+        if size >= 1024 * 1024 * 1024 and size % (1024 * 1024 * 1024) == 0:
+            return f"blob:limit={size // (1024 * 1024 * 1024)}g"
+        elif size >= 1024 * 1024 and size % (1024 * 1024) == 0:
+            return f"blob:limit={size // (1024 * 1024)}m"
+        elif size >= 1024 and size % 1024 == 0:
+            return f"blob:limit={size // 1024}k"
+        else:
+            return f"blob:limit={size}"
+
+    def __repr__(self) -> str:
+        return f"BlobLimitFilter(limit={self.limit})"
+
+
+class TreeDepthFilter(FilterSpec):
+    """Filter that excludes trees beyond a specified depth."""
+
+    def __init__(self, max_depth: int) -> None:
+        """Initialize tree depth filter.
+
+        Args:
+            max_depth: Maximum tree depth (0 = only root tree)
+        """
+        self.max_depth = max_depth
+
+    def should_include_blob(self, blob_size: int) -> bool:
+        """Include all blobs."""
+        return True
+
+    def should_include_tree(self, depth: int) -> bool:
+        """Include only trees up to max_depth."""
+        return depth <= self.max_depth
+
+    def to_spec_string(self) -> str:
+        """Return 'tree:<depth>'."""
+        return f"tree:{self.max_depth}"
+
+    def __repr__(self) -> str:
+        return f"TreeDepthFilter(max_depth={self.max_depth})"
+
+
+class SparseOidFilter(FilterSpec):
+    """Filter that uses a sparse specification from an object."""
+
+    def __init__(self, oid: "ObjectID") -> None:
+        """Initialize sparse OID filter.
+
+        Args:
+            oid: Object ID of the sparse specification
+        """
+        self.oid = oid
+
+    def should_include_blob(self, blob_size: int) -> bool:
+        """Include all blobs (sparse filtering is path-based, not size-based)."""
+        return True
+
+    def should_include_tree(self, depth: int) -> bool:
+        """Include all trees (sparse filtering is path-based)."""
+        return True
+
+    def to_spec_string(self) -> str:
+        """Return 'sparse:oid=<oid>'."""
+        return f"sparse:oid={self.oid.decode('ascii') if isinstance(self.oid, bytes) else self.oid}"
+
+    def __repr__(self) -> str:
+        oid_str = self.oid.decode("ascii") if isinstance(self.oid, bytes) else self.oid
+        return f"SparseOidFilter(oid={oid_str!r})"
+
+
+class CombineFilter(FilterSpec):
+    """Filter that combines multiple filters with AND logic."""
+
+    def __init__(self, filters: list[FilterSpec]) -> None:
+        """Initialize combine filter.
+
+        Args:
+            filters: List of filters to combine
+        """
+        self.filters = filters
+
+    def should_include_blob(self, blob_size: int) -> bool:
+        """Include blob only if all filters agree."""
+        return all(f.should_include_blob(blob_size) for f in self.filters)
+
+    def should_include_tree(self, depth: int) -> bool:
+        """Include tree only if all filters agree."""
+        return all(f.should_include_tree(depth) for f in self.filters)
+
+    def to_spec_string(self) -> str:
+        """Return 'combine:<filter1>+<filter2>+...'."""
+        return "combine:" + "+".join(f.to_spec_string() for f in self.filters)
+
+    def __repr__(self) -> str:
+        return f"CombineFilter(filters={self.filters!r})"
+
+
+def _parse_size(size_str: str) -> int:
+    """Parse a size specification like '100', '10k', '5m', '1g'.
+
+    Args:
+        size_str: Size string with optional unit suffix
+
+    Returns:
+        Size in bytes
+
+    Raises:
+        ValueError: If size_str is not a valid size specification
+    """
+    size_str = size_str.lower()
+    multipliers = {"k": 1024, "m": 1024 * 1024, "g": 1024 * 1024 * 1024}
+
+    if size_str[-1] in multipliers:
+        try:
+            value = int(size_str[:-1])
+            return value * multipliers[size_str[-1]]
+        except ValueError:
+            raise ValueError(f"Invalid size specification: {size_str}")
+    else:
+        try:
+            return int(size_str)
+        except ValueError:
+            raise ValueError(f"Invalid size specification: {size_str}")
+
+
+def parse_filter_spec(spec: str | bytes) -> FilterSpec:
+    """Parse a filter specification string.
+
+    Args:
+        spec: Filter specification (e.g., 'blob:none', 'blob:limit=1m')
+
+    Returns:
+        Parsed FilterSpec object
+
+    Raises:
+        ValueError: If spec is not a valid filter specification
+    """
+    if isinstance(spec, bytes):
+        spec = spec.decode("utf-8")
+
+    spec = spec.strip()
+
+    if spec == "blob:none":
+        return BlobNoneFilter()
+    elif spec.startswith("blob:limit="):
+        limit_str = spec[11:]  # len('blob:limit=') == 11
+        limit = _parse_size(limit_str)
+        return BlobLimitFilter(limit)
+    elif spec.startswith("tree:"):
+        depth_str = spec[5:]  # len('tree:') == 5
+        try:
+            depth = int(depth_str)
+            return TreeDepthFilter(depth)
+        except ValueError:
+            raise ValueError(f"Invalid tree depth: {depth_str}")
+    elif spec.startswith("sparse:oid="):
+        oid_str = spec[11:]  # len('sparse:oid=') == 11
+        # Convert to bytes for OID
+        oid = oid_str.encode("ascii")
+        return SparseOidFilter(oid)
+    elif spec.startswith("combine:"):
+        filter_specs = spec[8:].split("+")  # len('combine:') == 8
+        filters = [parse_filter_spec(f) for f in filter_specs]
+        return CombineFilter(filters)
+    else:
+        raise ValueError(f"Unknown filter specification: {spec}")

+ 33 - 0
dulwich/protocol.py

@@ -84,6 +84,7 @@ __all__ = [
     "extract_capabilities",
     "extract_capability_names",
     "extract_want_line_capabilities",
+    "find_capability",
     "format_ack_line",
     "format_capability_line",
     "format_cmd_pkt",
@@ -722,6 +723,38 @@ def ack_type(capabilities: Iterable[bytes]) -> int:
     return SINGLE_ACK
 
 
+def find_capability(capabilities: Iterable[bytes], *capability_names: bytes) -> bytes | None:
+    """Find a capability value in a list of capabilities.
+
+    This function looks for capabilities that may include arguments after an equals sign
+    and returns only the value part (after the '='). For capabilities without values,
+    returns the capability name itself.
+
+    Args:
+      capabilities: List of capability strings
+      capability_names: Capability name(s) to search for
+
+    Returns:
+      The value after '=' if found, or the capability name if no '=', or None if not found
+
+    Example:
+      >>> caps = [b'filter=blob:none', b'agent=git/2.0', b'thin-pack']
+      >>> find_capability(caps, b'filter')
+      b'blob:none'
+      >>> find_capability(caps, b'thin-pack')
+      b'thin-pack'
+      >>> find_capability(caps, b'missing')
+      None
+    """
+    for cap in capabilities:
+        for name in capability_names:
+            if cap == name:
+                return cap
+            elif cap.startswith(name + b"="):
+                return cap[len(name) + 1:]
+    return None
+
+
 class BufferedPktLineWriter:
     """Writer that wraps its data in pkt-lines and has an independent buffer.
 

+ 26 - 0
dulwich/server.py

@@ -100,6 +100,7 @@ from .errors import (
 from .object_store import MissingObjectFinder, PackBasedObjectStore, find_shallow
 from .objects import Commit, ObjectID, Tree, valid_hexsha
 from .pack import ObjectContainer, write_pack_from_container
+from .partial_clone import FilterSpec, parse_filter_spec
 from .protocol import (
     CAPABILITIES_REF,
     CAPABILITY_AGENT,
@@ -139,6 +140,7 @@ from .protocol import (
     capability_object_format,
     extract_capabilities,
     extract_want_line_capabilities,
+    find_capability,
     format_ack_line,
     format_ref_line,
     format_shallow_line,
@@ -379,6 +381,13 @@ class PackHandler(Handler):
         for cap in caps:
             if cap.startswith(CAPABILITY_AGENT + b"="):
                 continue
+            if cap.startswith(CAPABILITY_FILTER + b"="):
+                # Filter capability can have a value (e.g., filter=blob:none)
+                if CAPABILITY_FILTER not in allowable_caps:
+                    raise GitProtocolError(
+                        f"Client asked for capability {cap!r} that was not advertised."
+                    )
+                continue
             if cap not in allowable_caps:
                 raise GitProtocolError(
                     f"Client asked for capability {cap!r} that was not advertised."
@@ -439,6 +448,8 @@ class UploadPackHandler(PackHandler):
         # being processed, and the client is not accepting any other
         # data (such as side-band, see the progress method here).
         self._processing_have_lines = False
+        # Filter specification for partial clone support
+        self.filter_spec = None
 
     def capabilities(self) -> list[bytes]:
         """Return the list of capabilities supported by upload-pack.
@@ -469,6 +480,21 @@ class UploadPackHandler(PackHandler):
             CAPABILITY_OFS_DELTA,
         )
 
+    def set_client_capabilities(self, caps: Iterable[bytes]) -> None:
+        """Set client capabilities and parse filter specification if present.
+
+        Args:
+            caps: List of capability strings from the client
+        """
+        super().set_client_capabilities(caps)
+        # Parse filter specification if present
+        filter_spec_bytes = find_capability(caps, CAPABILITY_FILTER)
+        if filter_spec_bytes:
+            try:
+                self.filter_spec = parse_filter_spec(filter_spec_bytes)
+            except ValueError as e:
+                raise GitProtocolError(f"Invalid filter specification: {e}")
+
     def progress(self, message: bytes) -> None:
         """Send a progress message to the client.
 

+ 309 - 0
tests/test_partial_clone.py

@@ -0,0 +1,309 @@
+# test_partial_clone.py -- Tests for partial clone filter specifications
+# Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as published by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Tests for partial clone filter specifications."""
+
+from dulwich.partial_clone import (
+    BlobLimitFilter,
+    BlobNoneFilter,
+    CombineFilter,
+    SparseOidFilter,
+    TreeDepthFilter,
+    parse_filter_spec,
+)
+
+from . import TestCase
+
+
+class ParseFilterSpecTests(TestCase):
+    """Test parse_filter_spec function."""
+
+    def test_parse_blob_none(self):
+        """Test parsing 'blob:none' filter."""
+        filter_spec = parse_filter_spec("blob:none")
+        self.assertIsInstance(filter_spec, BlobNoneFilter)
+        self.assertEqual("blob:none", filter_spec.to_spec_string())
+
+    def test_parse_blob_none_bytes(self):
+        """Test parsing 'blob:none' as bytes."""
+        filter_spec = parse_filter_spec(b"blob:none")
+        self.assertIsInstance(filter_spec, BlobNoneFilter)
+
+    def test_parse_blob_limit_bytes(self):
+        """Test parsing 'blob:limit=100' in bytes."""
+        filter_spec = parse_filter_spec("blob:limit=100")
+        self.assertIsInstance(filter_spec, BlobLimitFilter)
+        self.assertEqual(100, filter_spec.limit)
+
+    def test_parse_blob_limit_kb(self):
+        """Test parsing 'blob:limit=10k'."""
+        filter_spec = parse_filter_spec("blob:limit=10k")
+        self.assertIsInstance(filter_spec, BlobLimitFilter)
+        self.assertEqual(10 * 1024, filter_spec.limit)
+
+    def test_parse_blob_limit_mb(self):
+        """Test parsing 'blob:limit=5m'."""
+        filter_spec = parse_filter_spec("blob:limit=5m")
+        self.assertIsInstance(filter_spec, BlobLimitFilter)
+        self.assertEqual(5 * 1024 * 1024, filter_spec.limit)
+
+    def test_parse_blob_limit_gb(self):
+        """Test parsing 'blob:limit=1g'."""
+        filter_spec = parse_filter_spec("blob:limit=1g")
+        self.assertIsInstance(filter_spec, BlobLimitFilter)
+        self.assertEqual(1024 * 1024 * 1024, filter_spec.limit)
+
+    def test_parse_tree_depth(self):
+        """Test parsing 'tree:0' filter."""
+        filter_spec = parse_filter_spec("tree:0")
+        self.assertIsInstance(filter_spec, TreeDepthFilter)
+        self.assertEqual(0, filter_spec.max_depth)
+
+    def test_parse_tree_depth_nonzero(self):
+        """Test parsing 'tree:3' filter."""
+        filter_spec = parse_filter_spec("tree:3")
+        self.assertIsInstance(filter_spec, TreeDepthFilter)
+        self.assertEqual(3, filter_spec.max_depth)
+
+    def test_parse_sparse_oid(self):
+        """Test parsing 'sparse:oid=<oid>' filter."""
+        oid = b"1234567890abcdef1234567890abcdef12345678"
+        filter_spec = parse_filter_spec(f"sparse:oid={oid.decode('ascii')}")
+        self.assertIsInstance(filter_spec, SparseOidFilter)
+        self.assertEqual(oid, filter_spec.oid)
+
+    def test_parse_combine(self):
+        """Test parsing 'combine:blob:none+tree:0' filter."""
+        filter_spec = parse_filter_spec("combine:blob:none+tree:0")
+        self.assertIsInstance(filter_spec, CombineFilter)
+        self.assertEqual(2, len(filter_spec.filters))
+        self.assertIsInstance(filter_spec.filters[0], BlobNoneFilter)
+        self.assertIsInstance(filter_spec.filters[1], TreeDepthFilter)
+
+    def test_parse_combine_multiple(self):
+        """Test parsing combine filter with 3+ filters."""
+        filter_spec = parse_filter_spec("combine:blob:none+tree:0+blob:limit=1m")
+        self.assertIsInstance(filter_spec, CombineFilter)
+        self.assertEqual(3, len(filter_spec.filters))
+
+    def test_parse_unknown_spec(self):
+        """Test that unknown filter specs raise ValueError."""
+        with self.assertRaises(ValueError) as cm:
+            parse_filter_spec("unknown:spec")
+        self.assertIn("Unknown filter specification", str(cm.exception))
+
+    def test_parse_invalid_tree_depth(self):
+        """Test that invalid tree depth raises ValueError."""
+        with self.assertRaises(ValueError) as cm:
+            parse_filter_spec("tree:invalid")
+        self.assertIn("Invalid tree depth", str(cm.exception))
+
+    def test_parse_invalid_blob_limit(self):
+        """Test that invalid blob limit raises ValueError."""
+        with self.assertRaises(ValueError) as cm:
+            parse_filter_spec("blob:limit=invalid")
+        self.assertIn("Invalid size specification", str(cm.exception))
+
+
+class BlobNoneFilterTests(TestCase):
+    """Test BlobNoneFilter class."""
+
+    def test_should_include_blob(self):
+        """Test that BlobNoneFilter excludes all blobs."""
+        filter_spec = BlobNoneFilter()
+        self.assertFalse(filter_spec.should_include_blob(0))
+        self.assertFalse(filter_spec.should_include_blob(100))
+        self.assertFalse(filter_spec.should_include_blob(1024 * 1024))
+
+    def test_should_include_tree(self):
+        """Test that BlobNoneFilter includes all trees."""
+        filter_spec = BlobNoneFilter()
+        self.assertTrue(filter_spec.should_include_tree(0))
+        self.assertTrue(filter_spec.should_include_tree(1))
+        self.assertTrue(filter_spec.should_include_tree(100))
+
+    def test_to_spec_string(self):
+        """Test conversion back to spec string."""
+        filter_spec = BlobNoneFilter()
+        self.assertEqual("blob:none", filter_spec.to_spec_string())
+
+    def test_repr(self):
+        """Test repr output."""
+        filter_spec = BlobNoneFilter()
+        self.assertEqual("BlobNoneFilter()", repr(filter_spec))
+
+
+class BlobLimitFilterTests(TestCase):
+    """Test BlobLimitFilter class."""
+
+    def test_should_include_blob_under_limit(self):
+        """Test that blobs under limit are included."""
+        filter_spec = BlobLimitFilter(1024)
+        self.assertTrue(filter_spec.should_include_blob(0))
+        self.assertTrue(filter_spec.should_include_blob(512))
+        self.assertTrue(filter_spec.should_include_blob(1024))
+
+    def test_should_include_blob_over_limit(self):
+        """Test that blobs over limit are excluded."""
+        filter_spec = BlobLimitFilter(1024)
+        self.assertFalse(filter_spec.should_include_blob(1025))
+        self.assertFalse(filter_spec.should_include_blob(2048))
+
+    def test_should_include_tree(self):
+        """Test that BlobLimitFilter includes all trees."""
+        filter_spec = BlobLimitFilter(1024)
+        self.assertTrue(filter_spec.should_include_tree(0))
+        self.assertTrue(filter_spec.should_include_tree(100))
+
+    def test_to_spec_string_bytes(self):
+        """Test conversion to spec string with bytes."""
+        filter_spec = BlobLimitFilter(100)
+        self.assertEqual("blob:limit=100", filter_spec.to_spec_string())
+
+    def test_to_spec_string_kb(self):
+        """Test conversion to spec string with KB."""
+        filter_spec = BlobLimitFilter(10 * 1024)
+        self.assertEqual("blob:limit=10k", filter_spec.to_spec_string())
+
+    def test_to_spec_string_mb(self):
+        """Test conversion to spec string with MB."""
+        filter_spec = BlobLimitFilter(5 * 1024 * 1024)
+        self.assertEqual("blob:limit=5m", filter_spec.to_spec_string())
+
+    def test_to_spec_string_gb(self):
+        """Test conversion to spec string with GB."""
+        filter_spec = BlobLimitFilter(2 * 1024 * 1024 * 1024)
+        self.assertEqual("blob:limit=2g", filter_spec.to_spec_string())
+
+    def test_to_spec_string_not_round(self):
+        """Test conversion to spec string with non-round size."""
+        filter_spec = BlobLimitFilter(1500)
+        self.assertEqual("blob:limit=1500", filter_spec.to_spec_string())
+
+    def test_repr(self):
+        """Test repr output."""
+        filter_spec = BlobLimitFilter(1024)
+        self.assertEqual("BlobLimitFilter(limit=1024)", repr(filter_spec))
+
+
+class TreeDepthFilterTests(TestCase):
+    """Test TreeDepthFilter class."""
+
+    def test_should_include_blob(self):
+        """Test that TreeDepthFilter includes all blobs."""
+        filter_spec = TreeDepthFilter(0)
+        self.assertTrue(filter_spec.should_include_blob(0))
+        self.assertTrue(filter_spec.should_include_blob(1024))
+
+    def test_should_include_tree_at_depth(self):
+        """Test that trees at or below max_depth are included."""
+        filter_spec = TreeDepthFilter(2)
+        self.assertTrue(filter_spec.should_include_tree(0))
+        self.assertTrue(filter_spec.should_include_tree(1))
+        self.assertTrue(filter_spec.should_include_tree(2))
+
+    def test_should_include_tree_beyond_depth(self):
+        """Test that trees beyond max_depth are excluded."""
+        filter_spec = TreeDepthFilter(2)
+        self.assertFalse(filter_spec.should_include_tree(3))
+        self.assertFalse(filter_spec.should_include_tree(10))
+
+    def test_to_spec_string(self):
+        """Test conversion back to spec string."""
+        filter_spec = TreeDepthFilter(3)
+        self.assertEqual("tree:3", filter_spec.to_spec_string())
+
+    def test_repr(self):
+        """Test repr output."""
+        filter_spec = TreeDepthFilter(2)
+        self.assertEqual("TreeDepthFilter(max_depth=2)", repr(filter_spec))
+
+
+class SparseOidFilterTests(TestCase):
+    """Test SparseOidFilter class."""
+
+    def test_should_include_blob(self):
+        """Test that SparseOidFilter includes all blobs."""
+        oid = b"1234567890abcdef1234567890abcdef12345678"
+        filter_spec = SparseOidFilter(oid)
+        self.assertTrue(filter_spec.should_include_blob(0))
+        self.assertTrue(filter_spec.should_include_blob(1024))
+
+    def test_should_include_tree(self):
+        """Test that SparseOidFilter includes all trees."""
+        oid = b"1234567890abcdef1234567890abcdef12345678"
+        filter_spec = SparseOidFilter(oid)
+        self.assertTrue(filter_spec.should_include_tree(0))
+        self.assertTrue(filter_spec.should_include_tree(10))
+
+    def test_to_spec_string(self):
+        """Test conversion back to spec string."""
+        oid = b"1234567890abcdef1234567890abcdef12345678"
+        filter_spec = SparseOidFilter(oid)
+        expected = "sparse:oid=1234567890abcdef1234567890abcdef12345678"
+        self.assertEqual(expected, filter_spec.to_spec_string())
+
+    def test_repr(self):
+        """Test repr output."""
+        oid = b"1234567890abcdef1234567890abcdef12345678"
+        filter_spec = SparseOidFilter(oid)
+        self.assertIn("SparseOidFilter", repr(filter_spec))
+        self.assertIn("1234567890abcdef1234567890abcdef12345678", repr(filter_spec))
+
+
+class CombineFilterTests(TestCase):
+    """Test CombineFilter class."""
+
+    def test_should_include_blob_all_allow(self):
+        """Test that blob is included when all filters allow it."""
+        filters = [BlobLimitFilter(1024), BlobLimitFilter(2048)]
+        filter_spec = CombineFilter(filters)
+        self.assertTrue(filter_spec.should_include_blob(512))
+
+    def test_should_include_blob_one_denies(self):
+        """Test that blob is excluded when one filter denies it."""
+        filters = [BlobLimitFilter(1024), BlobNoneFilter()]
+        filter_spec = CombineFilter(filters)
+        self.assertFalse(filter_spec.should_include_blob(512))
+
+    def test_should_include_tree_all_allow(self):
+        """Test that tree is included when all filters allow it."""
+        filters = [TreeDepthFilter(2), TreeDepthFilter(3)]
+        filter_spec = CombineFilter(filters)
+        self.assertTrue(filter_spec.should_include_tree(1))
+
+    def test_should_include_tree_one_denies(self):
+        """Test that tree is excluded when one filter denies it."""
+        filters = [TreeDepthFilter(2), TreeDepthFilter(1)]
+        filter_spec = CombineFilter(filters)
+        self.assertFalse(filter_spec.should_include_tree(2))
+
+    def test_to_spec_string(self):
+        """Test conversion back to spec string."""
+        filters = [BlobNoneFilter(), TreeDepthFilter(0)]
+        filter_spec = CombineFilter(filters)
+        self.assertEqual("combine:blob:none+tree:0", filter_spec.to_spec_string())
+
+    def test_repr(self):
+        """Test repr output."""
+        filters = [BlobNoneFilter()]
+        filter_spec = CombineFilter(filters)
+        self.assertIn("CombineFilter", repr(filter_spec))

+ 31 - 0
tests/test_protocol.py

@@ -38,6 +38,7 @@ from dulwich.protocol import (
     ack_type,
     extract_capabilities,
     extract_want_line_capabilities,
+    find_capability,
     pkt_line,
     pkt_seq,
 )
@@ -358,3 +359,33 @@ class CapabilitiesTests(TestCase):
     def test_filter_capability_in_known_upload_capabilities(self) -> None:
         """Test that CAPABILITY_FILTER is in KNOWN_UPLOAD_CAPABILITIES."""
         self.assertIn(CAPABILITY_FILTER, KNOWN_UPLOAD_CAPABILITIES)
+
+
+class FindCapabilityTests(TestCase):
+    """Tests for find_capability function."""
+
+    def test_find_capability_with_value(self) -> None:
+        """Test finding a capability with a value."""
+        caps = [b"filter=blob:none", b"agent=git/2.0"]
+        self.assertEqual(b"blob:none", find_capability(caps, b"filter"))
+
+    def test_find_capability_without_value(self) -> None:
+        """Test finding a capability without a value."""
+        caps = [b"thin-pack", b"ofs-delta"]
+        self.assertEqual(b"thin-pack", find_capability(caps, b"thin-pack"))
+
+    def test_find_capability_not_found(self) -> None:
+        """Test finding a capability that doesn't exist."""
+        caps = [b"thin-pack", b"ofs-delta"]
+        self.assertIsNone(find_capability(caps, b"missing"))
+
+    def test_find_capability_multiple_names(self) -> None:
+        """Test finding with multiple capability names."""
+        caps = [b"filter=blob:none", b"agent=git/2.0"]
+        # Should find first match
+        self.assertEqual(b"git/2.0", find_capability(caps, b"missing", b"agent"))
+
+    def test_find_capability_complex_value(self) -> None:
+        """Test finding capability with complex value."""
+        caps = [b"filter=combine:blob:none+tree:0"]
+        self.assertEqual(b"combine:blob:none+tree:0", find_capability(caps, b"filter"))

+ 23 - 0
tests/test_server.py

@@ -185,6 +185,29 @@ class UploadPackHandlerTestCase(TestCase):
         caps = self._handler.capabilities()
         self.assertIn(b"filter", caps)
 
+    def test_filter_spec_parsed(self) -> None:
+        """Test that filter specification is parsed from client capabilities."""
+        from dulwich.partial_clone import BlobNoneFilter
+
+        caps = [b"filter=blob:none", *list(self._handler.required_capabilities())]
+        self._handler.set_client_capabilities(caps)
+        self.assertIsNotNone(self._handler.filter_spec)
+        self.assertIsInstance(self._handler.filter_spec, BlobNoneFilter)
+
+    def test_filter_spec_not_present(self) -> None:
+        """Test that filter_spec is None when filter capability is not used."""
+        caps = self._handler.required_capabilities()
+        self._handler.set_client_capabilities(caps)
+        self.assertIsNone(self._handler.filter_spec)
+
+    def test_filter_spec_invalid(self) -> None:
+        """Test that invalid filter spec raises GitProtocolError."""
+        from dulwich.errors import GitProtocolError
+
+        caps = [b"filter=invalid:spec", *list(self._handler.required_capabilities())]
+        with self.assertRaises(GitProtocolError):
+            self._handler.set_client_capabilities(caps)
+
     def test_get_tagged(self) -> None:
         refs = {
             b"refs/tags/tag1": ONE,