Prechádzať zdrojové kódy

Improve line ending handling

Jelmer Vernooij 1 mesiac pred
rodič
commit
95033faf2b
2 zmenil súbory, kde vykonal 479 pridanie a 100 odobranie
  1. 200 80
      dulwich/line_ending.py
  2. 279 20
      tests/test_line_ending.py

+ 200 - 80
dulwich/line_ending.py

@@ -26,11 +26,11 @@ about how it seems to work.
 The normalization is a two-fold process that happens at two moments:
 
 - When reading a file from the index and to the working directory. For example
-  when doing a ``git clone`` or ``git checkout`` call. We call this process the
-  read filter in this module.
+  when doing a ``git clone`` or ``git checkout`` call. This is called the
+  smudge filter (repository -> working tree).
 - When writing a file to the index from the working directory. For example
-  when doing a ``git add`` call. We call this process the write filter in this
-  module.
+  when doing a ``git add`` call. This is called the clean filter (working tree
+  -> repository).
 
 Note that when checking status (getting unstaged changes), whether or not
 normalization is done on write depends on whether or not the file in the
@@ -108,13 +108,13 @@ attribute defined in ``.gitattributes``; it takes three possible values:
       line-endings in the working directory and convert line-endings to LF
       when writing to the index. When autocrlf is set to true, eol value is
       ignored.
-    - ``input``: Quite similar to the ``true`` value but only force the write
+    - ``input``: Quite similar to the ``true`` value but only applies the clean
       filter, ie line-ending of new files added to the index will get their
       line-endings converted to LF.
     - ``false`` (default): No normalization is done.
 
 ``core.eol`` is the top-level configuration to define the line-ending to use
-when applying the read_filer. It takes three possible values:
+when applying the smudge filter. It takes three possible values:
 
     - ``lf``: When normalization is done, force line-endings to be ``LF`` in the
       working directory.
@@ -143,6 +143,9 @@ if TYPE_CHECKING:
     from .config import StackedConfig
     from .object_store import BaseObjectStore
 
+from . import replace_me
+from .attrs import GitAttributes, Pattern
+from .filters import FilterBlobNormalizer, FilterDriver, FilterRegistry
 from .object_store import iter_tree_contents
 from .objects import Blob, ObjectID
 from .patch import is_binary
@@ -151,6 +154,42 @@ CRLF = b"\r\n"
 LF = b"\n"
 
 
+class LineEndingFilter(FilterDriver):
+    """Filter driver for line ending conversion."""
+
+    def __init__(
+        self,
+        clean_conversion: Optional[Callable[[bytes], bytes]] = None,
+        smudge_conversion: Optional[Callable[[bytes], bytes]] = None,
+        binary_detection: bool = True,
+    ):
+        self.clean_conversion = clean_conversion
+        self.smudge_conversion = smudge_conversion
+        self.binary_detection = binary_detection
+
+    def clean(self, data: bytes) -> bytes:
+        """Apply line ending conversion for checkin (working tree -> repository)."""
+        if self.clean_conversion is None:
+            return data
+
+        # Skip binary files if detection is enabled
+        if self.binary_detection and is_binary(data):
+            return data
+
+        return self.clean_conversion(data)
+
+    def smudge(self, data: bytes) -> bytes:
+        """Apply line ending conversion for checkout (repository -> working tree)."""
+        if self.smudge_conversion is None:
+            return data
+
+        # Skip binary files if detection is enabled
+        if self.binary_detection and is_binary(data):
+            return data
+
+        return self.smudge_conversion(data)
+
+
 def convert_crlf_to_lf(text_hunk: bytes) -> bytes:
     """Convert CRLF in text hunk into LF.
 
@@ -181,46 +220,26 @@ def convert_lf_to_crlf(text_hunk: bytes) -> bytes:
     return CRLF.join(cleaned_parts)
 
 
-def get_checkout_filter(
-    core_eol: str, core_autocrlf: Union[bool, str], git_attributes: dict[str, Any]
+def get_smudge_filter(
+    core_eol: str, core_autocrlf: bytes
 ) -> Optional[Callable[[bytes], bytes]]:
-    """Returns the correct checkout filter based on the passed arguments."""
-    # TODO this function should process the git_attributes for the path and if
-    # the text attribute is not defined, fallback on the
-    # get_checkout_filter_autocrlf function with the autocrlf value
-    if isinstance(core_autocrlf, bool):
-        autocrlf_bytes = b"true" if core_autocrlf else b"false"
-    else:
-        autocrlf_bytes = (
-            core_autocrlf.encode("ascii")
-            if isinstance(core_autocrlf, str)
-            else core_autocrlf
-        )
-    return get_checkout_filter_autocrlf(autocrlf_bytes)
+    """Returns the correct smudge filter based on the passed arguments."""
+    # Git attributes handling is done by the filter infrastructure
+    return get_smudge_filter_autocrlf(core_autocrlf)
 
 
-def get_checkin_filter(
-    core_eol: str, core_autocrlf: Union[bool, str], git_attributes: dict[str, Any]
+def get_clean_filter(
+    core_eol: str, core_autocrlf: bytes
 ) -> Optional[Callable[[bytes], bytes]]:
-    """Returns the correct checkin filter based on the passed arguments."""
-    # TODO this function should process the git_attributes for the path and if
-    # the text attribute is not defined, fallback on the
-    # get_checkin_filter_autocrlf function with the autocrlf value
-    if isinstance(core_autocrlf, bool):
-        autocrlf_bytes = b"true" if core_autocrlf else b"false"
-    else:
-        autocrlf_bytes = (
-            core_autocrlf.encode("ascii")
-            if isinstance(core_autocrlf, str)
-            else core_autocrlf
-        )
-    return get_checkin_filter_autocrlf(autocrlf_bytes)
+    """Returns the correct clean filter based on the passed arguments."""
+    # Git attributes handling is done by the filter infrastructure
+    return get_clean_filter_autocrlf(core_autocrlf)
 
 
-def get_checkout_filter_autocrlf(
+def get_smudge_filter_autocrlf(
     core_autocrlf: bytes,
 ) -> Optional[Callable[[bytes], bytes]]:
-    """Returns the correct checkout filter base on autocrlf value.
+    """Returns the correct smudge filter base on autocrlf value.
 
     Args:
       core_autocrlf: The bytes configuration value of core.autocrlf.
@@ -234,10 +253,10 @@ def get_checkout_filter_autocrlf(
     return None
 
 
-def get_checkin_filter_autocrlf(
+def get_clean_filter_autocrlf(
     core_autocrlf: bytes,
 ) -> Optional[Callable[[bytes], bytes]]:
-    """Returns the correct checkin filter base on autocrlf value.
+    """Returns the correct clean filter base on autocrlf value.
 
     Args:
       core_autocrlf: The bytes configuration value of core.autocrlf.
@@ -252,63 +271,162 @@ def get_checkin_filter_autocrlf(
     return None
 
 
-class BlobNormalizer:
+# Backwards compatibility wrappers
+@replace_me(since="0.23.1", remove_in="0.25.0")
+def get_checkout_filter(
+    core_eol: str, core_autocrlf: Union[bool, str], git_attributes: dict[str, Any]
+) -> Optional[Callable[[bytes], bytes]]:
+    """Deprecated: Use get_smudge_filter instead."""
+    # Convert core_autocrlf to bytes for compatibility
+    if isinstance(core_autocrlf, bool):
+        autocrlf_bytes = b"true" if core_autocrlf else b"false"
+    else:
+        autocrlf_bytes = (
+            core_autocrlf.encode("utf-8")
+            if isinstance(core_autocrlf, str)
+            else core_autocrlf
+        )
+    return get_smudge_filter(core_eol, autocrlf_bytes)
+
+
+@replace_me(since="0.23.1", remove_in="0.25.0")
+def get_checkin_filter(
+    core_eol: str, core_autocrlf: Union[bool, str], git_attributes: dict[str, Any]
+) -> Optional[Callable[[bytes], bytes]]:
+    """Deprecated: Use get_clean_filter instead."""
+    # Convert core_autocrlf to bytes for compatibility
+    if isinstance(core_autocrlf, bool):
+        autocrlf_bytes = b"true" if core_autocrlf else b"false"
+    else:
+        autocrlf_bytes = (
+            core_autocrlf.encode("utf-8")
+            if isinstance(core_autocrlf, str)
+            else core_autocrlf
+        )
+    return get_clean_filter(core_eol, autocrlf_bytes)
+
+
+@replace_me(since="0.23.1", remove_in="0.25.0")
+def get_checkout_filter_autocrlf(
+    core_autocrlf: bytes,
+) -> Optional[Callable[[bytes], bytes]]:
+    """Deprecated: Use get_smudge_filter_autocrlf instead."""
+    return get_smudge_filter_autocrlf(core_autocrlf)
+
+
+@replace_me(since="0.23.1", remove_in="0.25.0")
+def get_checkin_filter_autocrlf(
+    core_autocrlf: bytes,
+) -> Optional[Callable[[bytes], bytes]]:
+    """Deprecated: Use get_clean_filter_autocrlf instead."""
+    return get_clean_filter_autocrlf(core_autocrlf)
+
+
+class BlobNormalizer(FilterBlobNormalizer):
     """An object to store computation result of which filter to apply based
     on configuration, gitattributes, path and operation (checkin or checkout).
+
+    This class maintains backward compatibility while using the filter infrastructure.
     """
 
     def __init__(
-        self, config_stack: "StackedConfig", gitattributes: dict[str, Any]
+        self,
+        config_stack: "StackedConfig",
+        gitattributes: dict[str, Any],
+        core_eol: str = "native",
+        autocrlf: bytes = b"false",
     ) -> None:
-        self.config_stack = config_stack
-        self.gitattributes = gitattributes
-
-        # Compute which filters we needs based on parameters
-        try:
-            core_eol_raw = config_stack.get("core", "eol")
-            core_eol: str = (
-                core_eol_raw.decode("ascii")
-                if isinstance(core_eol_raw, bytes)
-                else core_eol_raw
-            )
-        except KeyError:
-            core_eol = "native"
-
-        try:
-            core_autocrlf_raw = config_stack.get("core", "autocrlf")
-            if isinstance(core_autocrlf_raw, bytes):
-                core_autocrlf: Union[bool, str] = core_autocrlf_raw.decode(
-                    "ascii"
-                ).lower()
+        # Set up a filter registry with line ending filters
+        filter_registry = FilterRegistry(config_stack)
+
+        # Create line ending filter if needed
+        smudge_filter = get_smudge_filter(core_eol, autocrlf)
+        clean_filter = get_clean_filter(core_eol, autocrlf)
+
+        # Always register a text filter that can be used by gitattributes
+        # Even if autocrlf is false, gitattributes text=true should work
+        line_ending_filter = LineEndingFilter(
+            clean_conversion=clean_filter or convert_crlf_to_lf,
+            smudge_conversion=smudge_filter or convert_lf_to_crlf,
+            binary_detection=True,
+        )
+        filter_registry.register_driver("text", line_ending_filter)
+
+        # Convert dict gitattributes to GitAttributes object for parent class
+        git_attrs_patterns = []
+        for pattern_str, attrs in gitattributes.items():
+            if isinstance(pattern_str, str):
+                pattern_bytes = pattern_str.encode("utf-8")
             else:
-                core_autocrlf = core_autocrlf_raw.lower()
-        except KeyError:
-            core_autocrlf = False
+                pattern_bytes = pattern_str
+            pattern = Pattern(pattern_bytes)
+            git_attrs_patterns.append((pattern, attrs))
 
-        self.fallback_read_filter = get_checkout_filter(
-            core_eol, core_autocrlf, self.gitattributes
-        )
-        self.fallback_write_filter = get_checkin_filter(
-            core_eol, core_autocrlf, self.gitattributes
-        )
+        git_attributes = GitAttributes(git_attrs_patterns)
+
+        # Initialize parent class with gitattributes
+        # The filter infrastructure will handle gitattributes processing
+        super().__init__(config_stack, git_attributes, filter_registry)
+
+        # Store original filters for backward compatibility
+        self.fallback_read_filter = smudge_filter
+        self.fallback_write_filter = clean_filter
 
     def checkin_normalize(self, blob: Blob, tree_path: bytes) -> Blob:
         """Normalize a blob during a checkin operation."""
-        if self.fallback_write_filter is not None:
-            return normalize_blob(
-                blob, self.fallback_write_filter, binary_detection=True
+        # First try to get filter from gitattributes (handled by parent)
+        result = super().checkin_normalize(blob, tree_path)
+
+        # Check if gitattributes explicitly disabled text conversion
+        attrs = self.gitattributes.match_path(tree_path)
+        if b"text" in attrs and attrs[b"text"] is False:
+            # Explicitly marked as binary, no conversion
+            return blob
+
+        # If no filter was applied via gitattributes and we have a fallback filter
+        # (autocrlf is enabled), apply it to all files
+        if result is blob and self.fallback_write_filter is not None:
+            # Apply the clean filter with binary detection
+            line_ending_filter = LineEndingFilter(
+                clean_conversion=self.fallback_write_filter,
+                smudge_conversion=None,
+                binary_detection=True,
             )
+            filtered_data = line_ending_filter.clean(blob.data)
+            if filtered_data != blob.data:
+                new_blob = Blob()
+                new_blob.data = filtered_data
+                return new_blob
 
-        return blob
+        return result
 
     def checkout_normalize(self, blob: Blob, tree_path: bytes) -> Blob:
         """Normalize a blob during a checkout operation."""
-        if self.fallback_read_filter is not None:
-            return normalize_blob(
-                blob, self.fallback_read_filter, binary_detection=True
+        # First try to get filter from gitattributes (handled by parent)
+        result = super().checkout_normalize(blob, tree_path)
+
+        # Check if gitattributes explicitly disabled text conversion
+        attrs = self.gitattributes.match_path(tree_path)
+        if b"text" in attrs and attrs[b"text"] is False:
+            # Explicitly marked as binary, no conversion
+            return blob
+
+        # If no filter was applied via gitattributes and we have a fallback filter
+        # (autocrlf is enabled), apply it to all files
+        if result is blob and self.fallback_read_filter is not None:
+            # Apply the smudge filter with binary detection
+            line_ending_filter = LineEndingFilter(
+                clean_conversion=None,
+                smudge_conversion=self.fallback_read_filter,
+                binary_detection=True,
             )
+            filtered_data = line_ending_filter.smudge(blob.data)
+            if filtered_data != blob.data:
+                new_blob = Blob()
+                new_blob.data = filtered_data
+                return new_blob
 
-        return blob
+        return result
 
 
 def normalize_blob(
@@ -344,8 +462,10 @@ class TreeBlobNormalizer(BlobNormalizer):
         git_attributes: dict[str, Any],
         object_store: "BaseObjectStore",
         tree: Optional[ObjectID] = None,
+        core_eol: str = "native",
+        autocrlf: bytes = b"false",
     ) -> None:
-        super().__init__(config_stack, git_attributes)
+        super().__init__(config_stack, git_attributes, core_eol, autocrlf)
         if tree:
             self.existing_paths = {
                 name for name, _, _ in iter_tree_contents(object_store, tree)

+ 279 - 20
tests/test_line_ending.py

@@ -22,10 +22,12 @@
 """Tests for the line ending conversion."""
 
 from dulwich.line_ending import (
+    BlobNormalizer,
+    LineEndingFilter,
     convert_crlf_to_lf,
     convert_lf_to_crlf,
-    get_checkin_filter_autocrlf,
-    get_checkout_filter_autocrlf,
+    get_clean_filter_autocrlf,
+    get_smudge_filter_autocrlf,
     normalize_blob,
 )
 from dulwich.objects import Blob
@@ -56,35 +58,35 @@ class LineEndingConversion(TestCase):
 
 
 class GetLineEndingAutocrlfFilters(TestCase):
-    def test_get_checkin_filter_autocrlf_default(self) -> None:
-        checkin_filter = get_checkin_filter_autocrlf(b"false")
+    def test_get_clean_filter_autocrlf_default(self) -> None:
+        clean_filter = get_clean_filter_autocrlf(b"false")
 
-        self.assertEqual(checkin_filter, None)
+        self.assertEqual(clean_filter, None)
 
-    def test_get_checkin_filter_autocrlf_true(self) -> None:
-        checkin_filter = get_checkin_filter_autocrlf(b"true")
+    def test_get_clean_filter_autocrlf_true(self) -> None:
+        clean_filter = get_clean_filter_autocrlf(b"true")
 
-        self.assertEqual(checkin_filter, convert_crlf_to_lf)
+        self.assertEqual(clean_filter, convert_crlf_to_lf)
 
-    def test_get_checkin_filter_autocrlf_input(self) -> None:
-        checkin_filter = get_checkin_filter_autocrlf(b"input")
+    def test_get_clean_filter_autocrlf_input(self) -> None:
+        clean_filter = get_clean_filter_autocrlf(b"input")
 
-        self.assertEqual(checkin_filter, convert_crlf_to_lf)
+        self.assertEqual(clean_filter, convert_crlf_to_lf)
 
-    def test_get_checkout_filter_autocrlf_default(self) -> None:
-        checkout_filter = get_checkout_filter_autocrlf(b"false")
+    def test_get_smudge_filter_autocrlf_default(self) -> None:
+        smudge_filter = get_smudge_filter_autocrlf(b"false")
 
-        self.assertEqual(checkout_filter, None)
+        self.assertEqual(smudge_filter, None)
 
-    def test_get_checkout_filter_autocrlf_true(self) -> None:
-        checkout_filter = get_checkout_filter_autocrlf(b"true")
+    def test_get_smudge_filter_autocrlf_true(self) -> None:
+        smudge_filter = get_smudge_filter_autocrlf(b"true")
 
-        self.assertEqual(checkout_filter, convert_lf_to_crlf)
+        self.assertEqual(smudge_filter, convert_lf_to_crlf)
 
-    def test_get_checkout_filter_autocrlf_input(self) -> None:
-        checkout_filter = get_checkout_filter_autocrlf(b"input")
+    def test_get_smudge_filter_autocrlf_input(self) -> None:
+        smudge_filter = get_smudge_filter_autocrlf(b"input")
 
-        self.assertEqual(checkout_filter, None)
+        self.assertEqual(smudge_filter, None)
 
 
 class NormalizeBlobTestCase(TestCase):
@@ -195,3 +197,260 @@ class NormalizeBlobTestCase(TestCase):
 
         self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
         self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
+
+
+class LineEndingFilterTests(TestCase):
+    """Test the LineEndingFilter class."""
+
+    def test_clean_no_conversion(self) -> None:
+        """Test clean with no conversion function."""
+        filter = LineEndingFilter()
+        data = b"test\r\ndata"
+        self.assertEqual(filter.clean(data), data)
+
+    def test_clean_with_conversion(self) -> None:
+        """Test clean with CRLF to LF conversion."""
+        filter = LineEndingFilter(clean_conversion=convert_crlf_to_lf)
+        data = b"test\r\ndata"
+        self.assertEqual(filter.clean(data), b"test\ndata")
+
+    def test_clean_binary_detection(self) -> None:
+        """Test clean skips binary files."""
+        filter = LineEndingFilter(
+            clean_conversion=convert_crlf_to_lf, binary_detection=True
+        )
+        # Binary data with null byte
+        data = b"test\r\n\x00data"
+        self.assertEqual(filter.clean(data), data)  # Should not convert
+
+    def test_smudge_no_conversion(self) -> None:
+        """Test smudge with no conversion function."""
+        filter = LineEndingFilter()
+        data = b"test\ndata"
+        self.assertEqual(filter.smudge(data), data)
+
+    def test_smudge_with_conversion(self) -> None:
+        """Test smudge with LF to CRLF conversion."""
+        filter = LineEndingFilter(smudge_conversion=convert_lf_to_crlf)
+        data = b"test\ndata"
+        self.assertEqual(filter.smudge(data), b"test\r\ndata")
+
+    def test_smudge_binary_detection(self) -> None:
+        """Test smudge skips binary files."""
+        filter = LineEndingFilter(
+            smudge_conversion=convert_lf_to_crlf, binary_detection=True
+        )
+        # Binary data with null byte
+        data = b"test\n\x00data"
+        self.assertEqual(filter.smudge(data), data)  # Should not convert
+
+
+class BlobNormalizerTests(TestCase):
+    """Test the BlobNormalizer class integration with filters."""
+
+    def setUp(self) -> None:
+        super().setUp()
+        from dulwich.config import ConfigDict
+
+        self.config = ConfigDict()
+        self.gitattributes = {}
+
+    def test_autocrlf_true_checkin(self) -> None:
+        """Test checkin with autocrlf=true."""
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
+
+        # Create blob with CRLF
+        blob = Blob()
+        blob.data = b"line1\r\nline2\r\n"
+
+        # Should convert to LF on checkin
+        result = normalizer.checkin_normalize(blob, b"test.txt")
+        self.assertEqual(result.data, b"line1\nline2\n")
+
+    def test_autocrlf_true_checkout(self) -> None:
+        """Test checkout with autocrlf=true."""
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
+
+        # Create blob with LF
+        blob = Blob()
+        blob.data = b"line1\nline2\n"
+
+        # Should convert to CRLF on checkout
+        result = normalizer.checkout_normalize(blob, b"test.txt")
+        self.assertEqual(result.data, b"line1\r\nline2\r\n")
+
+    def test_autocrlf_input_checkin(self) -> None:
+        """Test checkin with autocrlf=input."""
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"input")
+
+        # Create blob with CRLF
+        blob = Blob()
+        blob.data = b"line1\r\nline2\r\n"
+
+        # Should convert to LF on checkin
+        result = normalizer.checkin_normalize(blob, b"test.txt")
+        self.assertEqual(result.data, b"line1\nline2\n")
+
+    def test_autocrlf_input_checkout(self) -> None:
+        """Test checkout with autocrlf=input."""
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"input")
+
+        # Create blob with LF
+        blob = Blob()
+        blob.data = b"line1\nline2\n"
+
+        # Should NOT convert on checkout with input mode
+        result = normalizer.checkout_normalize(blob, b"test.txt")
+        self.assertIs(result, blob)  # Same object, no conversion
+
+    def test_autocrlf_false(self) -> None:
+        """Test with autocrlf=false (no conversion)."""
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"false")
+
+        # Create blob with mixed line endings
+        blob = Blob()
+        blob.data = b"line1\r\nline2\nline3"
+
+        # Should not convert on either operation
+        result = normalizer.checkin_normalize(blob, b"test.txt")
+        self.assertIs(result, blob)
+
+        result = normalizer.checkout_normalize(blob, b"test.txt")
+        self.assertIs(result, blob)
+
+    def test_gitattributes_text_attr(self) -> None:
+        """Test gitattributes text attribute overrides autocrlf."""
+        # Set gitattributes to force text conversion
+        self.gitattributes[b"*.txt"] = {b"text": True}
+
+        # Even with autocrlf=false, should convert based on gitattributes
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"false")
+
+        blob = Blob()
+        blob.data = b"line1\r\nline2\r\n"
+
+        # Should still convert because of gitattributes
+        result = normalizer.checkin_normalize(blob, b"test.txt")
+        # Note: with just text=true and no eol setting, it follows platform defaults
+        # For checkin, it should always normalize to LF
+        self.assertIsNot(result, blob)
+
+    def test_gitattributes_binary_attr(self) -> None:
+        """Test gitattributes -text attribute prevents conversion."""
+        # Set gitattributes to force binary (no conversion)
+        self.gitattributes[b"*.bin"] = {b"text": False}
+
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
+
+        blob = Blob()
+        blob.data = b"line1\r\nline2\r\n"
+
+        # Should not convert despite autocrlf=true
+        result = normalizer.checkin_normalize(blob, b"test.bin")
+        self.assertIs(result, blob)
+
+    def test_binary_file_detection(self) -> None:
+        """Test that binary files are not converted."""
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
+
+        # Create blob with binary content
+        blob = Blob()
+        blob.data = b"line1\r\n\x00\xffbinary\r\ndata"
+
+        # Should not convert binary files
+        result = normalizer.checkin_normalize(blob, b"binary.dat")
+        self.assertIs(result, blob)
+
+        result = normalizer.checkout_normalize(blob, b"binary.dat")
+        self.assertIs(result, blob)
+
+
+class LineEndingIntegrationTests(TestCase):
+    """Integration tests for line ending conversion with the filter system."""
+
+    def setUp(self) -> None:
+        super().setUp()
+        from dulwich.config import ConfigDict
+        from dulwich.filters import FilterRegistry
+
+        self.config = ConfigDict()
+        self.registry = FilterRegistry(self.config)
+
+    def test_filter_registry_with_line_endings(self) -> None:
+        """Test that line ending filters work through the registry."""
+        # Register a custom text filter that does line ending conversion
+        filter = LineEndingFilter(
+            clean_conversion=convert_crlf_to_lf,
+            smudge_conversion=convert_lf_to_crlf,
+            binary_detection=True,
+        )
+        self.registry.register_driver("text", filter)
+
+        # Set up gitattributes
+        # Create GitAttributes
+        from dulwich.attrs import GitAttributes, Pattern
+
+        patterns = [(Pattern(b"*.txt"), {b"filter": b"text"})]
+        gitattributes = GitAttributes(patterns)
+
+        # Create normalizer
+        from dulwich.filters import FilterBlobNormalizer
+
+        normalizer = FilterBlobNormalizer(self.config, gitattributes, self.registry)
+
+        # Test round trip
+        blob = Blob()
+        blob.data = b"Hello\r\nWorld\r\n"
+
+        # Checkin should convert CRLF to LF
+        checked_in = normalizer.checkin_normalize(blob, b"test.txt")
+        self.assertEqual(checked_in.data, b"Hello\nWorld\n")
+
+        # Checkout should convert LF to CRLF
+        checked_out = normalizer.checkout_normalize(checked_in, b"test.txt")
+        self.assertEqual(checked_out.data, b"Hello\r\nWorld\r\n")
+
+    def test_mixed_filters(self) -> None:
+        """Test multiple filters can coexist (line endings and LFS)."""
+        # This would be a more complex test requiring LFS setup
+        # For now, just verify the structure works
+        text_filter = LineEndingFilter(
+            clean_conversion=convert_crlf_to_lf,
+            smudge_conversion=convert_lf_to_crlf,
+        )
+        self.registry.register_driver("text", text_filter)
+
+        # Mock LFS filter
+        class MockLFSFilter:
+            def clean(self, data):
+                return b"LFS pointer"
+
+            def smudge(self, data):
+                return b"LFS content"
+
+        self.registry.register_driver("lfs", MockLFSFilter())
+
+        # Different files use different filters
+        from dulwich.attrs import GitAttributes, Pattern
+
+        patterns = [
+            (Pattern(b"*.txt"), {b"filter": b"text"}),
+            (Pattern(b"*.bin"), {b"filter": b"lfs"}),
+        ]
+        gitattributes = GitAttributes(patterns)
+
+        from dulwich.filters import FilterBlobNormalizer
+
+        normalizer = FilterBlobNormalizer(self.config, gitattributes, self.registry)
+
+        # Text file gets line ending conversion
+        text_blob = Blob()
+        text_blob.data = b"text\r\nfile"
+        result = normalizer.checkin_normalize(text_blob, b"test.txt")
+        self.assertEqual(result.data, b"text\nfile")
+
+        # Binary file gets LFS conversion
+        bin_blob = Blob()
+        bin_blob.data = b"binary content"
+        result = normalizer.checkin_normalize(bin_blob, b"test.bin")
+        self.assertEqual(result.data, b"LFS pointer")