Browse Source

Improve line ending handling

Jelmer Vernooij 1 month ago
parent
commit
95033faf2b
2 changed files with 479 additions and 100 deletions
  1. 200 80
      dulwich/line_ending.py
  2. 279 20
      tests/test_line_ending.py

+ 200 - 80
dulwich/line_ending.py

@@ -26,11 +26,11 @@ about how it seems to work.
 The normalization is a two-fold process that happens at two moments:
 The normalization is a two-fold process that happens at two moments:
 
 
 - When reading a file from the index and to the working directory. For example
 - When reading a file from the index and to the working directory. For example
-  when doing a ``git clone`` or ``git checkout`` call. We call this process the
-  read filter in this module.
+  when doing a ``git clone`` or ``git checkout`` call. This is called the
+  smudge filter (repository -> working tree).
 - When writing a file to the index from the working directory. For example
 - When writing a file to the index from the working directory. For example
-  when doing a ``git add`` call. We call this process the write filter in this
-  module.
+  when doing a ``git add`` call. This is called the clean filter (working tree
+  -> repository).
 
 
 Note that when checking status (getting unstaged changes), whether or not
 Note that when checking status (getting unstaged changes), whether or not
 normalization is done on write depends on whether or not the file in the
 normalization is done on write depends on whether or not the file in the
@@ -108,13 +108,13 @@ attribute defined in ``.gitattributes``; it takes three possible values:
       line-endings in the working directory and convert line-endings to LF
       line-endings in the working directory and convert line-endings to LF
       when writing to the index. When autocrlf is set to true, eol value is
       when writing to the index. When autocrlf is set to true, eol value is
       ignored.
       ignored.
-    - ``input``: Quite similar to the ``true`` value but only force the write
+    - ``input``: Quite similar to the ``true`` value but only applies the clean
       filter, ie line-ending of new files added to the index will get their
       filter, ie line-ending of new files added to the index will get their
       line-endings converted to LF.
       line-endings converted to LF.
     - ``false`` (default): No normalization is done.
     - ``false`` (default): No normalization is done.
 
 
 ``core.eol`` is the top-level configuration to define the line-ending to use
 ``core.eol`` is the top-level configuration to define the line-ending to use
-when applying the read_filer. It takes three possible values:
+when applying the smudge filter. It takes three possible values:
 
 
     - ``lf``: When normalization is done, force line-endings to be ``LF`` in the
     - ``lf``: When normalization is done, force line-endings to be ``LF`` in the
       working directory.
       working directory.
@@ -143,6 +143,9 @@ if TYPE_CHECKING:
     from .config import StackedConfig
     from .config import StackedConfig
     from .object_store import BaseObjectStore
     from .object_store import BaseObjectStore
 
 
+from . import replace_me
+from .attrs import GitAttributes, Pattern
+from .filters import FilterBlobNormalizer, FilterDriver, FilterRegistry
 from .object_store import iter_tree_contents
 from .object_store import iter_tree_contents
 from .objects import Blob, ObjectID
 from .objects import Blob, ObjectID
 from .patch import is_binary
 from .patch import is_binary
@@ -151,6 +154,42 @@ CRLF = b"\r\n"
 LF = b"\n"
 LF = b"\n"
 
 
 
 
+class LineEndingFilter(FilterDriver):
+    """Filter driver for line ending conversion."""
+
+    def __init__(
+        self,
+        clean_conversion: Optional[Callable[[bytes], bytes]] = None,
+        smudge_conversion: Optional[Callable[[bytes], bytes]] = None,
+        binary_detection: bool = True,
+    ):
+        self.clean_conversion = clean_conversion
+        self.smudge_conversion = smudge_conversion
+        self.binary_detection = binary_detection
+
+    def clean(self, data: bytes) -> bytes:
+        """Apply line ending conversion for checkin (working tree -> repository)."""
+        if self.clean_conversion is None:
+            return data
+
+        # Skip binary files if detection is enabled
+        if self.binary_detection and is_binary(data):
+            return data
+
+        return self.clean_conversion(data)
+
+    def smudge(self, data: bytes) -> bytes:
+        """Apply line ending conversion for checkout (repository -> working tree)."""
+        if self.smudge_conversion is None:
+            return data
+
+        # Skip binary files if detection is enabled
+        if self.binary_detection and is_binary(data):
+            return data
+
+        return self.smudge_conversion(data)
+
+
 def convert_crlf_to_lf(text_hunk: bytes) -> bytes:
 def convert_crlf_to_lf(text_hunk: bytes) -> bytes:
     """Convert CRLF in text hunk into LF.
     """Convert CRLF in text hunk into LF.
 
 
@@ -181,46 +220,26 @@ def convert_lf_to_crlf(text_hunk: bytes) -> bytes:
     return CRLF.join(cleaned_parts)
     return CRLF.join(cleaned_parts)
 
 
 
 
-def get_checkout_filter(
-    core_eol: str, core_autocrlf: Union[bool, str], git_attributes: dict[str, Any]
+def get_smudge_filter(
+    core_eol: str, core_autocrlf: bytes
 ) -> Optional[Callable[[bytes], bytes]]:
 ) -> Optional[Callable[[bytes], bytes]]:
-    """Returns the correct checkout filter based on the passed arguments."""
-    # TODO this function should process the git_attributes for the path and if
-    # the text attribute is not defined, fallback on the
-    # get_checkout_filter_autocrlf function with the autocrlf value
-    if isinstance(core_autocrlf, bool):
-        autocrlf_bytes = b"true" if core_autocrlf else b"false"
-    else:
-        autocrlf_bytes = (
-            core_autocrlf.encode("ascii")
-            if isinstance(core_autocrlf, str)
-            else core_autocrlf
-        )
-    return get_checkout_filter_autocrlf(autocrlf_bytes)
+    """Returns the correct smudge filter based on the passed arguments."""
+    # Git attributes handling is done by the filter infrastructure
+    return get_smudge_filter_autocrlf(core_autocrlf)
 
 
 
 
-def get_checkin_filter(
-    core_eol: str, core_autocrlf: Union[bool, str], git_attributes: dict[str, Any]
+def get_clean_filter(
+    core_eol: str, core_autocrlf: bytes
 ) -> Optional[Callable[[bytes], bytes]]:
 ) -> Optional[Callable[[bytes], bytes]]:
-    """Returns the correct checkin filter based on the passed arguments."""
-    # TODO this function should process the git_attributes for the path and if
-    # the text attribute is not defined, fallback on the
-    # get_checkin_filter_autocrlf function with the autocrlf value
-    if isinstance(core_autocrlf, bool):
-        autocrlf_bytes = b"true" if core_autocrlf else b"false"
-    else:
-        autocrlf_bytes = (
-            core_autocrlf.encode("ascii")
-            if isinstance(core_autocrlf, str)
-            else core_autocrlf
-        )
-    return get_checkin_filter_autocrlf(autocrlf_bytes)
+    """Returns the correct clean filter based on the passed arguments."""
+    # Git attributes handling is done by the filter infrastructure
+    return get_clean_filter_autocrlf(core_autocrlf)
 
 
 
 
-def get_checkout_filter_autocrlf(
+def get_smudge_filter_autocrlf(
     core_autocrlf: bytes,
     core_autocrlf: bytes,
 ) -> Optional[Callable[[bytes], bytes]]:
 ) -> Optional[Callable[[bytes], bytes]]:
-    """Returns the correct checkout filter base on autocrlf value.
+    """Returns the correct smudge filter base on autocrlf value.
 
 
     Args:
     Args:
       core_autocrlf: The bytes configuration value of core.autocrlf.
       core_autocrlf: The bytes configuration value of core.autocrlf.
@@ -234,10 +253,10 @@ def get_checkout_filter_autocrlf(
     return None
     return None
 
 
 
 
-def get_checkin_filter_autocrlf(
+def get_clean_filter_autocrlf(
     core_autocrlf: bytes,
     core_autocrlf: bytes,
 ) -> Optional[Callable[[bytes], bytes]]:
 ) -> Optional[Callable[[bytes], bytes]]:
-    """Returns the correct checkin filter base on autocrlf value.
+    """Returns the correct clean filter base on autocrlf value.
 
 
     Args:
     Args:
       core_autocrlf: The bytes configuration value of core.autocrlf.
       core_autocrlf: The bytes configuration value of core.autocrlf.
@@ -252,63 +271,162 @@ def get_checkin_filter_autocrlf(
     return None
     return None
 
 
 
 
-class BlobNormalizer:
+# Backwards compatibility wrappers
+@replace_me(since="0.23.1", remove_in="0.25.0")
+def get_checkout_filter(
+    core_eol: str, core_autocrlf: Union[bool, str], git_attributes: dict[str, Any]
+) -> Optional[Callable[[bytes], bytes]]:
+    """Deprecated: Use get_smudge_filter instead."""
+    # Convert core_autocrlf to bytes for compatibility
+    if isinstance(core_autocrlf, bool):
+        autocrlf_bytes = b"true" if core_autocrlf else b"false"
+    else:
+        autocrlf_bytes = (
+            core_autocrlf.encode("utf-8")
+            if isinstance(core_autocrlf, str)
+            else core_autocrlf
+        )
+    return get_smudge_filter(core_eol, autocrlf_bytes)
+
+
+@replace_me(since="0.23.1", remove_in="0.25.0")
+def get_checkin_filter(
+    core_eol: str, core_autocrlf: Union[bool, str], git_attributes: dict[str, Any]
+) -> Optional[Callable[[bytes], bytes]]:
+    """Deprecated: Use get_clean_filter instead."""
+    # Convert core_autocrlf to bytes for compatibility
+    if isinstance(core_autocrlf, bool):
+        autocrlf_bytes = b"true" if core_autocrlf else b"false"
+    else:
+        autocrlf_bytes = (
+            core_autocrlf.encode("utf-8")
+            if isinstance(core_autocrlf, str)
+            else core_autocrlf
+        )
+    return get_clean_filter(core_eol, autocrlf_bytes)
+
+
+@replace_me(since="0.23.1", remove_in="0.25.0")
+def get_checkout_filter_autocrlf(
+    core_autocrlf: bytes,
+) -> Optional[Callable[[bytes], bytes]]:
+    """Deprecated: Use get_smudge_filter_autocrlf instead."""
+    return get_smudge_filter_autocrlf(core_autocrlf)
+
+
+@replace_me(since="0.23.1", remove_in="0.25.0")
+def get_checkin_filter_autocrlf(
+    core_autocrlf: bytes,
+) -> Optional[Callable[[bytes], bytes]]:
+    """Deprecated: Use get_clean_filter_autocrlf instead."""
+    return get_clean_filter_autocrlf(core_autocrlf)
+
+
+class BlobNormalizer(FilterBlobNormalizer):
     """An object to store computation result of which filter to apply based
     """An object to store computation result of which filter to apply based
     on configuration, gitattributes, path and operation (checkin or checkout).
     on configuration, gitattributes, path and operation (checkin or checkout).
+
+    This class maintains backward compatibility while using the filter infrastructure.
     """
     """
 
 
     def __init__(
     def __init__(
-        self, config_stack: "StackedConfig", gitattributes: dict[str, Any]
+        self,
+        config_stack: "StackedConfig",
+        gitattributes: dict[str, Any],
+        core_eol: str = "native",
+        autocrlf: bytes = b"false",
     ) -> None:
     ) -> None:
-        self.config_stack = config_stack
-        self.gitattributes = gitattributes
-
-        # Compute which filters we needs based on parameters
-        try:
-            core_eol_raw = config_stack.get("core", "eol")
-            core_eol: str = (
-                core_eol_raw.decode("ascii")
-                if isinstance(core_eol_raw, bytes)
-                else core_eol_raw
-            )
-        except KeyError:
-            core_eol = "native"
-
-        try:
-            core_autocrlf_raw = config_stack.get("core", "autocrlf")
-            if isinstance(core_autocrlf_raw, bytes):
-                core_autocrlf: Union[bool, str] = core_autocrlf_raw.decode(
-                    "ascii"
-                ).lower()
+        # Set up a filter registry with line ending filters
+        filter_registry = FilterRegistry(config_stack)
+
+        # Create line ending filter if needed
+        smudge_filter = get_smudge_filter(core_eol, autocrlf)
+        clean_filter = get_clean_filter(core_eol, autocrlf)
+
+        # Always register a text filter that can be used by gitattributes
+        # Even if autocrlf is false, gitattributes text=true should work
+        line_ending_filter = LineEndingFilter(
+            clean_conversion=clean_filter or convert_crlf_to_lf,
+            smudge_conversion=smudge_filter or convert_lf_to_crlf,
+            binary_detection=True,
+        )
+        filter_registry.register_driver("text", line_ending_filter)
+
+        # Convert dict gitattributes to GitAttributes object for parent class
+        git_attrs_patterns = []
+        for pattern_str, attrs in gitattributes.items():
+            if isinstance(pattern_str, str):
+                pattern_bytes = pattern_str.encode("utf-8")
             else:
             else:
-                core_autocrlf = core_autocrlf_raw.lower()
-        except KeyError:
-            core_autocrlf = False
+                pattern_bytes = pattern_str
+            pattern = Pattern(pattern_bytes)
+            git_attrs_patterns.append((pattern, attrs))
 
 
-        self.fallback_read_filter = get_checkout_filter(
-            core_eol, core_autocrlf, self.gitattributes
-        )
-        self.fallback_write_filter = get_checkin_filter(
-            core_eol, core_autocrlf, self.gitattributes
-        )
+        git_attributes = GitAttributes(git_attrs_patterns)
+
+        # Initialize parent class with gitattributes
+        # The filter infrastructure will handle gitattributes processing
+        super().__init__(config_stack, git_attributes, filter_registry)
+
+        # Store original filters for backward compatibility
+        self.fallback_read_filter = smudge_filter
+        self.fallback_write_filter = clean_filter
 
 
     def checkin_normalize(self, blob: Blob, tree_path: bytes) -> Blob:
     def checkin_normalize(self, blob: Blob, tree_path: bytes) -> Blob:
         """Normalize a blob during a checkin operation."""
         """Normalize a blob during a checkin operation."""
-        if self.fallback_write_filter is not None:
-            return normalize_blob(
-                blob, self.fallback_write_filter, binary_detection=True
+        # First try to get filter from gitattributes (handled by parent)
+        result = super().checkin_normalize(blob, tree_path)
+
+        # Check if gitattributes explicitly disabled text conversion
+        attrs = self.gitattributes.match_path(tree_path)
+        if b"text" in attrs and attrs[b"text"] is False:
+            # Explicitly marked as binary, no conversion
+            return blob
+
+        # If no filter was applied via gitattributes and we have a fallback filter
+        # (autocrlf is enabled), apply it to all files
+        if result is blob and self.fallback_write_filter is not None:
+            # Apply the clean filter with binary detection
+            line_ending_filter = LineEndingFilter(
+                clean_conversion=self.fallback_write_filter,
+                smudge_conversion=None,
+                binary_detection=True,
             )
             )
+            filtered_data = line_ending_filter.clean(blob.data)
+            if filtered_data != blob.data:
+                new_blob = Blob()
+                new_blob.data = filtered_data
+                return new_blob
 
 
-        return blob
+        return result
 
 
     def checkout_normalize(self, blob: Blob, tree_path: bytes) -> Blob:
     def checkout_normalize(self, blob: Blob, tree_path: bytes) -> Blob:
         """Normalize a blob during a checkout operation."""
         """Normalize a blob during a checkout operation."""
-        if self.fallback_read_filter is not None:
-            return normalize_blob(
-                blob, self.fallback_read_filter, binary_detection=True
+        # First try to get filter from gitattributes (handled by parent)
+        result = super().checkout_normalize(blob, tree_path)
+
+        # Check if gitattributes explicitly disabled text conversion
+        attrs = self.gitattributes.match_path(tree_path)
+        if b"text" in attrs and attrs[b"text"] is False:
+            # Explicitly marked as binary, no conversion
+            return blob
+
+        # If no filter was applied via gitattributes and we have a fallback filter
+        # (autocrlf is enabled), apply it to all files
+        if result is blob and self.fallback_read_filter is not None:
+            # Apply the smudge filter with binary detection
+            line_ending_filter = LineEndingFilter(
+                clean_conversion=None,
+                smudge_conversion=self.fallback_read_filter,
+                binary_detection=True,
             )
             )
+            filtered_data = line_ending_filter.smudge(blob.data)
+            if filtered_data != blob.data:
+                new_blob = Blob()
+                new_blob.data = filtered_data
+                return new_blob
 
 
-        return blob
+        return result
 
 
 
 
 def normalize_blob(
 def normalize_blob(
@@ -344,8 +462,10 @@ class TreeBlobNormalizer(BlobNormalizer):
         git_attributes: dict[str, Any],
         git_attributes: dict[str, Any],
         object_store: "BaseObjectStore",
         object_store: "BaseObjectStore",
         tree: Optional[ObjectID] = None,
         tree: Optional[ObjectID] = None,
+        core_eol: str = "native",
+        autocrlf: bytes = b"false",
     ) -> None:
     ) -> None:
-        super().__init__(config_stack, git_attributes)
+        super().__init__(config_stack, git_attributes, core_eol, autocrlf)
         if tree:
         if tree:
             self.existing_paths = {
             self.existing_paths = {
                 name for name, _, _ in iter_tree_contents(object_store, tree)
                 name for name, _, _ in iter_tree_contents(object_store, tree)

+ 279 - 20
tests/test_line_ending.py

@@ -22,10 +22,12 @@
 """Tests for the line ending conversion."""
 """Tests for the line ending conversion."""
 
 
 from dulwich.line_ending import (
 from dulwich.line_ending import (
+    BlobNormalizer,
+    LineEndingFilter,
     convert_crlf_to_lf,
     convert_crlf_to_lf,
     convert_lf_to_crlf,
     convert_lf_to_crlf,
-    get_checkin_filter_autocrlf,
-    get_checkout_filter_autocrlf,
+    get_clean_filter_autocrlf,
+    get_smudge_filter_autocrlf,
     normalize_blob,
     normalize_blob,
 )
 )
 from dulwich.objects import Blob
 from dulwich.objects import Blob
@@ -56,35 +58,35 @@ class LineEndingConversion(TestCase):
 
 
 
 
 class GetLineEndingAutocrlfFilters(TestCase):
 class GetLineEndingAutocrlfFilters(TestCase):
-    def test_get_checkin_filter_autocrlf_default(self) -> None:
-        checkin_filter = get_checkin_filter_autocrlf(b"false")
+    def test_get_clean_filter_autocrlf_default(self) -> None:
+        clean_filter = get_clean_filter_autocrlf(b"false")
 
 
-        self.assertEqual(checkin_filter, None)
+        self.assertEqual(clean_filter, None)
 
 
-    def test_get_checkin_filter_autocrlf_true(self) -> None:
-        checkin_filter = get_checkin_filter_autocrlf(b"true")
+    def test_get_clean_filter_autocrlf_true(self) -> None:
+        clean_filter = get_clean_filter_autocrlf(b"true")
 
 
-        self.assertEqual(checkin_filter, convert_crlf_to_lf)
+        self.assertEqual(clean_filter, convert_crlf_to_lf)
 
 
-    def test_get_checkin_filter_autocrlf_input(self) -> None:
-        checkin_filter = get_checkin_filter_autocrlf(b"input")
+    def test_get_clean_filter_autocrlf_input(self) -> None:
+        clean_filter = get_clean_filter_autocrlf(b"input")
 
 
-        self.assertEqual(checkin_filter, convert_crlf_to_lf)
+        self.assertEqual(clean_filter, convert_crlf_to_lf)
 
 
-    def test_get_checkout_filter_autocrlf_default(self) -> None:
-        checkout_filter = get_checkout_filter_autocrlf(b"false")
+    def test_get_smudge_filter_autocrlf_default(self) -> None:
+        smudge_filter = get_smudge_filter_autocrlf(b"false")
 
 
-        self.assertEqual(checkout_filter, None)
+        self.assertEqual(smudge_filter, None)
 
 
-    def test_get_checkout_filter_autocrlf_true(self) -> None:
-        checkout_filter = get_checkout_filter_autocrlf(b"true")
+    def test_get_smudge_filter_autocrlf_true(self) -> None:
+        smudge_filter = get_smudge_filter_autocrlf(b"true")
 
 
-        self.assertEqual(checkout_filter, convert_lf_to_crlf)
+        self.assertEqual(smudge_filter, convert_lf_to_crlf)
 
 
-    def test_get_checkout_filter_autocrlf_input(self) -> None:
-        checkout_filter = get_checkout_filter_autocrlf(b"input")
+    def test_get_smudge_filter_autocrlf_input(self) -> None:
+        smudge_filter = get_smudge_filter_autocrlf(b"input")
 
 
-        self.assertEqual(checkout_filter, None)
+        self.assertEqual(smudge_filter, None)
 
 
 
 
 class NormalizeBlobTestCase(TestCase):
 class NormalizeBlobTestCase(TestCase):
@@ -195,3 +197,260 @@ class NormalizeBlobTestCase(TestCase):
 
 
         self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
         self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
         self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
         self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
+
+
+class LineEndingFilterTests(TestCase):
+    """Test the LineEndingFilter class."""
+
+    def test_clean_no_conversion(self) -> None:
+        """Test clean with no conversion function."""
+        filter = LineEndingFilter()
+        data = b"test\r\ndata"
+        self.assertEqual(filter.clean(data), data)
+
+    def test_clean_with_conversion(self) -> None:
+        """Test clean with CRLF to LF conversion."""
+        filter = LineEndingFilter(clean_conversion=convert_crlf_to_lf)
+        data = b"test\r\ndata"
+        self.assertEqual(filter.clean(data), b"test\ndata")
+
+    def test_clean_binary_detection(self) -> None:
+        """Test clean skips binary files."""
+        filter = LineEndingFilter(
+            clean_conversion=convert_crlf_to_lf, binary_detection=True
+        )
+        # Binary data with null byte
+        data = b"test\r\n\x00data"
+        self.assertEqual(filter.clean(data), data)  # Should not convert
+
+    def test_smudge_no_conversion(self) -> None:
+        """Test smudge with no conversion function."""
+        filter = LineEndingFilter()
+        data = b"test\ndata"
+        self.assertEqual(filter.smudge(data), data)
+
+    def test_smudge_with_conversion(self) -> None:
+        """Test smudge with LF to CRLF conversion."""
+        filter = LineEndingFilter(smudge_conversion=convert_lf_to_crlf)
+        data = b"test\ndata"
+        self.assertEqual(filter.smudge(data), b"test\r\ndata")
+
+    def test_smudge_binary_detection(self) -> None:
+        """Test smudge skips binary files."""
+        filter = LineEndingFilter(
+            smudge_conversion=convert_lf_to_crlf, binary_detection=True
+        )
+        # Binary data with null byte
+        data = b"test\n\x00data"
+        self.assertEqual(filter.smudge(data), data)  # Should not convert
+
+
+class BlobNormalizerTests(TestCase):
+    """Test the BlobNormalizer class integration with filters."""
+
+    def setUp(self) -> None:
+        super().setUp()
+        from dulwich.config import ConfigDict
+
+        self.config = ConfigDict()
+        self.gitattributes = {}
+
+    def test_autocrlf_true_checkin(self) -> None:
+        """Test checkin with autocrlf=true."""
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
+
+        # Create blob with CRLF
+        blob = Blob()
+        blob.data = b"line1\r\nline2\r\n"
+
+        # Should convert to LF on checkin
+        result = normalizer.checkin_normalize(blob, b"test.txt")
+        self.assertEqual(result.data, b"line1\nline2\n")
+
+    def test_autocrlf_true_checkout(self) -> None:
+        """Test checkout with autocrlf=true."""
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
+
+        # Create blob with LF
+        blob = Blob()
+        blob.data = b"line1\nline2\n"
+
+        # Should convert to CRLF on checkout
+        result = normalizer.checkout_normalize(blob, b"test.txt")
+        self.assertEqual(result.data, b"line1\r\nline2\r\n")
+
+    def test_autocrlf_input_checkin(self) -> None:
+        """Test checkin with autocrlf=input."""
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"input")
+
+        # Create blob with CRLF
+        blob = Blob()
+        blob.data = b"line1\r\nline2\r\n"
+
+        # Should convert to LF on checkin
+        result = normalizer.checkin_normalize(blob, b"test.txt")
+        self.assertEqual(result.data, b"line1\nline2\n")
+
+    def test_autocrlf_input_checkout(self) -> None:
+        """Test checkout with autocrlf=input."""
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"input")
+
+        # Create blob with LF
+        blob = Blob()
+        blob.data = b"line1\nline2\n"
+
+        # Should NOT convert on checkout with input mode
+        result = normalizer.checkout_normalize(blob, b"test.txt")
+        self.assertIs(result, blob)  # Same object, no conversion
+
+    def test_autocrlf_false(self) -> None:
+        """Test with autocrlf=false (no conversion)."""
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"false")
+
+        # Create blob with mixed line endings
+        blob = Blob()
+        blob.data = b"line1\r\nline2\nline3"
+
+        # Should not convert on either operation
+        result = normalizer.checkin_normalize(blob, b"test.txt")
+        self.assertIs(result, blob)
+
+        result = normalizer.checkout_normalize(blob, b"test.txt")
+        self.assertIs(result, blob)
+
+    def test_gitattributes_text_attr(self) -> None:
+        """Test gitattributes text attribute overrides autocrlf."""
+        # Set gitattributes to force text conversion
+        self.gitattributes[b"*.txt"] = {b"text": True}
+
+        # Even with autocrlf=false, should convert based on gitattributes
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"false")
+
+        blob = Blob()
+        blob.data = b"line1\r\nline2\r\n"
+
+        # Should still convert because of gitattributes
+        result = normalizer.checkin_normalize(blob, b"test.txt")
+        # Note: with just text=true and no eol setting, it follows platform defaults
+        # For checkin, it should always normalize to LF
+        self.assertIsNot(result, blob)
+
+    def test_gitattributes_binary_attr(self) -> None:
+        """Test gitattributes -text attribute prevents conversion."""
+        # Set gitattributes to force binary (no conversion)
+        self.gitattributes[b"*.bin"] = {b"text": False}
+
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
+
+        blob = Blob()
+        blob.data = b"line1\r\nline2\r\n"
+
+        # Should not convert despite autocrlf=true
+        result = normalizer.checkin_normalize(blob, b"test.bin")
+        self.assertIs(result, blob)
+
+    def test_binary_file_detection(self) -> None:
+        """Test that binary files are not converted."""
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
+
+        # Create blob with binary content
+        blob = Blob()
+        blob.data = b"line1\r\n\x00\xffbinary\r\ndata"
+
+        # Should not convert binary files
+        result = normalizer.checkin_normalize(blob, b"binary.dat")
+        self.assertIs(result, blob)
+
+        result = normalizer.checkout_normalize(blob, b"binary.dat")
+        self.assertIs(result, blob)
+
+
+class LineEndingIntegrationTests(TestCase):
+    """Integration tests for line ending conversion with the filter system."""
+
+    def setUp(self) -> None:
+        super().setUp()
+        from dulwich.config import ConfigDict
+        from dulwich.filters import FilterRegistry
+
+        self.config = ConfigDict()
+        self.registry = FilterRegistry(self.config)
+
+    def test_filter_registry_with_line_endings(self) -> None:
+        """Test that line ending filters work through the registry."""
+        # Register a custom text filter that does line ending conversion
+        filter = LineEndingFilter(
+            clean_conversion=convert_crlf_to_lf,
+            smudge_conversion=convert_lf_to_crlf,
+            binary_detection=True,
+        )
+        self.registry.register_driver("text", filter)
+
+        # Set up gitattributes
+        # Create GitAttributes
+        from dulwich.attrs import GitAttributes, Pattern
+
+        patterns = [(Pattern(b"*.txt"), {b"filter": b"text"})]
+        gitattributes = GitAttributes(patterns)
+
+        # Create normalizer
+        from dulwich.filters import FilterBlobNormalizer
+
+        normalizer = FilterBlobNormalizer(self.config, gitattributes, self.registry)
+
+        # Test round trip
+        blob = Blob()
+        blob.data = b"Hello\r\nWorld\r\n"
+
+        # Checkin should convert CRLF to LF
+        checked_in = normalizer.checkin_normalize(blob, b"test.txt")
+        self.assertEqual(checked_in.data, b"Hello\nWorld\n")
+
+        # Checkout should convert LF to CRLF
+        checked_out = normalizer.checkout_normalize(checked_in, b"test.txt")
+        self.assertEqual(checked_out.data, b"Hello\r\nWorld\r\n")
+
+    def test_mixed_filters(self) -> None:
+        """Test multiple filters can coexist (line endings and LFS)."""
+        # This would be a more complex test requiring LFS setup
+        # For now, just verify the structure works
+        text_filter = LineEndingFilter(
+            clean_conversion=convert_crlf_to_lf,
+            smudge_conversion=convert_lf_to_crlf,
+        )
+        self.registry.register_driver("text", text_filter)
+
+        # Mock LFS filter
+        class MockLFSFilter:
+            def clean(self, data):
+                return b"LFS pointer"
+
+            def smudge(self, data):
+                return b"LFS content"
+
+        self.registry.register_driver("lfs", MockLFSFilter())
+
+        # Different files use different filters
+        from dulwich.attrs import GitAttributes, Pattern
+
+        patterns = [
+            (Pattern(b"*.txt"), {b"filter": b"text"}),
+            (Pattern(b"*.bin"), {b"filter": b"lfs"}),
+        ]
+        gitattributes = GitAttributes(patterns)
+
+        from dulwich.filters import FilterBlobNormalizer
+
+        normalizer = FilterBlobNormalizer(self.config, gitattributes, self.registry)
+
+        # Text file gets line ending conversion
+        text_blob = Blob()
+        text_blob.data = b"text\r\nfile"
+        result = normalizer.checkin_normalize(text_blob, b"test.txt")
+        self.assertEqual(result.data, b"text\nfile")
+
+        # Binary file gets LFS conversion
+        bin_blob = Blob()
+        bin_blob.data = b"binary content"
+        result = normalizer.checkin_normalize(bin_blob, b"test.bin")
+        self.assertEqual(result.data, b"LFS pointer")