1 mesiac pred · 95033faf2b
--- a/dulwich/line_ending.py
+++ b/dulwich/line_ending.py
@@ -26,11 +26,11 @@ about how it seems to work.
 
				 The normalization is a two-fold process that happens at two moments:
			
 
				 
			
 
				 - When reading a file from the index and to the working directory. For example
			
 
				-  when doing a ``git clone`` or ``git checkout`` call. We call this process the
			
 
				-  read filter in this module.
			
 
				+  when doing a ``git clone`` or ``git checkout`` call. This is called the
			
 
				+  smudge filter (repository -> working tree).
			
 
				 - When writing a file to the index from the working directory. For example
			
 
				-  when doing a ``git add`` call. We call this process the write filter in this
			
 
				-  module.
			
 
				+  when doing a ``git add`` call. This is called the clean filter (working tree
			
 
				+  -> repository).
			
 
				 
			
 
				 Note that when checking status (getting unstaged changes), whether or not
			
 
				 normalization is done on write depends on whether or not the file in the
			
@@ -108,13 +108,13 @@ attribute defined in ``.gitattributes``; it takes three possible values:
 
				       line-endings in the working directory and convert line-endings to LF
			
 
				       when writing to the index. When autocrlf is set to true, eol value is
			
 
				       ignored.
			
 
				-    - ``input``: Quite similar to the ``true`` value but only force the write
			
 
				+    - ``input``: Quite similar to the ``true`` value but only applies the clean
			
 
				       filter, ie line-ending of new files added to the index will get their
			
 
				       line-endings converted to LF.
			
 
				     - ``false`` (default): No normalization is done.
			
 
				 
			
 
				 ``core.eol`` is the top-level configuration to define the line-ending to use
			
 
				-when applying the read_filer. It takes three possible values:
			
 
				+when applying the smudge filter. It takes three possible values:
			
 
				 
			
 
				     - ``lf``: When normalization is done, force line-endings to be ``LF`` in the
			
 
				       working directory.
			
@@ -143,6 +143,9 @@ if TYPE_CHECKING:
 
				     from .config import StackedConfig
			
 
				     from .object_store import BaseObjectStore
			
 
				 
			
 
				+from . import replace_me
			
 
				+from .attrs import GitAttributes, Pattern
			
 
				+from .filters import FilterBlobNormalizer, FilterDriver, FilterRegistry
			
 
				 from .object_store import iter_tree_contents
			
 
				 from .objects import Blob, ObjectID
			
 
				 from .patch import is_binary
			
@@ -151,6 +154,42 @@ CRLF = b"\r\n"
 
				 LF = b"\n"
			
 
				 
			
 
				 
			
 
				+class LineEndingFilter(FilterDriver):
			
 
				+    """Filter driver for line ending conversion."""
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        clean_conversion: Optional[Callable[[bytes], bytes]] = None,
			
 
				+        smudge_conversion: Optional[Callable[[bytes], bytes]] = None,
			
 
				+        binary_detection: bool = True,
			
 
				+    ):
			
 
				+        self.clean_conversion = clean_conversion
			
 
				+        self.smudge_conversion = smudge_conversion
			
 
				+        self.binary_detection = binary_detection
			
 
				+
			
 
				+    def clean(self, data: bytes) -> bytes:
			
 
				+        """Apply line ending conversion for checkin (working tree -> repository)."""
			
 
				+        if self.clean_conversion is None:
			
 
				+            return data
			
 
				+
			
 
				+        # Skip binary files if detection is enabled
			
 
				+        if self.binary_detection and is_binary(data):
			
 
				+            return data
			
 
				+
			
 
				+        return self.clean_conversion(data)
			
 
				+
			
 
				+    def smudge(self, data: bytes) -> bytes:
			
 
				+        """Apply line ending conversion for checkout (repository -> working tree)."""
			
 
				+        if self.smudge_conversion is None:
			
 
				+            return data
			
 
				+
			
 
				+        # Skip binary files if detection is enabled
			
 
				+        if self.binary_detection and is_binary(data):
			
 
				+            return data
			
 
				+
			
 
				+        return self.smudge_conversion(data)
			
 
				+
			
 
				+
			
 
				 def convert_crlf_to_lf(text_hunk: bytes) -> bytes:
			
 
				     """Convert CRLF in text hunk into LF.
			
 
				 
			
@@ -181,46 +220,26 @@ def convert_lf_to_crlf(text_hunk: bytes) -> bytes:
 
				     return CRLF.join(cleaned_parts)
			
 
				 
			
 
				 
			
 
				-def get_checkout_filter(
			
 
				-    core_eol: str, core_autocrlf: Union[bool, str], git_attributes: dict[str, Any]
			
 
				+def get_smudge_filter(
			
 
				+    core_eol: str, core_autocrlf: bytes
			
 
				 ) -> Optional[Callable[[bytes], bytes]]:
			
 
				-    """Returns the correct checkout filter based on the passed arguments."""
			
 
				-    # TODO this function should process the git_attributes for the path and if
			
 
				-    # the text attribute is not defined, fallback on the
			
 
				-    # get_checkout_filter_autocrlf function with the autocrlf value
			
 
				-    if isinstance(core_autocrlf, bool):
			
 
				-        autocrlf_bytes = b"true" if core_autocrlf else b"false"
			
 
				-    else:
			
 
				-        autocrlf_bytes = (
			
 
				-            core_autocrlf.encode("ascii")
			
 
				-            if isinstance(core_autocrlf, str)
			
 
				-            else core_autocrlf
			
 
				-        )
			
 
				-    return get_checkout_filter_autocrlf(autocrlf_bytes)
			
 
				+    """Returns the correct smudge filter based on the passed arguments."""
			
 
				+    # Git attributes handling is done by the filter infrastructure
			
 
				+    return get_smudge_filter_autocrlf(core_autocrlf)
			
 
				 
			
 
				 
			
 
				-def get_checkin_filter(
			
 
				-    core_eol: str, core_autocrlf: Union[bool, str], git_attributes: dict[str, Any]
			
 
				+def get_clean_filter(
			
 
				+    core_eol: str, core_autocrlf: bytes
			
 
				 ) -> Optional[Callable[[bytes], bytes]]:
			
 
				-    """Returns the correct checkin filter based on the passed arguments."""
			
 
				-    # TODO this function should process the git_attributes for the path and if
			
 
				-    # the text attribute is not defined, fallback on the
			
 
				-    # get_checkin_filter_autocrlf function with the autocrlf value
			
 
				-    if isinstance(core_autocrlf, bool):
			
 
				-        autocrlf_bytes = b"true" if core_autocrlf else b"false"
			
 
				-    else:
			
 
				-        autocrlf_bytes = (
			
 
				-            core_autocrlf.encode("ascii")
			
 
				-            if isinstance(core_autocrlf, str)
			
 
				-            else core_autocrlf
			
 
				-        )
			
 
				-    return get_checkin_filter_autocrlf(autocrlf_bytes)
			
 
				+    """Returns the correct clean filter based on the passed arguments."""
			
 
				+    # Git attributes handling is done by the filter infrastructure
			
 
				+    return get_clean_filter_autocrlf(core_autocrlf)
			
 
				 
			
 
				 
			
 
				-def get_checkout_filter_autocrlf(
			
 
				+def get_smudge_filter_autocrlf(
			
 
				     core_autocrlf: bytes,
			
 
				 ) -> Optional[Callable[[bytes], bytes]]:
			
 
				-    """Returns the correct checkout filter base on autocrlf value.
			
 
				+    """Returns the correct smudge filter base on autocrlf value.
			
 
				 
			
 
				     Args:
			
 
				       core_autocrlf: The bytes configuration value of core.autocrlf.
			
@@ -234,10 +253,10 @@ def get_checkout_filter_autocrlf(
 
				     return None
			
 
				 
			
 
				 
			
 
				-def get_checkin_filter_autocrlf(
			
 
				+def get_clean_filter_autocrlf(
			
 
				     core_autocrlf: bytes,
			
 
				 ) -> Optional[Callable[[bytes], bytes]]:
			
 
				-    """Returns the correct checkin filter base on autocrlf value.
			
 
				+    """Returns the correct clean filter base on autocrlf value.
			
 
				 
			
 
				     Args:
			
 
				       core_autocrlf: The bytes configuration value of core.autocrlf.
			
@@ -252,63 +271,162 @@ def get_checkin_filter_autocrlf(
 
				     return None
			
 
				 
			
 
				 
			
 
				-class BlobNormalizer:
			
 
				+# Backwards compatibility wrappers
			
 
				+@replace_me(since="0.23.1", remove_in="0.25.0")
			
 
				+def get_checkout_filter(
			
 
				+    core_eol: str, core_autocrlf: Union[bool, str], git_attributes: dict[str, Any]
			
 
				+) -> Optional[Callable[[bytes], bytes]]:
			
 
				+    """Deprecated: Use get_smudge_filter instead."""
			
 
				+    # Convert core_autocrlf to bytes for compatibility
			
 
				+    if isinstance(core_autocrlf, bool):
			
 
				+        autocrlf_bytes = b"true" if core_autocrlf else b"false"
			
 
				+    else:
			
 
				+        autocrlf_bytes = (
			
 
				+            core_autocrlf.encode("utf-8")
			
 
				+            if isinstance(core_autocrlf, str)
			
 
				+            else core_autocrlf
			
 
				+        )
			
 
				+    return get_smudge_filter(core_eol, autocrlf_bytes)
			
 
				+
			
 
				+
			
 
				+@replace_me(since="0.23.1", remove_in="0.25.0")
			
 
				+def get_checkin_filter(
			
 
				+    core_eol: str, core_autocrlf: Union[bool, str], git_attributes: dict[str, Any]
			
 
				+) -> Optional[Callable[[bytes], bytes]]:
			
 
				+    """Deprecated: Use get_clean_filter instead."""
			
 
				+    # Convert core_autocrlf to bytes for compatibility
			
 
				+    if isinstance(core_autocrlf, bool):
			
 
				+        autocrlf_bytes = b"true" if core_autocrlf else b"false"
			
 
				+    else:
			
 
				+        autocrlf_bytes = (
			
 
				+            core_autocrlf.encode("utf-8")
			
 
				+            if isinstance(core_autocrlf, str)
			
 
				+            else core_autocrlf
			
 
				+        )
			
 
				+    return get_clean_filter(core_eol, autocrlf_bytes)
			
 
				+
			
 
				+
			
 
				+@replace_me(since="0.23.1", remove_in="0.25.0")
			
 
				+def get_checkout_filter_autocrlf(
			
 
				+    core_autocrlf: bytes,
			
 
				+) -> Optional[Callable[[bytes], bytes]]:
			
 
				+    """Deprecated: Use get_smudge_filter_autocrlf instead."""
			
 
				+    return get_smudge_filter_autocrlf(core_autocrlf)
			
 
				+
			
 
				+
			
 
				+@replace_me(since="0.23.1", remove_in="0.25.0")
			
 
				+def get_checkin_filter_autocrlf(
			
 
				+    core_autocrlf: bytes,
			
 
				+) -> Optional[Callable[[bytes], bytes]]:
			
 
				+    """Deprecated: Use get_clean_filter_autocrlf instead."""
			
 
				+    return get_clean_filter_autocrlf(core_autocrlf)
			
 
				+
			
 
				+
			
 
				+class BlobNormalizer(FilterBlobNormalizer):
			
 
				     """An object to store computation result of which filter to apply based
			
 
				     on configuration, gitattributes, path and operation (checkin or checkout).
			
 
				+
			
 
				+    This class maintains backward compatibility while using the filter infrastructure.
			
 
				     """
			
 
				 
			
 
				     def __init__(
			
 
				-        self, config_stack: "StackedConfig", gitattributes: dict[str, Any]
			
 
				+        self,
			
 
				+        config_stack: "StackedConfig",
			
 
				+        gitattributes: dict[str, Any],
			
 
				+        core_eol: str = "native",
			
 
				+        autocrlf: bytes = b"false",
			
 
				     ) -> None:
			
 
				-        self.config_stack = config_stack
			
 
				-        self.gitattributes = gitattributes
			
 
				-
			
 
				-        # Compute which filters we needs based on parameters
			
 
				-        try:
			
 
				-            core_eol_raw = config_stack.get("core", "eol")
			
 
				-            core_eol: str = (
			
 
				-                core_eol_raw.decode("ascii")
			
 
				-                if isinstance(core_eol_raw, bytes)
			
 
				-                else core_eol_raw
			
 
				-            )
			
 
				-        except KeyError:
			
 
				-            core_eol = "native"
			
 
				-
			
 
				-        try:
			
 
				-            core_autocrlf_raw = config_stack.get("core", "autocrlf")
			
 
				-            if isinstance(core_autocrlf_raw, bytes):
			
 
				-                core_autocrlf: Union[bool, str] = core_autocrlf_raw.decode(
			
 
				-                    "ascii"
			
 
				-                ).lower()
			
 
				+        # Set up a filter registry with line ending filters
			
 
				+        filter_registry = FilterRegistry(config_stack)
			
 
				+
			
 
				+        # Create line ending filter if needed
			
 
				+        smudge_filter = get_smudge_filter(core_eol, autocrlf)
			
 
				+        clean_filter = get_clean_filter(core_eol, autocrlf)
			
 
				+
			
 
				+        # Always register a text filter that can be used by gitattributes
			
 
				+        # Even if autocrlf is false, gitattributes text=true should work
			
 
				+        line_ending_filter = LineEndingFilter(
			
 
				+            clean_conversion=clean_filter or convert_crlf_to_lf,
			
 
				+            smudge_conversion=smudge_filter or convert_lf_to_crlf,
			
 
				+            binary_detection=True,
			
 
				+        )
			
 
				+        filter_registry.register_driver("text", line_ending_filter)
			
 
				+
			
 
				+        # Convert dict gitattributes to GitAttributes object for parent class
			
 
				+        git_attrs_patterns = []
			
 
				+        for pattern_str, attrs in gitattributes.items():
			
 
				+            if isinstance(pattern_str, str):
			
 
				+                pattern_bytes = pattern_str.encode("utf-8")
			
 
				             else:
			
 
				-                core_autocrlf = core_autocrlf_raw.lower()
			
 
				-        except KeyError:
			
 
				-            core_autocrlf = False
			
 
				+                pattern_bytes = pattern_str
			
 
				+            pattern = Pattern(pattern_bytes)
			
 
				+            git_attrs_patterns.append((pattern, attrs))
			
 
				 
			
 
				-        self.fallback_read_filter = get_checkout_filter(
			
 
				-            core_eol, core_autocrlf, self.gitattributes
			
 
				-        )
			
 
				-        self.fallback_write_filter = get_checkin_filter(
			
 
				-            core_eol, core_autocrlf, self.gitattributes
			
 
				-        )
			
 
				+        git_attributes = GitAttributes(git_attrs_patterns)
			
 
				+
			
 
				+        # Initialize parent class with gitattributes
			
 
				+        # The filter infrastructure will handle gitattributes processing
			
 
				+        super().__init__(config_stack, git_attributes, filter_registry)
			
 
				+
			
 
				+        # Store original filters for backward compatibility
			
 
				+        self.fallback_read_filter = smudge_filter
			
 
				+        self.fallback_write_filter = clean_filter
			
 
				 
			
 
				     def checkin_normalize(self, blob: Blob, tree_path: bytes) -> Blob:
			
 
				         """Normalize a blob during a checkin operation."""
			
 
				-        if self.fallback_write_filter is not None:
			
 
				-            return normalize_blob(
			
 
				-                blob, self.fallback_write_filter, binary_detection=True
			
 
				+        # First try to get filter from gitattributes (handled by parent)
			
 
				+        result = super().checkin_normalize(blob, tree_path)
			
 
				+
			
 
				+        # Check if gitattributes explicitly disabled text conversion
			
 
				+        attrs = self.gitattributes.match_path(tree_path)
			
 
				+        if b"text" in attrs and attrs[b"text"] is False:
			
 
				+            # Explicitly marked as binary, no conversion
			
 
				+            return blob
			
 
				+
			
 
				+        # If no filter was applied via gitattributes and we have a fallback filter
			
 
				+        # (autocrlf is enabled), apply it to all files
			
 
				+        if result is blob and self.fallback_write_filter is not None:
			
 
				+            # Apply the clean filter with binary detection
			
 
				+            line_ending_filter = LineEndingFilter(
			
 
				+                clean_conversion=self.fallback_write_filter,
			
 
				+                smudge_conversion=None,
			
 
				+                binary_detection=True,
			
 
				             )
			
 
				+            filtered_data = line_ending_filter.clean(blob.data)
			
 
				+            if filtered_data != blob.data:
			
 
				+                new_blob = Blob()
			
 
				+                new_blob.data = filtered_data
			
 
				+                return new_blob
			
 
				 
			
 
				-        return blob
			
 
				+        return result
			
 
				 
			
 
				     def checkout_normalize(self, blob: Blob, tree_path: bytes) -> Blob:
			
 
				         """Normalize a blob during a checkout operation."""
			
 
				-        if self.fallback_read_filter is not None:
			
 
				-            return normalize_blob(
			
 
				-                blob, self.fallback_read_filter, binary_detection=True
			
 
				+        # First try to get filter from gitattributes (handled by parent)
			
 
				+        result = super().checkout_normalize(blob, tree_path)
			
 
				+
			
 
				+        # Check if gitattributes explicitly disabled text conversion
			
 
				+        attrs = self.gitattributes.match_path(tree_path)
			
 
				+        if b"text" in attrs and attrs[b"text"] is False:
			
 
				+            # Explicitly marked as binary, no conversion
			
 
				+            return blob
			
 
				+
			
 
				+        # If no filter was applied via gitattributes and we have a fallback filter
			
 
				+        # (autocrlf is enabled), apply it to all files
			
 
				+        if result is blob and self.fallback_read_filter is not None:
			
 
				+            # Apply the smudge filter with binary detection
			
 
				+            line_ending_filter = LineEndingFilter(
			
 
				+                clean_conversion=None,
			
 
				+                smudge_conversion=self.fallback_read_filter,
			
 
				+                binary_detection=True,
			
 
				             )
			
 
				+            filtered_data = line_ending_filter.smudge(blob.data)
			
 
				+            if filtered_data != blob.data:
			
 
				+                new_blob = Blob()
			
 
				+                new_blob.data = filtered_data
			
 
				+                return new_blob
			
 
				 
			
 
				-        return blob
			
 
				+        return result
			
 
				 
			
 
				 
			
 
				 def normalize_blob(
			
@@ -344,8 +462,10 @@ class TreeBlobNormalizer(BlobNormalizer):
 
				         git_attributes: dict[str, Any],
			
 
				         object_store: "BaseObjectStore",
			
 
				         tree: Optional[ObjectID] = None,
			
 
				+        core_eol: str = "native",
			
 
				+        autocrlf: bytes = b"false",
			
 
				     ) -> None:
			
 
				-        super().__init__(config_stack, git_attributes)
			
 
				+        super().__init__(config_stack, git_attributes, core_eol, autocrlf)
			
 
				         if tree:
			
 
				             self.existing_paths = {
			
 
				                 name for name, _, _ in iter_tree_contents(object_store, tree)
			
--- a/tests/test_line_ending.py
+++ b/tests/test_line_ending.py
@@ -22,10 +22,12 @@
 
				 """Tests for the line ending conversion."""
			
 
				 
			
 
				 from dulwich.line_ending import (
			
 
				+    BlobNormalizer,
			
 
				+    LineEndingFilter,
			
 
				     convert_crlf_to_lf,
			
 
				     convert_lf_to_crlf,
			
 
				-    get_checkin_filter_autocrlf,
			
 
				-    get_checkout_filter_autocrlf,
			
 
				+    get_clean_filter_autocrlf,
			
 
				+    get_smudge_filter_autocrlf,
			
 
				     normalize_blob,
			
 
				 )
			
 
				 from dulwich.objects import Blob
			
@@ -56,35 +58,35 @@ class LineEndingConversion(TestCase):
 
				 
			
 
				 
			
 
				 class GetLineEndingAutocrlfFilters(TestCase):
			
 
				-    def test_get_checkin_filter_autocrlf_default(self) -> None:
			
 
				-        checkin_filter = get_checkin_filter_autocrlf(b"false")
			
 
				+    def test_get_clean_filter_autocrlf_default(self) -> None:
			
 
				+        clean_filter = get_clean_filter_autocrlf(b"false")
			
 
				 
			
 
				-        self.assertEqual(checkin_filter, None)
			
 
				+        self.assertEqual(clean_filter, None)
			
 
				 
			
 
				-    def test_get_checkin_filter_autocrlf_true(self) -> None:
			
 
				-        checkin_filter = get_checkin_filter_autocrlf(b"true")
			
 
				+    def test_get_clean_filter_autocrlf_true(self) -> None:
			
 
				+        clean_filter = get_clean_filter_autocrlf(b"true")
			
 
				 
			
 
				-        self.assertEqual(checkin_filter, convert_crlf_to_lf)
			
 
				+        self.assertEqual(clean_filter, convert_crlf_to_lf)
			
 
				 
			
 
				-    def test_get_checkin_filter_autocrlf_input(self) -> None:
			
 
				-        checkin_filter = get_checkin_filter_autocrlf(b"input")
			
 
				+    def test_get_clean_filter_autocrlf_input(self) -> None:
			
 
				+        clean_filter = get_clean_filter_autocrlf(b"input")
			
 
				 
			
 
				-        self.assertEqual(checkin_filter, convert_crlf_to_lf)
			
 
				+        self.assertEqual(clean_filter, convert_crlf_to_lf)
			
 
				 
			
 
				-    def test_get_checkout_filter_autocrlf_default(self) -> None:
			
 
				-        checkout_filter = get_checkout_filter_autocrlf(b"false")
			
 
				+    def test_get_smudge_filter_autocrlf_default(self) -> None:
			
 
				+        smudge_filter = get_smudge_filter_autocrlf(b"false")
			
 
				 
			
 
				-        self.assertEqual(checkout_filter, None)
			
 
				+        self.assertEqual(smudge_filter, None)
			
 
				 
			
 
				-    def test_get_checkout_filter_autocrlf_true(self) -> None:
			
 
				-        checkout_filter = get_checkout_filter_autocrlf(b"true")
			
 
				+    def test_get_smudge_filter_autocrlf_true(self) -> None:
			
 
				+        smudge_filter = get_smudge_filter_autocrlf(b"true")
			
 
				 
			
 
				-        self.assertEqual(checkout_filter, convert_lf_to_crlf)
			
 
				+        self.assertEqual(smudge_filter, convert_lf_to_crlf)
			
 
				 
			
 
				-    def test_get_checkout_filter_autocrlf_input(self) -> None:
			
 
				-        checkout_filter = get_checkout_filter_autocrlf(b"input")
			
 
				+    def test_get_smudge_filter_autocrlf_input(self) -> None:
			
 
				+        smudge_filter = get_smudge_filter_autocrlf(b"input")
			
 
				 
			
 
				-        self.assertEqual(checkout_filter, None)
			
 
				+        self.assertEqual(smudge_filter, None)
			
 
				 
			
 
				 
			
 
				 class NormalizeBlobTestCase(TestCase):
			
@@ -195,3 +197,260 @@ class NormalizeBlobTestCase(TestCase):
 
				 
			
 
				         self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
			
 
				         self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
			
 
				+
			
 
				+
			
 
				+class LineEndingFilterTests(TestCase):
			
 
				+    """Test the LineEndingFilter class."""
			
 
				+
			
 
				+    def test_clean_no_conversion(self) -> None:
			
 
				+        """Test clean with no conversion function."""
			
 
				+        filter = LineEndingFilter()
			
 
				+        data = b"test\r\ndata"
			
 
				+        self.assertEqual(filter.clean(data), data)
			
 
				+
			
 
				+    def test_clean_with_conversion(self) -> None:
			
 
				+        """Test clean with CRLF to LF conversion."""
			
 
				+        filter = LineEndingFilter(clean_conversion=convert_crlf_to_lf)
			
 
				+        data = b"test\r\ndata"
			
 
				+        self.assertEqual(filter.clean(data), b"test\ndata")
			
 
				+
			
 
				+    def test_clean_binary_detection(self) -> None:
			
 
				+        """Test clean skips binary files."""
			
 
				+        filter = LineEndingFilter(
			
 
				+            clean_conversion=convert_crlf_to_lf, binary_detection=True
			
 
				+        )
			
 
				+        # Binary data with null byte
			
 
				+        data = b"test\r\n\x00data"
			
 
				+        self.assertEqual(filter.clean(data), data)  # Should not convert
			
 
				+
			
 
				+    def test_smudge_no_conversion(self) -> None:
			
 
				+        """Test smudge with no conversion function."""
			
 
				+        filter = LineEndingFilter()
			
 
				+        data = b"test\ndata"
			
 
				+        self.assertEqual(filter.smudge(data), data)
			
 
				+
			
 
				+    def test_smudge_with_conversion(self) -> None:
			
 
				+        """Test smudge with LF to CRLF conversion."""
			
 
				+        filter = LineEndingFilter(smudge_conversion=convert_lf_to_crlf)
			
 
				+        data = b"test\ndata"
			
 
				+        self.assertEqual(filter.smudge(data), b"test\r\ndata")
			
 
				+
			
 
				+    def test_smudge_binary_detection(self) -> None:
			
 
				+        """Test smudge skips binary files."""
			
 
				+        filter = LineEndingFilter(
			
 
				+            smudge_conversion=convert_lf_to_crlf, binary_detection=True
			
 
				+        )
			
 
				+        # Binary data with null byte
			
 
				+        data = b"test\n\x00data"
			
 
				+        self.assertEqual(filter.smudge(data), data)  # Should not convert
			
 
				+
			
 
				+
			
 
				+class BlobNormalizerTests(TestCase):
			
 
				+    """Test the BlobNormalizer class integration with filters."""
			
 
				+
			
 
				+    def setUp(self) -> None:
			
 
				+        super().setUp()
			
 
				+        from dulwich.config import ConfigDict
			
 
				+
			
 
				+        self.config = ConfigDict()
			
 
				+        self.gitattributes = {}
			
 
				+
			
 
				+    def test_autocrlf_true_checkin(self) -> None:
			
 
				+        """Test checkin with autocrlf=true."""
			
 
				+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
			
 
				+
			
 
				+        # Create blob with CRLF
			
 
				+        blob = Blob()
			
 
				+        blob.data = b"line1\r\nline2\r\n"
			
 
				+
			
 
				+        # Should convert to LF on checkin
			
 
				+        result = normalizer.checkin_normalize(blob, b"test.txt")
			
 
				+        self.assertEqual(result.data, b"line1\nline2\n")
			
 
				+
			
 
				+    def test_autocrlf_true_checkout(self) -> None:
			
 
				+        """Test checkout with autocrlf=true."""
			
 
				+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
			
 
				+
			
 
				+        # Create blob with LF
			
 
				+        blob = Blob()
			
 
				+        blob.data = b"line1\nline2\n"
			
 
				+
			
 
				+        # Should convert to CRLF on checkout
			
 
				+        result = normalizer.checkout_normalize(blob, b"test.txt")
			
 
				+        self.assertEqual(result.data, b"line1\r\nline2\r\n")
			
 
				+
			
 
				+    def test_autocrlf_input_checkin(self) -> None:
			
 
				+        """Test checkin with autocrlf=input."""
			
 
				+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"input")
			
 
				+
			
 
				+        # Create blob with CRLF
			
 
				+        blob = Blob()
			
 
				+        blob.data = b"line1\r\nline2\r\n"
			
 
				+
			
 
				+        # Should convert to LF on checkin
			
 
				+        result = normalizer.checkin_normalize(blob, b"test.txt")
			
 
				+        self.assertEqual(result.data, b"line1\nline2\n")
			
 
				+
			
 
				+    def test_autocrlf_input_checkout(self) -> None:
			
 
				+        """Test checkout with autocrlf=input."""
			
 
				+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"input")
			
 
				+
			
 
				+        # Create blob with LF
			
 
				+        blob = Blob()
			
 
				+        blob.data = b"line1\nline2\n"
			
 
				+
			
 
				+        # Should NOT convert on checkout with input mode
			
 
				+        result = normalizer.checkout_normalize(blob, b"test.txt")
			
 
				+        self.assertIs(result, blob)  # Same object, no conversion
			
 
				+
			
 
				+    def test_autocrlf_false(self) -> None:
			
 
				+        """Test with autocrlf=false (no conversion)."""
			
 
				+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"false")
			
 
				+
			
 
				+        # Create blob with mixed line endings
			
 
				+        blob = Blob()
			
 
				+        blob.data = b"line1\r\nline2\nline3"
			
 
				+
			
 
				+        # Should not convert on either operation
			
 
				+        result = normalizer.checkin_normalize(blob, b"test.txt")
			
 
				+        self.assertIs(result, blob)
			
 
				+
			
 
				+        result = normalizer.checkout_normalize(blob, b"test.txt")
			
 
				+        self.assertIs(result, blob)
			
 
				+
			
 
				+    def test_gitattributes_text_attr(self) -> None:
			
 
				+        """Test gitattributes text attribute overrides autocrlf."""
			
 
				+        # Set gitattributes to force text conversion
			
 
				+        self.gitattributes[b"*.txt"] = {b"text": True}
			
 
				+
			
 
				+        # Even with autocrlf=false, should convert based on gitattributes
			
 
				+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"false")
			
 
				+
			
 
				+        blob = Blob()
			
 
				+        blob.data = b"line1\r\nline2\r\n"
			
 
				+
			
 
				+        # Should still convert because of gitattributes
			
 
				+        result = normalizer.checkin_normalize(blob, b"test.txt")
			
 
				+        # Note: with just text=true and no eol setting, it follows platform defaults
			
 
				+        # For checkin, it should always normalize to LF
			
 
				+        self.assertIsNot(result, blob)
			
 
				+
			
 
				+    def test_gitattributes_binary_attr(self) -> None:
			
 
				+        """Test gitattributes -text attribute prevents conversion."""
			
 
				+        # Set gitattributes to force binary (no conversion)
			
 
				+        self.gitattributes[b"*.bin"] = {b"text": False}
			
 
				+
			
 
				+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
			
 
				+
			
 
				+        blob = Blob()
			
 
				+        blob.data = b"line1\r\nline2\r\n"
			
 
				+
			
 
				+        # Should not convert despite autocrlf=true
			
 
				+        result = normalizer.checkin_normalize(blob, b"test.bin")
			
 
				+        self.assertIs(result, blob)
			
 
				+
			
 
				+    def test_binary_file_detection(self) -> None:
			
 
				+        """Test that binary files are not converted."""
			
 
				+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
			
 
				+
			
 
				+        # Create blob with binary content
			
 
				+        blob = Blob()
			
 
				+        blob.data = b"line1\r\n\x00\xffbinary\r\ndata"
			
 
				+
			
 
				+        # Should not convert binary files
			
 
				+        result = normalizer.checkin_normalize(blob, b"binary.dat")
			
 
				+        self.assertIs(result, blob)
			
 
				+
			
 
				+        result = normalizer.checkout_normalize(blob, b"binary.dat")
			
 
				+        self.assertIs(result, blob)
			
 
				+
			
 
				+
			
 
				+class LineEndingIntegrationTests(TestCase):
			
 
				+    """Integration tests for line ending conversion with the filter system."""
			
 
				+
			
 
				+    def setUp(self) -> None:
			
 
				+        super().setUp()
			
 
				+        from dulwich.config import ConfigDict
			
 
				+        from dulwich.filters import FilterRegistry
			
 
				+
			
 
				+        self.config = ConfigDict()
			
 
				+        self.registry = FilterRegistry(self.config)
			
 
				+
			
 
				+    def test_filter_registry_with_line_endings(self) -> None:
			
 
				+        """Test that line ending filters work through the registry."""
			
 
				+        # Register a custom text filter that does line ending conversion
			
 
				+        filter = LineEndingFilter(
			
 
				+            clean_conversion=convert_crlf_to_lf,
			
 
				+            smudge_conversion=convert_lf_to_crlf,
			
 
				+            binary_detection=True,
			
 
				+        )
			
 
				+        self.registry.register_driver("text", filter)
			
 
				+
			
 
				+        # Set up gitattributes
			
 
				+        # Create GitAttributes
			
 
				+        from dulwich.attrs import GitAttributes, Pattern
			
 
				+
			
 
				+        patterns = [(Pattern(b"*.txt"), {b"filter": b"text"})]
			
 
				+        gitattributes = GitAttributes(patterns)
			
 
				+
			
 
				+        # Create normalizer
			
 
				+        from dulwich.filters import FilterBlobNormalizer
			
 
				+
			
 
				+        normalizer = FilterBlobNormalizer(self.config, gitattributes, self.registry)
			
 
				+
			
 
				+        # Test round trip
			
 
				+        blob = Blob()
			
 
				+        blob.data = b"Hello\r\nWorld\r\n"
			
 
				+
			
 
				+        # Checkin should convert CRLF to LF
			
 
				+        checked_in = normalizer.checkin_normalize(blob, b"test.txt")
			
 
				+        self.assertEqual(checked_in.data, b"Hello\nWorld\n")
			
 
				+
			
 
				+        # Checkout should convert LF to CRLF
			
 
				+        checked_out = normalizer.checkout_normalize(checked_in, b"test.txt")
			
 
				+        self.assertEqual(checked_out.data, b"Hello\r\nWorld\r\n")
			
 
				+
			
 
				+    def test_mixed_filters(self) -> None:
			
 
				+        """Test multiple filters can coexist (line endings and LFS)."""
			
 
				+        # This would be a more complex test requiring LFS setup
			
 
				+        # For now, just verify the structure works
			
 
				+        text_filter = LineEndingFilter(
			
 
				+            clean_conversion=convert_crlf_to_lf,
			
 
				+            smudge_conversion=convert_lf_to_crlf,
			
 
				+        )
			
 
				+        self.registry.register_driver("text", text_filter)
			
 
				+
			
 
				+        # Mock LFS filter
			
 
				+        class MockLFSFilter:
			
 
				+            def clean(self, data):
			
 
				+                return b"LFS pointer"
			
 
				+
			
 
				+            def smudge(self, data):
			
 
				+                return b"LFS content"
			
 
				+
			
 
				+        self.registry.register_driver("lfs", MockLFSFilter())
			
 
				+
			
 
				+        # Different files use different filters
			
 
				+        from dulwich.attrs import GitAttributes, Pattern
			
 
				+
			
 
				+        patterns = [
			
 
				+            (Pattern(b"*.txt"), {b"filter": b"text"}),
			
 
				+            (Pattern(b"*.bin"), {b"filter": b"lfs"}),
			
 
				+        ]
			
 
				+        gitattributes = GitAttributes(patterns)
			
 
				+
			
 
				+        from dulwich.filters import FilterBlobNormalizer
			
 
				+
			
 
				+        normalizer = FilterBlobNormalizer(self.config, gitattributes, self.registry)
			
 
				+
			
 
				+        # Text file gets line ending conversion
			
 
				+        text_blob = Blob()
			
 
				+        text_blob.data = b"text\r\nfile"
			
 
				+        result = normalizer.checkin_normalize(text_blob, b"test.txt")
			
 
				+        self.assertEqual(result.data, b"text\nfile")
			
 
				+
			
 
				+        # Binary file gets LFS conversion
			
 
				+        bin_blob = Blob()
			
 
				+        bin_blob.data = b"binary content"
			
 
				+        result = normalizer.checkin_normalize(bin_blob, b"test.bin")
			
 
				+        self.assertEqual(result.data, b"LFS pointer")