1 month ago · 95033faf2b
--- a/dulwich/line_ending.py
+++ b/dulwich/line_ending.py
@@ -26,11 +26,11 @@ about how it seems to work.
 
															 The normalization is a two-fold process that happens at two moments:
														
 
															 - When reading a file from the index and to the working directory. For example
														
 
															-  when doing a ``git clone`` or ``git checkout`` call. We call this process the
														
 
															-  read filter in this module.
														
 
															+  when doing a ``git clone`` or ``git checkout`` call. This is called the
														
 
															+  smudge filter (repository -> working tree).
														
 
															 - When writing a file to the index from the working directory. For example
														
 
															-  when doing a ``git add`` call. We call this process the write filter in this
														
 
															-  module.
														
 
															+  when doing a ``git add`` call. This is called the clean filter (working tree
														
 
															+  -> repository).
														
 
															 Note that when checking status (getting unstaged changes), whether or not
														
 
															 normalization is done on write depends on whether or not the file in the
														
@@ -108,13 +108,13 @@ attribute defined in ``.gitattributes``; it takes three possible values:
 
															       line-endings in the working directory and convert line-endings to LF
														
 
															       when writing to the index. When autocrlf is set to true, eol value is
														
 
															       ignored.
														
 
															-    - ``input``: Quite similar to the ``true`` value but only force the write
														
 
															+    - ``input``: Quite similar to the ``true`` value but only applies the clean
														
 
															       filter, ie line-ending of new files added to the index will get their
														
 
															       line-endings converted to LF.
														
 
															     - ``false`` (default): No normalization is done.
														
 
															 ``core.eol`` is the top-level configuration to define the line-ending to use
														
 
															-when applying the read_filer. It takes three possible values:
														
 
															+when applying the smudge filter. It takes three possible values:
														
 
															     - ``lf``: When normalization is done, force line-endings to be ``LF`` in the
														
 
															       working directory.
														
@@ -143,6 +143,9 @@ if TYPE_CHECKING:
 
															     from .config import StackedConfig
														
 
															     from .object_store import BaseObjectStore
														
 
															+from . import replace_me
														
 
															+from .attrs import GitAttributes, Pattern
														
 
															+from .filters import FilterBlobNormalizer, FilterDriver, FilterRegistry
														
 
															 from .object_store import iter_tree_contents
														
 
															 from .objects import Blob, ObjectID
														
 
															 from .patch import is_binary
														
@@ -151,6 +154,42 @@ CRLF = b"\r\n"
 
															 LF = b"\n"
														
 
															+class LineEndingFilter(FilterDriver):
														
 
															+    """Filter driver for line ending conversion."""
														
 
															+
														
 
															+    def __init__(
														
 
															+        self,
														
 
															+        clean_conversion: Optional[Callable[[bytes], bytes]] = None,
														
 
															+        smudge_conversion: Optional[Callable[[bytes], bytes]] = None,
														
 
															+        binary_detection: bool = True,
														
 
															+    ):
														
 
															+        self.clean_conversion = clean_conversion
														
 
															+        self.smudge_conversion = smudge_conversion
														
 
															+        self.binary_detection = binary_detection
														
 
															+
														
 
															+    def clean(self, data: bytes) -> bytes:
														
 
															+        """Apply line ending conversion for checkin (working tree -> repository)."""
														
 
															+        if self.clean_conversion is None:
														
 
															+            return data
														
 
															+
														
 
															+        # Skip binary files if detection is enabled
														
 
															+        if self.binary_detection and is_binary(data):
														
 
															+            return data
														
 
															+
														
 
															+        return self.clean_conversion(data)
														
 
															+
														
 
															+    def smudge(self, data: bytes) -> bytes:
														
 
															+        """Apply line ending conversion for checkout (repository -> working tree)."""
														
 
															+        if self.smudge_conversion is None:
														
 
															+            return data
														
 
															+
														
 
															+        # Skip binary files if detection is enabled
														
 
															+        if self.binary_detection and is_binary(data):
														
 
															+            return data
														
 
															+
														
 
															+        return self.smudge_conversion(data)
														
 
															+
														
 
															+
														
 
															 def convert_crlf_to_lf(text_hunk: bytes) -> bytes:
														
 
															     """Convert CRLF in text hunk into LF.
														
@@ -181,46 +220,26 @@ def convert_lf_to_crlf(text_hunk: bytes) -> bytes:
 
															     return CRLF.join(cleaned_parts)
														
 
															-def get_checkout_filter(
														
 
															-    core_eol: str, core_autocrlf: Union[bool, str], git_attributes: dict[str, Any]
														
 
															+def get_smudge_filter(
														
 
															+    core_eol: str, core_autocrlf: bytes
														
 
															 ) -> Optional[Callable[[bytes], bytes]]:
														
 
															-    """Returns the correct checkout filter based on the passed arguments."""
														
 
															-    # TODO this function should process the git_attributes for the path and if
														
 
															-    # the text attribute is not defined, fallback on the
														
 
															-    # get_checkout_filter_autocrlf function with the autocrlf value
														
 
															-    if isinstance(core_autocrlf, bool):
														
 
															-        autocrlf_bytes = b"true" if core_autocrlf else b"false"
														
 
															-    else:
														
 
															-        autocrlf_bytes = (
														
 
															-            core_autocrlf.encode("ascii")
														
 
															-            if isinstance(core_autocrlf, str)
														
 
															-            else core_autocrlf
														
 
															-        )
														
 
															-    return get_checkout_filter_autocrlf(autocrlf_bytes)
														
 
															+    """Returns the correct smudge filter based on the passed arguments."""
														
 
															+    # Git attributes handling is done by the filter infrastructure
														
 
															+    return get_smudge_filter_autocrlf(core_autocrlf)
														
 
															-def get_checkin_filter(
														
 
															-    core_eol: str, core_autocrlf: Union[bool, str], git_attributes: dict[str, Any]
														
 
															+def get_clean_filter(
														
 
															+    core_eol: str, core_autocrlf: bytes
														
 
															 ) -> Optional[Callable[[bytes], bytes]]:
														
 
															-    """Returns the correct checkin filter based on the passed arguments."""
														
 
															-    # TODO this function should process the git_attributes for the path and if
														
 
															-    # the text attribute is not defined, fallback on the
														
 
															-    # get_checkin_filter_autocrlf function with the autocrlf value
														
 
															-    if isinstance(core_autocrlf, bool):
														
 
															-        autocrlf_bytes = b"true" if core_autocrlf else b"false"
														
 
															-    else:
														
 
															-        autocrlf_bytes = (
														
 
															-            core_autocrlf.encode("ascii")
														
 
															-            if isinstance(core_autocrlf, str)
														
 
															-            else core_autocrlf
														
 
															-        )
														
 
															-    return get_checkin_filter_autocrlf(autocrlf_bytes)
														
 
															+    """Returns the correct clean filter based on the passed arguments."""
														
 
															+    # Git attributes handling is done by the filter infrastructure
														
 
															+    return get_clean_filter_autocrlf(core_autocrlf)
														
 
															-def get_checkout_filter_autocrlf(
														
 
															+def get_smudge_filter_autocrlf(
														
 
															     core_autocrlf: bytes,
														
 
															 ) -> Optional[Callable[[bytes], bytes]]:
														
 
															-    """Returns the correct checkout filter base on autocrlf value.
														
 
															+    """Returns the correct smudge filter base on autocrlf value.
														
 
															     Args:
														
 
															       core_autocrlf: The bytes configuration value of core.autocrlf.
														
@@ -234,10 +253,10 @@ def get_checkout_filter_autocrlf(
 
															     return None
														
 
															-def get_checkin_filter_autocrlf(
														
 
															+def get_clean_filter_autocrlf(
														
 
															     core_autocrlf: bytes,
														
 
															 ) -> Optional[Callable[[bytes], bytes]]:
														
 
															-    """Returns the correct checkin filter base on autocrlf value.
														
 
															+    """Returns the correct clean filter base on autocrlf value.
														
 
															     Args:
														
 
															       core_autocrlf: The bytes configuration value of core.autocrlf.
														
@@ -252,63 +271,162 @@ def get_checkin_filter_autocrlf(
 
															     return None
														
 
															-class BlobNormalizer:
														
 
															+# Backwards compatibility wrappers
														
 
															+@replace_me(since="0.23.1", remove_in="0.25.0")
														
 
															+def get_checkout_filter(
														
 
															+    core_eol: str, core_autocrlf: Union[bool, str], git_attributes: dict[str, Any]
														
 
															+) -> Optional[Callable[[bytes], bytes]]:
														
 
															+    """Deprecated: Use get_smudge_filter instead."""
														
 
															+    # Convert core_autocrlf to bytes for compatibility
														
 
															+    if isinstance(core_autocrlf, bool):
														
 
															+        autocrlf_bytes = b"true" if core_autocrlf else b"false"
														
 
															+    else:
														
 
															+        autocrlf_bytes = (
														
 
															+            core_autocrlf.encode("utf-8")
														
 
															+            if isinstance(core_autocrlf, str)
														
 
															+            else core_autocrlf
														
 
															+        )
														
 
															+    return get_smudge_filter(core_eol, autocrlf_bytes)
														
 
															+
														
 
															+
														
 
															+@replace_me(since="0.23.1", remove_in="0.25.0")
														
 
															+def get_checkin_filter(
														
 
															+    core_eol: str, core_autocrlf: Union[bool, str], git_attributes: dict[str, Any]
														
 
															+) -> Optional[Callable[[bytes], bytes]]:
														
 
															+    """Deprecated: Use get_clean_filter instead."""
														
 
															+    # Convert core_autocrlf to bytes for compatibility
														
 
															+    if isinstance(core_autocrlf, bool):
														
 
															+        autocrlf_bytes = b"true" if core_autocrlf else b"false"
														
 
															+    else:
														
 
															+        autocrlf_bytes = (
														
 
															+            core_autocrlf.encode("utf-8")
														
 
															+            if isinstance(core_autocrlf, str)
														
 
															+            else core_autocrlf
														
 
															+        )
														
 
															+    return get_clean_filter(core_eol, autocrlf_bytes)
														
 
															+
														
 
															+
														
 
															+@replace_me(since="0.23.1", remove_in="0.25.0")
														
 
															+def get_checkout_filter_autocrlf(
														
 
															+    core_autocrlf: bytes,
														
 
															+) -> Optional[Callable[[bytes], bytes]]:
														
 
															+    """Deprecated: Use get_smudge_filter_autocrlf instead."""
														
 
															+    return get_smudge_filter_autocrlf(core_autocrlf)
														
 
															+
														
 
															+
														
 
															+@replace_me(since="0.23.1", remove_in="0.25.0")
														
 
															+def get_checkin_filter_autocrlf(
														
 
															+    core_autocrlf: bytes,
														
 
															+) -> Optional[Callable[[bytes], bytes]]:
														
 
															+    """Deprecated: Use get_clean_filter_autocrlf instead."""
														
 
															+    return get_clean_filter_autocrlf(core_autocrlf)
														
 
															+
														
 
															+
														
 
															+class BlobNormalizer(FilterBlobNormalizer):
														
 
															     """An object to store computation result of which filter to apply based
														
 
															     on configuration, gitattributes, path and operation (checkin or checkout).
														
 
															+
														
 
															+    This class maintains backward compatibility while using the filter infrastructure.
														
 
															     """
														
 
															     def __init__(
														
 
															-        self, config_stack: "StackedConfig", gitattributes: dict[str, Any]
														
 
															+        self,
														
 
															+        config_stack: "StackedConfig",
														
 
															+        gitattributes: dict[str, Any],
														
 
															+        core_eol: str = "native",
														
 
															+        autocrlf: bytes = b"false",
														
 
															     ) -> None:
														
 
															-        self.config_stack = config_stack
														
 
															-        self.gitattributes = gitattributes
														
 
															-
														
 
															-        # Compute which filters we needs based on parameters
														
 
															-        try:
														
 
															-            core_eol_raw = config_stack.get("core", "eol")
														
 
															-            core_eol: str = (
														
 
															-                core_eol_raw.decode("ascii")
														
 
															-                if isinstance(core_eol_raw, bytes)
														
 
															-                else core_eol_raw
														
 
															-            )
														
 
															-        except KeyError:
														
 
															-            core_eol = "native"
														
 
															-
														
 
															-        try:
														
 
															-            core_autocrlf_raw = config_stack.get("core", "autocrlf")
														
 
															-            if isinstance(core_autocrlf_raw, bytes):
														
 
															-                core_autocrlf: Union[bool, str] = core_autocrlf_raw.decode(
														
 
															-                    "ascii"
														
 
															-                ).lower()
														
 
															+        # Set up a filter registry with line ending filters
														
 
															+        filter_registry = FilterRegistry(config_stack)
														
 
															+
														
 
															+        # Create line ending filter if needed
														
 
															+        smudge_filter = get_smudge_filter(core_eol, autocrlf)
														
 
															+        clean_filter = get_clean_filter(core_eol, autocrlf)
														
 
															+
														
 
															+        # Always register a text filter that can be used by gitattributes
														
 
															+        # Even if autocrlf is false, gitattributes text=true should work
														
 
															+        line_ending_filter = LineEndingFilter(
														
 
															+            clean_conversion=clean_filter or convert_crlf_to_lf,
														
 
															+            smudge_conversion=smudge_filter or convert_lf_to_crlf,
														
 
															+            binary_detection=True,
														
 
															+        )
														
 
															+        filter_registry.register_driver("text", line_ending_filter)
														
 
															+
														
 
															+        # Convert dict gitattributes to GitAttributes object for parent class
														
 
															+        git_attrs_patterns = []
														
 
															+        for pattern_str, attrs in gitattributes.items():
														
 
															+            if isinstance(pattern_str, str):
														
 
															+                pattern_bytes = pattern_str.encode("utf-8")
														
 
															             else:
														
 
															-                core_autocrlf = core_autocrlf_raw.lower()
														
 
															-        except KeyError:
														
 
															-            core_autocrlf = False
														
 
															+                pattern_bytes = pattern_str
														
 
															+            pattern = Pattern(pattern_bytes)
														
 
															+            git_attrs_patterns.append((pattern, attrs))
														
 
															-        self.fallback_read_filter = get_checkout_filter(
														
 
															-            core_eol, core_autocrlf, self.gitattributes
														
 
															-        )
														
 
															-        self.fallback_write_filter = get_checkin_filter(
														
 
															-            core_eol, core_autocrlf, self.gitattributes
														
 
															-        )
														
 
															+        git_attributes = GitAttributes(git_attrs_patterns)
														
 
															+
														
 
															+        # Initialize parent class with gitattributes
														
 
															+        # The filter infrastructure will handle gitattributes processing
														
 
															+        super().__init__(config_stack, git_attributes, filter_registry)
														
 
															+
														
 
															+        # Store original filters for backward compatibility
														
 
															+        self.fallback_read_filter = smudge_filter
														
 
															+        self.fallback_write_filter = clean_filter
														
 
															     def checkin_normalize(self, blob: Blob, tree_path: bytes) -> Blob:
														
 
															         """Normalize a blob during a checkin operation."""
														
 
															-        if self.fallback_write_filter is not None:
														
 
															-            return normalize_blob(
														
 
															-                blob, self.fallback_write_filter, binary_detection=True
														
 
															+        # First try to get filter from gitattributes (handled by parent)
														
 
															+        result = super().checkin_normalize(blob, tree_path)
														
 
															+
														
 
															+        # Check if gitattributes explicitly disabled text conversion
														
 
															+        attrs = self.gitattributes.match_path(tree_path)
														
 
															+        if b"text" in attrs and attrs[b"text"] is False:
														
 
															+            # Explicitly marked as binary, no conversion
														
 
															+            return blob
														
 
															+
														
 
															+        # If no filter was applied via gitattributes and we have a fallback filter
														
 
															+        # (autocrlf is enabled), apply it to all files
														
 
															+        if result is blob and self.fallback_write_filter is not None:
														
 
															+            # Apply the clean filter with binary detection
														
 
															+            line_ending_filter = LineEndingFilter(
														
 
															+                clean_conversion=self.fallback_write_filter,
														
 
															+                smudge_conversion=None,
														
 
															+                binary_detection=True,
														
 
															             )
														
 
															+            filtered_data = line_ending_filter.clean(blob.data)
														
 
															+            if filtered_data != blob.data:
														
 
															+                new_blob = Blob()
														
 
															+                new_blob.data = filtered_data
														
 
															+                return new_blob
														
 
															-        return blob
														
 
															+        return result
														
 
															     def checkout_normalize(self, blob: Blob, tree_path: bytes) -> Blob:
														
 
															         """Normalize a blob during a checkout operation."""
														
 
															-        if self.fallback_read_filter is not None:
														
 
															-            return normalize_blob(
														
 
															-                blob, self.fallback_read_filter, binary_detection=True
														
 
															+        # First try to get filter from gitattributes (handled by parent)
														
 
															+        result = super().checkout_normalize(blob, tree_path)
														
 
															+
														
 
															+        # Check if gitattributes explicitly disabled text conversion
														
 
															+        attrs = self.gitattributes.match_path(tree_path)
														
 
															+        if b"text" in attrs and attrs[b"text"] is False:
														
 
															+            # Explicitly marked as binary, no conversion
														
 
															+            return blob
														
 
															+
														
 
															+        # If no filter was applied via gitattributes and we have a fallback filter
														
 
															+        # (autocrlf is enabled), apply it to all files
														
 
															+        if result is blob and self.fallback_read_filter is not None:
														
 
															+            # Apply the smudge filter with binary detection
														
 
															+            line_ending_filter = LineEndingFilter(
														
 
															+                clean_conversion=None,
														
 
															+                smudge_conversion=self.fallback_read_filter,
														
 
															+                binary_detection=True,
														
 
															             )
														
 
															+            filtered_data = line_ending_filter.smudge(blob.data)
														
 
															+            if filtered_data != blob.data:
														
 
															+                new_blob = Blob()
														
 
															+                new_blob.data = filtered_data
														
 
															+                return new_blob
														
 
															-        return blob
														
 
															+        return result
														
 
															 def normalize_blob(
														
@@ -344,8 +462,10 @@ class TreeBlobNormalizer(BlobNormalizer):
 
															         git_attributes: dict[str, Any],
														
 
															         object_store: "BaseObjectStore",
														
 
															         tree: Optional[ObjectID] = None,
														
 
															+        core_eol: str = "native",
														
 
															+        autocrlf: bytes = b"false",
														
 
															     ) -> None:
														
 
															-        super().__init__(config_stack, git_attributes)
														
 
															+        super().__init__(config_stack, git_attributes, core_eol, autocrlf)
														
 
															         if tree:
														
 
															             self.existing_paths = {
														
 
															                 name for name, _, _ in iter_tree_contents(object_store, tree)
														
--- a/tests/test_line_ending.py
+++ b/tests/test_line_ending.py
@@ -22,10 +22,12 @@
 
															 """Tests for the line ending conversion."""
														
 
															 from dulwich.line_ending import (
														
 
															+    BlobNormalizer,
														
 
															+    LineEndingFilter,
														
 
															     convert_crlf_to_lf,
														
 
															     convert_lf_to_crlf,
														
 
															-    get_checkin_filter_autocrlf,
														
 
															-    get_checkout_filter_autocrlf,
														
 
															+    get_clean_filter_autocrlf,
														
 
															+    get_smudge_filter_autocrlf,
														
 
															     normalize_blob,
														
 
															 )
														
 
															 from dulwich.objects import Blob
														
@@ -56,35 +58,35 @@ class LineEndingConversion(TestCase):
 
															 class GetLineEndingAutocrlfFilters(TestCase):
														
 
															-    def test_get_checkin_filter_autocrlf_default(self) -> None:
														
 
															-        checkin_filter = get_checkin_filter_autocrlf(b"false")
														
 
															+    def test_get_clean_filter_autocrlf_default(self) -> None:
														
 
															+        clean_filter = get_clean_filter_autocrlf(b"false")
														
 
															-        self.assertEqual(checkin_filter, None)
														
 
															+        self.assertEqual(clean_filter, None)
														
 
															-    def test_get_checkin_filter_autocrlf_true(self) -> None:
														
 
															-        checkin_filter = get_checkin_filter_autocrlf(b"true")
														
 
															+    def test_get_clean_filter_autocrlf_true(self) -> None:
														
 
															+        clean_filter = get_clean_filter_autocrlf(b"true")
														
 
															-        self.assertEqual(checkin_filter, convert_crlf_to_lf)
														
 
															+        self.assertEqual(clean_filter, convert_crlf_to_lf)
														
 
															-    def test_get_checkin_filter_autocrlf_input(self) -> None:
														
 
															-        checkin_filter = get_checkin_filter_autocrlf(b"input")
														
 
															+    def test_get_clean_filter_autocrlf_input(self) -> None:
														
 
															+        clean_filter = get_clean_filter_autocrlf(b"input")
														
 
															-        self.assertEqual(checkin_filter, convert_crlf_to_lf)
														
 
															+        self.assertEqual(clean_filter, convert_crlf_to_lf)
														
 
															-    def test_get_checkout_filter_autocrlf_default(self) -> None:
														
 
															-        checkout_filter = get_checkout_filter_autocrlf(b"false")
														
 
															+    def test_get_smudge_filter_autocrlf_default(self) -> None:
														
 
															+        smudge_filter = get_smudge_filter_autocrlf(b"false")
														
 
															-        self.assertEqual(checkout_filter, None)
														
 
															+        self.assertEqual(smudge_filter, None)
														
 
															-    def test_get_checkout_filter_autocrlf_true(self) -> None:
														
 
															-        checkout_filter = get_checkout_filter_autocrlf(b"true")
														
 
															+    def test_get_smudge_filter_autocrlf_true(self) -> None:
														
 
															+        smudge_filter = get_smudge_filter_autocrlf(b"true")
														
 
															-        self.assertEqual(checkout_filter, convert_lf_to_crlf)
														
 
															+        self.assertEqual(smudge_filter, convert_lf_to_crlf)
														
 
															-    def test_get_checkout_filter_autocrlf_input(self) -> None:
														
 
															-        checkout_filter = get_checkout_filter_autocrlf(b"input")
														
 
															+    def test_get_smudge_filter_autocrlf_input(self) -> None:
														
 
															+        smudge_filter = get_smudge_filter_autocrlf(b"input")
														
 
															-        self.assertEqual(checkout_filter, None)
														
 
															+        self.assertEqual(smudge_filter, None)
														
 
															 class NormalizeBlobTestCase(TestCase):
														
@@ -195,3 +197,260 @@ class NormalizeBlobTestCase(TestCase):
 
															         self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
														
 
															         self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
														
 
															+
														
 
															+
														
 
															+class LineEndingFilterTests(TestCase):
														
 
															+    """Test the LineEndingFilter class."""
														
 
															+
														
 
															+    def test_clean_no_conversion(self) -> None:
														
 
															+        """Test clean with no conversion function."""
														
 
															+        filter = LineEndingFilter()
														
 
															+        data = b"test\r\ndata"
														
 
															+        self.assertEqual(filter.clean(data), data)
														
 
															+
														
 
															+    def test_clean_with_conversion(self) -> None:
														
 
															+        """Test clean with CRLF to LF conversion."""
														
 
															+        filter = LineEndingFilter(clean_conversion=convert_crlf_to_lf)
														
 
															+        data = b"test\r\ndata"
														
 
															+        self.assertEqual(filter.clean(data), b"test\ndata")
														
 
															+
														
 
															+    def test_clean_binary_detection(self) -> None:
														
 
															+        """Test clean skips binary files."""
														
 
															+        filter = LineEndingFilter(
														
 
															+            clean_conversion=convert_crlf_to_lf, binary_detection=True
														
 
															+        )
														
 
															+        # Binary data with null byte
														
 
															+        data = b"test\r\n\x00data"
														
 
															+        self.assertEqual(filter.clean(data), data)  # Should not convert
														
 
															+
														
 
															+    def test_smudge_no_conversion(self) -> None:
														
 
															+        """Test smudge with no conversion function."""
														
 
															+        filter = LineEndingFilter()
														
 
															+        data = b"test\ndata"
														
 
															+        self.assertEqual(filter.smudge(data), data)
														
 
															+
														
 
															+    def test_smudge_with_conversion(self) -> None:
														
 
															+        """Test smudge with LF to CRLF conversion."""
														
 
															+        filter = LineEndingFilter(smudge_conversion=convert_lf_to_crlf)
														
 
															+        data = b"test\ndata"
														
 
															+        self.assertEqual(filter.smudge(data), b"test\r\ndata")
														
 
															+
														
 
															+    def test_smudge_binary_detection(self) -> None:
														
 
															+        """Test smudge skips binary files."""
														
 
															+        filter = LineEndingFilter(
														
 
															+            smudge_conversion=convert_lf_to_crlf, binary_detection=True
														
 
															+        )
														
 
															+        # Binary data with null byte
														
 
															+        data = b"test\n\x00data"
														
 
															+        self.assertEqual(filter.smudge(data), data)  # Should not convert
														
 
															+
														
 
															+
														
 
															+class BlobNormalizerTests(TestCase):
														
 
															+    """Test the BlobNormalizer class integration with filters."""
														
 
															+
														
 
															+    def setUp(self) -> None:
														
 
															+        super().setUp()
														
 
															+        from dulwich.config import ConfigDict
														
 
															+
														
 
															+        self.config = ConfigDict()
														
 
															+        self.gitattributes = {}
														
 
															+
														
 
															+    def test_autocrlf_true_checkin(self) -> None:
														
 
															+        """Test checkin with autocrlf=true."""
														
 
															+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
														
 
															+
														
 
															+        # Create blob with CRLF
														
 
															+        blob = Blob()
														
 
															+        blob.data = b"line1\r\nline2\r\n"
														
 
															+
														
 
															+        # Should convert to LF on checkin
														
 
															+        result = normalizer.checkin_normalize(blob, b"test.txt")
														
 
															+        self.assertEqual(result.data, b"line1\nline2\n")
														
 
															+
														
 
															+    def test_autocrlf_true_checkout(self) -> None:
														
 
															+        """Test checkout with autocrlf=true."""
														
 
															+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
														
 
															+
														
 
															+        # Create blob with LF
														
 
															+        blob = Blob()
														
 
															+        blob.data = b"line1\nline2\n"
														
 
															+
														
 
															+        # Should convert to CRLF on checkout
														
 
															+        result = normalizer.checkout_normalize(blob, b"test.txt")
														
 
															+        self.assertEqual(result.data, b"line1\r\nline2\r\n")
														
 
															+
														
 
															+    def test_autocrlf_input_checkin(self) -> None:
														
 
															+        """Test checkin with autocrlf=input."""
														
 
															+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"input")
														
 
															+
														
 
															+        # Create blob with CRLF
														
 
															+        blob = Blob()
														
 
															+        blob.data = b"line1\r\nline2\r\n"
														
 
															+
														
 
															+        # Should convert to LF on checkin
														
 
															+        result = normalizer.checkin_normalize(blob, b"test.txt")
														
 
															+        self.assertEqual(result.data, b"line1\nline2\n")
														
 
															+
														
 
															+    def test_autocrlf_input_checkout(self) -> None:
														
 
															+        """Test checkout with autocrlf=input."""
														
 
															+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"input")
														
 
															+
														
 
															+        # Create blob with LF
														
 
															+        blob = Blob()
														
 
															+        blob.data = b"line1\nline2\n"
														
 
															+
														
 
															+        # Should NOT convert on checkout with input mode
														
 
															+        result = normalizer.checkout_normalize(blob, b"test.txt")
														
 
															+        self.assertIs(result, blob)  # Same object, no conversion
														
 
															+
														
 
															+    def test_autocrlf_false(self) -> None:
														
 
															+        """Test with autocrlf=false (no conversion)."""
														
 
															+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"false")
														
 
															+
														
 
															+        # Create blob with mixed line endings
														
 
															+        blob = Blob()
														
 
															+        blob.data = b"line1\r\nline2\nline3"
														
 
															+
														
 
															+        # Should not convert on either operation
														
 
															+        result = normalizer.checkin_normalize(blob, b"test.txt")
														
 
															+        self.assertIs(result, blob)
														
 
															+
														
 
															+        result = normalizer.checkout_normalize(blob, b"test.txt")
														
 
															+        self.assertIs(result, blob)
														
 
															+
														
 
															+    def test_gitattributes_text_attr(self) -> None:
														
 
															+        """Test gitattributes text attribute overrides autocrlf."""
														
 
															+        # Set gitattributes to force text conversion
														
 
															+        self.gitattributes[b"*.txt"] = {b"text": True}
														
 
															+
														
 
															+        # Even with autocrlf=false, should convert based on gitattributes
														
 
															+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"false")
														
 
															+
														
 
															+        blob = Blob()
														
 
															+        blob.data = b"line1\r\nline2\r\n"
														
 
															+
														
 
															+        # Should still convert because of gitattributes
														
 
															+        result = normalizer.checkin_normalize(blob, b"test.txt")
														
 
															+        # Note: with just text=true and no eol setting, it follows platform defaults
														
 
															+        # For checkin, it should always normalize to LF
														
 
															+        self.assertIsNot(result, blob)
														
 
															+
														
 
															+    def test_gitattributes_binary_attr(self) -> None:
														
 
															+        """Test gitattributes -text attribute prevents conversion."""
														
 
															+        # Set gitattributes to force binary (no conversion)
														
 
															+        self.gitattributes[b"*.bin"] = {b"text": False}
														
 
															+
														
 
															+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
														
 
															+
														
 
															+        blob = Blob()
														
 
															+        blob.data = b"line1\r\nline2\r\n"
														
 
															+
														
 
															+        # Should not convert despite autocrlf=true
														
 
															+        result = normalizer.checkin_normalize(blob, b"test.bin")
														
 
															+        self.assertIs(result, blob)
														
 
															+
														
 
															+    def test_binary_file_detection(self) -> None:
														
 
															+        """Test that binary files are not converted."""
														
 
															+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
														
 
															+
														
 
															+        # Create blob with binary content
														
 
															+        blob = Blob()
														
 
															+        blob.data = b"line1\r\n\x00\xffbinary\r\ndata"
														
 
															+
														
 
															+        # Should not convert binary files
														
 
															+        result = normalizer.checkin_normalize(blob, b"binary.dat")
														
 
															+        self.assertIs(result, blob)
														
 
															+
														
 
															+        result = normalizer.checkout_normalize(blob, b"binary.dat")
														
 
															+        self.assertIs(result, blob)
														
 
															+
														
 
															+
														
 
															+class LineEndingIntegrationTests(TestCase):
														
 
															+    """Integration tests for line ending conversion with the filter system."""
														
 
															+
														
 
															+    def setUp(self) -> None:
														
 
															+        super().setUp()
														
 
															+        from dulwich.config import ConfigDict
														
 
															+        from dulwich.filters import FilterRegistry
														
 
															+
														
 
															+        self.config = ConfigDict()
														
 
															+        self.registry = FilterRegistry(self.config)
														
 
															+
														
 
															+    def test_filter_registry_with_line_endings(self) -> None:
														
 
															+        """Test that line ending filters work through the registry."""
														
 
															+        # Register a custom text filter that does line ending conversion
														
 
															+        filter = LineEndingFilter(
														
 
															+            clean_conversion=convert_crlf_to_lf,
														
 
															+            smudge_conversion=convert_lf_to_crlf,
														
 
															+            binary_detection=True,
														
 
															+        )
														
 
															+        self.registry.register_driver("text", filter)
														
 
															+
														
 
															+        # Set up gitattributes
														
 
															+        # Create GitAttributes
														
 
															+        from dulwich.attrs import GitAttributes, Pattern
														
 
															+
														
 
															+        patterns = [(Pattern(b"*.txt"), {b"filter": b"text"})]
														
 
															+        gitattributes = GitAttributes(patterns)
														
 
															+
														
 
															+        # Create normalizer
														
 
															+        from dulwich.filters import FilterBlobNormalizer
														
 
															+
														
 
															+        normalizer = FilterBlobNormalizer(self.config, gitattributes, self.registry)
														
 
															+
														
 
															+        # Test round trip
														
 
															+        blob = Blob()
														
 
															+        blob.data = b"Hello\r\nWorld\r\n"
														
 
															+
														
 
															+        # Checkin should convert CRLF to LF
														
 
															+        checked_in = normalizer.checkin_normalize(blob, b"test.txt")
														
 
															+        self.assertEqual(checked_in.data, b"Hello\nWorld\n")
														
 
															+
														
 
															+        # Checkout should convert LF to CRLF
														
 
															+        checked_out = normalizer.checkout_normalize(checked_in, b"test.txt")
														
 
															+        self.assertEqual(checked_out.data, b"Hello\r\nWorld\r\n")
														
 
															+
														
 
															+    def test_mixed_filters(self) -> None:
														
 
															+        """Test multiple filters can coexist (line endings and LFS)."""
														
 
															+        # This would be a more complex test requiring LFS setup
														
 
															+        # For now, just verify the structure works
														
 
															+        text_filter = LineEndingFilter(
														
 
															+            clean_conversion=convert_crlf_to_lf,
														
 
															+            smudge_conversion=convert_lf_to_crlf,
														
 
															+        )
														
 
															+        self.registry.register_driver("text", text_filter)
														
 
															+
														
 
															+        # Mock LFS filter
														
 
															+        class MockLFSFilter:
														
 
															+            def clean(self, data):
														
 
															+                return b"LFS pointer"
														
 
															+
														
 
															+            def smudge(self, data):
														
 
															+                return b"LFS content"
														
 
															+
														
 
															+        self.registry.register_driver("lfs", MockLFSFilter())
														
 
															+
														
 
															+        # Different files use different filters
														
 
															+        from dulwich.attrs import GitAttributes, Pattern
														
 
															+
														
 
															+        patterns = [
														
 
															+            (Pattern(b"*.txt"), {b"filter": b"text"}),
														
 
															+            (Pattern(b"*.bin"), {b"filter": b"lfs"}),
														
 
															+        ]
														
 
															+        gitattributes = GitAttributes(patterns)
														
 
															+
														
 
															+        from dulwich.filters import FilterBlobNormalizer
														
 
															+
														
 
															+        normalizer = FilterBlobNormalizer(self.config, gitattributes, self.registry)
														
 
															+
														
 
															+        # Text file gets line ending conversion
														
 
															+        text_blob = Blob()
														
 
															+        text_blob.data = b"text\r\nfile"
														
 
															+        result = normalizer.checkin_normalize(text_blob, b"test.txt")
														
 
															+        self.assertEqual(result.data, b"text\nfile")
														
 
															+
														
 
															+        # Binary file gets LFS conversion
														
 
															+        bin_blob = Blob()
														
 
															+        bin_blob.data = b"binary content"
														
 
															+        result = normalizer.checkin_normalize(bin_blob, b"test.bin")
														
 
															+        self.assertEqual(result.data, b"LFS pointer")