il y a 2 mois · d1ee4f1a18
--- a/dulwich/attrs.py
+++ b/dulwich/attrs.py
@@ -345,28 +345,31 @@ class GitAttributes:
 
				         """
			
 
				         # Find existing pattern
			
 
				         pattern_obj = None
			
 
				-        pattern_index = None
			
 
				+        attrs_dict: Optional[dict[bytes, AttributeValue]] = None
			
 
				+        pattern_index = -1
			
 
				+
			
 
				         for i, (p, attrs) in enumerate(self._patterns):
			
 
				             if p.pattern == pattern:
			
 
				                 pattern_obj = p
			
 
				+                # Convert to mutable dict
			
 
				+                attrs_dict = dict(attrs)
			
 
				                 pattern_index = i
			
 
				                 break
			
 
				 
			
 
				         if pattern_obj is None:
			
 
				             # Create new pattern
			
 
				             pattern_obj = Pattern(pattern)
			
 
				-            attrs_dict: dict[bytes, AttributeValue] = {name: value}
			
 
				+            attrs_dict = {name: value}
			
 
				             self._patterns.append((pattern_obj, attrs_dict))
			
 
				         else:
			
 
				-            # Update existing pattern
			
 
				-            # Create a new dict with updated attributes
			
 
				-            assert (
			
 
				-                pattern_index is not None
			
 
				-            )  # pattern_index is set when pattern_obj is found
			
 
				-            old_attrs = self._patterns[pattern_index][1]
			
 
				-            new_attrs = dict(old_attrs)
			
 
				-            new_attrs[name] = value
			
 
				-            self._patterns[pattern_index] = (pattern_obj, new_attrs)
			
 
				+            # Update the existing pattern in the list
			
 
				+            assert pattern_index >= 0
			
 
				+            self._patterns[pattern_index] = (pattern_obj, attrs_dict)
			
 
				+
			
 
				+        # Update the attribute
			
 
				+        if attrs_dict is None:
			
 
				+            raise AssertionError("attrs_dict should not be None at this point")
			
 
				+        attrs_dict[name] = value
			
 
				 
			
 
				     def remove_pattern(self, pattern: bytes) -> None:
			
 
				         """Remove all attributes for a pattern.
			
--- a/dulwich/filters.py
+++ b/dulwich/filters.py
@@ -22,10 +22,9 @@
 
				 """Implementation of Git filter drivers (clean/smudge filters)."""
			
 
				 
			
 
				 import subprocess
			
 
				-from collections.abc import Mapping
			
 
				 from typing import TYPE_CHECKING, Callable, Optional, Protocol
			
 
				 
			
 
				-from .attrs import AttributeValue, Pattern, match_path
			
 
				+from .attrs import GitAttributes
			
 
				 from .objects import Blob
			
 
				 
			
 
				 if TYPE_CHECKING:
			
@@ -93,6 +92,10 @@ class FilterRegistry:
 
				 
			
 
				         # Register built-in filter factories
			
 
				         self.register_factory("lfs", self._create_lfs_filter)
			
 
				+        self.register_factory("text", self._create_text_filter)
			
 
				+
			
 
				+        # Auto-register line ending filter if autocrlf is enabled
			
 
				+        self._setup_line_ending_filter()
			
 
				 
			
 
				     def register_factory(
			
 
				         self, name: str, factory: Callable[["FilterRegistry"], FilterDriver]
			
@@ -112,9 +115,9 @@ class FilterRegistry:
 
				 
			
 
				         # Try to create from factory
			
 
				         if name in self._factories:
			
 
				-            driver = self._factories[name](self)
			
 
				-            self._drivers[name] = driver
			
 
				-            return driver
			
 
				+            factory_driver = self._factories[name](self)
			
 
				+            self._drivers[name] = factory_driver
			
 
				+            return factory_driver
			
 
				 
			
 
				         # Try to create from config
			
 
				         if self.config is not None:
			
@@ -135,21 +138,21 @@ class FilterRegistry:
 
				 
			
 
				         # Get clean command
			
 
				         try:
			
 
				-            clean_value = self.config.get(("filter", name), "clean")
			
 
				-            if isinstance(clean_value, bytes):
			
 
				-                clean_cmd = clean_value.decode("utf-8")
			
 
				+            clean_cmd_raw = self.config.get(("filter", name), "clean")
			
 
				+            if isinstance(clean_cmd_raw, bytes):
			
 
				+                clean_cmd = clean_cmd_raw.decode("utf-8")
			
 
				             else:
			
 
				-                clean_cmd = clean_value
			
 
				+                clean_cmd = clean_cmd_raw
			
 
				         except KeyError:
			
 
				             pass
			
 
				 
			
 
				         # Get smudge command
			
 
				         try:
			
 
				-            smudge_value = self.config.get(("filter", name), "smudge")
			
 
				-            if isinstance(smudge_value, bytes):
			
 
				-                smudge_cmd = smudge_value.decode("utf-8")
			
 
				+            smudge_cmd_raw = self.config.get(("filter", name), "smudge")
			
 
				+            if isinstance(smudge_cmd_raw, bytes):
			
 
				+                smudge_cmd = smudge_cmd_raw.decode("utf-8")
			
 
				             else:
			
 
				-                smudge_cmd = smudge_value
			
 
				+                smudge_cmd = smudge_cmd_raw
			
 
				         except KeyError:
			
 
				             pass
			
 
				 
			
@@ -174,30 +177,99 @@ class FilterRegistry:
 
				 
			
 
				         return LFSFilterDriver(lfs_store)
			
 
				 
			
 
				+    def _create_text_filter(self, registry: "FilterRegistry") -> FilterDriver:
			
 
				+        """Create text filter driver for line ending conversion.
			
 
				+
			
 
				+        This filter is used when files have the 'text' attribute set explicitly.
			
 
				+        It always normalizes line endings on checkin (CRLF -> LF).
			
 
				+        """
			
 
				+        from .line_ending import (
			
 
				+            LineEndingFilter,
			
 
				+            convert_crlf_to_lf,
			
 
				+            get_smudge_filter,
			
 
				+        )
			
 
				+
			
 
				+        if self.config is None:
			
 
				+            # Default text filter: always normalize on checkin
			
 
				+            return LineEndingFilter(
			
 
				+                clean_conversion=convert_crlf_to_lf,
			
 
				+                smudge_conversion=None,
			
 
				+                binary_detection=True,
			
 
				+            )
			
 
				+
			
 
				+        # Get core.eol and core.autocrlf settings for smudge behavior
			
 
				+        try:
			
 
				+            core_eol_raw = self.config.get("core", "eol")
			
 
				+            core_eol: str = (
			
 
				+                core_eol_raw.decode("ascii")
			
 
				+                if isinstance(core_eol_raw, bytes)
			
 
				+                else core_eol_raw
			
 
				+            )
			
 
				+        except KeyError:
			
 
				+            core_eol = "native"
			
 
				+
			
 
				+        # Parse autocrlf as bytes (can be b"true", b"input", or b"false")
			
 
				+        try:
			
 
				+            autocrlf_raw = self.config.get("core", "autocrlf")
			
 
				+            autocrlf: bytes = (
			
 
				+                autocrlf_raw.lower()
			
 
				+                if isinstance(autocrlf_raw, bytes)
			
 
				+                else str(autocrlf_raw).lower().encode("ascii")
			
 
				+            )
			
 
				+        except KeyError:
			
 
				+            autocrlf = b"false"
			
 
				+
			
 
				+        # For explicit text attribute:
			
 
				+        # - Always normalize to LF on checkin (clean)
			
 
				+        # - Smudge behavior depends on core.eol and core.autocrlf
			
 
				+        smudge_filter = get_smudge_filter(core_eol, autocrlf)
			
 
				+        clean_filter = convert_crlf_to_lf
			
 
				+
			
 
				+        return LineEndingFilter(
			
 
				+            clean_conversion=clean_filter,
			
 
				+            smudge_conversion=smudge_filter,
			
 
				+            binary_detection=True,
			
 
				+        )
			
 
				+
			
 
				+    def _setup_line_ending_filter(self) -> None:
			
 
				+        """Automatically register line ending filter if configured."""
			
 
				+        if self.config is None:
			
 
				+            return
			
 
				+
			
 
				+        # Parse autocrlf as bytes
			
 
				+        try:
			
 
				+            autocrlf_raw = self.config.get("core", "autocrlf")
			
 
				+            autocrlf: bytes = (
			
 
				+                autocrlf_raw.lower()
			
 
				+                if isinstance(autocrlf_raw, bytes)
			
 
				+                else str(autocrlf_raw).lower().encode("ascii")
			
 
				+            )
			
 
				+        except KeyError:
			
 
				+            return
			
 
				+
			
 
				+        # If autocrlf is enabled, register the text filter
			
 
				+        if autocrlf in (b"true", b"input"):
			
 
				+            # Pre-create the text filter so it's available
			
 
				+            self.get_driver("text")
			
 
				+
			
 
				 
			
 
				 def get_filter_for_path(
			
 
				     path: bytes,
			
 
				-    gitattributes: dict[bytes, dict[bytes, AttributeValue]],
			
 
				+    gitattributes: "GitAttributes",
			
 
				     filter_registry: FilterRegistry,
			
 
				 ) -> Optional[FilterDriver]:
			
 
				     """Get the appropriate filter driver for a given path.
			
 
				 
			
 
				     Args:
			
 
				         path: Path to check
			
 
				-        gitattributes: Parsed gitattributes (pattern -> attributes mapping)
			
 
				+        gitattributes: GitAttributes object with parsed patterns
			
 
				         filter_registry: Registry of filter drivers
			
 
				 
			
 
				     Returns:
			
 
				         FilterDriver instance or None
			
 
				     """
			
 
				-    # Convert gitattributes dict to list of (Pattern, attrs) tuples
			
 
				-    patterns: list[tuple[Pattern, Mapping[bytes, AttributeValue]]] = []
			
 
				-    for pattern_bytes, attrs in gitattributes.items():
			
 
				-        pattern = Pattern(pattern_bytes)
			
 
				-        patterns.append((pattern, attrs))
			
 
				-
			
 
				     # Get all attributes for this path
			
 
				-    attributes = match_path(patterns, path)
			
 
				+    attributes = gitattributes.match_path(path)
			
 
				 
			
 
				     # Check if there's a filter attribute
			
 
				     filter_name = attributes.get(b"filter")
			
@@ -209,6 +281,31 @@ def get_filter_for_path(
 
				             return filter_registry.get_driver(filter_name_str)
			
 
				         return None
			
 
				 
			
 
				+    # Check for text attribute
			
 
				+    text_attr = attributes.get(b"text")
			
 
				+    if text_attr is True:
			
 
				+        # Use the text filter for line ending conversion
			
 
				+        return filter_registry.get_driver("text")
			
 
				+    elif text_attr is False:
			
 
				+        # -text means binary, no conversion
			
 
				+        return None
			
 
				+
			
 
				+    # If no explicit text attribute, check if autocrlf is enabled
			
 
				+    # When autocrlf is true/input, files are treated as text by default
			
 
				+    if filter_registry.config is not None:
			
 
				+        try:
			
 
				+            autocrlf_raw = filter_registry.config.get("core", "autocrlf")
			
 
				+            autocrlf: bytes = (
			
 
				+                autocrlf_raw.lower()
			
 
				+                if isinstance(autocrlf_raw, bytes)
			
 
				+                else str(autocrlf_raw).lower().encode("ascii")
			
 
				+            )
			
 
				+            if autocrlf in (b"true", b"input"):
			
 
				+                # Use text filter for files without explicit attributes
			
 
				+                return filter_registry.get_driver("text")
			
 
				+        except KeyError:
			
 
				+            pass
			
 
				+
			
 
				     return None
			
 
				 
			
 
				 
			
@@ -221,7 +318,7 @@ class FilterBlobNormalizer:
 
				     def __init__(
			
 
				         self,
			
 
				         config_stack: Optional["StackedConfig"],
			
 
				-        gitattributes: dict[bytes, dict[bytes, AttributeValue]],
			
 
				+        gitattributes: GitAttributes,
			
 
				         filter_registry: Optional[FilterRegistry] = None,
			
 
				         repo=None,
			
 
				     ) -> None:
			
--- a/tests/test_filters.py
+++ b/tests/test_filters.py
@@ -1,4 +1,4 @@
 
				-# test_filters.py -- tests for filter drivers
			
 
				+# test_filters.py -- Tests for filters
			
 
				 # Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
			
 
				 #
			
 
				 # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
			
@@ -19,239 +19,181 @@
 
				 # License, Version 2.0.
			
 
				 #
			
 
				 
			
 
				-"""Tests for filter drivers support."""
			
 
				+"""Tests for filters."""
			
 
				 
			
 
				-import sys
			
 
				-from unittest import skipIf
			
 
				+import os
			
 
				+import tempfile
			
 
				+import unittest
			
 
				 
			
 
				-from dulwich.config import ConfigDict
			
 
				-from dulwich.filters import (
			
 
				-    FilterBlobNormalizer,
			
 
				-    FilterRegistry,
			
 
				-    ProcessFilterDriver,
			
 
				-    get_filter_for_path,
			
 
				-)
			
 
				-from dulwich.objects import Blob
			
 
				+from dulwich import porcelain
			
 
				+from dulwich.repo import Repo
			
 
				 
			
 
				 from . import TestCase
			
 
				 
			
 
				 
			
 
				-class ProcessFilterDriverTests(TestCase):
			
 
				-    @skipIf(sys.platform == "win32", "Unix shell commands")
			
 
				-    def test_clean_filter(self) -> None:
			
 
				-        """Test clean filter with external command."""
			
 
				-        # Use a simple command that converts to uppercase
			
 
				-        driver = ProcessFilterDriver(clean_cmd="tr '[:lower:]' '[:upper:]'")
			
 
				-        result = driver.clean(b"hello world")
			
 
				-        self.assertEqual(result, b"HELLO WORLD")
			
 
				-
			
 
				-    @skipIf(sys.platform == "win32", "Unix shell commands")
			
 
				-    def test_smudge_filter(self) -> None:
			
 
				-        """Test smudge filter with external command."""
			
 
				-        # Use a simple command that converts to lowercase
			
 
				-        driver = ProcessFilterDriver(smudge_cmd="tr '[:upper:]' '[:lower:]'")
			
 
				-        result = driver.smudge(b"HELLO WORLD")
			
 
				-        self.assertEqual(result, b"hello world")
			
 
				-
			
 
				-    def test_no_filters(self) -> None:
			
 
				-        """Test driver with no filters configured."""
			
 
				-        driver = ProcessFilterDriver()
			
 
				-        data = b"test data"
			
 
				-        self.assertEqual(driver.clean(data), data)
			
 
				-        self.assertEqual(driver.smudge(data), data)
			
 
				-
			
 
				-    @skipIf(sys.platform == "win32", "Unix shell commands")
			
 
				-    def test_failing_filter(self) -> None:
			
 
				-        """Test that failing filter propagates the error."""
			
 
				-        import subprocess
			
 
				-
			
 
				-        # Use a command that will fail
			
 
				-        driver = ProcessFilterDriver(clean_cmd="false")
			
 
				-        data = b"test data"
			
 
				-        # Should raise CalledProcessError
			
 
				-        with self.assertRaises(subprocess.CalledProcessError):
			
 
				-            driver.clean(data)
			
 
				-
			
 
				-        # Test smudge filter too
			
 
				-        driver = ProcessFilterDriver(smudge_cmd="false")
			
 
				-        with self.assertRaises(subprocess.CalledProcessError):
			
 
				-            driver.smudge(data)
			
 
				-
			
 
				-
			
 
				-class FilterRegistryTests(TestCase):
			
 
				-    def setUp(self) -> None:
			
 
				-        super().setUp()
			
 
				-        self.config = ConfigDict()
			
 
				-        self.registry = FilterRegistry(self.config)
			
 
				-
			
 
				-    def test_register_and_get_driver(self) -> None:
			
 
				-        """Test registering and retrieving a driver."""
			
 
				-        driver = ProcessFilterDriver(clean_cmd="cat")
			
 
				-        self.registry.register_driver("test", driver)
			
 
				-
			
 
				-        retrieved = self.registry.get_driver("test")
			
 
				-        self.assertIs(retrieved, driver)
			
 
				-
			
 
				-    def test_get_nonexistent_driver(self) -> None:
			
 
				-        """Test getting a non-existent driver."""
			
 
				-        result = self.registry.get_driver("nonexistent")
			
 
				-        self.assertIsNone(result)
			
 
				-
			
 
				-    def test_register_factory(self) -> None:
			
 
				-        """Test registering a driver factory."""
			
 
				-        created_driver = ProcessFilterDriver(clean_cmd="cat")
			
 
				-
			
 
				-        def factory(registry):
			
 
				-            return created_driver
			
 
				-
			
 
				-        self.registry.register_factory("test", factory)
			
 
				-
			
 
				-        # Getting driver should invoke factory
			
 
				-        retrieved = self.registry.get_driver("test")
			
 
				-        self.assertIs(retrieved, created_driver)
			
 
				-
			
 
				-        # Second get should return cached instance
			
 
				-        retrieved2 = self.registry.get_driver("test")
			
 
				-        self.assertIs(retrieved2, created_driver)
			
 
				-
			
 
				-    def test_create_from_config(self) -> None:
			
 
				-        """Test creating driver from config."""
			
 
				-        # Set up config using the proper Config interface
			
 
				-        self.config.set(("filter", "test"), "clean", b"cat")
			
 
				-        self.config.set(("filter", "test"), "smudge", b"tac")
			
 
				-
			
 
				-        # Get driver (should be created from config)
			
 
				-        driver = self.registry.get_driver("test")
			
 
				-        self.assertIsNotNone(driver)
			
 
				-        self.assertIsInstance(driver, ProcessFilterDriver)
			
 
				-        self.assertEqual(driver.clean_cmd, "cat")
			
 
				-        self.assertEqual(driver.smudge_cmd, "tac")
			
 
				-
			
 
				-    def test_builtin_lfs_factory(self) -> None:
			
 
				-        """Test that LFS filter is available as a built-in."""
			
 
				-        from dulwich.lfs import LFSFilterDriver
			
 
				-
			
 
				-        # Should be able to get LFS filter without explicit registration
			
 
				-        driver = self.registry.get_driver("lfs")
			
 
				-        self.assertIsNotNone(driver)
			
 
				-        self.assertIsInstance(driver, LFSFilterDriver)
			
 
				-
			
 
				-
			
 
				-class GetFilterForPathTests(TestCase):
			
 
				-    def setUp(self) -> None:
			
 
				-        super().setUp()
			
 
				-        self.registry = FilterRegistry()
			
 
				-        self.driver = ProcessFilterDriver(clean_cmd="cat")
			
 
				-        self.registry.register_driver("test", self.driver)
			
 
				-
			
 
				-    def test_get_filter_for_path(self) -> None:
			
 
				-        """Test getting filter for a path with filter attribute."""
			
 
				-        gitattributes = {
			
 
				-            b"*.txt": {b"filter": b"test"},
			
 
				-        }
			
 
				-
			
 
				-        result = get_filter_for_path(b"file.txt", gitattributes, self.registry)
			
 
				-        self.assertIs(result, self.driver)
			
 
				-
			
 
				-    def test_no_filter_attribute(self) -> None:
			
 
				-        """Test path with no filter attribute."""
			
 
				-        gitattributes = {
			
 
				-            b"*.txt": {b"text": b"auto"},
			
 
				-        }
			
 
				-
			
 
				-        result = get_filter_for_path(b"file.txt", gitattributes, self.registry)
			
 
				-        self.assertIsNone(result)
			
 
				+class GitAttributesFilterIntegrationTests(TestCase):
			
 
				+    """Test gitattributes integration with filter drivers."""
			
 
				 
			
 
				-    def test_no_matching_pattern(self) -> None:
			
 
				-        """Test path with no matching pattern."""
			
 
				-        gitattributes = {
			
 
				-            b"*.jpg": {b"filter": b"test"},
			
 
				-        }
			
 
				-
			
 
				-        result = get_filter_for_path(b"file.txt", gitattributes, self.registry)
			
 
				-        self.assertIsNone(result)
			
 
				-
			
 
				-    def test_filter_not_registered(self) -> None:
			
 
				-        """Test path with filter that's not registered."""
			
 
				-        gitattributes = {
			
 
				-            b"*.txt": {b"filter": b"nonexistent"},
			
 
				-        }
			
 
				-
			
 
				-        result = get_filter_for_path(b"file.txt", gitattributes, self.registry)
			
 
				-        self.assertIsNone(result)
			
 
				-
			
 
				-
			
 
				-class FilterBlobNormalizerTests(TestCase):
			
 
				     def setUp(self) -> None:
			
 
				         super().setUp()
			
 
				-        self.config = ConfigDict()
			
 
				-        self.registry = FilterRegistry(self.config)
			
 
				-        self.gitattributes = {}
			
 
				-        self.normalizer = FilterBlobNormalizer(
			
 
				-            self.config, self.gitattributes, self.registry
			
 
				-        )
			
 
				-
			
 
				-    def test_no_filter(self) -> None:
			
 
				-        """Test normalizer with no filter defined."""
			
 
				-        blob = Blob()
			
 
				-        blob.data = b"test content"
			
 
				-
			
 
				-        # Both checkin and checkout should return blob unchanged
			
 
				-        result = self.normalizer.checkin_normalize(blob, b"file.txt")
			
 
				-        self.assertIs(result, blob)
			
 
				-
			
 
				-        result = self.normalizer.checkout_normalize(blob, b"file.txt")
			
 
				-        self.assertIs(result, blob)
			
 
				-
			
 
				-    def test_with_filter(self) -> None:
			
 
				-        """Test normalizer with a filter defined."""
			
 
				-
			
 
				-        # Create a simple filter that converts to uppercase on clean
			
 
				-        # and lowercase on smudge
			
 
				-        class TestFilter:
			
 
				-            def clean(self, data):
			
 
				-                return data.upper()
			
 
				-
			
 
				-            def smudge(self, data):
			
 
				-                return data.lower()
			
 
				-
			
 
				-        # Register the filter and set it in gitattributes
			
 
				-        self.registry.register_driver("test", TestFilter())
			
 
				-        self.gitattributes[b"*.txt"] = {b"filter": b"test"}
			
 
				-
			
 
				-        blob = Blob()
			
 
				-        blob.data = b"Test Content"
			
 
				-
			
 
				-        # Checkin should uppercase
			
 
				-        result = self.normalizer.checkin_normalize(blob, b"file.txt")
			
 
				-        self.assertEqual(result.data, b"TEST CONTENT")
			
 
				-        self.assertIsNot(result, blob)  # Should be a new blob
			
 
				-
			
 
				-        # Checkout should lowercase
			
 
				-        result = self.normalizer.checkout_normalize(blob, b"file.txt")
			
 
				-        self.assertEqual(result.data, b"test content")
			
 
				-        self.assertIsNot(result, blob)  # Should be a new blob
			
 
				-
			
 
				-    def test_filter_returns_same_data(self) -> None:
			
 
				-        """Test that normalizer returns same blob if filter doesn't change data."""
			
 
				-
			
 
				-        # Create a filter that returns data unchanged
			
 
				-        class NoOpFilter:
			
 
				-            def clean(self, data):
			
 
				-                return data
			
 
				-
			
 
				-            def smudge(self, data):
			
 
				-                return data
			
 
				-
			
 
				-        self.registry.register_driver("noop", NoOpFilter())
			
 
				-        self.gitattributes[b"*.txt"] = {b"filter": b"noop"}
			
 
				-
			
 
				-        blob = Blob()
			
 
				-        blob.data = b"unchanged content"
			
 
				-
			
 
				-        # Both operations should return the same blob instance
			
 
				-        result = self.normalizer.checkin_normalize(blob, b"file.txt")
			
 
				-        self.assertIs(result, blob)
			
 
				-
			
 
				-        result = self.normalizer.checkout_normalize(blob, b"file.txt")
			
 
				-        self.assertIs(result, blob)
			
 
				+        self.test_dir = tempfile.mkdtemp()
			
 
				+        self.addCleanup(self._cleanup_test_dir)
			
 
				+        self.repo = Repo.init(self.test_dir)
			
 
				+
			
 
				+    def _cleanup_test_dir(self) -> None:
			
 
				+        """Clean up test directory."""
			
 
				+        import shutil
			
 
				+
			
 
				+        shutil.rmtree(self.test_dir)
			
 
				+
			
 
				+    def test_gitattributes_text_filter(self) -> None:
			
 
				+        """Test that text attribute triggers line ending conversion."""
			
 
				+        # Configure autocrlf first
			
 
				+        config = self.repo.get_config()
			
 
				+        config.set((b"core",), b"autocrlf", b"true")
			
 
				+        config.write_to_path()
			
 
				+
			
 
				+        # Create .gitattributes with text attribute
			
 
				+        gitattributes_path = os.path.join(self.test_dir, ".gitattributes")
			
 
				+        with open(gitattributes_path, "wb") as f:
			
 
				+            f.write(b"*.txt text\n")
			
 
				+            f.write(b"*.bin -text\n")
			
 
				+
			
 
				+        # Add .gitattributes
			
 
				+        porcelain.add(self.repo, paths=[".gitattributes"])
			
 
				+        porcelain.commit(self.repo, message=b"Add gitattributes")
			
 
				+
			
 
				+        # Create text file with CRLF
			
 
				+        text_file = os.path.join(self.test_dir, "test.txt")
			
 
				+        with open(text_file, "wb") as f:
			
 
				+            f.write(b"line1\r\nline2\r\n")
			
 
				+
			
 
				+        # Create binary file with CRLF
			
 
				+        bin_file = os.path.join(self.test_dir, "test.bin")
			
 
				+        with open(bin_file, "wb") as f:
			
 
				+            f.write(b"binary\r\ndata\r\n")
			
 
				+
			
 
				+        # Add files
			
 
				+        porcelain.add(self.repo, paths=["test.txt", "test.bin"])
			
 
				+
			
 
				+        # Check that text file was normalized
			
 
				+        index = self.repo.open_index()
			
 
				+        text_entry = index[b"test.txt"]
			
 
				+        text_blob = self.repo.object_store[text_entry.sha]
			
 
				+        self.assertEqual(text_blob.data, b"line1\nline2\n")
			
 
				+
			
 
				+        # Check that binary file was not normalized
			
 
				+        bin_entry = index[b"test.bin"]
			
 
				+        bin_blob = self.repo.object_store[bin_entry.sha]
			
 
				+        self.assertEqual(bin_blob.data, b"binary\r\ndata\r\n")
			
 
				+
			
 
				+    @unittest.skip("Custom process filters require external commands")
			
 
				+    def test_gitattributes_custom_filter(self) -> None:
			
 
				+        """Test custom filter specified in gitattributes."""
			
 
				+        # Create .gitattributes with custom filter
			
 
				+        gitattributes_path = os.path.join(self.test_dir, ".gitattributes")
			
 
				+        with open(gitattributes_path, "wb") as f:
			
 
				+            f.write(b"*.secret filter=redact\n")
			
 
				+
			
 
				+        # Configure custom filter (use tr command for testing)
			
 
				+        config = self.repo.get_config()
			
 
				+        # This filter replaces all digits with X
			
 
				+        config.set((b"filter", b"redact"), b"clean", b"tr '0-9' 'X'")
			
 
				+        config.write_to_path()
			
 
				+
			
 
				+        # Add .gitattributes
			
 
				+        porcelain.add(self.repo, paths=[".gitattributes"])
			
 
				+
			
 
				+        # Create file with sensitive content
			
 
				+        secret_file = os.path.join(self.test_dir, "password.secret")
			
 
				+        with open(secret_file, "wb") as f:
			
 
				+            f.write(b"password123\ntoken456\n")
			
 
				+
			
 
				+        # Add file
			
 
				+        porcelain.add(self.repo, paths=["password.secret"])
			
 
				+
			
 
				+        # Check that content was filtered
			
 
				+        index = self.repo.open_index()
			
 
				+        entry = index[b"password.secret"]
			
 
				+        blob = self.repo.object_store[entry.sha]
			
 
				+        self.assertEqual(blob.data, b"passwordXXX\ntokenXXX\n")
			
 
				+
			
 
				+    def test_gitattributes_from_tree(self) -> None:
			
 
				+        """Test that gitattributes from tree are used when no working tree exists."""
			
 
				+        # Create .gitattributes with text attribute
			
 
				+        gitattributes_path = os.path.join(self.test_dir, ".gitattributes")
			
 
				+        with open(gitattributes_path, "wb") as f:
			
 
				+            f.write(b"*.txt text\n")
			
 
				+
			
 
				+        # Add and commit .gitattributes
			
 
				+        porcelain.add(self.repo, paths=[".gitattributes"])
			
 
				+        porcelain.commit(self.repo, message=b"Add gitattributes")
			
 
				+
			
 
				+        # Remove .gitattributes from working tree
			
 
				+        os.remove(gitattributes_path)
			
 
				+
			
 
				+        # Get gitattributes - should still work from tree
			
 
				+        gitattributes = self.repo.get_gitattributes()
			
 
				+        attrs = gitattributes.match_path(b"test.txt")
			
 
				+        self.assertEqual(attrs.get(b"text"), True)
			
 
				+
			
 
				+    def test_gitattributes_info_attributes(self) -> None:
			
 
				+        """Test that .git/info/attributes is read."""
			
 
				+        # Create info/attributes
			
 
				+        info_dir = os.path.join(self.repo.controldir(), "info")
			
 
				+        if not os.path.exists(info_dir):
			
 
				+            os.makedirs(info_dir)
			
 
				+        info_attrs_path = os.path.join(info_dir, "attributes")
			
 
				+        with open(info_attrs_path, "wb") as f:
			
 
				+            f.write(b"*.log text\n")
			
 
				+
			
 
				+        # Get gitattributes
			
 
				+        gitattributes = self.repo.get_gitattributes()
			
 
				+        attrs = gitattributes.match_path(b"debug.log")
			
 
				+        self.assertEqual(attrs.get(b"text"), True)
			
 
				+
			
 
				+    @unittest.skip("Custom process filters require external commands")
			
 
				+    def test_filter_precedence(self) -> None:
			
 
				+        """Test that filter attribute takes precedence over text attribute."""
			
 
				+        # Create .gitattributes with both text and filter
			
 
				+        gitattributes_path = os.path.join(self.test_dir, ".gitattributes")
			
 
				+        with open(gitattributes_path, "wb") as f:
			
 
				+            f.write(b"*.txt text filter=custom\n")
			
 
				+
			
 
				+        # Configure autocrlf and custom filter
			
 
				+        config = self.repo.get_config()
			
 
				+        config.set((b"core",), b"autocrlf", b"true")
			
 
				+        # This filter converts to uppercase
			
 
				+        config.set((b"filter", b"custom"), b"clean", b"tr '[:lower:]' '[:upper:]'")
			
 
				+        config.write_to_path()
			
 
				+
			
 
				+        # Add .gitattributes
			
 
				+        porcelain.add(self.repo, paths=[".gitattributes"])
			
 
				+
			
 
				+        # Create text file with lowercase and CRLF
			
 
				+        text_file = os.path.join(self.test_dir, "test.txt")
			
 
				+        with open(text_file, "wb") as f:
			
 
				+            f.write(b"hello\r\nworld\r\n")
			
 
				+
			
 
				+        # Add file
			
 
				+        porcelain.add(self.repo, paths=["test.txt"])
			
 
				+
			
 
				+        # Check that custom filter was applied (not just line ending conversion)
			
 
				+        index = self.repo.open_index()
			
 
				+        entry = index[b"test.txt"]
			
 
				+        blob = self.repo.object_store[entry.sha]
			
 
				+        # Should be uppercase with LF endings
			
 
				+        self.assertEqual(blob.data, b"HELLO\nWORLD\n")
			
 
				+
			
 
				+    def test_blob_normalizer_integration(self) -> None:
			
 
				+        """Test that get_blob_normalizer returns a FilterBlobNormalizer."""
			
 
				+        normalizer = self.repo.get_blob_normalizer()
			
 
				+
			
 
				+        # Check it's the right type
			
 
				+        from dulwich.filters import FilterBlobNormalizer
			
 
				+
			
 
				+        self.assertIsInstance(normalizer, FilterBlobNormalizer)
			
 
				+
			
 
				+        # Check it has access to gitattributes
			
 
				+        self.assertIsNotNone(normalizer.gitattributes)
			
 
				+        self.assertIsNotNone(normalizer.filter_registry)