浏览代码

Fix sparse checkout not applying smudge filters during checkout

The issue was that apply_included_paths() created fresh FilterBlobNormalizer
instances each time, losing registered filter drivers. This fix caches the
FilterBlobNormalizer per repository to maintain filter state.

Changes:
- Cache FilterBlobNormalizer instances in Repository.get_blob_normalizer()
- Fix path type mismatch in sparse_patterns.py local_modifications_exist()
- Update test to use proper .gitattributes and driver registration
- Add proper cleanup in Repository.close()

This ensures smudge filters are consistently applied during sparse checkout
operations when files are materialized in the working tree.
Jelmer Vernooij 4 月之前
父节点
当前提交
232fc2f37a
共有 3 个文件被更改,包括 37 次插入37 次删除
  1. 20 16
      dulwich/repo.py
  2. 5 1
      dulwich/sparse_patterns.py
  3. 12 20
      tests/test_sparse_patterns.py

+ 20 - 16
dulwich/repo.py

@@ -1969,6 +1969,10 @@ class Repo(BaseRepo):
     def close(self) -> None:
         """Close any files opened by this repository."""
         self.object_store.close()
+        # Clean up cached blob normalizer
+        if hasattr(self, '_blob_normalizer'):
+            self._blob_normalizer.close()
+            del self._blob_normalizer
 
     def __enter__(self):
         """Enter context manager."""
@@ -2021,15 +2025,15 @@ class Repo(BaseRepo):
         """Return a BlobNormalizer object."""
         from .filters import FilterBlobNormalizer, FilterRegistry
 
-        # Get proper GitAttributes object
-        git_attributes = self.get_gitattributes()
-        config_stack = self.get_config_stack()
+        # Cache FilterBlobNormalizer per repository to maintain registered drivers
+        if not hasattr(self, '_blob_normalizer'):
+            # Get proper GitAttributes object
+            git_attributes = self.get_gitattributes()
+            config_stack = self.get_config_stack()
+            filter_registry = FilterRegistry(config_stack, self)
+            self._blob_normalizer = FilterBlobNormalizer(config_stack, git_attributes, filter_registry, self)
 
-        # Create FilterRegistry with repo reference
-        filter_registry = FilterRegistry(config_stack, self)
-
-        # Return FilterBlobNormalizer which handles all filters including line endings
-        return FilterBlobNormalizer(config_stack, git_attributes, filter_registry, self)
+        return self._blob_normalizer
 
     def get_gitattributes(self, tree: Optional[bytes] = None) -> "GitAttributes":
         """Read gitattributes for the repository.
@@ -2256,15 +2260,15 @@ class MemoryRepo(BaseRepo):
         """Return a BlobNormalizer object for checkin/checkout operations."""
         from .filters import FilterBlobNormalizer, FilterRegistry
 
-        # Get GitAttributes object
-        git_attributes = self.get_gitattributes()
-        config_stack = self.get_config_stack()
-
-        # Create FilterRegistry with repo reference
-        filter_registry = FilterRegistry(config_stack, self)
+        # Cache FilterBlobNormalizer per repository to maintain registered drivers
+        if not hasattr(self, '_blob_normalizer'):
+            # Get GitAttributes object
+            git_attributes = self.get_gitattributes()
+            config_stack = self.get_config_stack()
+            filter_registry = FilterRegistry(config_stack, self)
+            self._blob_normalizer = FilterBlobNormalizer(config_stack, git_attributes, filter_registry, self)
 
-        # Return FilterBlobNormalizer which handles all filters
-        return FilterBlobNormalizer(config_stack, git_attributes, filter_registry, self)
+        return self._blob_normalizer
 
     def get_gitattributes(self, tree: Optional[bytes] = None) -> "GitAttributes":
         """Read gitattributes for the repository."""

+ 5 - 1
dulwich/sparse_patterns.py

@@ -164,7 +164,11 @@ def apply_included_paths(
             blob_obj = repo.object_store[index_entry.sha]
         except KeyError:
             return True
-        norm_data = normalizer.checkin_normalize(disk_data, full_path)
+        # Create a temporary blob for normalization
+        temp_blob = Blob()
+        temp_blob.data = disk_data
+        norm_blob = normalizer.checkin_normalize(temp_blob, os.path.relpath(full_path, repo.path).encode())
+        norm_data = norm_blob.data
         if not isinstance(blob_obj, Blob):
             return True
         return bool(norm_data != blob_obj.data)

+ 12 - 20
tests/test_sparse_patterns.py

@@ -544,7 +544,7 @@ class ApplyIncludedPathsTests(TestCase):
 
     def test_checkout_normalization_applied(self):
         """Test that checkout normalization is applied when materializing files during sparse checkout."""
-
+        
         # Create a simple filter that converts content to uppercase
         class UppercaseFilter:
             def smudge(self, input_bytes, path=b""):
@@ -553,23 +553,18 @@ class ApplyIncludedPathsTests(TestCase):
             def clean(self, input_bytes):
                 return input_bytes.lower()
 
-        # Set up filter registry and normalizer
-        filter_registry = FilterRegistry()
-        filter_registry.register_driver("uppercase", UppercaseFilter())
-
-        # Create gitattributes object
-        from dulwich.attrs import GitAttributes, Pattern
-
-        patterns = [(Pattern(b"*.txt"), {b"filter": b"uppercase"})]
-        gitattributes = GitAttributes(patterns)
-
-        # Monkey patch the repo to use our filter registry
-        original_get_blob_normalizer = self.repo.get_blob_normalizer
+        # Create .gitattributes file
+        gitattributes_path = os.path.join(self.temp_dir, ".gitattributes")
+        with open(gitattributes_path, "w") as f:
+            f.write("*.txt filter=uppercase\n")
+        
+        # Add and commit .gitattributes
+        self.repo.stage([b".gitattributes"])
+        self.repo.do_commit(b"Add gitattributes", committer=b"Test <test@example.com>")
 
-        def get_blob_normalizer_with_filters():
-            return FilterBlobNormalizer(None, gitattributes, filter_registry)
-
-        self.repo.get_blob_normalizer = get_blob_normalizer_with_filters
+        # Register the filter with the repo's cached filter registry
+        normalizer = self.repo.get_blob_normalizer()
+        normalizer.filter_registry.register_driver("uppercase", UppercaseFilter())
 
         # Commit a file with lowercase content
         self._commit_blob("test.txt", b"hello world")
@@ -585,9 +580,6 @@ class ApplyIncludedPathsTests(TestCase):
             content = f.read()
             self.assertEqual(content, b"HELLO WORLD")
 
-        # Restore original method
-        self.repo.get_blob_normalizer = original_get_blob_normalizer
-
     def test_checkout_normalization_with_lf_to_crlf(self):
         """Test that line ending normalization is applied during sparse checkout."""
         # Commit a file with LF line endings