Bläddra i källkod

Fix get_unstaged_changes filter callback to expect Blob objects

Fixes #2010
Jelmer Vernooij 1 månad sedan
förälder
incheckning
bb830b45e5
4 ändrade filer med 67 tillägg och 27 borttagningar
  1. 5 0
      NEWS
  2. 2 2
      dulwich/index.py
  3. 5 23
      dulwich/porcelain.py
  4. 55 2
      tests/test_index.py

+ 5 - 0
NEWS

@@ -1,5 +1,10 @@
 0.25.0	UNRELEASED
 
+ * Fix ``get_unstaged_changes()`` to correctly pass Blob objects to filter
+   callbacks instead of raw bytes. This fixes crashes when using ``.gitattributes``
+   files with filter callbacks like ``checkin_normalize``.
+   (Jelmer Vernooij, #2010)
+
  * The ``ObjectID`` and ``Ref`` types are now newtypes, making it harder to
    accidentally pass the wrong type - as notified by mypy. Most of this is in
    lower-level code. (Jelmer Vernooij)

+ 2 - 2
dulwich/index.py

@@ -2797,7 +2797,7 @@ def _check_entry_for_changes(
     tree_path: bytes,
     entry: IndexEntry | ConflictedIndexEntry,
     root_path: bytes,
-    filter_blob_callback: Callable[[bytes, bytes], bytes] | None = None,
+    filter_blob_callback: Callable[[Blob, bytes], Blob] | None = None,
 ) -> bytes | None:
     """Check a single index entry for changes.
 
@@ -2836,7 +2836,7 @@ def _check_entry_for_changes(
         blob = blob_from_path_and_stat(full_path, st)
 
         if filter_blob_callback is not None:
-            blob.data = filter_blob_callback(blob.data, tree_path)
+            blob = filter_blob_callback(blob, tree_path)
     except FileNotFoundError:
         # The file was removed, so we assume that counts as
         # different from whatever file used to exist.

+ 5 - 23
dulwich/porcelain.py

@@ -836,15 +836,9 @@ def commit(
             index = r.open_index()
             normalizer = r.get_blob_normalizer()
 
-            # Create a wrapper that handles the bytes -> Blob conversion
+            # Pass the normalizer's checkin_normalize method directly
             if normalizer is not None:
-
-                def filter_callback(data: bytes, path: bytes) -> bytes:
-                    blob = Blob()
-                    blob.data = data
-                    normalized_blob = normalizer.checkin_normalize(blob, path)
-                    data_bytes: bytes = normalized_blob.data
-                    return data_bytes
+                filter_callback = normalizer.checkin_normalize
             else:
                 filter_callback = None
 
@@ -1299,13 +1293,7 @@ def add(
         index = r.open_index()
         normalizer = r.get_blob_normalizer()
         if normalizer is not None:
-
-            def filter_callback(data: bytes, path: bytes) -> bytes:
-                blob = Blob()
-                blob.data = data
-                normalized_blob = normalizer.checkin_normalize(blob, path)
-                data_bytes: bytes = normalized_blob.data
-                return data_bytes
+            filter_callback = normalizer.checkin_normalize
         else:
             filter_callback = None
 
@@ -3272,15 +3260,9 @@ def status(
         # 2. Get status of unstaged
         normalizer = r.get_blob_normalizer()
 
-        # Create a wrapper that handles the bytes -> Blob conversion
+        # Pass the normalizer's checkin_normalize method directly
         if normalizer is not None:
-
-            def filter_callback(data: bytes, path: bytes) -> bytes:
-                blob = Blob()
-                blob.data = data
-                normalized_blob = normalizer.checkin_normalize(blob, path)
-                result_data: bytes = normalized_blob.data
-                return result_data
+            filter_callback = normalizer.checkin_normalize
         else:
             filter_callback = None
 

+ 55 - 2
tests/test_index.py

@@ -1443,9 +1443,11 @@ class TestIndexEntryFromPath(TestCase):
         self.assertEqual(sorted(changes), [b"conflict", b"file1", b"file3", b"file4"])
 
         # Create a custom blob filter function
-        def filter_blob_callback(data, path):
+        def filter_blob_callback(blob, path):
             # Modify blob data to make it look changed
-            return b"modified " + data
+            result_blob = Blob()
+            result_blob.data = b"modified " + blob.data
+            return result_blob
 
         # Get unstaged changes with blob filter
         changes = list(get_unstaged_changes(index, repo_dir, filter_blob_callback))
@@ -1455,6 +1457,57 @@ class TestIndexEntryFromPath(TestCase):
             sorted(changes), [b"conflict", b"file1", b"file2", b"file3", b"file4"]
         )
 
+    def test_get_unstaged_changes_with_blob_filter(self) -> None:
+        """Test get_unstaged_changes with filter that expects Blob objects.
+
+        This reproduces issue #2010 where passing blob.data instead of blob
+        to the filter callback causes AttributeError when the callback expects
+        a Blob object (like checkin_normalize does).
+        """
+        repo_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, repo_dir)
+        with Repo.init(repo_dir) as repo:
+            # Create and commit a test file
+            test_file = os.path.join(repo_dir, "test.txt")
+            with open(test_file, "wb") as f:
+                f.write(b"original content")
+
+            repo.get_worktree().stage(["test.txt"])
+            repo.get_worktree().commit(
+                message=b"Initial commit",
+                committer=b"Test <test@example.com>",
+                author=b"Test <test@example.com>",
+            )
+
+            # Create a .gitattributes file
+            gitattributes_file = os.path.join(repo_dir, ".gitattributes")
+            with open(gitattributes_file, "wb") as f:
+                f.write(b"*.txt text\n")
+
+            # Modify the test file
+            with open(test_file, "wb") as f:
+                f.write(b"modified content")
+
+            # Force mtime change to ensure stat doesn't match
+            os.utime(test_file, (0, 0))
+
+            # Create a filter callback that expects Blob objects (like checkin_normalize)
+            def blob_filter_callback(blob: Blob, path: bytes) -> Blob:
+                """Filter that expects a Blob object, not bytes."""
+                # This should receive a Blob object with a .data attribute
+                self.assertIsInstance(blob, Blob)
+                self.assertTrue(hasattr(blob, "data"))
+                # Return the blob unchanged for this test
+                return blob
+
+            # This should not raise AttributeError: 'bytes' object has no attribute 'data'
+            changes = list(
+                get_unstaged_changes(repo.open_index(), repo_dir, blob_filter_callback)
+            )
+
+            # Should detect the change in test.txt
+            self.assertIn(b"test.txt", changes)
+
 
 class TestManyFilesFeature(TestCase):
     """Tests for the manyFiles feature (index version 4 and skipHash)."""