Sfoglia il codice sorgente

Fix handling of CRLF line endings with core.autocrlf = input

When core.autocrlf = input is set, files with CRLF line endings were
incorrectly showing as unstaged even when their content hadn't changed.
This was because TreeBlobNormalizer.checkin_normalize() was not applying
the clean filter (CRLF to LF conversion) for existing files when
autocrlf=input was set.

The fix ensures that when autocrlf=input is configured, all text files
are normalized during checkin operations, including when checking status.
This makes Dulwich's behavior consistent with Git.

Fixes #1770
Jelmer Vernooij 5 mesi fa
parent
commit
00c9436b08
4 ha cambiato i file con 203 aggiunte e 2 eliminazioni
  1. 4 0
      NEWS
  2. 5 2
      dulwich/line_ending.py
  3. 91 0
      tests/test_line_ending.py
  4. 103 0
      tests/test_porcelain.py

+ 4 - 0
NEWS

@@ -26,6 +26,10 @@
    for pick, reword, edit, squash, fixup, drop, exec, and break commands.
    (Jelmer Vernooij, #1696)
 
+ * Fix handling of CRLF line endings with ``core.autocrlf = input`` to prevent
+   unchanged files from appearing as unstaged in status.
+   (Jelmer Vernooij, #1770)
+
 0.24.1	2025-08-01
 
  * Require ``typing_extensions`` on Python 3.10.

+ 5 - 2
dulwich/line_ending.py

@@ -476,10 +476,13 @@ class TreeBlobNormalizer(BlobNormalizer):
 
     def checkin_normalize(self, blob: Blob, tree_path: bytes) -> Blob:
         """Normalize blob for checkin, considering existing tree state."""
-        # Existing files should only be normalized on checkin if it was
-        # previously normalized on checkout
+        # Existing files should only be normalized on checkin if:
+        # 1. They were previously normalized on checkout (autocrlf=true), OR
+        # 2. We have a write filter (autocrlf=true or autocrlf=input), OR
+        # 3. They are new files
         if (
             self.fallback_read_filter is not None
+            or self.fallback_write_filter is not None
             or tree_path not in self.existing_paths
         ):
             return super().checkin_normalize(blob, tree_path)

+ 91 - 0
tests/test_line_ending.py

@@ -24,6 +24,7 @@
 from dulwich.line_ending import (
     BlobNormalizer,
     LineEndingFilter,
+    TreeBlobNormalizer,
     convert_crlf_to_lf,
     convert_lf_to_crlf,
     get_clean_filter_autocrlf,
@@ -365,6 +366,96 @@ class BlobNormalizerTests(TestCase):
         self.assertIs(result, blob)
 
 
+class TreeBlobNormalizerTests(TestCase):
+    """Test the TreeBlobNormalizer class for existing file handling."""
+
+    def setUp(self) -> None:
+        super().setUp()
+        from dulwich.config import ConfigDict
+        from dulwich.object_store import MemoryObjectStore
+
+        self.config = ConfigDict()
+        self.gitattributes = {}
+        self.object_store = MemoryObjectStore()
+
+    def test_autocrlf_input_existing_files(self) -> None:
+        """Test that autocrlf=input normalizes existing files with CRLF."""
+        # Create a tree with an existing file
+        from dulwich.objects import Tree
+
+        tree = Tree()
+        tree[b"existing.txt"] = (0o100644, b"a" * 40)  # dummy sha
+        self.object_store.add_object(tree)
+
+        # Create normalizer with autocrlf=input
+        normalizer = TreeBlobNormalizer(
+            self.config,
+            self.gitattributes,
+            self.object_store,
+            tree.id,
+            autocrlf=b"input",
+        )
+
+        # Create blob with CRLF line endings
+        blob = Blob()
+        blob.data = b"line1\r\nline2\r\n"
+
+        # Should convert CRLF to LF on checkin even for existing files
+        result = normalizer.checkin_normalize(blob, b"existing.txt")
+        self.assertEqual(result.data, b"line1\nline2\n")
+
+    def test_autocrlf_false_existing_files(self) -> None:
+        """Test that autocrlf=false does not normalize existing files."""
+        # Create a tree with an existing file
+        from dulwich.objects import Tree
+
+        tree = Tree()
+        tree[b"existing.txt"] = (0o100644, b"a" * 40)  # dummy sha
+        self.object_store.add_object(tree)
+
+        # Create normalizer with autocrlf=false
+        normalizer = TreeBlobNormalizer(
+            self.config,
+            self.gitattributes,
+            self.object_store,
+            tree.id,
+            autocrlf=b"false",
+        )
+
+        # Create blob with CRLF line endings
+        blob = Blob()
+        blob.data = b"line1\r\nline2\r\n"
+
+        # Should NOT convert for existing files when autocrlf=false
+        result = normalizer.checkin_normalize(blob, b"existing.txt")
+        self.assertIs(result, blob)
+
+    def test_autocrlf_input_new_files(self) -> None:
+        """Test that autocrlf=input normalizes new files."""
+        # Create empty tree (no existing files)
+        from dulwich.objects import Tree
+
+        tree = Tree()
+        self.object_store.add_object(tree)
+
+        # Create normalizer with autocrlf=input
+        normalizer = TreeBlobNormalizer(
+            self.config,
+            self.gitattributes,
+            self.object_store,
+            tree.id,
+            autocrlf=b"input",
+        )
+
+        # Create blob with CRLF line endings
+        blob = Blob()
+        blob.data = b"line1\r\nline2\r\n"
+
+        # Should convert CRLF to LF for new files
+        result = normalizer.checkin_normalize(blob, b"new.txt")
+        self.assertEqual(result.data, b"line1\nline2\n")
+
+
 class LineEndingIntegrationTests(TestCase):
     """Integration tests for line ending conversion with the filter system."""
 

+ 103 - 0
tests/test_porcelain.py

@@ -5731,9 +5731,112 @@ class StatusTests(PorcelainTestCase):
         self.assertDictEqual(
             {"add": [b"crlf-new"], "delete": [], "modify": []}, results.staged
         )
+        # File committed with CRLF before autocrlf=input was enabled
+        # will appear as unstaged because working tree is normalized to LF
+        # during comparison but index still has CRLF
         self.assertListEqual(results.unstaged, [b"crlf-exists"])
         self.assertListEqual(results.untracked, [])
 
+    def test_status_autocrlf_input_modified(self) -> None:
+        """Test that modified files with CRLF are correctly detected with autocrlf=input."""
+        # Commit existing file with CRLF
+        file_path = os.path.join(self.repo.path, "crlf-file.txt")
+        with open(file_path, "wb") as f:
+            f.write(b"line1\r\nline2\r\nline3\r\n")
+        porcelain.add(repo=self.repo.path, paths=[file_path])
+        porcelain.commit(
+            repo=self.repo.path,
+            message=b"initial commit",
+            author=b"author <email>",
+            committer=b"committer <email>",
+        )
+
+        c = self.repo.get_config()
+        c.set("core", "autocrlf", "input")
+        c.write_to_path()
+
+        # Modify the file content but keep CRLF
+        with open(file_path, "wb") as f:
+            f.write(b"line1\r\nline2 modified\r\nline3\r\n")
+
+        results = porcelain.status(self.repo)
+        # Modified file should be detected as unstaged
+        self.assertListEqual(results.unstaged, [b"crlf-file.txt"])
+
+    def test_status_autocrlf_input_binary(self) -> None:
+        """Test that binary files are not affected by autocrlf=input."""
+        # Set autocrlf=input first
+        c = self.repo.get_config()
+        c.set("core", "autocrlf", "input")
+        c.write_to_path()
+
+        # Commit binary file with CRLF-like sequences
+        file_path = os.path.join(self.repo.path, "binary.dat")
+        with open(file_path, "wb") as f:
+            f.write(b"binary\r\ndata\x00\xff\r\nmore")
+        porcelain.add(repo=self.repo.path, paths=[file_path])
+        porcelain.commit(
+            repo=self.repo.path,
+            message=b"add binary",
+            author=b"author <email>",
+            committer=b"committer <email>",
+        )
+
+        # Status should be clean - binary files not normalized
+        results = porcelain.status(self.repo)
+        self.assertListEqual(results.unstaged, [])
+
+    def test_status_autocrlf_input_mixed_endings(self) -> None:
+        """Test files with mixed line endings with autocrlf=input."""
+        # Set autocrlf=input
+        c = self.repo.get_config()
+        c.set("core", "autocrlf", "input")
+        c.write_to_path()
+
+        # Create file with mixed line endings
+        file_path = os.path.join(self.repo.path, "mixed.txt")
+        with open(file_path, "wb") as f:
+            f.write(b"line1\r\nline2\nline3\r\n")
+        porcelain.add(repo=self.repo.path, paths=[file_path])
+        porcelain.commit(
+            repo=self.repo.path,
+            message=b"add mixed",
+            author=b"author <email>",
+            committer=b"committer <email>",
+        )
+
+        # The file was normalized on commit, so working tree should match
+        results = porcelain.status(self.repo)
+        self.assertListEqual(results.unstaged, [])
+
+    def test_status_autocrlf_input_issue_1770(self) -> None:
+        """Test the specific scenario from issue #1770.
+
+        Files with CRLF committed with autocrlf=input should not show as unstaged
+        when their content hasn't changed.
+        """
+        # Set autocrlf=input BEFORE committing
+        c = self.repo.get_config()
+        c.set("core", "autocrlf", "input")
+        c.write_to_path()
+
+        # Create and commit file with CRLF endings
+        file_path = os.path.join(self.repo.path, "crlf-test.dsp")
+        with open(file_path, "wb") as f:
+            f.write(b"# Microsoft DSP file\r\n\r\nContent here\r\n")
+        porcelain.add(repo=self.repo.path, paths=[file_path])
+        porcelain.commit(
+            repo=self.repo.path,
+            message=b"add dsp file",
+            author=b"author <email>",
+            committer=b"committer <email>",
+        )
+
+        # File was normalized to LF in index, but working tree still has CRLF
+        # Status should be clean because comparison normalizes working tree too
+        results = porcelain.status(self.repo)
+        self.assertListEqual(results.unstaged, [])
+
     def test_get_tree_changes_add(self) -> None:
         """Unit test for get_tree_changes add."""
         # Make a dummy file, stage