Răsfoiți Sursa

"new file" state depends on original tree, not just index entry

Peter Rowlands 4 ani în urmă
părinte
comite
150a3c564d
4 a modificat fișierele cu 59 adăugiri și 55 ștergeri
  1. 1 7
      dulwich/index.py
  2. 24 5
      dulwich/line_ending.py
  3. 13 10
      dulwich/repo.py
  4. 21 33
      dulwich/tests/test_porcelain.py

+ 1 - 7
dulwich/index.py

@@ -827,13 +827,7 @@ def get_unstaged_changes(index: Index, root_path, filter_blob_callback=None):
             blob = blob_from_path_and_stat(full_path, st)
 
             if filter_blob_callback is not None:
-                try:
-                    index[tree_path]
-                    new_file = False
-                except KeyError:
-                    new_file = True
-
-                blob = filter_blob_callback(blob, tree_path, new_file)
+                blob = filter_blob_callback(blob, tree_path)
         except FileNotFoundError:
             # The file was removed, so we assume that counts as
             # different from whatever file used to exist.

+ 24 - 5
dulwich/line_ending.py

@@ -240,14 +240,11 @@ class BlobNormalizer(object):
             core_eol, core_autocrlf, self.gitattributes
         )
 
-    def checkin_normalize(self, blob, tree_path, new_file=False):
+    def checkin_normalize(self, blob, tree_path):
         """Normalize a blob during a checkin operation"""
         # Existing files should only be normalized on checkin if it was
         # previously normalized on checkout
-        if (
-            self.fallback_write_filter is not None
-            and (self.fallback_read_filter is not None or new_file)
-        ):
+        if self.fallback_write_filter is not None:
             return normalize_blob(
                 blob, self.fallback_write_filter, binary_detection=True
             )
@@ -286,3 +283,25 @@ def normalize_blob(blob, conversion, binary_detection):
     new_blob.data = converted_data
 
     return new_blob
+
+
+class TreeBlobNormalizer(BlobNormalizer):
+    def __init__(self, config_stack, git_attributes, object_store, tree=None):
+        super().__init__(config_stack, git_attributes)
+        if tree:
+            self.existing_paths = {
+                name
+                for name, _, _ in object_store.iter_tree_contents(tree)
+            }
+        else:
+            self.existing_paths = set()
+
+    def checkin_normalize(self, blob, tree_path):
+        # Existing files should only be normalized on checkin if it was
+        # previously normalized on checkout
+        if (
+            self.fallback_read_filter is not None
+            or tree_path not in self.existing_paths
+        ):
+            return super().checkin_normalize(blob, tree_path)
+        return blob

+ 13 - 10
dulwich/repo.py

@@ -83,7 +83,7 @@ from dulwich.hooks import (
     PostReceiveShellHook,
 )
 
-from dulwich.line_ending import BlobNormalizer
+from dulwich.line_ending import BlobNormalizer, TreeBlobNormalizer
 
 from dulwich.refs import (  # noqa: F401
     ANNOTATED_TAG_SUFFIX,
@@ -1297,15 +1297,8 @@ class Repo(BaseRepo):
                     except KeyError:
                         pass
                 else:
-                    # Check if the file is already in the index
-                    try:
-                        index[tree_path]
-                        new_file = False
-                    except KeyError:
-                        new_file = True
-
                     blob = blob_from_path_and_stat(full_path, st)
-                    blob = blob_normalizer.checkin_normalize(blob, fs_path, new_file)
+                    blob = blob_normalizer.checkin_normalize(blob, fs_path)
                     self.object_store.add_object(blob)
                     index[tree_path] = index_entry_from_stat(st, blob.id, 0)
         index.write()
@@ -1539,7 +1532,17 @@ class Repo(BaseRepo):
         """Return a BlobNormalizer object"""
         # TODO Parse the git attributes files
         git_attributes = {}
-        return BlobNormalizer(self.get_config_stack(), git_attributes)
+        config_stack = self.get_config_stack()
+        try:
+            tree = self.object_store[self.refs[b"HEAD"]].tree
+            return TreeBlobNormalizer(
+                config_stack,
+                git_attributes,
+                self.object_store,
+                tree,
+            )
+        except KeyError:
+            return BlobNormalizer(config_stack, git_attributes)
 
 
 class MemoryRepo(BaseRepo):

+ 21 - 33
dulwich/tests/test_porcelain.py

@@ -1825,7 +1825,7 @@ class StatusTests(PorcelainTestCase):
         self.assertListEqual(results.unstaged, [b"crlf"])
         self.assertListEqual(results.untracked, [])
 
-    def test_status_crlf_no_convert(self):
+    def test_status_autocrlf_true(self):
         # First make a commit as if the file has been added on a Linux system
         # or with core.autocrlf=True
         file_path = os.path.join(self.repo.path, "crlf")
@@ -1849,48 +1849,36 @@ class StatusTests(PorcelainTestCase):
         c.set("core", "autocrlf", True)
         c.write_to_path()
 
-        # The file should be seen as modified as the line-ending conversion
-        # will NOT be applied since the file is already in the store and in
-        # the staging area
         results = porcelain.status(self.repo)
         self.assertDictEqual({"add": [], "delete": [], "modify": []}, results.staged)
-        self.assertListEqual(results.unstaged, [b"crlf"])
+        self.assertListEqual(results.unstaged, [])
         self.assertListEqual(results.untracked, [])
 
-    def test_status_crlf_convert(self):
-        # With an empty repo
-        file_path = os.path.join(self.repo.path, 'crlf')
-        repo = Repo(self.repo_path)
+    def test_status_autocrlf_input(self):
+        # Commit existing file with CRLF
+        file_path = os.path.join(self.repo.path, "crlf-exists")
+        with open(file_path, "wb") as f:
+            f.write(b"line1\r\nline2")
+        porcelain.add(repo=self.repo.path, paths=[file_path])
+        porcelain.commit(
+            repo=self.repo.path,
+            message=b"test status",
+            author=b"author <email>",
+            committer=b"committer <email>",
+        )
 
         c = self.repo.get_config()
-        c.set("core", "autocrlf", True)
+        c.set("core", "autocrlf", "input")
         c.write_to_path()
 
-        # Write the file as it would be on Windows
-        with open(file_path, 'wb') as f:
-            f.write(b'line1\r\nline2')
-
-        # Stage the file, the conversion should happens as the file is new
-        repo.stage([b"crlf"])
-
-        # So far, the file should be seen only as added
-        results = porcelain.status(self.repo)
-        self.assertDictEqual(
-            {'add': [b"crlf"], 'delete': [], 'modify': []},
-            results.staged)
-        self.assertListEqual(results.unstaged, [])
-        self.assertListEqual(results.untracked, [])
-
-        # Then update the file line-ending to Unix line endings
-        with open(file_path, 'wb') as f:
-            f.write(b'line1\nline2')
+        # Add new (untracked) file
+        file_path = os.path.join(self.repo.path, "crlf-new")
+        with open(file_path, "wb") as f:
+            f.write(b"line1\r\nline2")
+        porcelain.add(repo=self.repo.path, paths=[file_path])
 
-        # It should match the blob in staging area, so the file shouldn't be
-        # shown as unstaged
         results = porcelain.status(self.repo)
-        self.assertDictEqual(
-            {'add': [b"crlf"], 'delete': [], 'modify': []},
-            results.staged)
+        self.assertDictEqual({"add": [b"crlf-new"], "delete": [], "modify": []}, results.staged)
         self.assertListEqual(results.unstaged, [])
         self.assertListEqual(results.untracked, [])