소스 검색

Fix line-ending conversion support

The first support of line-ending conversion was too aggressive. After further
manual testing and investigation of
https://github.com/dulwich/dulwich/issues/693, I realized that line-ending
conversions doesn't apply on every checkin or checkout operation, only on
checkin or checkout operation resulting in a new file being created.

CGit actually tells you when it's applying the line-ending normalization with
a message that looks like: `warning: CRLF will be replaced by LF in BRANCH2.`

It should hopefully fix #693

Check if the file exists by checking the index rather than the store
Boris Feld 6 년 전
부모
커밋
0e6a037495
6개의 변경된 파일87개의 추가작업 그리고 13개의 파일을 삭제
  1. 10 2
      dulwich/index.py
  2. 23 4
      dulwich/line_ending.py
  3. 1 1
      dulwich/porcelain.py
  4. 8 1
      dulwich/repo.py
  5. 4 4
      dulwich/tests/test_index.py
  6. 41 1
      dulwich/tests/test_porcelain.py

+ 10 - 2
dulwich/index.py

@@ -800,7 +800,7 @@ def _has_directory_changed(tree_path, entry):
     return False
 
 
-def get_unstaged_changes(index: Index, root_path, filter_blob_callback=None):
+def get_unstaged_changes(index: Index, repo, filter_blob_callback=None):
     """Walk through an index and check for differences against working tree.
 
     Args:
@@ -808,6 +808,7 @@ def get_unstaged_changes(index: Index, root_path, filter_blob_callback=None):
       root_path: path in which to find files
     Returns: iterator over paths with unstaged changes
     """
+    root_path = repo.path
     # For each entry in the index check the sha1 & ensure not staged
     if not isinstance(root_path, bytes):
         root_path = os.fsencode(root_path)
@@ -827,7 +828,14 @@ def get_unstaged_changes(index: Index, root_path, filter_blob_callback=None):
             blob = blob_from_path_and_stat(full_path, st)
 
             if filter_blob_callback is not None:
-                blob = filter_blob_callback(blob, tree_path)
+                # Check if the file is already in the index
+                try:
+                    index[tree_path]
+                    new_file = False
+                except KeyError:
+                    new_file = True
+
+                blob = filter_blob_callback(blob, tree_path, new_file)
         except FileNotFoundError:
             # The file was removed, so we assume that counts as
             # different from whatever file used to exist.

+ 23 - 4
dulwich/line_ending.py

@@ -31,6 +31,9 @@ The normalization is a two-fold process that happens at two moments:
   when doing a `git add` call. We call this process the write filter in this
   module.
 
+The normalization only happens when the resulting file does not exists yet.
+For the write filter, they are files that are shown as added in status.
+
 One thing to know is that Git does line-ending normalization only on text
 files. How does Git know that a file is text? We can either mark a file as a
 text file, a binary file or ask Git to automatically decides. Git has an
@@ -231,8 +234,16 @@ class BlobNormalizer(object):
             core_eol, core_autocrlf, self.gitattributes
         )
 
-    def checkin_normalize(self, blob, tree_path):
-        """Normalize a blob during a checkin operation"""
+    def checkin_normalize(self, blob, tree_path, new_file=True):
+        """ Normalize a blob during a checkin operation
+
+        new_file is set to True by default for backward-compatibility
+        """
+        if not new_file:
+            # Line-ending normalization only happens for new files, aka files
+            # not already commited
+            return blob
+
         if self.fallback_write_filter is not None:
             return normalize_blob(
                 blob, self.fallback_write_filter, binary_detection=True
@@ -240,8 +251,16 @@ class BlobNormalizer(object):
 
         return blob
 
-    def checkout_normalize(self, blob, tree_path):
-        """Normalize a blob during a checkout operation"""
+    def checkout_normalize(self, blob, tree_path, new_file=True):
+        """ Normalize a blob during a checkout operation
+
+        new_file is set to True by default for backward-compatibility
+        """
+        if not new_file:
+            # Line-ending normalization only happens for new files, aka files
+            # not already commited
+            return blob
+
         if self.fallback_read_filter is not None:
             return normalize_blob(
                 blob, self.fallback_read_filter, binary_detection=True

+ 1 - 1
dulwich/porcelain.py

@@ -1214,7 +1214,7 @@ def status(repo=".", ignored=False):
         index = r.open_index()
         normalizer = r.get_blob_normalizer()
         filter_callback = normalizer.checkin_normalize
-        unstaged_changes = list(get_unstaged_changes(index, r.path, filter_callback))
+        unstaged_changes = list(get_unstaged_changes(index, r, filter_callback))
 
         untracked_paths = get_untracked_paths(
             r.path, r.path, index, exclude_ignored=not ignored

+ 8 - 1
dulwich/repo.py

@@ -1297,8 +1297,15 @@ class Repo(BaseRepo):
                     except KeyError:
                         pass
                 else:
+                    # Check if the file is already in the index
+                    try:
+                        index[tree_path]
+                        new_file = False
+                    except KeyError:
+                        new_file = True
+
                     blob = blob_from_path_and_stat(full_path, st)
-                    blob = blob_normalizer.checkin_normalize(blob, fs_path)
+                    blob = blob_normalizer.checkin_normalize(blob, fs_path, new_file)
                     self.object_store.add_object(blob)
                     index[tree_path] = index_entry_from_stat(st, blob.id, 0)
         index.write()

+ 4 - 4
dulwich/tests/test_index.py

@@ -720,7 +720,7 @@ class GetUnstagedChangesTests(TestCase):
             # modify access and modify time of path
             os.utime(foo1_fullpath, (0, 0))
 
-            changes = get_unstaged_changes(repo.open_index(), repo_dir)
+            changes = get_unstaged_changes(repo.open_index(), repo)
 
             self.assertEqual(list(changes), [b"foo1"])
 
@@ -745,7 +745,7 @@ class GetUnstagedChangesTests(TestCase):
 
             os.unlink(foo1_fullpath)
 
-            changes = get_unstaged_changes(repo.open_index(), repo_dir)
+            changes = get_unstaged_changes(repo.open_index(), repo)
 
             self.assertEqual(list(changes), [b"foo1"])
 
@@ -771,7 +771,7 @@ class GetUnstagedChangesTests(TestCase):
             os.remove(foo1_fullpath)
             os.mkdir(foo1_fullpath)
 
-            changes = get_unstaged_changes(repo.open_index(), repo_dir)
+            changes = get_unstaged_changes(repo.open_index(), repo)
 
             self.assertEqual(list(changes), [b"foo1"])
 
@@ -798,7 +798,7 @@ class GetUnstagedChangesTests(TestCase):
             os.remove(foo1_fullpath)
             os.symlink(os.path.dirname(foo1_fullpath), foo1_fullpath)
 
-            changes = get_unstaged_changes(repo.open_index(), repo_dir)
+            changes = get_unstaged_changes(repo.open_index(), repo)
 
             self.assertEqual(list(changes), [b"foo1"])
 

+ 41 - 1
dulwich/tests/test_porcelain.py

@@ -1825,7 +1825,7 @@ class StatusTests(PorcelainTestCase):
         self.assertListEqual(results.unstaged, [b"crlf"])
         self.assertListEqual(results.untracked, [])
 
-    def test_status_crlf_convert(self):
+    def test_status_crlf_no_convert(self):
         # First make a commit as if the file has been added on a Linux system
         # or with core.autocrlf=True
         file_path = os.path.join(self.repo.path, "crlf")
@@ -1849,8 +1849,48 @@ class StatusTests(PorcelainTestCase):
         c.set("core", "autocrlf", True)
         c.write_to_path()
 
+        # The file should be seen as modified as the line-ending conversion
+        # will NOT be applied since the file is already in the store and in
+        # the staging area
         results = porcelain.status(self.repo)
         self.assertDictEqual({"add": [], "delete": [], "modify": []}, results.staged)
+        self.assertListEqual(results.unstaged, [b"crlf"])
+        self.assertListEqual(results.untracked, [])
+
+    def test_status_crlf_convert(self):
+        # With an empty repo
+        file_path = os.path.join(self.repo.path, 'crlf')
+        repo = Repo(self.repo_path)
+
+        c = self.repo.get_config()
+        c.set("core", "autocrlf", True)
+        c.write_to_path()
+
+        # Write the file as it would be on Windows
+        with open(file_path, 'wb') as f:
+            f.write(b'line1\r\nline2')
+
+        # Stage the file, the conversion should happens as the file is new
+        repo.stage([b"crlf"])
+
+        # So far, the file should be seen only as added
+        results = porcelain.status(self.repo)
+        self.assertDictEqual(
+            {'add': [b"crlf"], 'delete': [], 'modify': []},
+            results.staged)
+        self.assertListEqual(results.unstaged, [])
+        self.assertListEqual(results.untracked, [])
+
+        # Then update the file line-ending to Unix line endings
+        with open(file_path, 'wb') as f:
+            f.write(b'line1\nline2')
+
+        # It should match the blob in staging area, so the file shouldn't be
+        # shown as unstaged
+        results = porcelain.status(self.repo)
+        self.assertDictEqual(
+            {'add': [b"crlf"], 'delete': [], 'modify': []},
+            results.staged)
         self.assertListEqual(results.unstaged, [])
         self.assertListEqual(results.untracked, [])