Explorar o código

Apply line-ending normalization during checkout operations

Fix build_index_from_tree to respect core.autocrlf configuration when
checking out files from the repository. This ensures consistent line
endings on Windows and other platforms that use CRLF.

Fixes #663
Jelmer Vernooij hai 1 mes
pai
achega
fa8caf9f52
Modificáronse 4 ficheiros con 56 adicións e 0 borrados
  1. 4 0
      NEWS
  2. 7 0
      dulwich/index.py
  3. 2 0
      dulwich/repo.py
  4. 43 0
      tests/test_index.py

+ 4 - 0
NEWS

@@ -53,6 +53,10 @@
    but leaves working tree unchanged. Soft reset only updates HEAD.
    (Jelmer Vernooij)
 
+ * Apply line-ending normalization in ``build_index_from_tree`` to respect
+   ``core.autocrlf`` configuration during checkout operations.
+   (Jelmer Vernooij, #663)
+
  * Add ``prune`` method to object stores for cleaning up orphaned temporary
    pack files. This is now called by ``garbage_collect()`` to match Git's
    behavior. Also added ``prune`` command to ``dulwich.porcelain``.

+ 7 - 0
dulwich/index.py

@@ -41,6 +41,7 @@ from typing import (
 
 if TYPE_CHECKING:
     from .file import _GitFile
+    from .line_ending import BlobNormalizer
     from .repo import BaseRepo
 
 from .file import GitFile
@@ -1321,6 +1322,7 @@ def build_index_from_tree(
     honor_filemode: bool = True,
     validate_path_element: Callable[[bytes], bool] = validate_path_element_default,
     symlink_fn: Optional[Callable] = None,
+    blob_normalizer: Optional["BlobNormalizer"] = None,
 ) -> None:
     """Generate and materialize index from a tree.
 
@@ -1333,6 +1335,8 @@ def build_index_from_tree(
         config file, default is core.filemode=True, change executable bit
       validate_path_element: Function to validate path elements to check
         out; default just refuses .git and .. directories.
+      blob_normalizer: An optional BlobNormalizer to use for converting line
+        endings when writing blobs to the working directory.
 
     Note: existing index is wiped and contents are not merged
         in a working dir. Suitable only for fresh clones.
@@ -1358,6 +1362,9 @@ def build_index_from_tree(
         else:
             obj = object_store[entry.sha]
             assert isinstance(obj, Blob)
+            # Apply blob normalization for checkout if normalizer is provided
+            if blob_normalizer is not None:
+                obj = blob_normalizer.checkout_normalize(obj, entry.path)
             st = build_file_from_blob(
                 obj,
                 entry.mode,

+ 2 - 0
dulwich/repo.py

@@ -1720,6 +1720,7 @@ class Repo(BaseRepo):
                 ) as f:
                     f.write(source)
 
+        blob_normalizer = self.get_blob_normalizer()
         return build_index_from_tree(
             self.path,
             self.index_path(),
@@ -1728,6 +1729,7 @@ class Repo(BaseRepo):
             honor_filemode=honor_filemode,
             validate_path_element=validate_path_element,
             symlink_fn=symlink_fn,
+            blob_normalizer=blob_normalizer,
         )
 
     def _get_config_condition_matchers(self) -> dict[str, "ConditionMatcher"]:

+ 43 - 0
tests/test_index.py

@@ -691,6 +691,49 @@ class BuildIndexTests(TestCase):
             self.assertEqual(index[b"c"].mode, S_IFGITLINK)  # mode
             self.assertEqual(index[b"c"].sha, c.id)  # sha
 
+    def test_with_line_ending_normalization(self) -> None:
+        """Test that build_index_from_tree applies line-ending normalization."""
+        repo_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, repo_dir)
+
+        from dulwich.line_ending import BlobNormalizer
+
+        with Repo.init(repo_dir) as repo:
+            # Set up autocrlf config
+            config = repo.get_config()
+            config.set((b"core",), b"autocrlf", b"true")
+            config.write_to_path()
+
+            # Create blob with LF line endings
+            content_lf = b"line1\nline2\nline3\n"
+            blob = Blob.from_string(content_lf)
+
+            tree = Tree()
+            tree[b"test.txt"] = (stat.S_IFREG | 0o644, blob.id)
+
+            repo.object_store.add_objects([(blob, None), (tree, None)])
+
+            # Create blob normalizer
+            blob_normalizer = BlobNormalizer(config, {})
+
+            # Build index with normalization
+            build_index_from_tree(
+                repo.path,
+                repo.index_path(),
+                repo.object_store,
+                tree.id,
+                blob_normalizer=blob_normalizer,
+            )
+
+            # On Windows with autocrlf=true, file should have CRLF line endings
+            test_file = os.path.join(repo.path, "test.txt")
+            with open(test_file, "rb") as f:
+                content = f.read()
+
+            # autocrlf=true means LF -> CRLF on checkout (on all platforms for testing)
+            expected_content = b"line1\r\nline2\r\nline3\r\n"
+            self.assertEqual(content, expected_content)
+
 
 class GetUnstagedChangesTests(TestCase):
     def test_get_unstaged_changes(self) -> None: