Bläddra i källkod

Add blob_normalizer support to update_working_tree (#1636)

- Add blob_normalizer parameter to update_working_tree function to
support
  line ending conversions during checkout operations
- Update porcelain.pull() and porcelain.reset() to pass blob_normalizer
  from repository configuration

This enables proper line ending handling when checking out files from
the repository to the working directory.
Jelmer Vernooij 1 månad sedan
förälder
incheckning
2bf27a74cb
5 ändrade filer med 157 tillägg och 2 borttagningar
  1. 4 0
      NEWS
  2. 14 0
      dulwich/index.py
  3. 6 1
      dulwich/porcelain.py
  4. 6 1
      dulwich/repo.py
  5. 127 0
      tests/test_index.py

+ 4 - 0
NEWS

@@ -53,6 +53,10 @@
    but leaves working tree unchanged. Soft reset only updates HEAD.
    (Jelmer Vernooij)
 
+ * Apply line-ending normalization in ``build_index_from_tree`` to respect
+   ``core.autocrlf`` configuration during checkout operations.
+   (Jelmer Vernooij, #663)
+
  * Add ``prune`` method to object stores for cleaning up orphaned temporary
    pack files. This is now called by ``garbage_collect()`` to match Git's
    behavior. Also added ``prune`` command to ``dulwich.porcelain``.

+ 14 - 0
dulwich/index.py

@@ -41,6 +41,7 @@ from typing import (
 
 if TYPE_CHECKING:
     from .file import _GitFile
+    from .line_ending import BlobNormalizer
     from .repo import BaseRepo
 
 from .file import GitFile
@@ -1321,6 +1322,7 @@ def build_index_from_tree(
     honor_filemode: bool = True,
     validate_path_element: Callable[[bytes], bool] = validate_path_element_default,
     symlink_fn: Optional[Callable] = None,
+    blob_normalizer: Optional["BlobNormalizer"] = None,
 ) -> None:
     """Generate and materialize index from a tree.
 
@@ -1333,6 +1335,8 @@ def build_index_from_tree(
         config file, default is core.filemode=True, change executable bit
       validate_path_element: Function to validate path elements to check
         out; default just refuses .git and .. directories.
+      blob_normalizer: An optional BlobNormalizer to use for converting line
+        endings when writing blobs to the working directory.
 
     Note: existing index is wiped and contents are not merged
         in a working dir. Suitable only for fresh clones.
@@ -1358,6 +1362,9 @@ def build_index_from_tree(
         else:
             obj = object_store[entry.sha]
             assert isinstance(obj, Blob)
+            # Apply blob normalization for checkout if normalizer is provided
+            if blob_normalizer is not None:
+                obj = blob_normalizer.checkout_normalize(obj, entry.path)
             st = build_file_from_blob(
                 obj,
                 entry.mode,
@@ -1485,6 +1492,7 @@ def update_working_tree(
     validate_path_element: Optional[Callable[[bytes], bool]] = None,
     symlink_fn: Optional[Callable] = None,
     force_remove_untracked: bool = False,
+    blob_normalizer: Optional["BlobNormalizer"] = None,
 ) -> None:
     """Update the working tree and index to match a new tree.
 
@@ -1503,6 +1511,8 @@ def update_working_tree(
       symlink_fn: Function to use for creating symlinks
       force_remove_untracked: If True, remove files that exist in working
         directory but not in target tree, even if old_tree_id is None
+      blob_normalizer: An optional BlobNormalizer to use for converting line
+        endings when writing blobs to the working directory.
     """
     import os
 
@@ -1545,6 +1555,10 @@ def update_working_tree(
         if not isinstance(blob_obj, Blob):
             raise ValueError(f"Object {entry.sha!r} is not a blob")
 
+        # Apply blob normalization for checkout if normalizer is provided
+        if blob_normalizer is not None:
+            blob_obj = blob_normalizer.checkout_normalize(blob_obj, entry.path)
+
         # Ensure parent directory exists
         parent_dir = os.path.dirname(full_path)
         if parent_dir and not os.path.exists(parent_dir):

+ 6 - 1
dulwich/porcelain.py

@@ -1575,6 +1575,7 @@ def reset(repo, mode, treeish="HEAD") -> None:
                         f.write(source)
 
             # Update working tree and index
+            blob_normalizer = r.get_blob_normalizer()
             update_working_tree(
                 r,
                 current_tree,
@@ -1583,6 +1584,7 @@ def reset(repo, mode, treeish="HEAD") -> None:
                 validate_path_element=validate_path_element,
                 symlink_fn=symlink_fn,
                 force_remove_untracked=True,
+                blob_normalizer=blob_normalizer,
             )
         else:
             raise Error(f"Invalid reset mode: {mode}")
@@ -1799,7 +1801,10 @@ def pull(
         # Skip if merge was performed as merge already updates the working tree
         if not merged and old_tree_id is not None:
             new_tree_id = r[b"HEAD"].tree
-            update_working_tree(r, old_tree_id, new_tree_id)
+            blob_normalizer = r.get_blob_normalizer()
+            update_working_tree(
+                r, old_tree_id, new_tree_id, blob_normalizer=blob_normalizer
+            )
         if remote_name is not None:
             _import_remote_refs(r.refs, remote_name, fetch_result.refs)
 

+ 6 - 1
dulwich/repo.py

@@ -1720,6 +1720,7 @@ class Repo(BaseRepo):
                 ) as f:
                     f.write(source)
 
+        blob_normalizer = self.get_blob_normalizer()
         return build_index_from_tree(
             self.path,
             self.index_path(),
@@ -1728,6 +1729,7 @@ class Repo(BaseRepo):
             honor_filemode=honor_filemode,
             validate_path_element=validate_path_element,
             symlink_fn=symlink_fn,
+            blob_normalizer=blob_normalizer,
         )
 
     def _get_config_condition_matchers(self) -> dict[str, "ConditionMatcher"]:
@@ -2037,7 +2039,10 @@ class Repo(BaseRepo):
         git_attributes = {}
         config_stack = self.get_config_stack()
         try:
-            tree = self.object_store[self.refs[b"HEAD"]].tree
+            head_sha = self.refs[b"HEAD"]
+            # Peel tags to get the underlying commit
+            _, obj = peel_sha(self.object_store, head_sha)
+            tree = obj.tree
             return TreeBlobNormalizer(
                 config_stack,
                 git_attributes,

+ 127 - 0
tests/test_index.py

@@ -691,6 +691,49 @@ class BuildIndexTests(TestCase):
             self.assertEqual(index[b"c"].mode, S_IFGITLINK)  # mode
             self.assertEqual(index[b"c"].sha, c.id)  # sha
 
+    def test_with_line_ending_normalization(self) -> None:
+        """Test that build_index_from_tree applies line-ending normalization."""
+        repo_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, repo_dir)
+
+        from dulwich.line_ending import BlobNormalizer
+
+        with Repo.init(repo_dir) as repo:
+            # Set up autocrlf config
+            config = repo.get_config()
+            config.set((b"core",), b"autocrlf", b"true")
+            config.write_to_path()
+
+            # Create blob with LF line endings
+            content_lf = b"line1\nline2\nline3\n"
+            blob = Blob.from_string(content_lf)
+
+            tree = Tree()
+            tree[b"test.txt"] = (stat.S_IFREG | 0o644, blob.id)
+
+            repo.object_store.add_objects([(blob, None), (tree, None)])
+
+            # Create blob normalizer
+            blob_normalizer = BlobNormalizer(config, {})
+
+            # Build index with normalization
+            build_index_from_tree(
+                repo.path,
+                repo.index_path(),
+                repo.object_store,
+                tree.id,
+                blob_normalizer=blob_normalizer,
+            )
+
+            # On Windows with autocrlf=true, file should have CRLF line endings
+            test_file = os.path.join(repo.path, "test.txt")
+            with open(test_file, "rb") as f:
+                content = f.read()
+
+            # autocrlf=true means LF -> CRLF on checkout (on all platforms for testing)
+            expected_content = b"line1\r\nline2\r\nline3\r\n"
+            self.assertEqual(content, expected_content)
+
 
 class GetUnstagedChangesTests(TestCase):
     def test_get_unstaged_changes(self) -> None:
@@ -1601,3 +1644,87 @@ class TestPathPrefixCompression(TestCase):
         compressed = _compress_path(b"short", b"very/long/path/file.txt")
         decompressed, _ = _decompress_path(compressed, 0, b"very/long/path/file.txt")
         self.assertEqual(b"short", decompressed)
+
+
+class TestUpdateWorkingTree(TestCase):
+    def setUp(self):
+        self.tempdir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, self.tempdir)
+        from dulwich.repo import Repo
+
+        self.repo = Repo.init(self.tempdir)
+
+    def test_update_working_tree_with_blob_normalizer(self):
+        """Test update_working_tree with a blob normalizer."""
+        from dulwich.index import update_working_tree
+        from dulwich.objects import Blob, Tree
+
+        # Create a simple blob normalizer that converts CRLF to LF
+        class TestBlobNormalizer:
+            def checkout_normalize(self, blob, path):
+                # Convert CRLF to LF during checkout
+                new_blob = Blob()
+                new_blob.data = blob.data.replace(b"\r\n", b"\n")
+                return new_blob
+
+        # Create a tree with a file containing CRLF
+        blob = Blob()
+        blob.data = b"Hello\r\nWorld\r\n"
+        self.repo.object_store.add_object(blob)
+
+        tree = Tree()
+        tree[b"test.txt"] = (0o100644, blob.id)
+        self.repo.object_store.add_object(tree)
+
+        # Update working tree with normalizer
+        normalizer = TestBlobNormalizer()
+        update_working_tree(
+            self.repo,
+            None,  # old_tree_id
+            tree.id,  # new_tree_id
+            blob_normalizer=normalizer,
+        )
+
+        # Check that the file was written with LF line endings
+        test_file = os.path.join(self.tempdir, "test.txt")
+        with open(test_file, "rb") as f:
+            content = f.read()
+
+        self.assertEqual(b"Hello\nWorld\n", content)
+
+        # Check that the index has the original blob SHA
+        index = self.repo.open_index()
+        self.assertEqual(blob.id, index[b"test.txt"].sha)
+
+    def test_update_working_tree_without_blob_normalizer(self):
+        """Test update_working_tree without a blob normalizer."""
+        from dulwich.index import update_working_tree
+        from dulwich.objects import Blob, Tree
+
+        # Create a tree with a file containing CRLF
+        blob = Blob()
+        blob.data = b"Hello\r\nWorld\r\n"
+        self.repo.object_store.add_object(blob)
+
+        tree = Tree()
+        tree[b"test.txt"] = (0o100644, blob.id)
+        self.repo.object_store.add_object(tree)
+
+        # Update working tree without normalizer
+        update_working_tree(
+            self.repo,
+            None,  # old_tree_id
+            tree.id,  # new_tree_id
+            blob_normalizer=None,
+        )
+
+        # Check that the file was written with original CRLF line endings
+        test_file = os.path.join(self.tempdir, "test.txt")
+        with open(test_file, "rb") as f:
+            content = f.read()
+
+        self.assertEqual(b"Hello\r\nWorld\r\n", content)
+
+        # Check that the index has the blob SHA
+        index = self.repo.open_index()
+        self.assertEqual(blob.id, index[b"test.txt"].sha)