1 månad sedan · 2bf27a74cb
--- a/NEWS
+++ b/NEWS
@@ -53,6 +53,10 @@
 
				    but leaves working tree unchanged. Soft reset only updates HEAD.
			
 
				    (Jelmer Vernooĳ)
			
 
				 
			
 
				+ * Apply line-ending normalization in ``build_index_from_tree`` to respect
			
 
				+   ``core.autocrlf`` configuration during checkout operations.
			
 
				+   (Jelmer Vernooĳ, #663)
			
 
				+
			
 
				  * Add ``prune`` method to object stores for cleaning up orphaned temporary
			
 
				    pack files. This is now called by ``garbage_collect()`` to match Git's
			
 
				    behavior. Also added ``prune`` command to ``dulwich.porcelain``.
			
--- a/dulwich/index.py
+++ b/dulwich/index.py
@@ -41,6 +41,7 @@ from typing import (
 
				 
			
 
				 if TYPE_CHECKING:
			
 
				     from .file import _GitFile
			
 
				+    from .line_ending import BlobNormalizer
			
 
				     from .repo import BaseRepo
			
 
				 
			
 
				 from .file import GitFile
			
@@ -1321,6 +1322,7 @@ def build_index_from_tree(
 
				     honor_filemode: bool = True,
			
 
				     validate_path_element: Callable[[bytes], bool] = validate_path_element_default,
			
 
				     symlink_fn: Optional[Callable] = None,
			
 
				+    blob_normalizer: Optional["BlobNormalizer"] = None,
			
 
				 ) -> None:
			
 
				     """Generate and materialize index from a tree.
			
 
				 
			
@@ -1333,6 +1335,8 @@ def build_index_from_tree(
 
				         config file, default is core.filemode=True, change executable bit
			
 
				       validate_path_element: Function to validate path elements to check
			
 
				         out; default just refuses .git and .. directories.
			
 
				+      blob_normalizer: An optional BlobNormalizer to use for converting line
			
 
				+        endings when writing blobs to the working directory.
			
 
				 
			
 
				     Note: existing index is wiped and contents are not merged
			
 
				         in a working dir. Suitable only for fresh clones.
			
@@ -1358,6 +1362,9 @@ def build_index_from_tree(
 
				         else:
			
 
				             obj = object_store[entry.sha]
			
 
				             assert isinstance(obj, Blob)
			
 
				+            # Apply blob normalization for checkout if normalizer is provided
			
 
				+            if blob_normalizer is not None:
			
 
				+                obj = blob_normalizer.checkout_normalize(obj, entry.path)
			
 
				             st = build_file_from_blob(
			
 
				                 obj,
			
 
				                 entry.mode,
			
@@ -1485,6 +1492,7 @@ def update_working_tree(
 
				     validate_path_element: Optional[Callable[[bytes], bool]] = None,
			
 
				     symlink_fn: Optional[Callable] = None,
			
 
				     force_remove_untracked: bool = False,
			
 
				+    blob_normalizer: Optional["BlobNormalizer"] = None,
			
 
				 ) -> None:
			
 
				     """Update the working tree and index to match a new tree.
			
 
				 
			
@@ -1503,6 +1511,8 @@ def update_working_tree(
 
				       symlink_fn: Function to use for creating symlinks
			
 
				       force_remove_untracked: If True, remove files that exist in working
			
 
				         directory but not in target tree, even if old_tree_id is None
			
 
				+      blob_normalizer: An optional BlobNormalizer to use for converting line
			
 
				+        endings when writing blobs to the working directory.
			
 
				     """
			
 
				     import os
			
 
				 
			
@@ -1545,6 +1555,10 @@ def update_working_tree(
 
				         if not isinstance(blob_obj, Blob):
			
 
				             raise ValueError(f"Object {entry.sha!r} is not a blob")
			
 
				 
			
 
				+        # Apply blob normalization for checkout if normalizer is provided
			
 
				+        if blob_normalizer is not None:
			
 
				+            blob_obj = blob_normalizer.checkout_normalize(blob_obj, entry.path)
			
 
				+
			
 
				         # Ensure parent directory exists
			
 
				         parent_dir = os.path.dirname(full_path)
			
 
				         if parent_dir and not os.path.exists(parent_dir):
			
--- a/dulwich/porcelain.py
+++ b/dulwich/porcelain.py
@@ -1575,6 +1575,7 @@ def reset(repo, mode, treeish="HEAD") -> None:
 
				                         f.write(source)
			
 
				 
			
 
				             # Update working tree and index
			
 
				+            blob_normalizer = r.get_blob_normalizer()
			
 
				             update_working_tree(
			
 
				                 r,
			
 
				                 current_tree,
			
@@ -1583,6 +1584,7 @@ def reset(repo, mode, treeish="HEAD") -> None:
 
				                 validate_path_element=validate_path_element,
			
 
				                 symlink_fn=symlink_fn,
			
 
				                 force_remove_untracked=True,
			
 
				+                blob_normalizer=blob_normalizer,
			
 
				             )
			
 
				         else:
			
 
				             raise Error(f"Invalid reset mode: {mode}")
			
@@ -1799,7 +1801,10 @@ def pull(
 
				         # Skip if merge was performed as merge already updates the working tree
			
 
				         if not merged and old_tree_id is not None:
			
 
				             new_tree_id = r[b"HEAD"].tree
			
 
				-            update_working_tree(r, old_tree_id, new_tree_id)
			
 
				+            blob_normalizer = r.get_blob_normalizer()
			
 
				+            update_working_tree(
			
 
				+                r, old_tree_id, new_tree_id, blob_normalizer=blob_normalizer
			
 
				+            )
			
 
				         if remote_name is not None:
			
 
				             _import_remote_refs(r.refs, remote_name, fetch_result.refs)
			
 
				 
			
--- a/dulwich/repo.py
+++ b/dulwich/repo.py
@@ -1720,6 +1720,7 @@ class Repo(BaseRepo):
 
				                 ) as f:
			
 
				                     f.write(source)
			
 
				 
			
 
				+        blob_normalizer = self.get_blob_normalizer()
			
 
				         return build_index_from_tree(
			
 
				             self.path,
			
 
				             self.index_path(),
			
@@ -1728,6 +1729,7 @@ class Repo(BaseRepo):
 
				             honor_filemode=honor_filemode,
			
 
				             validate_path_element=validate_path_element,
			
 
				             symlink_fn=symlink_fn,
			
 
				+            blob_normalizer=blob_normalizer,
			
 
				         )
			
 
				 
			
 
				     def _get_config_condition_matchers(self) -> dict[str, "ConditionMatcher"]:
			
@@ -2037,7 +2039,10 @@ class Repo(BaseRepo):
 
				         git_attributes = {}
			
 
				         config_stack = self.get_config_stack()
			
 
				         try:
			
 
				-            tree = self.object_store[self.refs[b"HEAD"]].tree
			
 
				+            head_sha = self.refs[b"HEAD"]
			
 
				+            # Peel tags to get the underlying commit
			
 
				+            _, obj = peel_sha(self.object_store, head_sha)
			
 
				+            tree = obj.tree
			
 
				             return TreeBlobNormalizer(
			
 
				                 config_stack,
			
 
				                 git_attributes,
			
--- a/tests/test_index.py
+++ b/tests/test_index.py
@@ -691,6 +691,49 @@ class BuildIndexTests(TestCase):
 
				             self.assertEqual(index[b"c"].mode, S_IFGITLINK)  # mode
			
 
				             self.assertEqual(index[b"c"].sha, c.id)  # sha
			
 
				 
			
 
				+    def test_with_line_ending_normalization(self) -> None:
			
 
				+        """Test that build_index_from_tree applies line-ending normalization."""
			
 
				+        repo_dir = tempfile.mkdtemp()
			
 
				+        self.addCleanup(shutil.rmtree, repo_dir)
			
 
				+
			
 
				+        from dulwich.line_ending import BlobNormalizer
			
 
				+
			
 
				+        with Repo.init(repo_dir) as repo:
			
 
				+            # Set up autocrlf config
			
 
				+            config = repo.get_config()
			
 
				+            config.set((b"core",), b"autocrlf", b"true")
			
 
				+            config.write_to_path()
			
 
				+
			
 
				+            # Create blob with LF line endings
			
 
				+            content_lf = b"line1\nline2\nline3\n"
			
 
				+            blob = Blob.from_string(content_lf)
			
 
				+
			
 
				+            tree = Tree()
			
 
				+            tree[b"test.txt"] = (stat.S_IFREG | 0o644, blob.id)
			
 
				+
			
 
				+            repo.object_store.add_objects([(blob, None), (tree, None)])
			
 
				+
			
 
				+            # Create blob normalizer
			
 
				+            blob_normalizer = BlobNormalizer(config, {})
			
 
				+
			
 
				+            # Build index with normalization
			
 
				+            build_index_from_tree(
			
 
				+                repo.path,
			
 
				+                repo.index_path(),
			
 
				+                repo.object_store,
			
 
				+                tree.id,
			
 
				+                blob_normalizer=blob_normalizer,
			
 
				+            )
			
 
				+
			
 
				+            # On Windows with autocrlf=true, file should have CRLF line endings
			
 
				+            test_file = os.path.join(repo.path, "test.txt")
			
 
				+            with open(test_file, "rb") as f:
			
 
				+                content = f.read()
			
 
				+
			
 
				+            # autocrlf=true means LF -> CRLF on checkout (on all platforms for testing)
			
 
				+            expected_content = b"line1\r\nline2\r\nline3\r\n"
			
 
				+            self.assertEqual(content, expected_content)
			
 
				+
			
 
				 
			
 
				 class GetUnstagedChangesTests(TestCase):
			
 
				     def test_get_unstaged_changes(self) -> None:
			
@@ -1601,3 +1644,87 @@ class TestPathPrefixCompression(TestCase):
 
				         compressed = _compress_path(b"short", b"very/long/path/file.txt")
			
 
				         decompressed, _ = _decompress_path(compressed, 0, b"very/long/path/file.txt")
			
 
				         self.assertEqual(b"short", decompressed)
			
 
				+
			
 
				+
			
 
				+class TestUpdateWorkingTree(TestCase):
			
 
				+    def setUp(self):
			
 
				+        self.tempdir = tempfile.mkdtemp()
			
 
				+        self.addCleanup(shutil.rmtree, self.tempdir)
			
 
				+        from dulwich.repo import Repo
			
 
				+
			
 
				+        self.repo = Repo.init(self.tempdir)
			
 
				+
			
 
				+    def test_update_working_tree_with_blob_normalizer(self):
			
 
				+        """Test update_working_tree with a blob normalizer."""
			
 
				+        from dulwich.index import update_working_tree
			
 
				+        from dulwich.objects import Blob, Tree
			
 
				+
			
 
				+        # Create a simple blob normalizer that converts CRLF to LF
			
 
				+        class TestBlobNormalizer:
			
 
				+            def checkout_normalize(self, blob, path):
			
 
				+                # Convert CRLF to LF during checkout
			
 
				+                new_blob = Blob()
			
 
				+                new_blob.data = blob.data.replace(b"\r\n", b"\n")
			
 
				+                return new_blob
			
 
				+
			
 
				+        # Create a tree with a file containing CRLF
			
 
				+        blob = Blob()
			
 
				+        blob.data = b"Hello\r\nWorld\r\n"
			
 
				+        self.repo.object_store.add_object(blob)
			
 
				+
			
 
				+        tree = Tree()
			
 
				+        tree[b"test.txt"] = (0o100644, blob.id)
			
 
				+        self.repo.object_store.add_object(tree)
			
 
				+
			
 
				+        # Update working tree with normalizer
			
 
				+        normalizer = TestBlobNormalizer()
			
 
				+        update_working_tree(
			
 
				+            self.repo,
			
 
				+            None,  # old_tree_id
			
 
				+            tree.id,  # new_tree_id
			
 
				+            blob_normalizer=normalizer,
			
 
				+        )
			
 
				+
			
 
				+        # Check that the file was written with LF line endings
			
 
				+        test_file = os.path.join(self.tempdir, "test.txt")
			
 
				+        with open(test_file, "rb") as f:
			
 
				+            content = f.read()
			
 
				+
			
 
				+        self.assertEqual(b"Hello\nWorld\n", content)
			
 
				+
			
 
				+        # Check that the index has the original blob SHA
			
 
				+        index = self.repo.open_index()
			
 
				+        self.assertEqual(blob.id, index[b"test.txt"].sha)
			
 
				+
			
 
				+    def test_update_working_tree_without_blob_normalizer(self):
			
 
				+        """Test update_working_tree without a blob normalizer."""
			
 
				+        from dulwich.index import update_working_tree
			
 
				+        from dulwich.objects import Blob, Tree
			
 
				+
			
 
				+        # Create a tree with a file containing CRLF
			
 
				+        blob = Blob()
			
 
				+        blob.data = b"Hello\r\nWorld\r\n"
			
 
				+        self.repo.object_store.add_object(blob)
			
 
				+
			
 
				+        tree = Tree()
			
 
				+        tree[b"test.txt"] = (0o100644, blob.id)
			
 
				+        self.repo.object_store.add_object(tree)
			
 
				+
			
 
				+        # Update working tree without normalizer
			
 
				+        update_working_tree(
			
 
				+            self.repo,
			
 
				+            None,  # old_tree_id
			
 
				+            tree.id,  # new_tree_id
			
 
				+            blob_normalizer=None,
			
 
				+        )
			
 
				+
			
 
				+        # Check that the file was written with original CRLF line endings
			
 
				+        test_file = os.path.join(self.tempdir, "test.txt")
			
 
				+        with open(test_file, "rb") as f:
			
 
				+            content = f.read()
			
 
				+
			
 
				+        self.assertEqual(b"Hello\r\nWorld\r\n", content)
			
 
				+
			
 
				+        # Check that the index has the blob SHA
			
 
				+        index = self.repo.open_index()
			
 
				+        self.assertEqual(blob.id, index[b"test.txt"].sha)