1 月之前 · ebeb83a3fd
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,10 @@
 
				 
			
 
				  * Add ``reflog`` command in porcelain. (Jelmer Vernooĳ)
			
 
				 
			
 
				+ * Add support for ``core.preloadIndex`` configuration setting to enable
			
 
				+   parallel stat operations when checking for unstaged changes. This improves
			
 
				+   performance on slow filesystems like NFS. (Jelmer Vernooĳ)
			
 
				+
			
 
				 0.23.2	2025-07-07
			
 
				 
			
 
				  * Print deprecations on usage, not import.
			
--- a/dulwich/index.py
+++ b/dulwich/index.py
@@ -1999,50 +1999,112 @@ def update_working_tree(
 
				     index.write()
			
 
				 
			
 
				 
			
 
				+def _check_entry_for_changes(
			
 
				+    tree_path: bytes,
			
 
				+    entry: Union[IndexEntry, ConflictedIndexEntry],
			
 
				+    root_path: bytes,
			
 
				+    filter_blob_callback: Optional[Callable] = None,
			
 
				+) -> Optional[bytes]:
			
 
				+    """Check a single index entry for changes.
			
 
				+
			
 
				+    Args:
			
 
				+      tree_path: Path in the tree
			
 
				+      entry: Index entry to check
			
 
				+      root_path: Root filesystem path
			
 
				+      filter_blob_callback: Optional callback to filter blobs
			
 
				+    Returns: tree_path if changed, None otherwise
			
 
				+    """
			
 
				+    if isinstance(entry, ConflictedIndexEntry):
			
 
				+        # Conflicted files are always unstaged
			
 
				+        return tree_path
			
 
				+
			
 
				+    full_path = _tree_to_fs_path(root_path, tree_path)
			
 
				+    try:
			
 
				+        st = os.lstat(full_path)
			
 
				+        if stat.S_ISDIR(st.st_mode):
			
 
				+            if _has_directory_changed(tree_path, entry):
			
 
				+                return tree_path
			
 
				+            return None
			
 
				+
			
 
				+        if not stat.S_ISREG(st.st_mode) and not stat.S_ISLNK(st.st_mode):
			
 
				+            return None
			
 
				+
			
 
				+        blob = blob_from_path_and_stat(full_path, st)
			
 
				+
			
 
				+        if filter_blob_callback is not None:
			
 
				+            blob = filter_blob_callback(blob, tree_path)
			
 
				+    except FileNotFoundError:
			
 
				+        # The file was removed, so we assume that counts as
			
 
				+        # different from whatever file used to exist.
			
 
				+        return tree_path
			
 
				+    else:
			
 
				+        if blob.id != entry.sha:
			
 
				+            return tree_path
			
 
				+    return None
			
 
				+
			
 
				+
			
 
				 def get_unstaged_changes(
			
 
				     index: Index,
			
 
				     root_path: Union[str, bytes],
			
 
				     filter_blob_callback: Optional[Callable] = None,
			
 
				+    preload_index: bool = False,
			
 
				 ) -> Generator[bytes, None, None]:
			
 
				     """Walk through an index and check for differences against working tree.
			
 
				 
			
 
				     Args:
			
 
				       index: index to check
			
 
				       root_path: path in which to find files
			
 
				+      filter_blob_callback: Optional callback to filter blobs
			
 
				+      preload_index: If True, use parallel threads to check files (requires threading support)
			
 
				     Returns: iterator over paths with unstaged changes
			
 
				     """
			
 
				     # For each entry in the index check the sha1 & ensure not staged
			
 
				     if not isinstance(root_path, bytes):
			
 
				         root_path = os.fsencode(root_path)
			
 
				 
			
 
				-    for tree_path, entry in index.iteritems():
			
 
				-        full_path = _tree_to_fs_path(root_path, tree_path)
			
 
				-        if isinstance(entry, ConflictedIndexEntry):
			
 
				-            # Conflicted files are always unstaged
			
 
				-            yield tree_path
			
 
				-            continue
			
 
				-
			
 
				+    if preload_index:
			
 
				+        # Use parallel processing for better performance on slow filesystems
			
 
				         try:
			
 
				-            st = os.lstat(full_path)
			
 
				-            if stat.S_ISDIR(st.st_mode):
			
 
				-                if _has_directory_changed(tree_path, entry):
			
 
				-                    yield tree_path
			
 
				-                continue
			
 
				-
			
 
				-            if not stat.S_ISREG(st.st_mode) and not stat.S_ISLNK(st.st_mode):
			
 
				-                continue
			
 
				-
			
 
				-            blob = blob_from_path_and_stat(full_path, st)
			
 
				-
			
 
				-            if filter_blob_callback is not None:
			
 
				-                blob = filter_blob_callback(blob, tree_path)
			
 
				-        except FileNotFoundError:
			
 
				-            # The file was removed, so we assume that counts as
			
 
				-            # different from whatever file used to exist.
			
 
				-            yield tree_path
			
 
				+            import multiprocessing
			
 
				+            from concurrent.futures import ThreadPoolExecutor
			
 
				+        except ImportError:
			
 
				+            # If threading is not available, fall back to serial processing
			
 
				+            preload_index = False
			
 
				         else:
			
 
				-            if blob.id != entry.sha:
			
 
				-                yield tree_path
			
 
				+            # Collect all entries first
			
 
				+            entries = list(index.iteritems())
			
 
				+
			
 
				+            # Use number of CPUs but cap at 8 threads to avoid overhead
			
 
				+            num_workers = min(multiprocessing.cpu_count(), 8)
			
 
				+
			
 
				+            # Process entries in parallel
			
 
				+            with ThreadPoolExecutor(max_workers=num_workers) as executor:
			
 
				+                # Submit all tasks
			
 
				+                futures = [
			
 
				+                    executor.submit(
			
 
				+                        _check_entry_for_changes,
			
 
				+                        tree_path,
			
 
				+                        entry,
			
 
				+                        root_path,
			
 
				+                        filter_blob_callback,
			
 
				+                    )
			
 
				+                    for tree_path, entry in entries
			
 
				+                ]
			
 
				+
			
 
				+                # Yield results as they complete
			
 
				+                for future in futures:
			
 
				+                    result = future.result()
			
 
				+                    if result is not None:
			
 
				+                        yield result
			
 
				+
			
 
				+    if not preload_index:
			
 
				+        # Serial processing
			
 
				+        for tree_path, entry in index.iteritems():
			
 
				+            result = _check_entry_for_changes(
			
 
				+                tree_path, entry, root_path, filter_blob_callback
			
 
				+            )
			
 
				+            if result is not None:
			
 
				+                yield result
			
 
				 
			
 
				 
			
 
				 def _tree_to_fs_path(root_path: bytes, tree_path: bytes) -> bytes:
			
--- a/dulwich/porcelain.py
+++ b/dulwich/porcelain.py
@@ -656,7 +656,14 @@ def add(repo: Union[str, os.PathLike, BaseRepo] = ".", paths=None):
 
				         index = r.open_index()
			
 
				         normalizer = r.get_blob_normalizer()
			
 
				         filter_callback = normalizer.checkin_normalize
			
 
				-        all_unstaged_paths = list(get_unstaged_changes(index, r.path, filter_callback))
			
 
				+
			
 
				+        # Check if core.preloadIndex is enabled
			
 
				+        config = r.get_config_stack()
			
 
				+        preload_index = config.get_boolean(b"core", b"preloadIndex", False)
			
 
				+
			
 
				+        all_unstaged_paths = list(
			
 
				+            get_unstaged_changes(index, r.path, filter_callback, preload_index)
			
 
				+        )
			
 
				 
			
 
				         if not paths:
			
 
				             # When no paths specified, add all untracked and modified files from repo root
			
@@ -2058,7 +2065,14 @@ def status(repo=".", ignored=False, untracked_files="normal"):
 
				         index = r.open_index()
			
 
				         normalizer = r.get_blob_normalizer()
			
 
				         filter_callback = normalizer.checkin_normalize
			
 
				-        unstaged_changes = list(get_unstaged_changes(index, r.path, filter_callback))
			
 
				+
			
 
				+        # Check if core.preloadIndex is enabled
			
 
				+        config = r.get_config_stack()
			
 
				+        preload_index = config.get_boolean(b"core", b"preloadIndex", False)
			
 
				+
			
 
				+        unstaged_changes = list(
			
 
				+            get_unstaged_changes(index, r.path, filter_callback, preload_index)
			
 
				+        )
			
 
				 
			
 
				         untracked_paths = get_untracked_paths(
			
 
				             r.path,
			
--- a/tests/test_index.py
+++ b/tests/test_index.py
@@ -771,6 +771,51 @@ class GetUnstagedChangesTests(TestCase):
 
				 
			
 
				             self.assertEqual(list(changes), [b"foo1"])
			
 
				 
			
 
				+    def test_get_unstaged_changes_with_preload(self) -> None:
			
 
				+        """Unit test for get_unstaged_changes with preload_index=True."""
			
 
				+        repo_dir = tempfile.mkdtemp()
			
 
				+        self.addCleanup(shutil.rmtree, repo_dir)
			
 
				+        with Repo.init(repo_dir) as repo:
			
 
				+            # Create multiple files to test parallel processing
			
 
				+            files = []
			
 
				+            for i in range(10):
			
 
				+                filename = f"foo{i}"
			
 
				+                fullpath = os.path.join(repo_dir, filename)
			
 
				+                with open(fullpath, "wb") as f:
			
 
				+                    f.write(b"origstuff" + str(i).encode())
			
 
				+                files.append(filename)
			
 
				+
			
 
				+            repo.stage(files)
			
 
				+            repo.do_commit(
			
 
				+                b"test status",
			
 
				+                author=b"author <email>",
			
 
				+                committer=b"committer <email>",
			
 
				+            )
			
 
				+
			
 
				+            # Modify some files
			
 
				+            modified_files = [b"foo1", b"foo3", b"foo5", b"foo7"]
			
 
				+            for filename in modified_files:
			
 
				+                fullpath = os.path.join(repo_dir, filename.decode())
			
 
				+                with open(fullpath, "wb") as f:
			
 
				+                    f.write(b"newstuff")
			
 
				+                os.utime(fullpath, (0, 0))
			
 
				+
			
 
				+            # Test with preload_index=False (serial)
			
 
				+            changes_serial = list(
			
 
				+                get_unstaged_changes(repo.open_index(), repo_dir, preload_index=False)
			
 
				+            )
			
 
				+            changes_serial.sort()
			
 
				+
			
 
				+            # Test with preload_index=True (parallel)
			
 
				+            changes_parallel = list(
			
 
				+                get_unstaged_changes(repo.open_index(), repo_dir, preload_index=True)
			
 
				+            )
			
 
				+            changes_parallel.sort()
			
 
				+
			
 
				+            # Both should return the same results
			
 
				+            self.assertEqual(changes_serial, changes_parallel)
			
 
				+            self.assertEqual(changes_serial, sorted(modified_files))
			
 
				+
			
 
				     def test_get_unstaged_deleted_changes(self) -> None:
			
 
				         """Unit test for get_unstaged_changes."""
			
 
				         repo_dir = tempfile.mkdtemp()
			
--- a/tests/test_porcelain.py
+++ b/tests/test_porcelain.py
@@ -4787,6 +4787,46 @@ class StatusTests(PorcelainTestCase):
 
				         self.assertEqual(results.staged["add"][0], filename_add.encode("ascii"))
			
 
				         self.assertEqual(results.unstaged, [b"foo"])
			
 
				 
			
 
				+    def test_status_with_core_preloadindex(self) -> None:
			
 
				+        """Test status with core.preloadIndex enabled."""
			
 
				+        # Set core.preloadIndex to true
			
 
				+        config = self.repo.get_config()
			
 
				+        config.set(b"core", b"preloadIndex", b"true")
			
 
				+        config.write_to_path()
			
 
				+
			
 
				+        # Create multiple files
			
 
				+        files = []
			
 
				+        for i in range(10):
			
 
				+            filename = f"file{i}"
			
 
				+            fullpath = os.path.join(self.repo.path, filename)
			
 
				+            with open(fullpath, "w") as f:
			
 
				+                f.write(f"content{i}")
			
 
				+            files.append(fullpath)
			
 
				+
			
 
				+        porcelain.add(repo=self.repo.path, paths=files)
			
 
				+        porcelain.commit(
			
 
				+            repo=self.repo.path,
			
 
				+            message=b"test preload status",
			
 
				+            author=b"author <email>",
			
 
				+            committer=b"committer <email>",
			
 
				+        )
			
 
				+
			
 
				+        # Modify some files
			
 
				+        modified_files = ["file1", "file3", "file5", "file7"]
			
 
				+        for filename in modified_files:
			
 
				+            fullpath = os.path.join(self.repo.path, filename)
			
 
				+            with open(fullpath, "w") as f:
			
 
				+                f.write("modified content")
			
 
				+            os.utime(fullpath, (0, 0))
			
 
				+
			
 
				+        # Status should work correctly with preloadIndex enabled
			
 
				+        results = porcelain.status(self.repo)
			
 
				+
			
 
				+        # Check that we detected the correct unstaged changes
			
 
				+        unstaged_sorted = sorted(results.unstaged)
			
 
				+        expected_sorted = sorted([f.encode("ascii") for f in modified_files])
			
 
				+        self.assertEqual(unstaged_sorted, expected_sorted)
			
 
				+
			
 
				     def test_status_all(self) -> None:
			
 
				         del_path = os.path.join(self.repo.path, "foo")
			
 
				         mod_path = os.path.join(self.repo.path, "bar")