3 miesięcy temu · 1702e77e70
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -87,9 +87,9 @@ dependencies = [
 
				 
			
 
				 [[package]]
			
 
				 name = "pyo3"
			
 
				-version = "0.26.0"
			
 
				+version = "0.27.0"
			
 
				 source = "registry+https://github.com/rust-lang/crates.io-index"
			
 
				-checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383"
			
 
				+checksum = "fa8e48c12afdeb26aa4be4e5c49fb5e11c3efa0878db783a960eea2b9ac6dd19"
			
 
				 dependencies = [
			
 
				  "indoc",
			
 
				  "libc",
			
@@ -104,18 +104,18 @@ dependencies = [
 
				 
			
 
				 [[package]]
			
 
				 name = "pyo3-build-config"
			
 
				-version = "0.26.0"
			
 
				+version = "0.27.0"
			
 
				 source = "registry+https://github.com/rust-lang/crates.io-index"
			
 
				-checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f"
			
 
				+checksum = "bc1989dbf2b60852e0782c7487ebf0b4c7f43161ffe820849b56cf05f945cee1"
			
 
				 dependencies = [
			
 
				  "target-lexicon",
			
 
				 ]
			
 
				 
			
 
				 [[package]]
			
 
				 name = "pyo3-ffi"
			
 
				-version = "0.26.0"
			
 
				+version = "0.27.0"
			
 
				 source = "registry+https://github.com/rust-lang/crates.io-index"
			
 
				-checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105"
			
 
				+checksum = "c808286da7500385148930152e54fb6883452033085bf1f857d85d4e82ca905c"
			
 
				 dependencies = [
			
 
				  "libc",
			
 
				  "pyo3-build-config",
			
@@ -123,9 +123,9 @@ dependencies = [
 
				 
			
 
				 [[package]]
			
 
				 name = "pyo3-macros"
			
 
				-version = "0.26.0"
			
 
				+version = "0.27.0"
			
 
				 source = "registry+https://github.com/rust-lang/crates.io-index"
			
 
				-checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded"
			
 
				+checksum = "83a0543c16be0d86cf0dbf2e2b636ece9fd38f20406bb43c255e0bc368095f92"
			
 
				 dependencies = [
			
 
				  "proc-macro2",
			
 
				  "pyo3-macros-backend",
			
@@ -135,9 +135,9 @@ dependencies = [
 
				 
			
 
				 [[package]]
			
 
				 name = "pyo3-macros-backend"
			
 
				-version = "0.26.0"
			
 
				+version = "0.27.0"
			
 
				 source = "registry+https://github.com/rust-lang/crates.io-index"
			
 
				-checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf"
			
 
				+checksum = "2a00da2ce064dcd582448ea24a5a26fa9527e0483103019b741ebcbe632dcd29"
			
 
				 dependencies = [
			
 
				  "heck",
			
 
				  "proc-macro2",
			
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,7 +3,7 @@ members = ["crates/*"]
 
				 resolver = "2"
			
 
				 
			
 
				 [workspace.dependencies]
			
 
				-pyo3 = ">=0.25,<0.27"
			
 
				+pyo3 = ">=0.25,<0.28"
			
 
				 
			
 
				 [workspace.package]
			
 
				-version = "0.24.6"
			
 
				+version = "0.24.7"
			
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,10 @@
 
				+0.24.7	UNRELEASED
			
 
				+
			
 
				+ * Add sparse index support for improved performance with large repositories.
			
 
				+   Implements reading and writing of sparse directory entries, index expansion/
			
 
				+   collapse operations, and the 'sdir' extension.
			
 
				+   (Jelmer Vernooĳ, #1797)
			
 
				+
			
 
				 0.24.6	2025-10-19
			
 
				 
			
 
				  * Fix import failure when ``sys.stdin`` is ``None``. The ``dulwich.server``
			
--- a/dulwich/__init__.py
+++ b/dulwich/__init__.py
@@ -31,7 +31,7 @@ if sys.version_info >= (3, 10):
 
				 else:
			
 
				     from typing_extensions import ParamSpec
			
 
				 
			
 
				-__version__ = (0, 24, 6)
			
 
				+__version__ = (0, 24, 7)
			
 
				 
			
 
				 __all__ = ["__version__", "replace_me"]
			
 
				 
			
--- a/dulwich/index.py
+++ b/dulwich/index.py
@@ -28,7 +28,7 @@ import stat
 
				 import struct
			
 
				 import sys
			
 
				 import types
			
 
				-from collections.abc import Generator, Iterable, Iterator, Mapping, Sequence
			
 
				+from collections.abc import Generator, Iterable, Iterator, Mapping, Sequence, Set
			
 
				 from dataclasses import dataclass
			
 
				 from enum import Enum
			
 
				 from typing import (
			
@@ -94,6 +94,7 @@ REUC_EXTENSION = b"REUC"
 
				 UNTR_EXTENSION = b"UNTR"
			
 
				 EOIE_EXTENSION = b"EOIE"
			
 
				 IEOT_EXTENSION = b"IEOT"
			
 
				+SDIR_EXTENSION = b"sdir"  # Sparse directory extension
			
 
				 
			
 
				 
			
 
				 def _encode_varint(value: int) -> bytes:
			
@@ -303,6 +304,25 @@ class SerializedIndexEntry:
 
				         """
			
 
				         return Stage((self.flags & FLAG_STAGEMASK) >> FLAG_STAGESHIFT)
			
 
				 
			
 
				+    def is_sparse_dir(self) -> bool:
			
 
				+        """Check if this entry represents a sparse directory.
			
 
				+
			
 
				+        A sparse directory entry is a collapsed representation of an entire
			
 
				+        directory tree in a sparse index. It has:
			
 
				+        - Directory mode (0o040000)
			
 
				+        - SKIP_WORKTREE flag set
			
 
				+        - Path ending with '/'
			
 
				+        - SHA pointing to a tree object
			
 
				+
			
 
				+        Returns:
			
 
				+          True if entry is a sparse directory entry
			
 
				+        """
			
 
				+        return (
			
 
				+            stat.S_ISDIR(self.mode)
			
 
				+            and bool(self.extended_flags & EXTENDED_FLAG_SKIP_WORKTREE)
			
 
				+            and self.name.endswith(b"/")
			
 
				+        )
			
 
				+
			
 
				 
			
 
				 @dataclass
			
 
				 class IndexExtension:
			
@@ -327,6 +347,8 @@ class IndexExtension:
 
				             return ResolveUndoExtension.from_bytes(data)
			
 
				         elif signature == UNTR_EXTENSION:
			
 
				             return UntrackedExtension.from_bytes(data)
			
 
				+        elif signature == SDIR_EXTENSION:
			
 
				+            return SparseDirExtension.from_bytes(data)
			
 
				         else:
			
 
				             # Unknown extension - just store raw data
			
 
				             return cls(signature, data)
			
@@ -430,6 +452,41 @@ class UntrackedExtension(IndexExtension):
 
				         return cls(data)
			
 
				 
			
 
				 
			
 
				+class SparseDirExtension(IndexExtension):
			
 
				+    """Sparse directory extension.
			
 
				+
			
 
				+    This extension indicates that the index contains sparse directory entries.
			
 
				+    Tools that don't understand sparse index should avoid interacting with
			
 
				+    the index when this extension is present.
			
 
				+
			
 
				+    The extension data is empty - its presence is the signal.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self) -> None:
			
 
				+        """Initialize SparseDirExtension."""
			
 
				+        super().__init__(SDIR_EXTENSION, b"")
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_bytes(cls, data: bytes) -> "SparseDirExtension":
			
 
				+        """Parse SparseDirExtension from bytes.
			
 
				+
			
 
				+        Args:
			
 
				+          data: Raw bytes to parse (should be empty)
			
 
				+
			
 
				+        Returns:
			
 
				+          SparseDirExtension instance
			
 
				+        """
			
 
				+        return cls()
			
 
				+
			
 
				+    def to_bytes(self) -> bytes:
			
 
				+        """Serialize SparseDirExtension to bytes.
			
 
				+
			
 
				+        Returns:
			
 
				+          Empty bytes (extension presence is the signal)
			
 
				+        """
			
 
				+        return b""
			
 
				+
			
 
				+
			
 
				 @dataclass
			
 
				 class IndexEntry:
			
 
				     """Represents an entry in the Git index.
			
@@ -532,6 +589,28 @@ class IndexEntry:
 
				             if self.extended_flags == 0:
			
 
				                 self.flags &= ~FLAG_EXTENDED
			
 
				 
			
 
				+    def is_sparse_dir(self, name: bytes) -> bool:
			
 
				+        """Check if this entry represents a sparse directory.
			
 
				+
			
 
				+        A sparse directory entry is a collapsed representation of an entire
			
 
				+        directory tree in a sparse index. It has:
			
 
				+        - Directory mode (0o040000)
			
 
				+        - SKIP_WORKTREE flag set
			
 
				+        - Path ending with '/'
			
 
				+        - SHA pointing to a tree object
			
 
				+
			
 
				+        Args:
			
 
				+          name: The path name for this entry (IndexEntry doesn't store name)
			
 
				+
			
 
				+        Returns:
			
 
				+          True if entry is a sparse directory entry
			
 
				+        """
			
 
				+        return (
			
 
				+            stat.S_ISDIR(self.mode)
			
 
				+            and bool(self.extended_flags & EXTENDED_FLAG_SKIP_WORKTREE)
			
 
				+            and name.endswith(b"/")
			
 
				+        )
			
 
				+
			
 
				 
			
 
				 class ConflictedIndexEntry:
			
 
				     """Index entry that represents a conflict."""
			
@@ -1219,6 +1298,208 @@ class Index:
 
				         """
			
 
				         return commit_tree(object_store, self.iterobjects())
			
 
				 
			
 
				+    def is_sparse(self) -> bool:
			
 
				+        """Check if this index contains sparse directory entries.
			
 
				+
			
 
				+        Returns:
			
 
				+          True if any sparse directory extension is present
			
 
				+        """
			
 
				+        return any(isinstance(ext, SparseDirExtension) for ext in self._extensions)
			
 
				+
			
 
				+    def ensure_full_index(self, object_store: "BaseObjectStore") -> None:
			
 
				+        """Expand all sparse directory entries into full file entries.
			
 
				+
			
 
				+        This converts a sparse index into a full index by recursively
			
 
				+        expanding any sparse directory entries into their constituent files.
			
 
				+
			
 
				+        Args:
			
 
				+          object_store: Object store to read tree objects from
			
 
				+
			
 
				+        Raises:
			
 
				+          KeyError: If a tree object referenced by a sparse dir entry doesn't exist
			
 
				+        """
			
 
				+        if not self.is_sparse():
			
 
				+            return
			
 
				+
			
 
				+        # Find all sparse directory entries
			
 
				+        sparse_dirs = []
			
 
				+        for path, entry in list(self._byname.items()):
			
 
				+            if isinstance(entry, IndexEntry) and entry.is_sparse_dir(path):
			
 
				+                sparse_dirs.append((path, entry))
			
 
				+
			
 
				+        # Expand each sparse directory
			
 
				+        for path, entry in sparse_dirs:
			
 
				+            # Remove the sparse directory entry
			
 
				+            del self._byname[path]
			
 
				+
			
 
				+            # Get the tree object
			
 
				+            tree = object_store[entry.sha]
			
 
				+            if not isinstance(tree, Tree):
			
 
				+                raise ValueError(f"Sparse directory {path!r} points to non-tree object")
			
 
				+
			
 
				+            # Recursively add all entries from the tree
			
 
				+            self._expand_tree(path.rstrip(b"/"), tree, object_store, entry)
			
 
				+
			
 
				+        # Remove the sparse directory extension
			
 
				+        self._extensions = [
			
 
				+            ext for ext in self._extensions if not isinstance(ext, SparseDirExtension)
			
 
				+        ]
			
 
				+
			
 
				+    def _expand_tree(
			
 
				+        self,
			
 
				+        prefix: bytes,
			
 
				+        tree: Tree,
			
 
				+        object_store: "BaseObjectStore",
			
 
				+        template_entry: IndexEntry,
			
 
				+    ) -> None:
			
 
				+        """Recursively expand a tree into index entries.
			
 
				+
			
 
				+        Args:
			
 
				+          prefix: Path prefix for entries (without trailing slash)
			
 
				+          tree: Tree object to expand
			
 
				+          object_store: Object store to read nested trees from
			
 
				+          template_entry: Template entry to copy metadata from
			
 
				+        """
			
 
				+        for name, mode, sha in tree.items():
			
 
				+            if prefix:
			
 
				+                full_path = prefix + b"/" + name
			
 
				+            else:
			
 
				+                full_path = name
			
 
				+
			
 
				+            if stat.S_ISDIR(mode):
			
 
				+                # Recursively expand subdirectories
			
 
				+                subtree = object_store[sha]
			
 
				+                if not isinstance(subtree, Tree):
			
 
				+                    raise ValueError(
			
 
				+                        f"Directory entry {full_path!r} points to non-tree object"
			
 
				+                    )
			
 
				+                self._expand_tree(full_path, subtree, object_store, template_entry)
			
 
				+            else:
			
 
				+                # Create an index entry for this file
			
 
				+                # Use the template entry for metadata but with the file's sha and mode
			
 
				+                new_entry = IndexEntry(
			
 
				+                    ctime=template_entry.ctime,
			
 
				+                    mtime=template_entry.mtime,
			
 
				+                    dev=template_entry.dev,
			
 
				+                    ino=template_entry.ino,
			
 
				+                    mode=mode,
			
 
				+                    uid=template_entry.uid,
			
 
				+                    gid=template_entry.gid,
			
 
				+                    size=0,  # Size is unknown from tree
			
 
				+                    sha=sha,
			
 
				+                    flags=0,
			
 
				+                    extended_flags=0,  # Don't copy skip-worktree flag
			
 
				+                )
			
 
				+                self._byname[full_path] = new_entry
			
 
				+
			
 
				+    def convert_to_sparse(
			
 
				+        self,
			
 
				+        object_store: "BaseObjectStore",
			
 
				+        tree_sha: bytes,
			
 
				+        sparse_dirs: Set[bytes],
			
 
				+    ) -> None:
			
 
				+        """Convert full index entries to sparse directory entries.
			
 
				+
			
 
				+        This collapses directories that are entirely outside the sparse
			
 
				+        checkout cone into single sparse directory entries.
			
 
				+
			
 
				+        Args:
			
 
				+          object_store: Object store to read tree objects
			
 
				+          tree_sha: SHA of the tree (usually HEAD) to base sparse dirs on
			
 
				+          sparse_dirs: Set of directory paths (with trailing /) to collapse
			
 
				+
			
 
				+        Raises:
			
 
				+          KeyError: If tree_sha or a subdirectory doesn't exist
			
 
				+        """
			
 
				+        if not sparse_dirs:
			
 
				+            return
			
 
				+
			
 
				+        # Get the base tree
			
 
				+        tree = object_store[tree_sha]
			
 
				+        if not isinstance(tree, Tree):
			
 
				+            raise ValueError(f"tree_sha {tree_sha!r} is not a tree object")
			
 
				+
			
 
				+        # For each sparse directory, find its tree SHA and create sparse entry
			
 
				+        for dir_path in sparse_dirs:
			
 
				+            dir_path_stripped = dir_path.rstrip(b"/")
			
 
				+
			
 
				+            # Find the tree SHA for this directory
			
 
				+            subtree_sha = self._find_subtree_sha(tree, dir_path_stripped, object_store)
			
 
				+            if subtree_sha is None:
			
 
				+                # Directory doesn't exist in tree, skip it
			
 
				+                continue
			
 
				+
			
 
				+            # Remove all entries under this directory
			
 
				+            entries_to_remove = [
			
 
				+                path
			
 
				+                for path in self._byname
			
 
				+                if path.startswith(dir_path) or path == dir_path_stripped
			
 
				+            ]
			
 
				+            for path in entries_to_remove:
			
 
				+                del self._byname[path]
			
 
				+
			
 
				+            # Create a sparse directory entry
			
 
				+            # Use minimal metadata since it's not a real file
			
 
				+            sparse_entry = IndexEntry(
			
 
				+                ctime=0,
			
 
				+                mtime=0,
			
 
				+                dev=0,
			
 
				+                ino=0,
			
 
				+                mode=stat.S_IFDIR,
			
 
				+                uid=0,
			
 
				+                gid=0,
			
 
				+                size=0,
			
 
				+                sha=subtree_sha,
			
 
				+                flags=0,
			
 
				+                extended_flags=EXTENDED_FLAG_SKIP_WORKTREE,
			
 
				+            )
			
 
				+            self._byname[dir_path] = sparse_entry
			
 
				+
			
 
				+        # Add sparse directory extension if not present
			
 
				+        if not self.is_sparse():
			
 
				+            self._extensions.append(SparseDirExtension())
			
 
				+
			
 
				+    def _find_subtree_sha(
			
 
				+        self,
			
 
				+        tree: Tree,
			
 
				+        path: bytes,
			
 
				+        object_store: "BaseObjectStore",
			
 
				+    ) -> Optional[bytes]:
			
 
				+        """Find the SHA of a subtree at a given path.
			
 
				+
			
 
				+        Args:
			
 
				+          tree: Root tree object to search in
			
 
				+          path: Path to the subdirectory (no trailing slash)
			
 
				+          object_store: Object store to read nested trees from
			
 
				+
			
 
				+        Returns:
			
 
				+          SHA of the subtree, or None if path doesn't exist
			
 
				+        """
			
 
				+        if not path:
			
 
				+            return tree.id
			
 
				+
			
 
				+        parts = path.split(b"/")
			
 
				+        current_tree = tree
			
 
				+
			
 
				+        for part in parts:
			
 
				+            # Look for this part in the current tree
			
 
				+            try:
			
 
				+                mode, sha = current_tree[part]
			
 
				+            except KeyError:
			
 
				+                return None
			
 
				+
			
 
				+            if not stat.S_ISDIR(mode):
			
 
				+                # Path component is a file, not a directory
			
 
				+                return None
			
 
				+
			
 
				+            # Load the next tree
			
 
				+            obj = object_store[sha]
			
 
				+            if not isinstance(obj, Tree):
			
 
				+                return None
			
 
				+            current_tree = obj
			
 
				+
			
 
				+        return current_tree.id
			
 
				+
			
 
				 
			
 
				 def commit_tree(
			
 
				     object_store: ObjectContainer, blobs: Iterable[tuple[bytes, bytes, int]]
			
--- a/tests/compat/test_index.py
+++ b/tests/compat/test_index.py
@@ -962,3 +962,223 @@ class IndexV4CompatTestCase(CompatTestCase):
 
				         self.assertIn(b"unchanged.txt", entries)
			
 
				         self.assertNotIn(b"old1.txt", entries)
			
 
				         self.assertNotIn(b"old2.txt", entries)
			
 
				+
			
 
				+
			
 
				+class SparseIndexCompatTestCase(CompatTestCase):
			
 
				+    """Tests for Git sparse index compatibility with C Git."""
			
 
				+
			
 
				+    def setUp(self) -> None:
			
 
				+        super().setUp()
			
 
				+        self.tempdir = tempfile.mkdtemp()
			
 
				+        self.addCleanup(self._cleanup)
			
 
				+
			
 
				+    def _cleanup(self) -> None:
			
 
				+        import shutil
			
 
				+
			
 
				+        shutil.rmtree(self.tempdir, ignore_errors=True)
			
 
				+
			
 
				+    def test_read_sparse_index_created_by_git(self) -> None:
			
 
				+        """Test that Dulwich can read a sparse index created by Git."""
			
 
				+        # Sparse index requires Git 2.37+
			
 
				+        require_git_version((2, 37, 0))
			
 
				+
			
 
				+        repo_path = os.path.join(self.tempdir, "test_repo")
			
 
				+        os.mkdir(repo_path)
			
 
				+
			
 
				+        # Initialize repo
			
 
				+        run_git_or_fail(["init"], cwd=repo_path)
			
 
				+        run_git_or_fail(["config", "core.sparseCheckout", "true"], cwd=repo_path)
			
 
				+        run_git_or_fail(
			
 
				+            ["config", "index.sparse", "true"], cwd=repo_path
			
 
				+        )  # Enable sparse index
			
 
				+
			
 
				+        # Create directory structure
			
 
				+        os.makedirs(os.path.join(repo_path, "included"), exist_ok=True)
			
 
				+        os.makedirs(os.path.join(repo_path, "excluded", "subdir"), exist_ok=True)
			
 
				+
			
 
				+        # Create files
			
 
				+        with open(os.path.join(repo_path, "included", "file1.txt"), "w") as f:
			
 
				+            f.write("included file 1\n")
			
 
				+        with open(os.path.join(repo_path, "included", "file2.txt"), "w") as f:
			
 
				+            f.write("included file 2\n")
			
 
				+        with open(os.path.join(repo_path, "excluded", "file3.txt"), "w") as f:
			
 
				+            f.write("excluded file 3\n")
			
 
				+        with open(os.path.join(repo_path, "excluded", "subdir", "file4.txt"), "w") as f:
			
 
				+            f.write("excluded file 4\n")
			
 
				+        with open(os.path.join(repo_path, "root.txt"), "w") as f:
			
 
				+            f.write("root file\n")
			
 
				+
			
 
				+        # Add and commit all files
			
 
				+        run_git_or_fail(["add", "."], cwd=repo_path)
			
 
				+        run_git_or_fail(["commit", "-m", "initial"], cwd=repo_path)
			
 
				+
			
 
				+        # Set up sparse-checkout to include only "included/" and root files
			
 
				+        sparse_checkout_path = os.path.join(
			
 
				+            repo_path, ".git", "info", "sparse-checkout"
			
 
				+        )
			
 
				+        os.makedirs(os.path.dirname(sparse_checkout_path), exist_ok=True)
			
 
				+        with open(sparse_checkout_path, "w") as f:
			
 
				+            f.write("/*\n")  # Include top-level files
			
 
				+            f.write("!/*/\n")  # Exclude all directories
			
 
				+            f.write("/included/\n")  # Re-include "included" directory
			
 
				+
			
 
				+        # Run sparse-checkout reapply to create sparse index
			
 
				+        run_git_or_fail(["sparse-checkout", "reapply"], cwd=repo_path)
			
 
				+
			
 
				+        # Read the index with Dulwich
			
 
				+        from dulwich.index import Index
			
 
				+
			
 
				+        index_path = os.path.join(repo_path, ".git", "index")
			
 
				+        idx = Index(index_path)
			
 
				+
			
 
				+        # Git may or may not create a sparse index depending on the repo state
			
 
				+        # The key test is that Dulwich can read it without errors
			
 
				+        self.assertIsNotNone(idx)
			
 
				+
			
 
				+        # If it is sparse, verify we can handle sparse directory entries
			
 
				+        if idx.is_sparse():
			
 
				+            for path, entry in idx.items():
			
 
				+                if entry.is_sparse_dir(path):
			
 
				+                    # Verify sparse dirs are for excluded paths
			
 
				+                    self.assertTrue(path.startswith(b"excluded"))
			
 
				+        else:
			
 
				+            # If not sparse, we should still be able to read all entries
			
 
				+            self.assertGreater(len(idx), 0)
			
 
				+
			
 
				+    def test_write_sparse_index_readable_by_git(self) -> None:
			
 
				+        """Test that Git can read a sparse index created by Dulwich."""
			
 
				+        # Sparse index requires Git 2.37+
			
 
				+        require_git_version((2, 37, 0))
			
 
				+
			
 
				+        from dulwich.index import Index, IndexEntry, SparseDirExtension
			
 
				+        from dulwich.objects import Blob, Tree
			
 
				+        from dulwich.repo import Repo
			
 
				+
			
 
				+        repo_path = os.path.join(self.tempdir, "test_repo")
			
 
				+        os.mkdir(repo_path)
			
 
				+
			
 
				+        # Initialize repo with Git
			
 
				+        run_git_or_fail(["init"], cwd=repo_path)
			
 
				+        run_git_or_fail(["config", "index.sparse", "true"], cwd=repo_path)
			
 
				+
			
 
				+        # Create a tree structure using Dulwich
			
 
				+        repo = Repo(repo_path)
			
 
				+
			
 
				+        # Create blobs
			
 
				+        blob1 = Blob()
			
 
				+        blob1.data = b"file1 content"
			
 
				+        repo.object_store.add_object(blob1)
			
 
				+
			
 
				+        blob2 = Blob()
			
 
				+        blob2.data = b"file2 content"
			
 
				+        repo.object_store.add_object(blob2)
			
 
				+
			
 
				+        # Create subtree for sparse directory
			
 
				+        subtree = Tree()
			
 
				+        subtree[b"file1.txt"] = (0o100644, blob1.id)
			
 
				+        subtree[b"file2.txt"] = (0o100644, blob2.id)
			
 
				+        repo.object_store.add_object(subtree)
			
 
				+
			
 
				+        # Create root tree
			
 
				+        tree = Tree()
			
 
				+        tree[b"sparse_dir"] = (0o040000, subtree.id)
			
 
				+        repo.object_store.add_object(tree)
			
 
				+
			
 
				+        # Create a commit
			
 
				+        from dulwich.objects import Commit
			
 
				+
			
 
				+        commit = Commit()
			
 
				+        commit.tree = tree.id
			
 
				+        commit.author = commit.committer = b"Test <test@example.com>"
			
 
				+        commit.author_time = commit.commit_time = 1234567890
			
 
				+        commit.author_timezone = commit.commit_timezone = 0
			
 
				+        commit.encoding = b"UTF-8"
			
 
				+        commit.message = b"Test commit"
			
 
				+        repo.object_store.add_object(commit)
			
 
				+        repo.refs[b"refs/heads/master"] = commit.id
			
 
				+
			
 
				+        # Create sparse index with Dulwich
			
 
				+        index = Index(os.path.join(repo_path, ".git", "index"), read=False)
			
 
				+
			
 
				+        # Add sparse directory entry
			
 
				+        sparse_entry = IndexEntry(
			
 
				+            ctime=0,
			
 
				+            mtime=0,
			
 
				+            dev=0,
			
 
				+            ino=0,
			
 
				+            mode=0o040000,
			
 
				+            uid=0,
			
 
				+            gid=0,
			
 
				+            size=0,
			
 
				+            sha=subtree.id,
			
 
				+            extended_flags=0x4000,  # SKIP_WORKTREE
			
 
				+        )
			
 
				+        index[b"sparse_dir/"] = sparse_entry
			
 
				+        index._extensions.append(SparseDirExtension())
			
 
				+        index.write()
			
 
				+
			
 
				+        # Verify Git can read the index
			
 
				+        output = run_git_or_fail(["ls-files", "--debug"], cwd=repo_path)
			
 
				+        self.assertIn(b"sparse_dir/", output)
			
 
				+
			
 
				+        # Verify Git recognizes it as sparse
			
 
				+        output = run_git_or_fail(["status"], cwd=repo_path)
			
 
				+        # Should not crash
			
 
				+        self.assertIsNotNone(output)
			
 
				+
			
 
				+    def test_expand_sparse_index_matches_git(self) -> None:
			
 
				+        """Test that expanding a sparse index matches Git's behavior."""
			
 
				+        # Sparse index requires Git 2.37+
			
 
				+        require_git_version((2, 37, 0))
			
 
				+
			
 
				+        repo_path = os.path.join(self.tempdir, "test_repo")
			
 
				+        os.mkdir(repo_path)
			
 
				+
			
 
				+        # Initialize repo
			
 
				+        run_git_or_fail(["init"], cwd=repo_path)
			
 
				+        run_git_or_fail(["config", "core.sparseCheckout", "true"], cwd=repo_path)
			
 
				+        run_git_or_fail(["config", "index.sparse", "true"], cwd=repo_path)
			
 
				+
			
 
				+        # Create directory structure
			
 
				+        os.makedirs(os.path.join(repo_path, "dir1", "subdir"), exist_ok=True)
			
 
				+        with open(os.path.join(repo_path, "dir1", "file1.txt"), "w") as f:
			
 
				+            f.write("file1\n")
			
 
				+        with open(os.path.join(repo_path, "dir1", "subdir", "file2.txt"), "w") as f:
			
 
				+            f.write("file2\n")
			
 
				+
			
 
				+        # Commit files
			
 
				+        run_git_or_fail(["add", "."], cwd=repo_path)
			
 
				+        run_git_or_fail(["commit", "-m", "initial"], cwd=repo_path)
			
 
				+
			
 
				+        # Set up sparse-checkout to exclude dir1
			
 
				+        sparse_checkout_path = os.path.join(
			
 
				+            repo_path, ".git", "info", "sparse-checkout"
			
 
				+        )
			
 
				+        os.makedirs(os.path.dirname(sparse_checkout_path), exist_ok=True)
			
 
				+        with open(sparse_checkout_path, "w") as f:
			
 
				+            f.write("/*\n")
			
 
				+            f.write("!/dir1/\n")
			
 
				+
			
 
				+        run_git_or_fail(["sparse-checkout", "reapply"], cwd=repo_path)
			
 
				+
			
 
				+        # Read sparse index with Dulwich
			
 
				+        from dulwich.index import Index
			
 
				+        from dulwich.repo import Repo
			
 
				+
			
 
				+        index_path = os.path.join(repo_path, ".git", "index")
			
 
				+        idx = Index(index_path)
			
 
				+
			
 
				+        if idx.is_sparse():
			
 
				+            # Expand the index
			
 
				+            repo = Repo(repo_path)
			
 
				+            idx.ensure_full_index(repo.object_store)
			
 
				+
			
 
				+            # Should no longer be sparse
			
 
				+            self.assertFalse(idx.is_sparse())
			
 
				+
			
 
				+            # Write it back
			
 
				+            idx.write()
			
 
				+
			
 
				+            # Git should still be able to read it
			
 
				+            output = run_git_or_fail(["status"], cwd=repo_path)
			
 
				+            self.assertIsNotNone(output)
			
--- a/tests/test_index.py
+++ b/tests/test_index.py
@@ -3008,3 +3008,276 @@ class TestUpdateWorkingTree(TestCase):
 
				                     parent = os.path.dirname(path)
			
 
				                     # We're not creating these directories, just testing the logic doesn't fail
			
 
				                     self.assertIsInstance(parent, bytes)
			
 
				+
			
 
				+
			
 
				+class TestSparseIndex(TestCase):
			
 
				+    """Tests for sparse index support."""
			
 
				+
			
 
				+    def test_serialized_index_entry_is_sparse_dir(self):
			
 
				+        """Test SerializedIndexEntry.is_sparse_dir() method."""
			
 
				+        from dulwich.index import EXTENDED_FLAG_SKIP_WORKTREE
			
 
				+
			
 
				+        # Regular file entry - not sparse
			
 
				+        regular_entry = SerializedIndexEntry(
			
 
				+            name=b"file.txt",
			
 
				+            ctime=0,
			
 
				+            mtime=0,
			
 
				+            dev=0,
			
 
				+            ino=0,
			
 
				+            mode=0o100644,
			
 
				+            uid=0,
			
 
				+            gid=0,
			
 
				+            size=0,
			
 
				+            sha=b"\x00" * 20,
			
 
				+            flags=0,
			
 
				+            extended_flags=0,
			
 
				+        )
			
 
				+        self.assertFalse(regular_entry.is_sparse_dir())
			
 
				+
			
 
				+        # Directory mode but no skip-worktree flag - not sparse
			
 
				+        dir_entry = SerializedIndexEntry(
			
 
				+            name=b"dir/",
			
 
				+            ctime=0,
			
 
				+            mtime=0,
			
 
				+            dev=0,
			
 
				+            ino=0,
			
 
				+            mode=stat.S_IFDIR,
			
 
				+            uid=0,
			
 
				+            gid=0,
			
 
				+            size=0,
			
 
				+            sha=b"\x00" * 20,
			
 
				+            flags=0,
			
 
				+            extended_flags=0,
			
 
				+        )
			
 
				+        self.assertFalse(dir_entry.is_sparse_dir())
			
 
				+
			
 
				+        # Skip-worktree flag but not directory - not sparse
			
 
				+        skip_file = SerializedIndexEntry(
			
 
				+            name=b"file.txt",
			
 
				+            ctime=0,
			
 
				+            mtime=0,
			
 
				+            dev=0,
			
 
				+            ino=0,
			
 
				+            mode=0o100644,
			
 
				+            uid=0,
			
 
				+            gid=0,
			
 
				+            size=0,
			
 
				+            sha=b"\x00" * 20,
			
 
				+            flags=0,
			
 
				+            extended_flags=EXTENDED_FLAG_SKIP_WORKTREE,
			
 
				+        )
			
 
				+        self.assertFalse(skip_file.is_sparse_dir())
			
 
				+
			
 
				+        # Directory mode + skip-worktree + trailing slash - sparse!
			
 
				+        sparse_dir = SerializedIndexEntry(
			
 
				+            name=b"sparse_dir/",
			
 
				+            ctime=0,
			
 
				+            mtime=0,
			
 
				+            dev=0,
			
 
				+            ino=0,
			
 
				+            mode=stat.S_IFDIR,
			
 
				+            uid=0,
			
 
				+            gid=0,
			
 
				+            size=0,
			
 
				+            sha=b"\x00" * 20,
			
 
				+            flags=0,
			
 
				+            extended_flags=EXTENDED_FLAG_SKIP_WORKTREE,
			
 
				+        )
			
 
				+        self.assertTrue(sparse_dir.is_sparse_dir())
			
 
				+
			
 
				+    def test_index_entry_is_sparse_dir(self):
			
 
				+        """Test IndexEntry.is_sparse_dir() method."""
			
 
				+        from dulwich.index import EXTENDED_FLAG_SKIP_WORKTREE
			
 
				+
			
 
				+        # Regular file - not sparse
			
 
				+        regular = IndexEntry(
			
 
				+            ctime=0,
			
 
				+            mtime=0,
			
 
				+            dev=0,
			
 
				+            ino=0,
			
 
				+            mode=0o100644,
			
 
				+            uid=0,
			
 
				+            gid=0,
			
 
				+            size=0,
			
 
				+            sha=b"\x00" * 20,
			
 
				+            extended_flags=0,
			
 
				+        )
			
 
				+        self.assertFalse(regular.is_sparse_dir(b"file.txt"))
			
 
				+
			
 
				+        # Sparse directory entry
			
 
				+        sparse = IndexEntry(
			
 
				+            ctime=0,
			
 
				+            mtime=0,
			
 
				+            dev=0,
			
 
				+            ino=0,
			
 
				+            mode=stat.S_IFDIR,
			
 
				+            uid=0,
			
 
				+            gid=0,
			
 
				+            size=0,
			
 
				+            sha=b"\x00" * 20,
			
 
				+            extended_flags=EXTENDED_FLAG_SKIP_WORKTREE,
			
 
				+        )
			
 
				+        self.assertTrue(sparse.is_sparse_dir(b"dir/"))
			
 
				+        self.assertFalse(sparse.is_sparse_dir(b"dir"))  # No trailing slash
			
 
				+
			
 
				+    def test_sparse_dir_extension(self):
			
 
				+        """Test SparseDirExtension serialization."""
			
 
				+        from dulwich.index import SDIR_EXTENSION, SparseDirExtension
			
 
				+
			
 
				+        ext = SparseDirExtension()
			
 
				+        self.assertEqual(ext.signature, SDIR_EXTENSION)
			
 
				+        self.assertEqual(ext.to_bytes(), b"")
			
 
				+
			
 
				+        # Test round-trip
			
 
				+        ext2 = SparseDirExtension.from_bytes(b"")
			
 
				+        self.assertEqual(ext2.signature, SDIR_EXTENSION)
			
 
				+        self.assertEqual(ext2.to_bytes(), b"")
			
 
				+
			
 
				+    def test_index_is_sparse(self):
			
 
				+        """Test Index.is_sparse() method."""
			
 
				+        from dulwich.index import SparseDirExtension
			
 
				+
			
 
				+        with tempfile.TemporaryDirectory() as tmpdir:
			
 
				+            index_path = os.path.join(tmpdir, "index")
			
 
				+            idx = Index(index_path, read=False)
			
 
				+
			
 
				+            # Initially not sparse
			
 
				+            self.assertFalse(idx.is_sparse())
			
 
				+
			
 
				+            # Add sparse directory extension
			
 
				+            idx._extensions.append(SparseDirExtension())
			
 
				+            self.assertTrue(idx.is_sparse())
			
 
				+
			
 
				+    def test_index_expansion(self):
			
 
				+        """Test Index.ensure_full_index() expands sparse directories."""
			
 
				+        from dulwich.index import EXTENDED_FLAG_SKIP_WORKTREE, SparseDirExtension
			
 
				+        from dulwich.object_store import MemoryObjectStore
			
 
				+        from dulwich.objects import Blob, Tree
			
 
				+
			
 
				+        # Create a tree structure
			
 
				+        store = MemoryObjectStore()
			
 
				+
			
 
				+        blob1 = Blob()
			
 
				+        blob1.data = b"file1"
			
 
				+        store.add_object(blob1)
			
 
				+
			
 
				+        blob2 = Blob()
			
 
				+        blob2.data = b"file2"
			
 
				+        store.add_object(blob2)
			
 
				+
			
 
				+        subtree = Tree()
			
 
				+        subtree[b"file1.txt"] = (0o100644, blob1.id)
			
 
				+        subtree[b"file2.txt"] = (0o100644, blob2.id)
			
 
				+        store.add_object(subtree)
			
 
				+
			
 
				+        # Create an index with a sparse directory entry
			
 
				+        with tempfile.TemporaryDirectory() as tmpdir:
			
 
				+            index_path = os.path.join(tmpdir, "index")
			
 
				+            idx = Index(index_path, read=False)
			
 
				+
			
 
				+            # Add sparse directory entry
			
 
				+            sparse_entry = IndexEntry(
			
 
				+                ctime=0,
			
 
				+                mtime=0,
			
 
				+                dev=0,
			
 
				+                ino=0,
			
 
				+                mode=stat.S_IFDIR,
			
 
				+                uid=0,
			
 
				+                gid=0,
			
 
				+                size=0,
			
 
				+                sha=subtree.id,
			
 
				+                extended_flags=EXTENDED_FLAG_SKIP_WORKTREE,
			
 
				+            )
			
 
				+            idx[b"subdir/"] = sparse_entry
			
 
				+            idx._extensions.append(SparseDirExtension())
			
 
				+
			
 
				+            self.assertTrue(idx.is_sparse())
			
 
				+            self.assertEqual(len(idx), 1)
			
 
				+
			
 
				+            # Expand the index
			
 
				+            idx.ensure_full_index(store)
			
 
				+
			
 
				+            # Should no longer be sparse
			
 
				+            self.assertFalse(idx.is_sparse())
			
 
				+
			
 
				+            # Should have 2 entries now (the files)
			
 
				+            self.assertEqual(len(idx), 2)
			
 
				+            self.assertIn(b"subdir/file1.txt", idx)
			
 
				+            self.assertIn(b"subdir/file2.txt", idx)
			
 
				+
			
 
				+            # Entries should point to the correct blobs
			
 
				+            self.assertEqual(idx[b"subdir/file1.txt"].sha, blob1.id)
			
 
				+            self.assertEqual(idx[b"subdir/file2.txt"].sha, blob2.id)
			
 
				+
			
 
				+    def test_index_collapse(self):
			
 
				+        """Test Index.convert_to_sparse() collapses directories."""
			
 
				+        from dulwich.object_store import MemoryObjectStore
			
 
				+        from dulwich.objects import Blob, Tree
			
 
				+
			
 
				+        # Create a tree structure
			
 
				+        store = MemoryObjectStore()
			
 
				+
			
 
				+        blob1 = Blob()
			
 
				+        blob1.data = b"file1"
			
 
				+        store.add_object(blob1)
			
 
				+
			
 
				+        blob2 = Blob()
			
 
				+        blob2.data = b"file2"
			
 
				+        store.add_object(blob2)
			
 
				+
			
 
				+        subtree = Tree()
			
 
				+        subtree[b"file1.txt"] = (0o100644, blob1.id)
			
 
				+        subtree[b"file2.txt"] = (0o100644, blob2.id)
			
 
				+        store.add_object(subtree)
			
 
				+
			
 
				+        tree = Tree()
			
 
				+        tree[b"subdir"] = (stat.S_IFDIR, subtree.id)
			
 
				+        store.add_object(tree)
			
 
				+
			
 
				+        # Create an index with full entries
			
 
				+        with tempfile.TemporaryDirectory() as tmpdir:
			
 
				+            index_path = os.path.join(tmpdir, "index")
			
 
				+            idx = Index(index_path, read=False)
			
 
				+
			
 
				+            idx[b"subdir/file1.txt"] = IndexEntry(
			
 
				+                ctime=0,
			
 
				+                mtime=0,
			
 
				+                dev=0,
			
 
				+                ino=0,
			
 
				+                mode=0o100644,
			
 
				+                uid=0,
			
 
				+                gid=0,
			
 
				+                size=5,
			
 
				+                sha=blob1.id,
			
 
				+                extended_flags=0,
			
 
				+            )
			
 
				+            idx[b"subdir/file2.txt"] = IndexEntry(
			
 
				+                ctime=0,
			
 
				+                mtime=0,
			
 
				+                dev=0,
			
 
				+                ino=0,
			
 
				+                mode=0o100644,
			
 
				+                uid=0,
			
 
				+                gid=0,
			
 
				+                size=5,
			
 
				+                sha=blob2.id,
			
 
				+                extended_flags=0,
			
 
				+            )
			
 
				+
			
 
				+            self.assertEqual(len(idx), 2)
			
 
				+            self.assertFalse(idx.is_sparse())
			
 
				+
			
 
				+            # Collapse subdir to sparse
			
 
				+            idx.convert_to_sparse(store, tree.id, {b"subdir/"})
			
 
				+
			
 
				+            # Should now be sparse
			
 
				+            self.assertTrue(idx.is_sparse())
			
 
				+
			
 
				+            # Should have 1 entry (the sparse dir)
			
 
				+            self.assertEqual(len(idx), 1)
			
 
				+            self.assertIn(b"subdir/", idx)
			
 
				+
			
 
				+            # Entry should be a sparse directory
			
 
				+            entry = idx[b"subdir/"]
			
 
				+            self.assertTrue(entry.is_sparse_dir(b"subdir/"))
			
 
				+            self.assertEqual(entry.sha, subtree.id)