3 months ago · 2104307722
--- a/NEWS
+++ b/NEWS
@@ -1,10 +1,10 @@
 
				 0.22.9	UNRELEASED
			
 
				 
			
 
				- * Add support for Git's ``feature.manyFiles`` configuration and index version 4.
			
 
				-   This enables compatibility with repositories using manyFiles feature and
			
 
				-   provides faster index writes through optional hash skipping. Supports
			
 
				-   ``feature.manyFiles``, ``index.version``, and ``index.skipHash`` configuration
			
 
				-   options. Path prefix compression not yet implemented. (Jelmer Vernooĳ, #1462)
			
 
				+ * Add full support for Git's ``feature.manyFiles`` configuration and index version 4.
			
 
				+   This enables faster Git operations in large repositories through path prefix
			
 
				+   compression (30-50% smaller index files) and optional hash skipping for faster
			
 
				+   writes. Supports ``feature.manyFiles``, ``index.version``, and ``index.skipHash``
			
 
				+   configuration options with complete index v4 compatibility. (Jelmer Vernooĳ, #1462)
			
 
				 
			
 
				  * In dulwich.porcelain docstring, list functions by their Python identifiers.
			
 
				    (Marnanel Thurman)
			
--- a/dulwich/index.py
+++ b/dulwich/index.py
@@ -69,6 +69,173 @@ EXTENDED_FLAG_INTEND_TO_ADD = 0x2000
 
				 DEFAULT_VERSION = 2
			
 
				 
			
 
				 
			
 
				+def _encode_varint(value: int) -> bytes:
			
 
				+    """Encode an integer using variable-width encoding.
			
 
				+
			
 
				+    Same format as used for OFS_DELTA pack entries and index v4 path compression.
			
 
				+    Uses 7 bits per byte, with the high bit indicating continuation.
			
 
				+
			
 
				+    Args:
			
 
				+      value: Integer to encode
			
 
				+    Returns:
			
 
				+      Encoded bytes
			
 
				+    """
			
 
				+    if value == 0:
			
 
				+        return b"\x00"
			
 
				+
			
 
				+    result = []
			
 
				+    while value > 0:
			
 
				+        byte = value & 0x7F  # Take lower 7 bits
			
 
				+        value >>= 7
			
 
				+        if value > 0:
			
 
				+            byte |= 0x80  # Set continuation bit
			
 
				+        result.append(byte)
			
 
				+
			
 
				+    return bytes(result)
			
 
				+
			
 
				+
			
 
				+def _decode_varint(data: bytes, offset: int = 0) -> tuple[int, int]:
			
 
				+    """Decode a variable-width encoded integer.
			
 
				+
			
 
				+    Args:
			
 
				+      data: Bytes to decode from
			
 
				+      offset: Starting offset in data
			
 
				+    Returns:
			
 
				+      tuple of (decoded_value, new_offset)
			
 
				+    """
			
 
				+    value = 0
			
 
				+    shift = 0
			
 
				+    pos = offset
			
 
				+
			
 
				+    while pos < len(data):
			
 
				+        byte = data[pos]
			
 
				+        pos += 1
			
 
				+        value |= (byte & 0x7F) << shift
			
 
				+        shift += 7
			
 
				+        if not (byte & 0x80):  # No continuation bit
			
 
				+            break
			
 
				+
			
 
				+    return value, pos
			
 
				+
			
 
				+
			
 
				+def _compress_path(path: bytes, previous_path: bytes) -> bytes:
			
 
				+    """Compress a path relative to the previous path for index version 4.
			
 
				+
			
 
				+    Args:
			
 
				+      path: Path to compress
			
 
				+      previous_path: Previous path for comparison
			
 
				+    Returns:
			
 
				+      Compressed path data (varint prefix_len + suffix)
			
 
				+    """
			
 
				+    # Find the common prefix length
			
 
				+    common_len = 0
			
 
				+    min_len = min(len(path), len(previous_path))
			
 
				+
			
 
				+    for i in range(min_len):
			
 
				+        if path[i] == previous_path[i]:
			
 
				+            common_len += 1
			
 
				+        else:
			
 
				+            break
			
 
				+
			
 
				+    # The number of bytes to remove from the end of previous_path
			
 
				+    # to get the common prefix
			
 
				+    remove_len = len(previous_path) - common_len
			
 
				+
			
 
				+    # The suffix to append
			
 
				+    suffix = path[common_len:]
			
 
				+
			
 
				+    # Encode: varint(remove_len) + suffix + NUL
			
 
				+    return _encode_varint(remove_len) + suffix + b"\x00"
			
 
				+
			
 
				+
			
 
				+def _decompress_path(
			
 
				+    data: bytes, offset: int, previous_path: bytes
			
 
				+) -> tuple[bytes, int]:
			
 
				+    """Decompress a path from index version 4 compressed format.
			
 
				+
			
 
				+    Args:
			
 
				+      data: Raw data containing compressed path
			
 
				+      offset: Starting offset in data
			
 
				+      previous_path: Previous path for decompression
			
 
				+    Returns:
			
 
				+      tuple of (decompressed_path, new_offset)
			
 
				+    """
			
 
				+    # Decode the number of bytes to remove from previous path
			
 
				+    remove_len, new_offset = _decode_varint(data, offset)
			
 
				+
			
 
				+    # Find the NUL terminator for the suffix
			
 
				+    suffix_start = new_offset
			
 
				+    suffix_end = suffix_start
			
 
				+    while suffix_end < len(data) and data[suffix_end] != 0:
			
 
				+        suffix_end += 1
			
 
				+
			
 
				+    if suffix_end >= len(data):
			
 
				+        raise ValueError("Unterminated path suffix in compressed entry")
			
 
				+
			
 
				+    suffix = data[suffix_start:suffix_end]
			
 
				+    new_offset = suffix_end + 1  # Skip the NUL terminator
			
 
				+
			
 
				+    # Reconstruct the path
			
 
				+    if remove_len > len(previous_path):
			
 
				+        raise ValueError(
			
 
				+            f"Invalid path compression: trying to remove {remove_len} bytes from {len(previous_path)}-byte path"
			
 
				+        )
			
 
				+
			
 
				+    prefix = previous_path[:-remove_len] if remove_len > 0 else previous_path
			
 
				+    path = prefix + suffix
			
 
				+
			
 
				+    return path, new_offset
			
 
				+
			
 
				+
			
 
				+def _decompress_path_from_stream(f, previous_path: bytes) -> tuple[bytes, int]:
			
 
				+    """Decompress a path from index version 4 compressed format, reading from stream.
			
 
				+
			
 
				+    Args:
			
 
				+      f: File-like object to read from
			
 
				+      previous_path: Previous path for decompression
			
 
				+    Returns:
			
 
				+      tuple of (decompressed_path, bytes_consumed)
			
 
				+    """
			
 
				+    # Decode the varint for remove_len by reading byte by byte
			
 
				+    remove_len = 0
			
 
				+    shift = 0
			
 
				+    bytes_consumed = 0
			
 
				+
			
 
				+    while True:
			
 
				+        byte_data = f.read(1)
			
 
				+        if not byte_data:
			
 
				+            raise ValueError("Unexpected end of file while reading varint")
			
 
				+        byte = byte_data[0]
			
 
				+        bytes_consumed += 1
			
 
				+        remove_len |= (byte & 0x7F) << shift
			
 
				+        shift += 7
			
 
				+        if not (byte & 0x80):  # No continuation bit
			
 
				+            break
			
 
				+
			
 
				+    # Read the suffix until NUL terminator
			
 
				+    suffix = b""
			
 
				+    while True:
			
 
				+        byte_data = f.read(1)
			
 
				+        if not byte_data:
			
 
				+            raise ValueError("Unexpected end of file while reading path suffix")
			
 
				+        byte = byte_data[0]
			
 
				+        bytes_consumed += 1
			
 
				+        if byte == 0:  # NUL terminator
			
 
				+            break
			
 
				+        suffix += bytes([byte])
			
 
				+
			
 
				+    # Reconstruct the path
			
 
				+    if remove_len > len(previous_path):
			
 
				+        raise ValueError(
			
 
				+            f"Invalid path compression: trying to remove {remove_len} bytes from {len(previous_path)}-byte path"
			
 
				+        )
			
 
				+
			
 
				+    prefix = previous_path[:-remove_len] if remove_len > 0 else previous_path
			
 
				+    path = prefix + suffix
			
 
				+
			
 
				+    return path, bytes_consumed
			
 
				+
			
 
				+
			
 
				 class Stage(Enum):
			
 
				     NORMAL = 0
			
 
				     MERGE_CONFLICT_ANCESTOR = 1
			
@@ -241,11 +408,15 @@ def write_cache_time(f, t) -> None:
 
				     f.write(struct.pack(">LL", *t))
			
 
				 
			
 
				 
			
 
				-def read_cache_entry(f, version: int) -> SerializedIndexEntry:
			
 
				+def read_cache_entry(
			
 
				+    f, version: int, previous_path: bytes = b""
			
 
				+) -> SerializedIndexEntry:
			
 
				     """Read an entry from a cache file.
			
 
				 
			
 
				     Args:
			
 
				       f: File-like object to read from
			
 
				+      version: Index version
			
 
				+      previous_path: Previous entry's path (for version 4 compression)
			
 
				     """
			
 
				     beginoffset = f.tell()
			
 
				     ctime = read_cache_time(f)
			
@@ -266,11 +437,26 @@ def read_cache_entry(f, version: int) -> SerializedIndexEntry:
 
				         (extended_flags,) = struct.unpack(">H", f.read(2))
			
 
				     else:
			
 
				         extended_flags = 0
			
 
				-    name = f.read(flags & FLAG_NAMEMASK)
			
 
				+
			
 
				+    if version >= 4:
			
 
				+        # Version 4: path is compressed, name length should be 0
			
 
				+        name_len = flags & FLAG_NAMEMASK
			
 
				+        if name_len != 0:
			
 
				+            raise ValueError(
			
 
				+                f"Non-zero name length {name_len} in version 4 index entry"
			
 
				+            )
			
 
				+
			
 
				+        # Read compressed path data byte by byte to avoid seeking
			
 
				+        name, consumed = _decompress_path_from_stream(f, previous_path)
			
 
				+    else:
			
 
				+        # Versions < 4: regular name reading
			
 
				+        name = f.read(flags & FLAG_NAMEMASK)
			
 
				+
			
 
				     # Padding:
			
 
				     if version < 4:
			
 
				         real_size = (f.tell() - beginoffset + 8) & ~7
			
 
				         f.read((beginoffset + real_size) - f.tell())
			
 
				+
			
 
				     return SerializedIndexEntry(
			
 
				         name,
			
 
				         ctime,
			
@@ -287,21 +473,33 @@ def read_cache_entry(f, version: int) -> SerializedIndexEntry:
 
				     )
			
 
				 
			
 
				 
			
 
				-def write_cache_entry(f, entry: SerializedIndexEntry, version: int) -> None:
			
 
				+def write_cache_entry(
			
 
				+    f, entry: SerializedIndexEntry, version: int, previous_path: bytes = b""
			
 
				+) -> None:
			
 
				     """Write an index entry to a file.
			
 
				 
			
 
				     Args:
			
 
				       f: File object
			
 
				-      entry: IndexEntry to write, tuple with:
			
 
				+      entry: IndexEntry to write
			
 
				+      version: Index format version
			
 
				+      previous_path: Previous entry's path (for version 4 compression)
			
 
				     """
			
 
				     beginoffset = f.tell()
			
 
				     write_cache_time(f, entry.ctime)
			
 
				     write_cache_time(f, entry.mtime)
			
 
				-    flags = len(entry.name) | (entry.flags & ~FLAG_NAMEMASK)
			
 
				+
			
 
				+    if version >= 4:
			
 
				+        # Version 4: use path compression, set name length to 0
			
 
				+        flags = 0 | (entry.flags & ~FLAG_NAMEMASK)
			
 
				+    else:
			
 
				+        # Versions < 4: include actual name length
			
 
				+        flags = len(entry.name) | (entry.flags & ~FLAG_NAMEMASK)
			
 
				+
			
 
				     if entry.extended_flags:
			
 
				         flags |= FLAG_EXTENDED
			
 
				     if flags & FLAG_EXTENDED and version is not None and version < 3:
			
 
				         raise AssertionError("unable to use extended flags in version < 3")
			
 
				+
			
 
				     f.write(
			
 
				         struct.pack(
			
 
				             b">LLLLLL20sH",
			
@@ -317,8 +515,14 @@ def write_cache_entry(f, entry: SerializedIndexEntry, version: int) -> None:
 
				     )
			
 
				     if flags & FLAG_EXTENDED:
			
 
				         f.write(struct.pack(b">H", entry.extended_flags))
			
 
				-    f.write(entry.name)
			
 
				-    if version < 4:
			
 
				+
			
 
				+    if version >= 4:
			
 
				+        # Version 4: write compressed path
			
 
				+        compressed_path = _compress_path(entry.name, previous_path)
			
 
				+        f.write(compressed_path)
			
 
				+    else:
			
 
				+        # Versions < 4: write regular path and padding
			
 
				+        f.write(entry.name)
			
 
				         real_size = (f.tell() - beginoffset + 8) & ~7
			
 
				         f.write(b"\0" * ((beginoffset + real_size) - f.tell()))
			
 
				 
			
@@ -348,8 +552,11 @@ def read_index_header(f: BinaryIO) -> tuple[int, int]:
 
				 def read_index(f: BinaryIO) -> Iterator[SerializedIndexEntry]:
			
 
				     """Read an index file, yielding the individual entries."""
			
 
				     version, num_entries = read_index_header(f)
			
 
				+    previous_path = b""
			
 
				     for i in range(num_entries):
			
 
				-        yield read_cache_entry(f, version)
			
 
				+        entry = read_cache_entry(f, version, previous_path)
			
 
				+        previous_path = entry.name
			
 
				+        yield entry
			
 
				 
			
 
				 
			
 
				 def read_index_dict_with_version(
			
@@ -363,8 +570,10 @@ def read_index_dict_with_version(
 
				     version, num_entries = read_index_header(f)
			
 
				 
			
 
				     ret: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]] = {}
			
 
				+    previous_path = b""
			
 
				     for i in range(num_entries):
			
 
				-        entry = read_cache_entry(f, version)
			
 
				+        entry = read_cache_entry(f, version, previous_path)
			
 
				+        previous_path = entry.name
			
 
				         stage = entry.stage()
			
 
				         if stage == Stage.NORMAL:
			
 
				             ret[entry.name] = IndexEntry.from_serialized(entry)
			
@@ -432,8 +641,10 @@ def write_index(
 
				     # Proceed with the existing code to write the header and entries.
			
 
				     f.write(b"DIRC")
			
 
				     f.write(struct.pack(b">LL", version, len(entries)))
			
 
				+    previous_path = b""
			
 
				     for entry in entries:
			
 
				-        write_cache_entry(f, entry, version=version)
			
 
				+        write_cache_entry(f, entry, version=version, previous_path=previous_path)
			
 
				+        previous_path = entry.name
			
 
				 
			
 
				 
			
 
				 def write_index_dict(
			
--- a/tests/test_index.py
+++ b/tests/test_index.py
@@ -1285,49 +1285,97 @@ class TestManyFilesFeature(TestCase):
 
				 
			
 
				     def test_version_4_no_padding(self):
			
 
				         """Test that version 4 entries have no padding."""
			
 
				-        # Create a version 4 entry and version 2 entry to compare
			
 
				-        entry = SerializedIndexEntry(
			
 
				-            name=b"test.txt",
			
 
				-            ctime=(1234567890, 0),
			
 
				-            mtime=(1234567890, 0),
			
 
				-            dev=1,
			
 
				-            ino=1,
			
 
				-            mode=0o100644,
			
 
				-            uid=1000,
			
 
				-            gid=1000,
			
 
				-            size=5,
			
 
				-            sha=b"0" * 40,
			
 
				-            flags=len(b"test.txt"),
			
 
				-            extended_flags=0,
			
 
				-        )
			
 
				+        # Create entries with names that would show compression benefits
			
 
				+        entries = [
			
 
				+            SerializedIndexEntry(
			
 
				+                name=b"src/main/java/com/example/Service.java",
			
 
				+                ctime=(1234567890, 0),
			
 
				+                mtime=(1234567890, 0),
			
 
				+                dev=1,
			
 
				+                ino=1,
			
 
				+                mode=0o100644,
			
 
				+                uid=1000,
			
 
				+                gid=1000,
			
 
				+                size=5,
			
 
				+                sha=b"0" * 40,
			
 
				+                flags=0,
			
 
				+                extended_flags=0,
			
 
				+            ),
			
 
				+            SerializedIndexEntry(
			
 
				+                name=b"src/main/java/com/example/Controller.java",
			
 
				+                ctime=(1234567890, 0),
			
 
				+                mtime=(1234567890, 0),
			
 
				+                dev=1,
			
 
				+                ino=2,
			
 
				+                mode=0o100644,
			
 
				+                uid=1000,
			
 
				+                gid=1000,
			
 
				+                size=5,
			
 
				+                sha=b"1" * 40,
			
 
				+                flags=0,
			
 
				+                extended_flags=0,
			
 
				+            ),
			
 
				+        ]
			
 
				 
			
 
				-        # Test version 2 (with padding)
			
 
				+        # Test version 2 (with padding, full paths)
			
 
				         buf_v2 = BytesIO()
			
 
				         from dulwich.index import write_cache_entry
			
 
				 
			
 
				-        write_cache_entry(buf_v2, entry, version=2)
			
 
				+        previous_path = b""
			
 
				+        for entry in entries:
			
 
				+            # Set proper flags for v2
			
 
				+            entry_v2 = SerializedIndexEntry(
			
 
				+                entry.name,
			
 
				+                entry.ctime,
			
 
				+                entry.mtime,
			
 
				+                entry.dev,
			
 
				+                entry.ino,
			
 
				+                entry.mode,
			
 
				+                entry.uid,
			
 
				+                entry.gid,
			
 
				+                entry.size,
			
 
				+                entry.sha,
			
 
				+                len(entry.name),
			
 
				+                entry.extended_flags,
			
 
				+            )
			
 
				+            write_cache_entry(buf_v2, entry_v2, version=2, previous_path=previous_path)
			
 
				+            previous_path = entry.name
			
 
				         v2_data = buf_v2.getvalue()
			
 
				 
			
 
				-        # Test version 4 (without padding)
			
 
				+        # Test version 4 (path compression, no padding)
			
 
				         buf_v4 = BytesIO()
			
 
				-        write_cache_entry(buf_v4, entry, version=4)
			
 
				+        previous_path = b""
			
 
				+        for entry in entries:
			
 
				+            write_cache_entry(buf_v4, entry, version=4, previous_path=previous_path)
			
 
				+            previous_path = entry.name
			
 
				         v4_data = buf_v4.getvalue()
			
 
				 
			
 
				-        # Version 4 should be shorter due to no padding
			
 
				+        # Version 4 should be shorter due to compression and no padding
			
 
				         self.assertLess(len(v4_data), len(v2_data))
			
 
				 
			
 
				         # Both should parse correctly
			
 
				         buf_v2.seek(0)
			
 
				         from dulwich.index import read_cache_entry
			
 
				 
			
 
				-        parsed_v2 = read_cache_entry(buf_v2, version=2)
			
 
				+        previous_path = b""
			
 
				+        parsed_v2_entries = []
			
 
				+        for _ in entries:
			
 
				+            parsed = read_cache_entry(buf_v2, version=2, previous_path=previous_path)
			
 
				+            parsed_v2_entries.append(parsed)
			
 
				+            previous_path = parsed.name
			
 
				 
			
 
				         buf_v4.seek(0)
			
 
				-        parsed_v4 = read_cache_entry(buf_v4, version=4)
			
 
				+        previous_path = b""
			
 
				+        parsed_v4_entries = []
			
 
				+        for _ in entries:
			
 
				+            parsed = read_cache_entry(buf_v4, version=4, previous_path=previous_path)
			
 
				+            parsed_v4_entries.append(parsed)
			
 
				+            previous_path = parsed.name
			
 
				 
			
 
				-        # Both should have the same content
			
 
				-        self.assertEqual(parsed_v2.name, parsed_v4.name)
			
 
				-        self.assertEqual(parsed_v2.sha, parsed_v4.sha)
			
 
				+        # Both should have the same paths
			
 
				+        for v2_entry, v4_entry in zip(parsed_v2_entries, parsed_v4_entries):
			
 
				+            self.assertEqual(v2_entry.name, v4_entry.name)
			
 
				+            self.assertEqual(v2_entry.sha, v4_entry.sha)
			
 
				 
			
 
				 
			
 
				 class TestManyFilesRepoIntegration(TestCase):
			
@@ -1371,3 +1419,150 @@ class TestManyFilesRepoIntegration(TestCase):
 
				         index = repo.open_index()
			
 
				         self.assertFalse(index._skip_hash)
			
 
				         self.assertEqual(index._version, 3)
			
 
				+
			
 
				+
			
 
				+class TestPathPrefixCompression(TestCase):
			
 
				+    """Tests for index version 4 path prefix compression."""
			
 
				+
			
 
				+    def setUp(self):
			
 
				+        self.tempdir = tempfile.mkdtemp()
			
 
				+        self.addCleanup(shutil.rmtree, self.tempdir)
			
 
				+
			
 
				+    def test_varint_encoding_decoding(self):
			
 
				+        """Test variable-width integer encoding and decoding."""
			
 
				+        from dulwich.index import _decode_varint, _encode_varint
			
 
				+
			
 
				+        test_values = [0, 1, 127, 128, 255, 256, 16383, 16384, 65535, 65536]
			
 
				+
			
 
				+        for value in test_values:
			
 
				+            encoded = _encode_varint(value)
			
 
				+            decoded, _ = _decode_varint(encoded, 0)
			
 
				+            self.assertEqual(value, decoded, f"Failed for value {value}")
			
 
				+
			
 
				+    def test_path_compression_simple(self):
			
 
				+        """Test simple path compression cases."""
			
 
				+        from dulwich.index import _compress_path, _decompress_path
			
 
				+
			
 
				+        # Test case 1: No common prefix
			
 
				+        compressed = _compress_path(b"file1.txt", b"")
			
 
				+        decompressed, _ = _decompress_path(compressed, 0, b"")
			
 
				+        self.assertEqual(b"file1.txt", decompressed)
			
 
				+
			
 
				+        # Test case 2: Common prefix
			
 
				+        compressed = _compress_path(b"src/file2.txt", b"src/file1.txt")
			
 
				+        decompressed, _ = _decompress_path(compressed, 0, b"src/file1.txt")
			
 
				+        self.assertEqual(b"src/file2.txt", decompressed)
			
 
				+
			
 
				+        # Test case 3: Completely different paths
			
 
				+        compressed = _compress_path(b"docs/readme.md", b"src/file1.txt")
			
 
				+        decompressed, _ = _decompress_path(compressed, 0, b"src/file1.txt")
			
 
				+        self.assertEqual(b"docs/readme.md", decompressed)
			
 
				+
			
 
				+    def test_path_compression_deep_directories(self):
			
 
				+        """Test compression with deep directory structures."""
			
 
				+        from dulwich.index import _compress_path, _decompress_path
			
 
				+
			
 
				+        path1 = b"src/main/java/com/example/service/UserService.java"
			
 
				+        path2 = b"src/main/java/com/example/service/OrderService.java"
			
 
				+        path3 = b"src/main/java/com/example/model/User.java"
			
 
				+
			
 
				+        # Compress path2 relative to path1
			
 
				+        compressed = _compress_path(path2, path1)
			
 
				+        decompressed, _ = _decompress_path(compressed, 0, path1)
			
 
				+        self.assertEqual(path2, decompressed)
			
 
				+
			
 
				+        # Compress path3 relative to path2
			
 
				+        compressed = _compress_path(path3, path2)
			
 
				+        decompressed, _ = _decompress_path(compressed, 0, path2)
			
 
				+        self.assertEqual(path3, decompressed)
			
 
				+
			
 
				+    def test_index_version_4_with_compression(self):
			
 
				+        """Test full index version 4 write/read with path compression."""
			
 
				+        index_path = os.path.join(self.tempdir, "index")
			
 
				+
			
 
				+        # Create an index with version 4
			
 
				+        index = Index(index_path, read=False, version=4)
			
 
				+
			
 
				+        # Add multiple entries with common prefixes
			
 
				+        paths = [
			
 
				+            b"src/main/java/App.java",
			
 
				+            b"src/main/java/Utils.java",
			
 
				+            b"src/main/resources/config.properties",
			
 
				+            b"src/test/java/AppTest.java",
			
 
				+            b"docs/README.md",
			
 
				+            b"docs/INSTALL.md",
			
 
				+        ]
			
 
				+
			
 
				+        for i, path in enumerate(paths):
			
 
				+            entry = IndexEntry(
			
 
				+                ctime=(1234567890, 0),
			
 
				+                mtime=(1234567890, 0),
			
 
				+                dev=1,
			
 
				+                ino=i + 1,
			
 
				+                mode=0o100644,
			
 
				+                uid=1000,
			
 
				+                gid=1000,
			
 
				+                size=10,
			
 
				+                sha=f"{i:040d}".encode(),
			
 
				+            )
			
 
				+            index[path] = entry
			
 
				+
			
 
				+        # Write and read back
			
 
				+        index.write()
			
 
				+
			
 
				+        # Read the index back
			
 
				+        index2 = Index(index_path)
			
 
				+        self.assertEqual(index2._version, 4)
			
 
				+
			
 
				+        # Verify all paths were preserved correctly
			
 
				+        for path in paths:
			
 
				+            self.assertIn(path, index2)
			
 
				+
			
 
				+        # Verify the index file is smaller than version 2 would be
			
 
				+        with open(index_path, "rb") as f:
			
 
				+            v4_size = len(f.read())
			
 
				+
			
 
				+        # Create equivalent version 2 index for comparison
			
 
				+        index_v2_path = os.path.join(self.tempdir, "index_v2")
			
 
				+        index_v2 = Index(index_v2_path, read=False, version=2)
			
 
				+        for path in paths:
			
 
				+            entry = IndexEntry(
			
 
				+                ctime=(1234567890, 0),
			
 
				+                mtime=(1234567890, 0),
			
 
				+                dev=1,
			
 
				+                ino=1,
			
 
				+                mode=0o100644,
			
 
				+                uid=1000,
			
 
				+                gid=1000,
			
 
				+                size=10,
			
 
				+                sha=b"0" * 40,
			
 
				+            )
			
 
				+            index_v2[path] = entry
			
 
				+        index_v2.write()
			
 
				+
			
 
				+        with open(index_v2_path, "rb") as f:
			
 
				+            v2_size = len(f.read())
			
 
				+
			
 
				+        # Version 4 should be smaller due to compression
			
 
				+        self.assertLess(
			
 
				+            v4_size, v2_size, "Version 4 index should be smaller than version 2"
			
 
				+        )
			
 
				+
			
 
				+    def test_path_compression_edge_cases(self):
			
 
				+        """Test edge cases in path compression."""
			
 
				+        from dulwich.index import _compress_path, _decompress_path
			
 
				+
			
 
				+        # Empty paths
			
 
				+        compressed = _compress_path(b"", b"")
			
 
				+        decompressed, _ = _decompress_path(compressed, 0, b"")
			
 
				+        self.assertEqual(b"", decompressed)
			
 
				+
			
 
				+        # Path identical to previous
			
 
				+        compressed = _compress_path(b"same.txt", b"same.txt")
			
 
				+        decompressed, _ = _decompress_path(compressed, 0, b"same.txt")
			
 
				+        self.assertEqual(b"same.txt", decompressed)
			
 
				+
			
 
				+        # Path shorter than previous
			
 
				+        compressed = _compress_path(b"short", b"very/long/path/file.txt")
			
 
				+        decompressed, _ = _decompress_path(compressed, 0, b"very/long/path/file.txt")
			
 
				+        self.assertEqual(b"short", decompressed)