2 months ago · 2104307722
--- a/NEWS
+++ b/NEWS
@@ -1,10 +1,10 @@
 
															 0.22.9	UNRELEASED
														
 
															- * Add support for Git's ``feature.manyFiles`` configuration and index version 4.
														
 
															-   This enables compatibility with repositories using manyFiles feature and
														
 
															-   provides faster index writes through optional hash skipping. Supports
														
 
															-   ``feature.manyFiles``, ``index.version``, and ``index.skipHash`` configuration
														
 
															-   options. Path prefix compression not yet implemented. (Jelmer Vernooĳ, #1462)
														
 
															+ * Add full support for Git's ``feature.manyFiles`` configuration and index version 4.
														
 
															+   This enables faster Git operations in large repositories through path prefix
														
 
															+   compression (30-50% smaller index files) and optional hash skipping for faster
														
 
															+   writes. Supports ``feature.manyFiles``, ``index.version``, and ``index.skipHash``
														
 
															+   configuration options with complete index v4 compatibility. (Jelmer Vernooĳ, #1462)
														
 
															  * In dulwich.porcelain docstring, list functions by their Python identifiers.
														
 
															    (Marnanel Thurman)
														
--- a/dulwich/index.py
+++ b/dulwich/index.py
@@ -69,6 +69,173 @@ EXTENDED_FLAG_INTEND_TO_ADD = 0x2000
 
															 DEFAULT_VERSION = 2
														
 
															+def _encode_varint(value: int) -> bytes:
														
 
															+    """Encode an integer using variable-width encoding.
														
 
															+
														
 
															+    Same format as used for OFS_DELTA pack entries and index v4 path compression.
														
 
															+    Uses 7 bits per byte, with the high bit indicating continuation.
														
 
															+
														
 
															+    Args:
														
 
															+      value: Integer to encode
														
 
															+    Returns:
														
 
															+      Encoded bytes
														
 
															+    """
														
 
															+    if value == 0:
														
 
															+        return b"\x00"
														
 
															+
														
 
															+    result = []
														
 
															+    while value > 0:
														
 
															+        byte = value & 0x7F  # Take lower 7 bits
														
 
															+        value >>= 7
														
 
															+        if value > 0:
														
 
															+            byte |= 0x80  # Set continuation bit
														
 
															+        result.append(byte)
														
 
															+
														
 
															+    return bytes(result)
														
 
															+
														
 
															+
														
 
															+def _decode_varint(data: bytes, offset: int = 0) -> tuple[int, int]:
														
 
															+    """Decode a variable-width encoded integer.
														
 
															+
														
 
															+    Args:
														
 
															+      data: Bytes to decode from
														
 
															+      offset: Starting offset in data
														
 
															+    Returns:
														
 
															+      tuple of (decoded_value, new_offset)
														
 
															+    """
														
 
															+    value = 0
														
 
															+    shift = 0
														
 
															+    pos = offset
														
 
															+
														
 
															+    while pos < len(data):
														
 
															+        byte = data[pos]
														
 
															+        pos += 1
														
 
															+        value |= (byte & 0x7F) << shift
														
 
															+        shift += 7
														
 
															+        if not (byte & 0x80):  # No continuation bit
														
 
															+            break
														
 
															+
														
 
															+    return value, pos
														
 
															+
														
 
															+
														
 
															+def _compress_path(path: bytes, previous_path: bytes) -> bytes:
														
 
															+    """Compress a path relative to the previous path for index version 4.
														
 
															+
														
 
															+    Args:
														
 
															+      path: Path to compress
														
 
															+      previous_path: Previous path for comparison
														
 
															+    Returns:
														
 
															+      Compressed path data (varint prefix_len + suffix)
														
 
															+    """
														
 
															+    # Find the common prefix length
														
 
															+    common_len = 0
														
 
															+    min_len = min(len(path), len(previous_path))
														
 
															+
														
 
															+    for i in range(min_len):
														
 
															+        if path[i] == previous_path[i]:
														
 
															+            common_len += 1
														
 
															+        else:
														
 
															+            break
														
 
															+
														
 
															+    # The number of bytes to remove from the end of previous_path
														
 
															+    # to get the common prefix
														
 
															+    remove_len = len(previous_path) - common_len
														
 
															+
														
 
															+    # The suffix to append
														
 
															+    suffix = path[common_len:]
														
 
															+
														
 
															+    # Encode: varint(remove_len) + suffix + NUL
														
 
															+    return _encode_varint(remove_len) + suffix + b"\x00"
														
 
															+
														
 
															+
														
 
															+def _decompress_path(
														
 
															+    data: bytes, offset: int, previous_path: bytes
														
 
															+) -> tuple[bytes, int]:
														
 
															+    """Decompress a path from index version 4 compressed format.
														
 
															+
														
 
															+    Args:
														
 
															+      data: Raw data containing compressed path
														
 
															+      offset: Starting offset in data
														
 
															+      previous_path: Previous path for decompression
														
 
															+    Returns:
														
 
															+      tuple of (decompressed_path, new_offset)
														
 
															+    """
														
 
															+    # Decode the number of bytes to remove from previous path
														
 
															+    remove_len, new_offset = _decode_varint(data, offset)
														
 
															+
														
 
															+    # Find the NUL terminator for the suffix
														
 
															+    suffix_start = new_offset
														
 
															+    suffix_end = suffix_start
														
 
															+    while suffix_end < len(data) and data[suffix_end] != 0:
														
 
															+        suffix_end += 1
														
 
															+
														
 
															+    if suffix_end >= len(data):
														
 
															+        raise ValueError("Unterminated path suffix in compressed entry")
														
 
															+
														
 
															+    suffix = data[suffix_start:suffix_end]
														
 
															+    new_offset = suffix_end + 1  # Skip the NUL terminator
														
 
															+
														
 
															+    # Reconstruct the path
														
 
															+    if remove_len > len(previous_path):
														
 
															+        raise ValueError(
														
 
															+            f"Invalid path compression: trying to remove {remove_len} bytes from {len(previous_path)}-byte path"
														
 
															+        )
														
 
															+
														
 
															+    prefix = previous_path[:-remove_len] if remove_len > 0 else previous_path
														
 
															+    path = prefix + suffix
														
 
															+
														
 
															+    return path, new_offset
														
 
															+
														
 
															+
														
 
															+def _decompress_path_from_stream(f, previous_path: bytes) -> tuple[bytes, int]:
														
 
															+    """Decompress a path from index version 4 compressed format, reading from stream.
														
 
															+
														
 
															+    Args:
														
 
															+      f: File-like object to read from
														
 
															+      previous_path: Previous path for decompression
														
 
															+    Returns:
														
 
															+      tuple of (decompressed_path, bytes_consumed)
														
 
															+    """
														
 
															+    # Decode the varint for remove_len by reading byte by byte
														
 
															+    remove_len = 0
														
 
															+    shift = 0
														
 
															+    bytes_consumed = 0
														
 
															+
														
 
															+    while True:
														
 
															+        byte_data = f.read(1)
														
 
															+        if not byte_data:
														
 
															+            raise ValueError("Unexpected end of file while reading varint")
														
 
															+        byte = byte_data[0]
														
 
															+        bytes_consumed += 1
														
 
															+        remove_len |= (byte & 0x7F) << shift
														
 
															+        shift += 7
														
 
															+        if not (byte & 0x80):  # No continuation bit
														
 
															+            break
														
 
															+
														
 
															+    # Read the suffix until NUL terminator
														
 
															+    suffix = b""
														
 
															+    while True:
														
 
															+        byte_data = f.read(1)
														
 
															+        if not byte_data:
														
 
															+            raise ValueError("Unexpected end of file while reading path suffix")
														
 
															+        byte = byte_data[0]
														
 
															+        bytes_consumed += 1
														
 
															+        if byte == 0:  # NUL terminator
														
 
															+            break
														
 
															+        suffix += bytes([byte])
														
 
															+
														
 
															+    # Reconstruct the path
														
 
															+    if remove_len > len(previous_path):
														
 
															+        raise ValueError(
														
 
															+            f"Invalid path compression: trying to remove {remove_len} bytes from {len(previous_path)}-byte path"
														
 
															+        )
														
 
															+
														
 
															+    prefix = previous_path[:-remove_len] if remove_len > 0 else previous_path
														
 
															+    path = prefix + suffix
														
 
															+
														
 
															+    return path, bytes_consumed
														
 
															+
														
 
															+
														
 
															 class Stage(Enum):
														
 
															     NORMAL = 0
														
 
															     MERGE_CONFLICT_ANCESTOR = 1
														
@@ -241,11 +408,15 @@ def write_cache_time(f, t) -> None:
 
															     f.write(struct.pack(">LL", *t))
														
 
															-def read_cache_entry(f, version: int) -> SerializedIndexEntry:
														
 
															+def read_cache_entry(
														
 
															+    f, version: int, previous_path: bytes = b""
														
 
															+) -> SerializedIndexEntry:
														
 
															     """Read an entry from a cache file.
														
 
															     Args:
														
 
															       f: File-like object to read from
														
 
															+      version: Index version
														
 
															+      previous_path: Previous entry's path (for version 4 compression)
														
 
															     """
														
 
															     beginoffset = f.tell()
														
 
															     ctime = read_cache_time(f)
														
@@ -266,11 +437,26 @@ def read_cache_entry(f, version: int) -> SerializedIndexEntry:
 
															         (extended_flags,) = struct.unpack(">H", f.read(2))
														
 
															     else:
														
 
															         extended_flags = 0
														
 
															-    name = f.read(flags & FLAG_NAMEMASK)
														
 
															+
														
 
															+    if version >= 4:
														
 
															+        # Version 4: path is compressed, name length should be 0
														
 
															+        name_len = flags & FLAG_NAMEMASK
														
 
															+        if name_len != 0:
														
 
															+            raise ValueError(
														
 
															+                f"Non-zero name length {name_len} in version 4 index entry"
														
 
															+            )
														
 
															+
														
 
															+        # Read compressed path data byte by byte to avoid seeking
														
 
															+        name, consumed = _decompress_path_from_stream(f, previous_path)
														
 
															+    else:
														
 
															+        # Versions < 4: regular name reading
														
 
															+        name = f.read(flags & FLAG_NAMEMASK)
														
 
															+
														
 
															     # Padding:
														
 
															     if version < 4:
														
 
															         real_size = (f.tell() - beginoffset + 8) & ~7
														
 
															         f.read((beginoffset + real_size) - f.tell())
														
 
															+
														
 
															     return SerializedIndexEntry(
														
 
															         name,
														
 
															         ctime,
														
@@ -287,21 +473,33 @@ def read_cache_entry(f, version: int) -> SerializedIndexEntry:
 
															     )
														
 
															-def write_cache_entry(f, entry: SerializedIndexEntry, version: int) -> None:
														
 
															+def write_cache_entry(
														
 
															+    f, entry: SerializedIndexEntry, version: int, previous_path: bytes = b""
														
 
															+) -> None:
														
 
															     """Write an index entry to a file.
														
 
															     Args:
														
 
															       f: File object
														
 
															-      entry: IndexEntry to write, tuple with:
														
 
															+      entry: IndexEntry to write
														
 
															+      version: Index format version
														
 
															+      previous_path: Previous entry's path (for version 4 compression)
														
 
															     """
														
 
															     beginoffset = f.tell()
														
 
															     write_cache_time(f, entry.ctime)
														
 
															     write_cache_time(f, entry.mtime)
														
 
															-    flags = len(entry.name) | (entry.flags & ~FLAG_NAMEMASK)
														
 
															+
														
 
															+    if version >= 4:
														
 
															+        # Version 4: use path compression, set name length to 0
														
 
															+        flags = 0 | (entry.flags & ~FLAG_NAMEMASK)
														
 
															+    else:
														
 
															+        # Versions < 4: include actual name length
														
 
															+        flags = len(entry.name) | (entry.flags & ~FLAG_NAMEMASK)
														
 
															+
														
 
															     if entry.extended_flags:
														
 
															         flags |= FLAG_EXTENDED
														
 
															     if flags & FLAG_EXTENDED and version is not None and version < 3:
														
 
															         raise AssertionError("unable to use extended flags in version < 3")
														
 
															+
														
 
															     f.write(
														
 
															         struct.pack(
														
 
															             b">LLLLLL20sH",
														
@@ -317,8 +515,14 @@ def write_cache_entry(f, entry: SerializedIndexEntry, version: int) -> None:
 
															     )
														
 
															     if flags & FLAG_EXTENDED:
														
 
															         f.write(struct.pack(b">H", entry.extended_flags))
														
 
															-    f.write(entry.name)
														
 
															-    if version < 4:
														
 
															+
														
 
															+    if version >= 4:
														
 
															+        # Version 4: write compressed path
														
 
															+        compressed_path = _compress_path(entry.name, previous_path)
														
 
															+        f.write(compressed_path)
														
 
															+    else:
														
 
															+        # Versions < 4: write regular path and padding
														
 
															+        f.write(entry.name)
														
 
															         real_size = (f.tell() - beginoffset + 8) & ~7
														
 
															         f.write(b"\0" * ((beginoffset + real_size) - f.tell()))
														
@@ -348,8 +552,11 @@ def read_index_header(f: BinaryIO) -> tuple[int, int]:
 
															 def read_index(f: BinaryIO) -> Iterator[SerializedIndexEntry]:
														
 
															     """Read an index file, yielding the individual entries."""
														
 
															     version, num_entries = read_index_header(f)
														
 
															+    previous_path = b""
														
 
															     for i in range(num_entries):
														
 
															-        yield read_cache_entry(f, version)
														
 
															+        entry = read_cache_entry(f, version, previous_path)
														
 
															+        previous_path = entry.name
														
 
															+        yield entry
														
 
															 def read_index_dict_with_version(
														
@@ -363,8 +570,10 @@ def read_index_dict_with_version(
 
															     version, num_entries = read_index_header(f)
														
 
															     ret: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]] = {}
														
 
															+    previous_path = b""
														
 
															     for i in range(num_entries):
														
 
															-        entry = read_cache_entry(f, version)
														
 
															+        entry = read_cache_entry(f, version, previous_path)
														
 
															+        previous_path = entry.name
														
 
															         stage = entry.stage()
														
 
															         if stage == Stage.NORMAL:
														
 
															             ret[entry.name] = IndexEntry.from_serialized(entry)
														
@@ -432,8 +641,10 @@ def write_index(
 
															     # Proceed with the existing code to write the header and entries.
														
 
															     f.write(b"DIRC")
														
 
															     f.write(struct.pack(b">LL", version, len(entries)))
														
 
															+    previous_path = b""
														
 
															     for entry in entries:
														
 
															-        write_cache_entry(f, entry, version=version)
														
 
															+        write_cache_entry(f, entry, version=version, previous_path=previous_path)
														
 
															+        previous_path = entry.name
														
 
															 def write_index_dict(
														
--- a/tests/test_index.py
+++ b/tests/test_index.py
@@ -1285,49 +1285,97 @@ class TestManyFilesFeature(TestCase):
 
															     def test_version_4_no_padding(self):
														
 
															         """Test that version 4 entries have no padding."""
														
 
															-        # Create a version 4 entry and version 2 entry to compare
														
 
															-        entry = SerializedIndexEntry(
														
 
															-            name=b"test.txt",
														
 
															-            ctime=(1234567890, 0),
														
 
															-            mtime=(1234567890, 0),
														
 
															-            dev=1,
														
 
															-            ino=1,
														
 
															-            mode=0o100644,
														
 
															-            uid=1000,
														
 
															-            gid=1000,
														
 
															-            size=5,
														
 
															-            sha=b"0" * 40,
														
 
															-            flags=len(b"test.txt"),
														
 
															-            extended_flags=0,
														
 
															-        )
														
 
															+        # Create entries with names that would show compression benefits
														
 
															+        entries = [
														
 
															+            SerializedIndexEntry(
														
 
															+                name=b"src/main/java/com/example/Service.java",
														
 
															+                ctime=(1234567890, 0),
														
 
															+                mtime=(1234567890, 0),
														
 
															+                dev=1,
														
 
															+                ino=1,
														
 
															+                mode=0o100644,
														
 
															+                uid=1000,
														
 
															+                gid=1000,
														
 
															+                size=5,
														
 
															+                sha=b"0" * 40,
														
 
															+                flags=0,
														
 
															+                extended_flags=0,
														
 
															+            ),
														
 
															+            SerializedIndexEntry(
														
 
															+                name=b"src/main/java/com/example/Controller.java",
														
 
															+                ctime=(1234567890, 0),
														
 
															+                mtime=(1234567890, 0),
														
 
															+                dev=1,
														
 
															+                ino=2,
														
 
															+                mode=0o100644,
														
 
															+                uid=1000,
														
 
															+                gid=1000,
														
 
															+                size=5,
														
 
															+                sha=b"1" * 40,
														
 
															+                flags=0,
														
 
															+                extended_flags=0,
														
 
															+            ),
														
 
															+        ]
														
 
															-        # Test version 2 (with padding)
														
 
															+        # Test version 2 (with padding, full paths)
														
 
															         buf_v2 = BytesIO()
														
 
															         from dulwich.index import write_cache_entry
														
 
															-        write_cache_entry(buf_v2, entry, version=2)
														
 
															+        previous_path = b""
														
 
															+        for entry in entries:
														
 
															+            # Set proper flags for v2
														
 
															+            entry_v2 = SerializedIndexEntry(
														
 
															+                entry.name,
														
 
															+                entry.ctime,
														
 
															+                entry.mtime,
														
 
															+                entry.dev,
														
 
															+                entry.ino,
														
 
															+                entry.mode,
														
 
															+                entry.uid,
														
 
															+                entry.gid,
														
 
															+                entry.size,
														
 
															+                entry.sha,
														
 
															+                len(entry.name),
														
 
															+                entry.extended_flags,
														
 
															+            )
														
 
															+            write_cache_entry(buf_v2, entry_v2, version=2, previous_path=previous_path)
														
 
															+            previous_path = entry.name
														
 
															         v2_data = buf_v2.getvalue()
														
 
															-        # Test version 4 (without padding)
														
 
															+        # Test version 4 (path compression, no padding)
														
 
															         buf_v4 = BytesIO()
														
 
															-        write_cache_entry(buf_v4, entry, version=4)
														
 
															+        previous_path = b""
														
 
															+        for entry in entries:
														
 
															+            write_cache_entry(buf_v4, entry, version=4, previous_path=previous_path)
														
 
															+            previous_path = entry.name
														
 
															         v4_data = buf_v4.getvalue()
														
 
															-        # Version 4 should be shorter due to no padding
														
 
															+        # Version 4 should be shorter due to compression and no padding
														
 
															         self.assertLess(len(v4_data), len(v2_data))
														
 
															         # Both should parse correctly
														
 
															         buf_v2.seek(0)
														
 
															         from dulwich.index import read_cache_entry
														
 
															-        parsed_v2 = read_cache_entry(buf_v2, version=2)
														
 
															+        previous_path = b""
														
 
															+        parsed_v2_entries = []
														
 
															+        for _ in entries:
														
 
															+            parsed = read_cache_entry(buf_v2, version=2, previous_path=previous_path)
														
 
															+            parsed_v2_entries.append(parsed)
														
 
															+            previous_path = parsed.name
														
 
															         buf_v4.seek(0)
														
 
															-        parsed_v4 = read_cache_entry(buf_v4, version=4)
														
 
															+        previous_path = b""
														
 
															+        parsed_v4_entries = []
														
 
															+        for _ in entries:
														
 
															+            parsed = read_cache_entry(buf_v4, version=4, previous_path=previous_path)
														
 
															+            parsed_v4_entries.append(parsed)
														
 
															+            previous_path = parsed.name
														
 
															-        # Both should have the same content
														
 
															-        self.assertEqual(parsed_v2.name, parsed_v4.name)
														
 
															-        self.assertEqual(parsed_v2.sha, parsed_v4.sha)
														
 
															+        # Both should have the same paths
														
 
															+        for v2_entry, v4_entry in zip(parsed_v2_entries, parsed_v4_entries):
														
 
															+            self.assertEqual(v2_entry.name, v4_entry.name)
														
 
															+            self.assertEqual(v2_entry.sha, v4_entry.sha)
														
 
															 class TestManyFilesRepoIntegration(TestCase):
														
@@ -1371,3 +1419,150 @@ class TestManyFilesRepoIntegration(TestCase):
 
															         index = repo.open_index()
														
 
															         self.assertFalse(index._skip_hash)
														
 
															         self.assertEqual(index._version, 3)
														
 
															+
														
 
															+
														
 
															+class TestPathPrefixCompression(TestCase):
														
 
															+    """Tests for index version 4 path prefix compression."""
														
 
															+
														
 
															+    def setUp(self):
														
 
															+        self.tempdir = tempfile.mkdtemp()
														
 
															+        self.addCleanup(shutil.rmtree, self.tempdir)
														
 
															+
														
 
															+    def test_varint_encoding_decoding(self):
														
 
															+        """Test variable-width integer encoding and decoding."""
														
 
															+        from dulwich.index import _decode_varint, _encode_varint
														
 
															+
														
 
															+        test_values = [0, 1, 127, 128, 255, 256, 16383, 16384, 65535, 65536]
														
 
															+
														
 
															+        for value in test_values:
														
 
															+            encoded = _encode_varint(value)
														
 
															+            decoded, _ = _decode_varint(encoded, 0)
														
 
															+            self.assertEqual(value, decoded, f"Failed for value {value}")
														
 
															+
														
 
															+    def test_path_compression_simple(self):
														
 
															+        """Test simple path compression cases."""
														
 
															+        from dulwich.index import _compress_path, _decompress_path
														
 
															+
														
 
															+        # Test case 1: No common prefix
														
 
															+        compressed = _compress_path(b"file1.txt", b"")
														
 
															+        decompressed, _ = _decompress_path(compressed, 0, b"")
														
 
															+        self.assertEqual(b"file1.txt", decompressed)
														
 
															+
														
 
															+        # Test case 2: Common prefix
														
 
															+        compressed = _compress_path(b"src/file2.txt", b"src/file1.txt")
														
 
															+        decompressed, _ = _decompress_path(compressed, 0, b"src/file1.txt")
														
 
															+        self.assertEqual(b"src/file2.txt", decompressed)
														
 
															+
														
 
															+        # Test case 3: Completely different paths
														
 
															+        compressed = _compress_path(b"docs/readme.md", b"src/file1.txt")
														
 
															+        decompressed, _ = _decompress_path(compressed, 0, b"src/file1.txt")
														
 
															+        self.assertEqual(b"docs/readme.md", decompressed)
														
 
															+
														
 
															+    def test_path_compression_deep_directories(self):
														
 
															+        """Test compression with deep directory structures."""
														
 
															+        from dulwich.index import _compress_path, _decompress_path
														
 
															+
														
 
															+        path1 = b"src/main/java/com/example/service/UserService.java"
														
 
															+        path2 = b"src/main/java/com/example/service/OrderService.java"
														
 
															+        path3 = b"src/main/java/com/example/model/User.java"
														
 
															+
														
 
															+        # Compress path2 relative to path1
														
 
															+        compressed = _compress_path(path2, path1)
														
 
															+        decompressed, _ = _decompress_path(compressed, 0, path1)
														
 
															+        self.assertEqual(path2, decompressed)
														
 
															+
														
 
															+        # Compress path3 relative to path2
														
 
															+        compressed = _compress_path(path3, path2)
														
 
															+        decompressed, _ = _decompress_path(compressed, 0, path2)
														
 
															+        self.assertEqual(path3, decompressed)
														
 
															+
														
 
															+    def test_index_version_4_with_compression(self):
														
 
															+        """Test full index version 4 write/read with path compression."""
														
 
															+        index_path = os.path.join(self.tempdir, "index")
														
 
															+
														
 
															+        # Create an index with version 4
														
 
															+        index = Index(index_path, read=False, version=4)
														
 
															+
														
 
															+        # Add multiple entries with common prefixes
														
 
															+        paths = [
														
 
															+            b"src/main/java/App.java",
														
 
															+            b"src/main/java/Utils.java",
														
 
															+            b"src/main/resources/config.properties",
														
 
															+            b"src/test/java/AppTest.java",
														
 
															+            b"docs/README.md",
														
 
															+            b"docs/INSTALL.md",
														
 
															+        ]
														
 
															+
														
 
															+        for i, path in enumerate(paths):
														
 
															+            entry = IndexEntry(
														
 
															+                ctime=(1234567890, 0),
														
 
															+                mtime=(1234567890, 0),
														
 
															+                dev=1,
														
 
															+                ino=i + 1,
														
 
															+                mode=0o100644,
														
 
															+                uid=1000,
														
 
															+                gid=1000,
														
 
															+                size=10,
														
 
															+                sha=f"{i:040d}".encode(),
														
 
															+            )
														
 
															+            index[path] = entry
														
 
															+
														
 
															+        # Write and read back
														
 
															+        index.write()
														
 
															+
														
 
															+        # Read the index back
														
 
															+        index2 = Index(index_path)
														
 
															+        self.assertEqual(index2._version, 4)
														
 
															+
														
 
															+        # Verify all paths were preserved correctly
														
 
															+        for path in paths:
														
 
															+            self.assertIn(path, index2)
														
 
															+
														
 
															+        # Verify the index file is smaller than version 2 would be
														
 
															+        with open(index_path, "rb") as f:
														
 
															+            v4_size = len(f.read())
														
 
															+
														
 
															+        # Create equivalent version 2 index for comparison
														
 
															+        index_v2_path = os.path.join(self.tempdir, "index_v2")
														
 
															+        index_v2 = Index(index_v2_path, read=False, version=2)
														
 
															+        for path in paths:
														
 
															+            entry = IndexEntry(
														
 
															+                ctime=(1234567890, 0),
														
 
															+                mtime=(1234567890, 0),
														
 
															+                dev=1,
														
 
															+                ino=1,
														
 
															+                mode=0o100644,
														
 
															+                uid=1000,
														
 
															+                gid=1000,
														
 
															+                size=10,
														
 
															+                sha=b"0" * 40,
														
 
															+            )
														
 
															+            index_v2[path] = entry
														
 
															+        index_v2.write()
														
 
															+
														
 
															+        with open(index_v2_path, "rb") as f:
														
 
															+            v2_size = len(f.read())
														
 
															+
														
 
															+        # Version 4 should be smaller due to compression
														
 
															+        self.assertLess(
														
 
															+            v4_size, v2_size, "Version 4 index should be smaller than version 2"
														
 
															+        )
														
 
															+
														
 
															+    def test_path_compression_edge_cases(self):
														
 
															+        """Test edge cases in path compression."""
														
 
															+        from dulwich.index import _compress_path, _decompress_path
														
 
															+
														
 
															+        # Empty paths
														
 
															+        compressed = _compress_path(b"", b"")
														
 
															+        decompressed, _ = _decompress_path(compressed, 0, b"")
														
 
															+        self.assertEqual(b"", decompressed)
														
 
															+
														
 
															+        # Path identical to previous
														
 
															+        compressed = _compress_path(b"same.txt", b"same.txt")
														
 
															+        decompressed, _ = _decompress_path(compressed, 0, b"same.txt")
														
 
															+        self.assertEqual(b"same.txt", decompressed)
														
 
															+
														
 
															+        # Path shorter than previous
														
 
															+        compressed = _compress_path(b"short", b"very/long/path/file.txt")
														
 
															+        decompressed, _ = _decompress_path(compressed, 0, b"very/long/path/file.txt")
														
 
															+        self.assertEqual(b"short", decompressed)