Jelmer Vernooij 1 месяц назад
Родитель
Сommit
836e22b4a1
6 измененных файлов с 1374 добавлено и 0 удалено
  1. 644 0
      dulwich/midx.py
  2. 184 0
      dulwich/object_store.py
  3. 1 0
      tests/__init__.py
  4. 1 0
      tests/compat/__init__.py
  5. 267 0
      tests/compat/test_midx.py
  6. 277 0
      tests/test_midx.py

+ 644 - 0
dulwich/midx.py

@@ -0,0 +1,644 @@
+# midx.py -- Multi-Pack-Index (MIDX) support
+# Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as published by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Multi-Pack-Index (MIDX) support.
+
+A multi-pack-index (MIDX) provides a single index that covers multiple pack files,
+enabling fast object lookup across all packs without opening each pack index.
+
+The MIDX file format consists of:
+- A header with signature, version, and hash algorithm
+- A chunk lookup table
+- Multiple chunks containing pack names, OID fanout, OID lookup, and object offsets
+- A trailer with checksum
+
+This module provides:
+- Reading MIDX files
+- Writing MIDX files
+- Integration with pack-based object stores
+
+Limitations:
+- Incremental MIDX chains are not yet supported (base_midx_files must be 0)
+- BTMP (bitmapped packfiles) chunk is not yet implemented
+- RIDX (reverse index) chunk is not yet implemented
+
+Note: Incremental MIDX chains were introduced in Git 2.47 as an experimental
+feature, where multiple MIDX files can be chained together. The format includes
+a base_midx_files field in the header and uses a multi-pack-index.d/ directory
+with a multi-pack-index-chain file. This feature is not yet supported by Dulwich
+as the specification is still evolving.
+"""
+
+import os
+import struct
+from collections.abc import Iterator
+from io import UnsupportedOperation
+from typing import IO, Any
+
+try:
+    import mmap
+except ImportError:
+    has_mmap = False
+else:
+    has_mmap = True
+
+from .file import GitFile, _GitFile
+from .objects import ObjectID, RawObjectID
+from .pack import SHA1Writer
+
+# MIDX signature
+MIDX_SIGNATURE = b"MIDX"
+
+# MIDX version
+MIDX_VERSION = 1
+
+# Chunk identifiers (4 bytes each)
+CHUNK_PNAM = b"PNAM"  # Packfile names
+CHUNK_OIDF = b"OIDF"  # OID fanout table
+CHUNK_OIDL = b"OIDL"  # OID lookup table
+CHUNK_OOFF = b"OOFF"  # Object offsets
+CHUNK_LOFF = b"LOFF"  # Large offsets (optional)
+CHUNK_BTMP = b"BTMP"  # Bitmapped packfiles (optional)
+CHUNK_RIDX = b"RIDX"  # Reverse index (optional)
+
+# Hash algorithm identifiers
+HASH_ALGORITHM_SHA1 = 1
+HASH_ALGORITHM_SHA256 = 2
+
+
+class MultiPackIndex:
+    """Multi-pack-index for efficient object lookup across multiple pack files."""
+
+    def __init__(
+        self,
+        filename: str | os.PathLike[str],
+        file: IO[bytes] | _GitFile | None = None,
+        contents: bytes | None = None,
+        size: int | None = None,
+    ) -> None:
+        """Initialize a MultiPackIndex.
+
+        Args:
+            filename: Path to the MIDX file
+            file: Optional file object
+            contents: Optional mmap'd contents
+            size: Optional size of the MIDX file
+        """
+        self._filename = os.fspath(filename)
+        self._file = file
+        self._size = size
+
+        # Instance variables that will be set during parsing
+        self.version: int
+        self.hash_algorithm: int
+        self.hash_size: int
+        self.chunk_count: int
+        self.base_midx_files: int
+        self.pack_count: int
+        self.pack_names: list[str]
+        self.object_count: int
+        self._chunks: dict[bytes, int]
+        self._fanout_table: list[int]
+        self._oidl_offset: int
+        self._ooff_offset: int
+        self._loff_offset: int
+
+        # Load file contents
+        if contents is None:
+            if file is None:
+                with GitFile(filename, "rb") as f:
+                    self._contents, self._size = self._load_file_contents(f, size)
+            else:
+                self._contents, self._size = self._load_file_contents(file, size)
+        else:
+            self._contents = contents
+
+        # Parse header
+        self._parse_header()
+
+        # Parse chunk lookup table
+        self._parse_chunk_table()
+
+    def _load_file_contents(
+        self, f: IO[bytes] | _GitFile, size: int | None = None
+    ) -> tuple[bytes | Any, int]:
+        """Load contents from a file, preferring mmap when possible.
+
+        Args:
+            f: File-like object to load
+            size: Expected size, or None to determine from file
+
+        Returns:
+            Tuple of (contents, size)
+        """
+        try:
+            fd = f.fileno()
+        except (UnsupportedOperation, AttributeError):
+            fd = None
+
+        # Attempt to use mmap if possible
+        if fd is not None:
+            if size is None:
+                size = os.fstat(fd).st_size
+            if has_mmap:
+                try:
+                    contents = mmap.mmap(fd, size, access=mmap.ACCESS_READ)
+                except (OSError, ValueError):
+                    # Can't mmap - perhaps a socket or invalid file descriptor
+                    pass
+                else:
+                    return contents, size
+
+        # Fall back to reading entire file into memory
+        contents_bytes = f.read()
+        size = len(contents_bytes)
+        return contents_bytes, size
+
+    def _parse_header(self) -> None:
+        """Parse the MIDX header."""
+        if len(self._contents) < 12:
+            raise ValueError("MIDX file too small")
+
+        # Check signature
+        signature = self._contents[0:4]
+        if signature != MIDX_SIGNATURE:
+            raise ValueError(f"Invalid MIDX signature: {signature!r}")
+
+        # Read version
+        self.version = self._contents[4]
+        if self.version != MIDX_VERSION:
+            raise ValueError(f"Unsupported MIDX version: {self.version}")
+
+        # Read object ID version (hash algorithm)
+        self.hash_algorithm = self._contents[5]
+        if self.hash_algorithm == HASH_ALGORITHM_SHA1:
+            self.hash_size = 20
+        elif self.hash_algorithm == HASH_ALGORITHM_SHA256:
+            self.hash_size = 32
+        else:
+            raise ValueError(f"Unknown hash algorithm: {self.hash_algorithm}")
+
+        # Read chunk count
+        self.chunk_count = self._contents[6]
+
+        # Read base MIDX files count (currently always 0)
+        self.base_midx_files = self._contents[7]
+        if self.base_midx_files != 0:
+            raise ValueError("Incremental MIDX not yet supported")
+
+        # Read pack file count
+        (self.pack_count,) = struct.unpack(">L", self._contents[8:12])
+
+    def _parse_chunk_table(self) -> None:
+        """Parse the chunk lookup table."""
+        self._chunks = {}
+
+        # Chunk table starts at offset 12
+        offset = 12
+
+        # Each chunk entry is 12 bytes (4-byte ID + 8-byte offset)
+        for i in range(self.chunk_count + 1):  # +1 for terminator
+            chunk_id = self._contents[offset : offset + 4]
+            (chunk_offset,) = struct.unpack(
+                ">Q", self._contents[offset + 4 : offset + 12]
+            )
+
+            if chunk_id == b"\x00\x00\x00\x00":
+                # Terminator entry
+                break
+
+            self._chunks[chunk_id] = chunk_offset
+            offset += 12
+
+        # Parse required chunks
+        self._parse_pnam_chunk()
+        self._parse_oidf_chunk()
+        self._parse_oidl_chunk()
+        self._parse_ooff_chunk()
+
+        # Parse optional chunks
+        if CHUNK_LOFF in self._chunks:
+            self._parse_loff_chunk()
+
+    def _parse_pnam_chunk(self) -> None:
+        """Parse the Packfile Names (PNAM) chunk."""
+        if CHUNK_PNAM not in self._chunks:
+            raise ValueError("Required PNAM chunk not found")
+
+        offset = self._chunks[CHUNK_PNAM]
+        self.pack_names = []
+
+        # Find the end of the PNAM chunk (next chunk or end of chunks section)
+        next_offset = min(
+            (o for o in self._chunks.values() if o > offset),
+            default=len(self._contents),
+        )
+
+        # Parse null-terminated pack names
+        current = offset
+        while current < next_offset:
+            # Find the next null terminator
+            null_pos = self._contents.find(b"\x00", current, next_offset)
+            if null_pos == -1:
+                break
+
+            pack_name = self._contents[current:null_pos].decode("utf-8")
+            if pack_name:  # Skip empty strings (padding)
+                self.pack_names.append(pack_name)
+            current = null_pos + 1
+
+    def _parse_oidf_chunk(self) -> None:
+        """Parse the OID Fanout (OIDF) chunk."""
+        if CHUNK_OIDF not in self._chunks:
+            raise ValueError("Required OIDF chunk not found")
+
+        offset = self._chunks[CHUNK_OIDF]
+        self._fanout_table = []
+
+        # Read 256 4-byte entries
+        for i in range(256):
+            (count,) = struct.unpack(
+                ">L", self._contents[offset + i * 4 : offset + i * 4 + 4]
+            )
+            self._fanout_table.append(count)
+
+        # Total object count is the last entry
+        self.object_count = self._fanout_table[255]
+
+    def _parse_oidl_chunk(self) -> None:
+        """Parse the OID Lookup (OIDL) chunk."""
+        if CHUNK_OIDL not in self._chunks:
+            raise ValueError("Required OIDL chunk not found")
+
+        self._oidl_offset = self._chunks[CHUNK_OIDL]
+
+    def _parse_ooff_chunk(self) -> None:
+        """Parse the Object Offsets (OOFF) chunk."""
+        if CHUNK_OOFF not in self._chunks:
+            raise ValueError("Required OOFF chunk not found")
+
+        self._ooff_offset = self._chunks[CHUNK_OOFF]
+
+    def _parse_loff_chunk(self) -> None:
+        """Parse the Large Offsets (LOFF) chunk."""
+        self._loff_offset = self._chunks[CHUNK_LOFF]
+
+    def __len__(self) -> int:
+        """Return the number of objects in this MIDX."""
+        return self.object_count
+
+    def _get_oid(self, index: int) -> RawObjectID:
+        """Get the object ID at the given index.
+
+        Args:
+            index: Index of the object
+
+        Returns:
+            Binary object ID
+        """
+        if index < 0 or index >= self.object_count:
+            raise IndexError(f"Index {index} out of range")
+
+        offset = self._oidl_offset + index * self.hash_size
+        return RawObjectID(self._contents[offset : offset + self.hash_size])
+
+    def _get_pack_info(self, index: int) -> tuple[int, int]:
+        """Get pack ID and offset for object at the given index.
+
+        Args:
+            index: Index of the object
+
+        Returns:
+            Tuple of (pack_id, offset)
+        """
+        if index < 0 or index >= self.object_count:
+            raise IndexError(f"Index {index} out of range")
+
+        # Each entry is 8 bytes (4-byte pack ID + 4-byte offset)
+        offset = self._ooff_offset + index * 8
+
+        (pack_id,) = struct.unpack(">L", self._contents[offset : offset + 4])
+        (pack_offset,) = struct.unpack(">L", self._contents[offset + 4 : offset + 8])
+
+        # Check if this is a large offset (MSB set)
+        if pack_offset & 0x80000000:
+            # Look up in LOFF chunk
+            if CHUNK_LOFF not in self._chunks:
+                raise ValueError("Large offset found but no LOFF chunk")
+
+            large_index = pack_offset & 0x7FFFFFFF
+            large_offset_pos = self._loff_offset + large_index * 8
+            (pack_offset,) = struct.unpack(
+                ">Q", self._contents[large_offset_pos : large_offset_pos + 8]
+            )
+
+        return pack_id, pack_offset
+
+    def object_offset(self, sha: ObjectID | RawObjectID) -> tuple[str, int] | None:
+        """Return the pack name and offset for the given object.
+
+        Args:
+            sha: Binary SHA-1 or SHA-256 hash
+
+        Returns:
+            Tuple of (pack_name, offset) or None if not found
+        """
+        if len(sha) != self.hash_size:
+            raise ValueError(
+                f"SHA size mismatch: expected {self.hash_size}, got {len(sha)}"
+            )
+
+        # Use fanout table to narrow search range
+        first_byte = sha[0]
+        start_idx = 0 if first_byte == 0 else self._fanout_table[first_byte - 1]
+        end_idx = self._fanout_table[first_byte]
+
+        # Binary search within the range
+        while start_idx < end_idx:
+            mid = (start_idx + end_idx) // 2
+            mid_sha = self._get_oid(mid)
+
+            if mid_sha == sha:
+                # Found it!
+                pack_id, offset = self._get_pack_info(mid)
+                return self.pack_names[pack_id], offset
+            elif mid_sha < sha:
+                start_idx = mid + 1
+            else:
+                end_idx = mid
+
+        return None
+
+    def __contains__(self, sha: ObjectID | RawObjectID) -> bool:
+        """Check if the given object SHA is in this MIDX.
+
+        Args:
+            sha: Binary SHA hash
+
+        Returns:
+            True if the object is in this MIDX
+        """
+        return self.object_offset(sha) is not None
+
+    def iterentries(self) -> Iterator[tuple[RawObjectID, str, int]]:
+        """Iterate over all entries in this MIDX.
+
+        Yields:
+            Tuples of (sha, pack_name, offset)
+        """
+        for i in range(self.object_count):
+            sha = self._get_oid(i)
+            pack_id, offset = self._get_pack_info(i)
+            pack_name = self.pack_names[pack_id]
+            yield sha, pack_name, offset
+
+    def close(self) -> None:
+        """Close the MIDX file and release mmap resources."""
+        # Close mmap'd contents first if it's an mmap object
+        if self._contents is not None and has_mmap:
+            if isinstance(self._contents, mmap.mmap):
+                self._contents.close()
+        self._contents = None
+
+        # Close file handle
+        if self._file is not None:
+            self._file.close()
+            self._file = None
+
+
+def load_midx(path: str | os.PathLike[str]) -> MultiPackIndex:
+    """Load a multi-pack-index file by path.
+
+    Args:
+        path: Path to the MIDX file
+
+    Returns:
+        A MultiPackIndex loaded from the given path
+    """
+    with GitFile(path, "rb") as f:
+        return load_midx_file(path, f)
+
+
+def load_midx_file(
+    path: str | os.PathLike[str], f: IO[bytes] | _GitFile
+) -> MultiPackIndex:
+    """Load a multi-pack-index from a file-like object.
+
+    Args:
+        path: Path for the MIDX file
+        f: File-like object
+
+    Returns:
+        A MultiPackIndex loaded from the given file
+    """
+    return MultiPackIndex(path, file=f)
+
+
+def write_midx(
+    f: IO[bytes],
+    pack_index_entries: list[tuple[str, list[tuple[RawObjectID, int, int | None]]]],
+    hash_algorithm: int = HASH_ALGORITHM_SHA1,
+) -> bytes:
+    """Write a multi-pack-index file.
+
+    Args:
+        f: File-like object to write to
+        pack_index_entries: List of (pack_name, entries) tuples where entries are
+                          (sha, offset, crc32) tuples, sorted by SHA
+        hash_algorithm: Hash algorithm to use (1=SHA-1, 2=SHA-256)
+
+    Returns:
+        SHA-1 checksum of the written MIDX file
+    """
+    if hash_algorithm == HASH_ALGORITHM_SHA1:
+        hash_size = 20
+    elif hash_algorithm == HASH_ALGORITHM_SHA256:
+        hash_size = 32
+    else:
+        raise ValueError(f"Unknown hash algorithm: {hash_algorithm}")
+
+    # Wrap file in SHA1Writer to compute checksum
+    writer = SHA1Writer(f)
+
+    # Sort pack entries by pack name (required by Git)
+    pack_index_entries_sorted = sorted(pack_index_entries, key=lambda x: x[0])
+
+    # Collect all objects from all packs
+    all_objects: list[tuple[RawObjectID, int, int]] = []  # (sha, pack_id, offset)
+    pack_names: list[str] = []
+
+    for pack_id, (pack_name, entries) in enumerate(pack_index_entries_sorted):
+        pack_names.append(pack_name)
+        for sha, offset, _crc32 in entries:
+            all_objects.append((sha, pack_id, offset))
+
+    # Sort all objects by SHA
+    all_objects.sort(key=lambda x: x[0])
+
+    # Calculate offsets for chunks
+    num_packs = len(pack_names)
+    num_objects = len(all_objects)
+
+    # Header: 12 bytes
+    header_size = 12
+
+    # Chunk count: PNAM, OIDF, OIDL, OOFF, and optionally LOFF
+    # We'll determine if LOFF is needed later
+    chunk_count = 4  # PNAM, OIDF, OIDL, OOFF
+
+    # Check if we need LOFF chunk (for offsets >= 2^31)
+    need_loff = any(offset >= 2**31 for _sha, _pack_id, offset in all_objects)
+    if need_loff:
+        chunk_count += 1
+
+    # Chunk table: (chunk_count + 1) * 12 bytes (including terminator)
+    chunk_table_size = (chunk_count + 1) * 12
+
+    # Calculate chunk offsets
+    current_offset = header_size + chunk_table_size
+
+    # PNAM chunk: pack names as null-terminated strings, padded to 4-byte boundary
+    pnam_data = b"".join(name.encode("utf-8") + b"\x00" for name in pack_names)
+    # Pad to 4-byte boundary
+    pnam_padding = (4 - len(pnam_data) % 4) % 4
+    pnam_data += b"\x00" * pnam_padding
+    pnam_offset = current_offset
+    current_offset += len(pnam_data)
+
+    # OIDF chunk: 256 * 4 bytes
+    oidf_offset = current_offset
+    oidf_size = 256 * 4
+    current_offset += oidf_size
+
+    # OIDL chunk: num_objects * hash_size bytes
+    oidl_offset = current_offset
+    oidl_size = num_objects * hash_size
+    current_offset += oidl_size
+
+    # OOFF chunk: num_objects * 8 bytes (4 for pack_id + 4 for offset)
+    ooff_offset = current_offset
+    ooff_size = num_objects * 8
+    current_offset += ooff_size
+
+    # LOFF chunk (if needed): variable size
+    # We'll calculate the exact size when we know how many large offsets we have
+    loff_offset = current_offset if need_loff else 0
+    large_offsets: list[int] = []
+
+    # Calculate trailer offset (where checksum starts)
+    # We need to pre-calculate large offset count for accurate trailer offset
+    if need_loff:
+        # Count large offsets
+        large_offset_count = sum(1 for _, _, offset in all_objects if offset >= 2**31)
+        loff_size = large_offset_count * 8
+        trailer_offset = current_offset + loff_size
+    else:
+        trailer_offset = current_offset
+
+    # Write header
+    writer.write(MIDX_SIGNATURE)  # 4 bytes: signature
+    writer.write(bytes([MIDX_VERSION]))  # 1 byte: version
+    writer.write(bytes([hash_algorithm]))  # 1 byte: hash algorithm
+    writer.write(bytes([chunk_count]))  # 1 byte: chunk count
+    writer.write(bytes([0]))  # 1 byte: base MIDX files (always 0)
+    writer.write(struct.pack(">L", num_packs))  # 4 bytes: pack count
+
+    # Write chunk table
+    chunk_table = [
+        (CHUNK_PNAM, pnam_offset),
+        (CHUNK_OIDF, oidf_offset),
+        (CHUNK_OIDL, oidl_offset),
+        (CHUNK_OOFF, ooff_offset),
+    ]
+    if need_loff:
+        chunk_table.append((CHUNK_LOFF, loff_offset))
+
+    for chunk_id, chunk_offset in chunk_table:
+        writer.write(chunk_id)  # 4 bytes
+        writer.write(struct.pack(">Q", chunk_offset))  # 8 bytes
+
+    # Write terminator (points to where trailer/checksum starts)
+    writer.write(b"\x00\x00\x00\x00")  # 4 bytes
+    writer.write(struct.pack(">Q", trailer_offset))  # 8 bytes
+
+    # Write PNAM chunk
+    writer.write(pnam_data)
+
+    # Write OIDF chunk (fanout table)
+    fanout: list[int] = [0] * 256
+    for sha, _pack_id, _offset in all_objects:
+        first_byte = sha[0]
+        fanout[first_byte] += 1
+
+    # Convert counts to cumulative
+    cumulative = 0
+    for i in range(256):
+        cumulative += fanout[i]
+        writer.write(struct.pack(">L", cumulative))
+
+    # Write OIDL chunk (object IDs)
+    for sha, _pack_id, _offset in all_objects:
+        writer.write(sha)
+
+    # Write OOFF chunk (pack ID and offset for each object)
+    for _sha, pack_id, offset in all_objects:
+        writer.write(struct.pack(">L", pack_id))
+
+        if offset >= 2**31:
+            # Use large offset table
+            large_offset_index = len(large_offsets)
+            large_offsets.append(offset)
+            # Set MSB to indicate large offset
+            writer.write(struct.pack(">L", 0x80000000 | large_offset_index))
+        else:
+            writer.write(struct.pack(">L", offset))
+
+    # Write LOFF chunk if needed
+    if need_loff:
+        for large_offset in large_offsets:
+            writer.write(struct.pack(">Q", large_offset))
+
+    # Write checksum
+    return writer.write_sha()
+
+
+def write_midx_file(
+    path: str | os.PathLike[str],
+    pack_index_entries: list[tuple[str, list[tuple[RawObjectID, int, int | None]]]],
+    hash_algorithm: int = HASH_ALGORITHM_SHA1,
+) -> bytes:
+    """Write a multi-pack-index file to disk.
+
+    Args:
+        path: Path where to write the MIDX file
+        pack_index_entries: List of (pack_name, entries) tuples where entries are
+                          (sha, offset, crc32) tuples, sorted by SHA
+        hash_algorithm: Hash algorithm to use (1=SHA-1, 2=SHA-256)
+
+    Returns:
+        SHA-1 checksum of the written MIDX file
+    """
+    with GitFile(path, "wb") as f:
+        return write_midx(f, pack_index_entries, hash_algorithm)
+
+
+# TODO: Add support for incremental MIDX chains
+# TODO: Add support for BTMP and RIDX chunks for bitmap integration

+ 184 - 0
dulwich/object_store.py

@@ -42,6 +42,7 @@ from typing import (
 
 from .errors import NotTreeError
 from .file import GitFile, _GitFile
+from .midx import MultiPackIndex, load_midx
 from .objects import (
     S_ISGITLINK,
     ZERO_SHA,
@@ -1399,6 +1400,10 @@ class DiskObjectStore(PackBasedObjectStore):
         self._commit_graph = None
         self._use_commit_graph = True  # Default to true
 
+        # Multi-pack-index support - lazy loaded
+        self._midx: MultiPackIndex | None = None
+        self._use_midx = True  # Default to true
+
     def __repr__(self) -> str:
         """Return string representation of DiskObjectStore.
 
@@ -1485,6 +1490,9 @@ class DiskObjectStore(PackBasedObjectStore):
         # Read core.commitGraph setting
         use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True)
 
+        # Read core.multiPackIndex setting
+        use_midx = config.get_boolean((b"core",), b"multiPackIndex", True)
+
         # Read core.fsyncObjectFiles setting
         fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False)
 
@@ -1521,6 +1529,7 @@ class DiskObjectStore(PackBasedObjectStore):
             dir_mode=dir_mode,
         )
         instance._use_commit_graph = use_commit_graph
+        instance._use_midx = use_midx
         return instance
 
     @property
@@ -2036,6 +2045,162 @@ class DiskObjectStore(PackBasedObjectStore):
                 self._commit_graph = read_commit_graph(graph_file)
         return self._commit_graph
 
+    def get_midx(self) -> MultiPackIndex | None:
+        """Get the multi-pack-index for this object store.
+
+        Returns:
+          MultiPackIndex object if available, None otherwise
+
+        Raises:
+          ValueError: If MIDX file is corrupt
+          OSError: If MIDX file cannot be read
+        """
+        if not self._use_midx:
+            return None
+
+        if self._midx is None:
+            # Look for MIDX in pack directory
+            midx_file = os.path.join(self.pack_dir, "multi-pack-index")
+            if os.path.exists(midx_file):
+                self._midx = load_midx(midx_file)
+        return self._midx
+
+    def _get_pack_by_name(self, pack_name: str) -> Pack:
+        """Get a pack by its base name.
+
+        Args:
+            pack_name: Base name of the pack (e.g., 'pack-abc123.pack' or 'pack-abc123.idx')
+
+        Returns:
+            Pack object
+
+        Raises:
+            KeyError: If pack doesn't exist
+        """
+        # Remove .pack or .idx extension if present
+        if pack_name.endswith(".pack"):
+            base_name = pack_name[:-5]
+        elif pack_name.endswith(".idx"):
+            base_name = pack_name[:-4]
+        else:
+            base_name = pack_name
+
+        # Check if already in cache
+        if base_name in self._pack_cache:
+            return self._pack_cache[base_name]
+
+        # Load the pack
+        pack_path = os.path.join(self.pack_dir, base_name)
+        if not os.path.exists(pack_path + ".pack"):
+            raise KeyError(f"Pack {pack_name} not found")
+
+        pack = Pack(
+            pack_path,
+            delta_window_size=self.pack_delta_window_size,
+            window_memory=self.pack_window_memory,
+            delta_cache_size=self.pack_delta_cache_size,
+            depth=self.pack_depth,
+            threads=self.pack_threads,
+            big_file_threshold=self.pack_big_file_threshold,
+        )
+        self._pack_cache[base_name] = pack
+        return pack
+
+    def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:
+        """Check if a particular object is present by SHA1 and is packed.
+
+        This checks the MIDX first if available, then falls back to checking
+        individual pack indexes.
+
+        Args:
+            sha: Binary SHA of the object
+
+        Returns:
+            True if the object is in a pack file
+        """
+        # Check MIDX first for faster lookup
+        midx = self.get_midx()
+        if midx is not None and sha in midx:
+            return True
+
+        # Fall back to checking individual packs
+        return super().contains_packed(sha)
+
+    def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]:
+        """Obtain the raw fulltext for an object.
+
+        This uses the MIDX if available for faster lookups.
+
+        Args:
+            name: SHA for the object (20 bytes binary or 40 bytes hex)
+
+        Returns:
+            Tuple with numeric type and object contents
+
+        Raises:
+            KeyError: If object not found
+        """
+        if name == ZERO_SHA:
+            raise KeyError(name)
+
+        sha: RawObjectID
+        if len(name) == 40:
+            # name is ObjectID (hex), convert to RawObjectID
+            sha = hex_to_sha(cast(ObjectID, name))
+        elif len(name) == 20:
+            # name is already RawObjectID (binary)
+            sha = RawObjectID(name)
+        else:
+            raise AssertionError(f"Invalid object name {name!r}")
+
+        # Try MIDX first for faster lookup
+        midx = self.get_midx()
+        if midx is not None:
+            result = midx.object_offset(sha)
+            if result is not None:
+                pack_name, _offset = result
+                try:
+                    pack = self._get_pack_by_name(pack_name)
+                    return pack.get_raw(sha)
+                except (KeyError, PackFileDisappeared):
+                    # Pack disappeared or object not found, fall through to standard lookup
+                    pass
+
+        # Fall back to the standard implementation
+        return super().get_raw(name)
+
+    def write_midx(self) -> bytes:
+        """Write a multi-pack-index file for this object store.
+
+        Creates a MIDX file that indexes all pack files in the pack directory.
+
+        Returns:
+            SHA-1 checksum of the written MIDX file
+
+        Raises:
+            OSError: If the pack directory doesn't exist or MIDX can't be written
+        """
+        from .midx import write_midx_file
+
+        # Get all pack files
+        packs = self.packs
+        if not packs:
+            # No packs to index
+            return b"\x00" * 20
+
+        # Collect entries from all packs
+        pack_entries: list[tuple[str, list[tuple[RawObjectID, int, int | None]]]] = []
+
+        for pack in packs:
+            # Git stores .idx extension in MIDX, not .pack
+            pack_name = os.path.basename(pack._basename) + ".idx"
+            entries = list(pack.index.iterentries())
+            pack_entries.append((pack_name, entries))
+
+        # Write MIDX file
+        midx_path = os.path.join(self.pack_dir, "multi-pack-index")
+        return write_midx_file(midx_path, pack_entries)
+
     def write_commit_graph(
         self, refs: Iterable[ObjectID] | None = None, reachable: bool = True
     ) -> None:
@@ -2149,6 +2314,25 @@ class DiskObjectStore(PackBasedObjectStore):
                 if time.time() - mtime > grace_period:
                     os.remove(pack_path)
 
+    def close(self) -> None:
+        """Close the object store and release resources.
+
+        This method closes all cached pack files, MIDX, and frees associated resources.
+        """
+        # Close MIDX if it's loaded
+        if self._midx is not None:
+            self._midx.close()
+            self._midx = None
+
+        # Close alternates
+        if self._alternates is not None:
+            for alt in self._alternates:
+                alt.close()
+            self._alternates = None
+
+        # Call parent class close to handle pack files
+        super().close()
+
 
 class MemoryObjectStore(PackCapableObjectStore):
     """Object store that keeps all objects in memory."""

+ 1 - 0
tests/__init__.py

@@ -162,6 +162,7 @@ def self_test_suite() -> unittest.TestSuite:
         "mbox",
         "merge",
         "merge_drivers",
+        "midx",
         "missing_obj_finder",
         "notes",
         "objects",

+ 1 - 0
tests/compat/__init__.py

@@ -34,6 +34,7 @@ def test_suite() -> unittest.TestSuite:
         "dumb",
         "index",
         "lfs",
+        "midx",
         "pack",
         "patch",
         "porcelain",

+ 267 - 0
tests/compat/test_midx.py

@@ -0,0 +1,267 @@
+# test_midx.py -- Compatibility tests for multi-pack-index functionality
+# Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as published by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+
+"""Compatibility tests for Git multi-pack-index functionality.
+
+These tests verify that dulwich's MIDX implementation can read and interact
+with MIDX files created by C Git, and that Git can read MIDX files created
+by Dulwich.
+"""
+
+import os
+import tempfile
+
+from dulwich.midx import load_midx
+from dulwich.object_store import DiskObjectStore
+from dulwich.repo import Repo
+
+from .utils import CompatTestCase, run_git_or_fail
+
+
+class MIDXCompatTests(CompatTestCase):
+    """Compatibility tests for multi-pack-index functionality."""
+
+    # Multi-pack-index was introduced in Git 2.21.0
+    min_git_version = (2, 21, 0)
+
+    def setUp(self):
+        super().setUp()
+        self.test_dir = tempfile.mkdtemp()
+        self.repo_path = os.path.join(self.test_dir, "test-repo")
+
+        # Set up git identity to avoid committer identity errors
+        self.overrideEnv("GIT_COMMITTER_NAME", "Test Author")
+        self.overrideEnv("GIT_COMMITTER_EMAIL", "test@example.com")
+        self.overrideEnv("GIT_AUTHOR_NAME", "Test Author")
+        self.overrideEnv("GIT_AUTHOR_EMAIL", "test@example.com")
+
+    def tearDown(self):
+        from .utils import rmtree_ro
+
+        rmtree_ro(self.test_dir)
+
+    def create_test_repo_with_packs(self):
+        """Create a test repository with multiple pack files."""
+        # Initialize repository
+        run_git_or_fail(["init"], cwd=self.test_dir)
+        os.rename(os.path.join(self.test_dir, ".git"), self.repo_path)
+
+        work_dir = os.path.join(self.test_dir, "work")
+        os.makedirs(work_dir)
+
+        # Create .git file pointing to our repo
+        with open(os.path.join(work_dir, ".git"), "w") as f:
+            f.write(f"gitdir: {self.repo_path}\n")
+
+        # Create some commits and pack them
+        for i in range(5):
+            filename = f"file{i}.txt"
+            with open(os.path.join(work_dir, filename), "w") as f:
+                f.write(f"Content {i}\n" * 100)  # Make files bigger to ensure packing
+
+            run_git_or_fail(["add", filename], cwd=work_dir)
+            run_git_or_fail(
+                [
+                    "commit",
+                    "-m",
+                    f"Commit {i}",
+                    "--author",
+                    "Test Author <test@example.com>",
+                ],
+                cwd=work_dir,
+            )
+
+            # Create a pack file after each commit to get multiple packs
+            if i > 0:  # Skip first commit to avoid empty pack
+                run_git_or_fail(["repack", "-d"], cwd=work_dir)
+
+        return work_dir
+
+    def test_read_git_midx(self):
+        """Test that Dulwich can read a MIDX file created by Git."""
+        work_dir = self.create_test_repo_with_packs()
+
+        # Have Git create a MIDX file
+        run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)
+
+        # Verify Git created the MIDX file
+        midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
+        self.assertTrue(
+            os.path.exists(midx_path), "Git did not create multi-pack-index file"
+        )
+
+        # Load the MIDX file with Dulwich
+        midx = load_midx(midx_path)
+        try:
+            # Verify we can read it
+            self.assertGreater(len(midx), 0, "MIDX should contain objects")
+            self.assertGreater(midx.pack_count, 0, "MIDX should reference packs")
+
+            # Verify the pack names look reasonable
+            # Git stores .idx extensions in MIDX files
+            for pack_name in midx.pack_names:
+                self.assertTrue(pack_name.startswith("pack-"))
+                self.assertTrue(pack_name.endswith(".idx"))
+        finally:
+            midx.close()
+
+    def test_git_uses_dulwich_midx(self):
+        """Test that Git can use a MIDX file created by Dulwich."""
+        work_dir = self.create_test_repo_with_packs()
+
+        # Use Dulwich to create a MIDX file
+        repo = Repo(self.repo_path)
+        try:
+            store = repo.object_store
+            self.assertIsInstance(store, DiskObjectStore)
+
+            # Write MIDX with Dulwich
+            checksum = store.write_midx()
+            self.assertEqual(20, len(checksum))
+        finally:
+            repo.close()
+
+        # Verify the file was created
+        midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
+        self.assertTrue(os.path.exists(midx_path))
+
+        # Have Git verify the MIDX file (should succeed with return code 0)
+        run_git_or_fail(["multi-pack-index", "verify"], cwd=work_dir)
+
+        # Try to use the MIDX with Git commands
+        # This should work if the MIDX is valid
+        run_git_or_fail(["fsck"], cwd=work_dir)
+
+    def test_midx_object_lookup_matches_git(self):
+        """Test that object lookups through MIDX match Git's results."""
+        work_dir = self.create_test_repo_with_packs()
+
+        # Have Git create a MIDX file
+        run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)
+
+        # Load with Dulwich
+        repo = Repo(self.repo_path)
+        try:
+            store = repo.object_store
+
+            # Get MIDX
+            midx = store.get_midx()
+            self.assertIsNotNone(midx, "MIDX should be loaded")
+
+            # Get all objects from Git
+            result = run_git_or_fail(["rev-list", "--all", "--objects"], cwd=work_dir)
+            object_shas = [
+                line.split()[0].encode("ascii")
+                for line in result.decode("utf-8").strip().split("\n")
+                if line
+            ]
+
+            # Verify we can find these objects through the MIDX
+            found_count = 0
+            for sha_hex in object_shas:
+                # Convert hex to binary
+                sha_bin = bytes.fromhex(sha_hex.decode("ascii"))
+
+                # Check if it's in the MIDX
+                if sha_bin in midx:
+                    found_count += 1
+
+                    # Verify we can get the object location
+                    result = midx.object_offset(sha_bin)
+                    self.assertIsNotNone(result)
+                    pack_name, offset = result
+                    self.assertIsInstance(pack_name, str)
+                    self.assertIsInstance(offset, int)
+                    self.assertGreater(offset, 0)
+
+            # We should find at least some objects in the MIDX
+            self.assertGreater(
+                found_count, 0, "Should find at least some objects in MIDX"
+            )
+        finally:
+            repo.close()
+
+    def test_midx_with_multiple_packs(self):
+        """Test MIDX functionality with multiple pack files."""
+        work_dir = self.create_test_repo_with_packs()
+
+        # Create multiple pack files explicitly
+        run_git_or_fail(["repack"], cwd=work_dir)
+        run_git_or_fail(["repack"], cwd=work_dir)
+
+        # Create MIDX with Git
+        run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)
+
+        # Load with Dulwich
+        midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
+        midx = load_midx(midx_path)
+        try:
+            # Should have multiple packs
+            # (Exact count may vary depending on Git version and repacking)
+            self.assertGreaterEqual(midx.pack_count, 1)
+
+            # Verify we can iterate over all entries
+            entries = list(midx.iterentries())
+            self.assertGreater(len(entries), 0)
+
+            # All entries should have valid structure
+            for sha, pack_name, offset in entries:
+                self.assertEqual(20, len(sha))  # SHA-1 is 20 bytes
+                self.assertIsInstance(pack_name, str)
+                # Git stores .idx extensions in MIDX files
+                self.assertTrue(pack_name.endswith(".idx"))
+                self.assertIsInstance(offset, int)
+                self.assertGreaterEqual(offset, 0)
+        finally:
+            midx.close()
+
+    def test_dulwich_object_store_with_git_midx(self):
+        """Test that DiskObjectStore can use Git-created MIDX for lookups."""
+        work_dir = self.create_test_repo_with_packs()
+
+        # Have Git create a MIDX file
+        run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)
+
+        # Load repo with Dulwich
+        repo = Repo(self.repo_path)
+        try:
+            # Get a commit from the repo
+            result = run_git_or_fail(["rev-parse", "HEAD"], cwd=work_dir)
+            head_sha = result.decode("utf-8").strip().encode("ascii")
+
+            # Verify we can access it through Dulwich
+            # This should use the MIDX for lookup
+            obj = repo.object_store[head_sha]
+            self.assertIsNotNone(obj)
+            self.assertEqual(b"commit", obj.type_name)
+        finally:
+            repo.close()
+
+    def test_repack_with_midx(self):
+        """Test that repacking works correctly with MIDX present."""
+        work_dir = self.create_test_repo_with_packs()
+
+        # Create MIDX with Dulwich
+        repo = Repo(self.repo_path)
+        try:
+            repo.object_store.write_midx()
+        finally:
+            repo.close()
+
+        # Verify Git can still repack
+        run_git_or_fail(["repack", "-d"], cwd=work_dir)
+
+        # The MIDX should still be readable
+        midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
+        if os.path.exists(midx_path):  # Git may remove it during repack
+            midx = load_midx(midx_path)
+            try:
+                self.assertGreaterEqual(len(midx), 0)
+            finally:
+                midx.close()

+ 277 - 0
tests/test_midx.py

@@ -0,0 +1,277 @@
+# test_midx.py -- Tests for multi-pack-index
+# Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as published by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Tests for multi-pack-index (MIDX) functionality."""
+
+import os
+import tempfile
+from io import BytesIO
+from unittest import TestCase
+
+from dulwich.midx import (
+    HASH_ALGORITHM_SHA1,
+    MultiPackIndex,
+    write_midx,
+    write_midx_file,
+)
+
+
+class MIDXWriteTests(TestCase):
+    """Tests for writing MIDX files."""
+
+    def test_write_empty_midx(self):
+        """Test writing an empty MIDX file."""
+        f = BytesIO()
+        pack_entries = []
+        checksum = write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
+
+        # Checksum should be 20 bytes
+        self.assertEqual(20, len(checksum))
+
+        # Should be able to read it back
+        f.seek(0)
+        midx = MultiPackIndex("test.midx", file=f, contents=f.read())
+        self.assertEqual(0, len(midx))
+        self.assertEqual(0, midx.pack_count)
+        self.assertEqual([], midx.pack_names)
+
+    def test_write_single_pack_midx(self):
+        """Test writing a MIDX file with a single pack."""
+        f = BytesIO()
+
+        # Create some fake pack entries
+        pack_entries = [
+            (
+                "pack-abc123.idx",
+                [
+                    (b"\x01" * 20, 100, 0x12345678),  # sha, offset, crc32
+                    (b"\x02" * 20, 200, 0x87654321),
+                    (b"\x03" * 20, 300, 0xABCDEF00),
+                ],
+            )
+        ]
+
+        checksum = write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
+        self.assertEqual(20, len(checksum))
+
+        # Read it back
+        f.seek(0)
+        midx = MultiPackIndex("test.midx", file=f, contents=f.read())
+
+        self.assertEqual(3, len(midx))
+        self.assertEqual(1, midx.pack_count)
+        self.assertEqual(["pack-abc123.idx"], midx.pack_names)
+
+        # Check object lookups
+        result = midx.object_offset(b"\x01" * 20)
+        self.assertIsNotNone(result)
+        pack_name, offset = result
+        self.assertEqual("pack-abc123.idx", pack_name)
+        self.assertEqual(100, offset)
+
+        result = midx.object_offset(b"\x02" * 20)
+        self.assertIsNotNone(result)
+        pack_name, offset = result
+        self.assertEqual("pack-abc123.idx", pack_name)
+        self.assertEqual(200, offset)
+
+        result = midx.object_offset(b"\x03" * 20)
+        self.assertIsNotNone(result)
+        pack_name, offset = result
+        self.assertEqual("pack-abc123.idx", pack_name)
+        self.assertEqual(300, offset)
+
+        # Check non-existent object
+        result = midx.object_offset(b"\xff" * 20)
+        self.assertIsNone(result)
+
+    def test_write_multiple_packs_midx(self):
+        """Test writing a MIDX file with multiple packs."""
+        f = BytesIO()
+
+        pack_entries = [
+            (
+                "pack-111.idx",
+                [
+                    (b"\x01" * 20, 100, 0),
+                    (b"\x03" * 20, 300, 0),
+                ],
+            ),
+            (
+                "pack-222.idx",
+                [
+                    (b"\x02" * 20, 50, 0),
+                    (b"\x04" * 20, 150, 0),
+                ],
+            ),
+        ]
+
+        checksum = write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
+        self.assertEqual(20, len(checksum))
+
+        # Read it back
+        f.seek(0)
+        midx = MultiPackIndex("test.midx", file=f, contents=f.read())
+
+        self.assertEqual(4, len(midx))
+        self.assertEqual(2, midx.pack_count)
+        self.assertEqual(["pack-111.idx", "pack-222.idx"], midx.pack_names)
+
+        # Objects should be findable across packs
+        result = midx.object_offset(b"\x01" * 20)
+        self.assertIsNotNone(result)
+        self.assertEqual("pack-111.idx", result[0])
+
+        result = midx.object_offset(b"\x02" * 20)
+        self.assertIsNotNone(result)
+        self.assertEqual("pack-222.idx", result[0])
+
+    def test_write_large_offsets(self):
+        """Test writing a MIDX file with large offsets (>= 2^31)."""
+        f = BytesIO()
+
+        large_offset = 2**32  # Offset that requires LOFF chunk
+        pack_entries = [
+            (
+                "pack-large.idx",
+                [
+                    (b"\x01" * 20, 100, 0),
+                    (b"\x02" * 20, large_offset, 0),  # Large offset
+                ],
+            )
+        ]
+
+        checksum = write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
+        self.assertEqual(20, len(checksum))
+
+        # Read it back
+        f.seek(0)
+        midx = MultiPackIndex("test.midx", file=f, contents=f.read())
+
+        self.assertEqual(2, len(midx))
+
+        # Small offset should work
+        result = midx.object_offset(b"\x01" * 20)
+        self.assertIsNotNone(result)
+        self.assertEqual(100, result[1])
+
+        # Large offset should work
+        result = midx.object_offset(b"\x02" * 20)
+        self.assertIsNotNone(result)
+        self.assertEqual(large_offset, result[1])
+
+    def test_write_midx_file(self):
+        """Test writing a MIDX file to disk."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            midx_path = os.path.join(tmpdir, "multi-pack-index")
+
+            pack_entries = [
+                (
+                    "pack-test.idx",
+                    [
+                        (b"\xaa" * 20, 1000, 0),
+                    ],
+                )
+            ]
+
+            checksum = write_midx_file(midx_path, pack_entries, HASH_ALGORITHM_SHA1)
+            self.assertEqual(20, len(checksum))
+
+            # Verify file was created
+            self.assertTrue(os.path.exists(midx_path))
+
+            # Read it back from disk
+            with open(midx_path, "rb") as f:
+                midx = MultiPackIndex(midx_path, file=f, contents=f.read())
+
+            self.assertEqual(1, len(midx))
+            result = midx.object_offset(b"\xaa" * 20)
+            self.assertIsNotNone(result)
+            self.assertEqual("pack-test.idx", result[0])
+            self.assertEqual(1000, result[1])
+
+
+class MIDXContainsTests(TestCase):
+    """Tests for MIDX __contains__ method."""
+
+    def test_contains_object(self):
+        """Test checking if an object is in the MIDX."""
+        f = BytesIO()
+        pack_entries = [
+            (
+                "pack-test.idx",
+                [
+                    (b"\x01" * 20, 100, 0),
+                    (b"\x02" * 20, 200, 0),
+                ],
+            )
+        ]
+
+        write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
+        f.seek(0)
+        midx = MultiPackIndex("test.midx", file=f, contents=f.read())
+
+        self.assertTrue(b"\x01" * 20 in midx)
+        self.assertTrue(b"\x02" * 20 in midx)
+        self.assertFalse(b"\xff" * 20 in midx)
+
+
+class MIDXIterEntriesTests(TestCase):
+    """Tests for MIDX iterentries method."""
+
+    def test_iterentries(self):
+        """Test iterating over MIDX entries."""
+        f = BytesIO()
+        pack_entries = [
+            (
+                "pack-111.idx",
+                [
+                    (b"\x01" * 20, 100, 0),
+                    (b"\x03" * 20, 300, 0),
+                ],
+            ),
+            (
+                "pack-222.idx",
+                [
+                    (b"\x02" * 20, 50, 0),
+                ],
+            ),
+        ]
+
+        write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
+        f.seek(0)
+        midx = MultiPackIndex("test.midx", file=f, contents=f.read())
+
+        entries = list(midx.iterentries())
+        self.assertEqual(3, len(entries))
+
+        # Entries should be sorted by SHA
+        self.assertEqual(b"\x01" * 20, entries[0][0])
+        self.assertEqual("pack-111.idx", entries[0][1])
+        self.assertEqual(100, entries[0][2])
+
+        self.assertEqual(b"\x02" * 20, entries[1][0])
+        self.assertEqual("pack-222.idx", entries[1][1])
+        self.assertEqual(50, entries[1][2])
+
+        self.assertEqual(b"\x03" * 20, entries[2][0])
+        self.assertEqual("pack-111.idx", entries[2][1])
+        self.assertEqual(300, entries[2][2])