1 месяц назад · 836e22b4a1
--- a/dulwich/midx.py
+++ b/dulwich/midx.py
@@ -0,0 +1,644 @@
 
				+# midx.py -- Multi-Pack-Index (MIDX) support
			
 
				+# Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
			
 
				+#
			
 
				+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
			
 
				+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
			
 
				+# General Public License as published by the Free Software Foundation; version 2.0
			
 
				+# or (at your option) any later version. You can redistribute it and/or
			
 
				+# modify it under the terms of either of these two licenses.
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+#
			
 
				+# You should have received a copy of the licenses; if not, see
			
 
				+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
			
 
				+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
			
 
				+# License, Version 2.0.
			
 
				+#
			
 
				+
			
 
				+"""Multi-Pack-Index (MIDX) support.
			
 
				+
			
 
				+A multi-pack-index (MIDX) provides a single index that covers multiple pack files,
			
 
				+enabling fast object lookup across all packs without opening each pack index.
			
 
				+
			
 
				+The MIDX file format consists of:
			
 
				+- A header with signature, version, and hash algorithm
			
 
				+- A chunk lookup table
			
 
				+- Multiple chunks containing pack names, OID fanout, OID lookup, and object offsets
			
 
				+- A trailer with checksum
			
 
				+
			
 
				+This module provides:
			
 
				+- Reading MIDX files
			
 
				+- Writing MIDX files
			
 
				+- Integration with pack-based object stores
			
 
				+
			
 
				+Limitations:
			
 
				+- Incremental MIDX chains are not yet supported (base_midx_files must be 0)
			
 
				+- BTMP (bitmapped packfiles) chunk is not yet implemented
			
 
				+- RIDX (reverse index) chunk is not yet implemented
			
 
				+
			
 
				+Note: Incremental MIDX chains were introduced in Git 2.47 as an experimental
			
 
				+feature, where multiple MIDX files can be chained together. The format includes
			
 
				+a base_midx_files field in the header and uses a multi-pack-index.d/ directory
			
 
				+with a multi-pack-index-chain file. This feature is not yet supported by Dulwich
			
 
				+as the specification is still evolving.
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import struct
			
 
				+from collections.abc import Iterator
			
 
				+from io import UnsupportedOperation
			
 
				+from typing import IO, Any
			
 
				+
			
 
				+try:
			
 
				+    import mmap
			
 
				+except ImportError:
			
 
				+    has_mmap = False
			
 
				+else:
			
 
				+    has_mmap = True
			
 
				+
			
 
				+from .file import GitFile, _GitFile
			
 
				+from .objects import ObjectID, RawObjectID
			
 
				+from .pack import SHA1Writer
			
 
				+
			
 
				+# MIDX signature
			
 
				+MIDX_SIGNATURE = b"MIDX"
			
 
				+
			
 
				+# MIDX version
			
 
				+MIDX_VERSION = 1
			
 
				+
			
 
				+# Chunk identifiers (4 bytes each)
			
 
				+CHUNK_PNAM = b"PNAM"  # Packfile names
			
 
				+CHUNK_OIDF = b"OIDF"  # OID fanout table
			
 
				+CHUNK_OIDL = b"OIDL"  # OID lookup table
			
 
				+CHUNK_OOFF = b"OOFF"  # Object offsets
			
 
				+CHUNK_LOFF = b"LOFF"  # Large offsets (optional)
			
 
				+CHUNK_BTMP = b"BTMP"  # Bitmapped packfiles (optional)
			
 
				+CHUNK_RIDX = b"RIDX"  # Reverse index (optional)
			
 
				+
			
 
				+# Hash algorithm identifiers
			
 
				+HASH_ALGORITHM_SHA1 = 1
			
 
				+HASH_ALGORITHM_SHA256 = 2
			
 
				+
			
 
				+
			
 
				+class MultiPackIndex:
			
 
				+    """Multi-pack-index for efficient object lookup across multiple pack files."""
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        filename: str | os.PathLike[str],
			
 
				+        file: IO[bytes] | _GitFile | None = None,
			
 
				+        contents: bytes | None = None,
			
 
				+        size: int | None = None,
			
 
				+    ) -> None:
			
 
				+        """Initialize a MultiPackIndex.
			
 
				+
			
 
				+        Args:
			
 
				+            filename: Path to the MIDX file
			
 
				+            file: Optional file object
			
 
				+            contents: Optional mmap'd contents
			
 
				+            size: Optional size of the MIDX file
			
 
				+        """
			
 
				+        self._filename = os.fspath(filename)
			
 
				+        self._file = file
			
 
				+        self._size = size
			
 
				+
			
 
				+        # Instance variables that will be set during parsing
			
 
				+        self.version: int
			
 
				+        self.hash_algorithm: int
			
 
				+        self.hash_size: int
			
 
				+        self.chunk_count: int
			
 
				+        self.base_midx_files: int
			
 
				+        self.pack_count: int
			
 
				+        self.pack_names: list[str]
			
 
				+        self.object_count: int
			
 
				+        self._chunks: dict[bytes, int]
			
 
				+        self._fanout_table: list[int]
			
 
				+        self._oidl_offset: int
			
 
				+        self._ooff_offset: int
			
 
				+        self._loff_offset: int
			
 
				+
			
 
				+        # Load file contents
			
 
				+        if contents is None:
			
 
				+            if file is None:
			
 
				+                with GitFile(filename, "rb") as f:
			
 
				+                    self._contents, self._size = self._load_file_contents(f, size)
			
 
				+            else:
			
 
				+                self._contents, self._size = self._load_file_contents(file, size)
			
 
				+        else:
			
 
				+            self._contents = contents
			
 
				+
			
 
				+        # Parse header
			
 
				+        self._parse_header()
			
 
				+
			
 
				+        # Parse chunk lookup table
			
 
				+        self._parse_chunk_table()
			
 
				+
			
 
				+    def _load_file_contents(
			
 
				+        self, f: IO[bytes] | _GitFile, size: int | None = None
			
 
				+    ) -> tuple[bytes | Any, int]:
			
 
				+        """Load contents from a file, preferring mmap when possible.
			
 
				+
			
 
				+        Args:
			
 
				+            f: File-like object to load
			
 
				+            size: Expected size, or None to determine from file
			
 
				+
			
 
				+        Returns:
			
 
				+            Tuple of (contents, size)
			
 
				+        """
			
 
				+        try:
			
 
				+            fd = f.fileno()
			
 
				+        except (UnsupportedOperation, AttributeError):
			
 
				+            fd = None
			
 
				+
			
 
				+        # Attempt to use mmap if possible
			
 
				+        if fd is not None:
			
 
				+            if size is None:
			
 
				+                size = os.fstat(fd).st_size
			
 
				+            if has_mmap:
			
 
				+                try:
			
 
				+                    contents = mmap.mmap(fd, size, access=mmap.ACCESS_READ)
			
 
				+                except (OSError, ValueError):
			
 
				+                    # Can't mmap - perhaps a socket or invalid file descriptor
			
 
				+                    pass
			
 
				+                else:
			
 
				+                    return contents, size
			
 
				+
			
 
				+        # Fall back to reading entire file into memory
			
 
				+        contents_bytes = f.read()
			
 
				+        size = len(contents_bytes)
			
 
				+        return contents_bytes, size
			
 
				+
			
 
				+    def _parse_header(self) -> None:
			
 
				+        """Parse the MIDX header."""
			
 
				+        if len(self._contents) < 12:
			
 
				+            raise ValueError("MIDX file too small")
			
 
				+
			
 
				+        # Check signature
			
 
				+        signature = self._contents[0:4]
			
 
				+        if signature != MIDX_SIGNATURE:
			
 
				+            raise ValueError(f"Invalid MIDX signature: {signature!r}")
			
 
				+
			
 
				+        # Read version
			
 
				+        self.version = self._contents[4]
			
 
				+        if self.version != MIDX_VERSION:
			
 
				+            raise ValueError(f"Unsupported MIDX version: {self.version}")
			
 
				+
			
 
				+        # Read object ID version (hash algorithm)
			
 
				+        self.hash_algorithm = self._contents[5]
			
 
				+        if self.hash_algorithm == HASH_ALGORITHM_SHA1:
			
 
				+            self.hash_size = 20
			
 
				+        elif self.hash_algorithm == HASH_ALGORITHM_SHA256:
			
 
				+            self.hash_size = 32
			
 
				+        else:
			
 
				+            raise ValueError(f"Unknown hash algorithm: {self.hash_algorithm}")
			
 
				+
			
 
				+        # Read chunk count
			
 
				+        self.chunk_count = self._contents[6]
			
 
				+
			
 
				+        # Read base MIDX files count (currently always 0)
			
 
				+        self.base_midx_files = self._contents[7]
			
 
				+        if self.base_midx_files != 0:
			
 
				+            raise ValueError("Incremental MIDX not yet supported")
			
 
				+
			
 
				+        # Read pack file count
			
 
				+        (self.pack_count,) = struct.unpack(">L", self._contents[8:12])
			
 
				+
			
 
				+    def _parse_chunk_table(self) -> None:
			
 
				+        """Parse the chunk lookup table."""
			
 
				+        self._chunks = {}
			
 
				+
			
 
				+        # Chunk table starts at offset 12
			
 
				+        offset = 12
			
 
				+
			
 
				+        # Each chunk entry is 12 bytes (4-byte ID + 8-byte offset)
			
 
				+        for i in range(self.chunk_count + 1):  # +1 for terminator
			
 
				+            chunk_id = self._contents[offset : offset + 4]
			
 
				+            (chunk_offset,) = struct.unpack(
			
 
				+                ">Q", self._contents[offset + 4 : offset + 12]
			
 
				+            )
			
 
				+
			
 
				+            if chunk_id == b"\x00\x00\x00\x00":
			
 
				+                # Terminator entry
			
 
				+                break
			
 
				+
			
 
				+            self._chunks[chunk_id] = chunk_offset
			
 
				+            offset += 12
			
 
				+
			
 
				+        # Parse required chunks
			
 
				+        self._parse_pnam_chunk()
			
 
				+        self._parse_oidf_chunk()
			
 
				+        self._parse_oidl_chunk()
			
 
				+        self._parse_ooff_chunk()
			
 
				+
			
 
				+        # Parse optional chunks
			
 
				+        if CHUNK_LOFF in self._chunks:
			
 
				+            self._parse_loff_chunk()
			
 
				+
			
 
				+    def _parse_pnam_chunk(self) -> None:
			
 
				+        """Parse the Packfile Names (PNAM) chunk."""
			
 
				+        if CHUNK_PNAM not in self._chunks:
			
 
				+            raise ValueError("Required PNAM chunk not found")
			
 
				+
			
 
				+        offset = self._chunks[CHUNK_PNAM]
			
 
				+        self.pack_names = []
			
 
				+
			
 
				+        # Find the end of the PNAM chunk (next chunk or end of chunks section)
			
 
				+        next_offset = min(
			
 
				+            (o for o in self._chunks.values() if o > offset),
			
 
				+            default=len(self._contents),
			
 
				+        )
			
 
				+
			
 
				+        # Parse null-terminated pack names
			
 
				+        current = offset
			
 
				+        while current < next_offset:
			
 
				+            # Find the next null terminator
			
 
				+            null_pos = self._contents.find(b"\x00", current, next_offset)
			
 
				+            if null_pos == -1:
			
 
				+                break
			
 
				+
			
 
				+            pack_name = self._contents[current:null_pos].decode("utf-8")
			
 
				+            if pack_name:  # Skip empty strings (padding)
			
 
				+                self.pack_names.append(pack_name)
			
 
				+            current = null_pos + 1
			
 
				+
			
 
				+    def _parse_oidf_chunk(self) -> None:
			
 
				+        """Parse the OID Fanout (OIDF) chunk."""
			
 
				+        if CHUNK_OIDF not in self._chunks:
			
 
				+            raise ValueError("Required OIDF chunk not found")
			
 
				+
			
 
				+        offset = self._chunks[CHUNK_OIDF]
			
 
				+        self._fanout_table = []
			
 
				+
			
 
				+        # Read 256 4-byte entries
			
 
				+        for i in range(256):
			
 
				+            (count,) = struct.unpack(
			
 
				+                ">L", self._contents[offset + i * 4 : offset + i * 4 + 4]
			
 
				+            )
			
 
				+            self._fanout_table.append(count)
			
 
				+
			
 
				+        # Total object count is the last entry
			
 
				+        self.object_count = self._fanout_table[255]
			
 
				+
			
 
				+    def _parse_oidl_chunk(self) -> None:
			
 
				+        """Parse the OID Lookup (OIDL) chunk."""
			
 
				+        if CHUNK_OIDL not in self._chunks:
			
 
				+            raise ValueError("Required OIDL chunk not found")
			
 
				+
			
 
				+        self._oidl_offset = self._chunks[CHUNK_OIDL]
			
 
				+
			
 
				+    def _parse_ooff_chunk(self) -> None:
			
 
				+        """Parse the Object Offsets (OOFF) chunk."""
			
 
				+        if CHUNK_OOFF not in self._chunks:
			
 
				+            raise ValueError("Required OOFF chunk not found")
			
 
				+
			
 
				+        self._ooff_offset = self._chunks[CHUNK_OOFF]
			
 
				+
			
 
				+    def _parse_loff_chunk(self) -> None:
			
 
				+        """Parse the Large Offsets (LOFF) chunk."""
			
 
				+        self._loff_offset = self._chunks[CHUNK_LOFF]
			
 
				+
			
 
				+    def __len__(self) -> int:
			
 
				+        """Return the number of objects in this MIDX."""
			
 
				+        return self.object_count
			
 
				+
			
 
				+    def _get_oid(self, index: int) -> RawObjectID:
			
 
				+        """Get the object ID at the given index.
			
 
				+
			
 
				+        Args:
			
 
				+            index: Index of the object
			
 
				+
			
 
				+        Returns:
			
 
				+            Binary object ID
			
 
				+        """
			
 
				+        if index < 0 or index >= self.object_count:
			
 
				+            raise IndexError(f"Index {index} out of range")
			
 
				+
			
 
				+        offset = self._oidl_offset + index * self.hash_size
			
 
				+        return RawObjectID(self._contents[offset : offset + self.hash_size])
			
 
				+
			
 
				+    def _get_pack_info(self, index: int) -> tuple[int, int]:
			
 
				+        """Get pack ID and offset for object at the given index.
			
 
				+
			
 
				+        Args:
			
 
				+            index: Index of the object
			
 
				+
			
 
				+        Returns:
			
 
				+            Tuple of (pack_id, offset)
			
 
				+        """
			
 
				+        if index < 0 or index >= self.object_count:
			
 
				+            raise IndexError(f"Index {index} out of range")
			
 
				+
			
 
				+        # Each entry is 8 bytes (4-byte pack ID + 4-byte offset)
			
 
				+        offset = self._ooff_offset + index * 8
			
 
				+
			
 
				+        (pack_id,) = struct.unpack(">L", self._contents[offset : offset + 4])
			
 
				+        (pack_offset,) = struct.unpack(">L", self._contents[offset + 4 : offset + 8])
			
 
				+
			
 
				+        # Check if this is a large offset (MSB set)
			
 
				+        if pack_offset & 0x80000000:
			
 
				+            # Look up in LOFF chunk
			
 
				+            if CHUNK_LOFF not in self._chunks:
			
 
				+                raise ValueError("Large offset found but no LOFF chunk")
			
 
				+
			
 
				+            large_index = pack_offset & 0x7FFFFFFF
			
 
				+            large_offset_pos = self._loff_offset + large_index * 8
			
 
				+            (pack_offset,) = struct.unpack(
			
 
				+                ">Q", self._contents[large_offset_pos : large_offset_pos + 8]
			
 
				+            )
			
 
				+
			
 
				+        return pack_id, pack_offset
			
 
				+
			
 
				+    def object_offset(self, sha: ObjectID | RawObjectID) -> tuple[str, int] | None:
			
 
				+        """Return the pack name and offset for the given object.
			
 
				+
			
 
				+        Args:
			
 
				+            sha: Binary SHA-1 or SHA-256 hash
			
 
				+
			
 
				+        Returns:
			
 
				+            Tuple of (pack_name, offset) or None if not found
			
 
				+        """
			
 
				+        if len(sha) != self.hash_size:
			
 
				+            raise ValueError(
			
 
				+                f"SHA size mismatch: expected {self.hash_size}, got {len(sha)}"
			
 
				+            )
			
 
				+
			
 
				+        # Use fanout table to narrow search range
			
 
				+        first_byte = sha[0]
			
 
				+        start_idx = 0 if first_byte == 0 else self._fanout_table[first_byte - 1]
			
 
				+        end_idx = self._fanout_table[first_byte]
			
 
				+
			
 
				+        # Binary search within the range
			
 
				+        while start_idx < end_idx:
			
 
				+            mid = (start_idx + end_idx) // 2
			
 
				+            mid_sha = self._get_oid(mid)
			
 
				+
			
 
				+            if mid_sha == sha:
			
 
				+                # Found it!
			
 
				+                pack_id, offset = self._get_pack_info(mid)
			
 
				+                return self.pack_names[pack_id], offset
			
 
				+            elif mid_sha < sha:
			
 
				+                start_idx = mid + 1
			
 
				+            else:
			
 
				+                end_idx = mid
			
 
				+
			
 
				+        return None
			
 
				+
			
 
				+    def __contains__(self, sha: ObjectID | RawObjectID) -> bool:
			
 
				+        """Check if the given object SHA is in this MIDX.
			
 
				+
			
 
				+        Args:
			
 
				+            sha: Binary SHA hash
			
 
				+
			
 
				+        Returns:
			
 
				+            True if the object is in this MIDX
			
 
				+        """
			
 
				+        return self.object_offset(sha) is not None
			
 
				+
			
 
				+    def iterentries(self) -> Iterator[tuple[RawObjectID, str, int]]:
			
 
				+        """Iterate over all entries in this MIDX.
			
 
				+
			
 
				+        Yields:
			
 
				+            Tuples of (sha, pack_name, offset)
			
 
				+        """
			
 
				+        for i in range(self.object_count):
			
 
				+            sha = self._get_oid(i)
			
 
				+            pack_id, offset = self._get_pack_info(i)
			
 
				+            pack_name = self.pack_names[pack_id]
			
 
				+            yield sha, pack_name, offset
			
 
				+
			
 
				+    def close(self) -> None:
			
 
				+        """Close the MIDX file and release mmap resources."""
			
 
				+        # Close mmap'd contents first if it's an mmap object
			
 
				+        if self._contents is not None and has_mmap:
			
 
				+            if isinstance(self._contents, mmap.mmap):
			
 
				+                self._contents.close()
			
 
				+        self._contents = None
			
 
				+
			
 
				+        # Close file handle
			
 
				+        if self._file is not None:
			
 
				+            self._file.close()
			
 
				+            self._file = None
			
 
				+
			
 
				+
			
 
				+def load_midx(path: str | os.PathLike[str]) -> MultiPackIndex:
			
 
				+    """Load a multi-pack-index file by path.
			
 
				+
			
 
				+    Args:
			
 
				+        path: Path to the MIDX file
			
 
				+
			
 
				+    Returns:
			
 
				+        A MultiPackIndex loaded from the given path
			
 
				+    """
			
 
				+    with GitFile(path, "rb") as f:
			
 
				+        return load_midx_file(path, f)
			
 
				+
			
 
				+
			
 
				+def load_midx_file(
			
 
				+    path: str | os.PathLike[str], f: IO[bytes] | _GitFile
			
 
				+) -> MultiPackIndex:
			
 
				+    """Load a multi-pack-index from a file-like object.
			
 
				+
			
 
				+    Args:
			
 
				+        path: Path for the MIDX file
			
 
				+        f: File-like object
			
 
				+
			
 
				+    Returns:
			
 
				+        A MultiPackIndex loaded from the given file
			
 
				+    """
			
 
				+    return MultiPackIndex(path, file=f)
			
 
				+
			
 
				+
			
 
				+def write_midx(
			
 
				+    f: IO[bytes],
			
 
				+    pack_index_entries: list[tuple[str, list[tuple[RawObjectID, int, int | None]]]],
			
 
				+    hash_algorithm: int = HASH_ALGORITHM_SHA1,
			
 
				+) -> bytes:
			
 
				+    """Write a multi-pack-index file.
			
 
				+
			
 
				+    Args:
			
 
				+        f: File-like object to write to
			
 
				+        pack_index_entries: List of (pack_name, entries) tuples where entries are
			
 
				+                          (sha, offset, crc32) tuples, sorted by SHA
			
 
				+        hash_algorithm: Hash algorithm to use (1=SHA-1, 2=SHA-256)
			
 
				+
			
 
				+    Returns:
			
 
				+        SHA-1 checksum of the written MIDX file
			
 
				+    """
			
 
				+    if hash_algorithm == HASH_ALGORITHM_SHA1:
			
 
				+        hash_size = 20
			
 
				+    elif hash_algorithm == HASH_ALGORITHM_SHA256:
			
 
				+        hash_size = 32
			
 
				+    else:
			
 
				+        raise ValueError(f"Unknown hash algorithm: {hash_algorithm}")
			
 
				+
			
 
				+    # Wrap file in SHA1Writer to compute checksum
			
 
				+    writer = SHA1Writer(f)
			
 
				+
			
 
				+    # Sort pack entries by pack name (required by Git)
			
 
				+    pack_index_entries_sorted = sorted(pack_index_entries, key=lambda x: x[0])
			
 
				+
			
 
				+    # Collect all objects from all packs
			
 
				+    all_objects: list[tuple[RawObjectID, int, int]] = []  # (sha, pack_id, offset)
			
 
				+    pack_names: list[str] = []
			
 
				+
			
 
				+    for pack_id, (pack_name, entries) in enumerate(pack_index_entries_sorted):
			
 
				+        pack_names.append(pack_name)
			
 
				+        for sha, offset, _crc32 in entries:
			
 
				+            all_objects.append((sha, pack_id, offset))
			
 
				+
			
 
				+    # Sort all objects by SHA
			
 
				+    all_objects.sort(key=lambda x: x[0])
			
 
				+
			
 
				+    # Calculate offsets for chunks
			
 
				+    num_packs = len(pack_names)
			
 
				+    num_objects = len(all_objects)
			
 
				+
			
 
				+    # Header: 12 bytes
			
 
				+    header_size = 12
			
 
				+
			
 
				+    # Chunk count: PNAM, OIDF, OIDL, OOFF, and optionally LOFF
			
 
				+    # We'll determine if LOFF is needed later
			
 
				+    chunk_count = 4  # PNAM, OIDF, OIDL, OOFF
			
 
				+
			
 
				+    # Check if we need LOFF chunk (for offsets >= 2^31)
			
 
				+    need_loff = any(offset >= 2**31 for _sha, _pack_id, offset in all_objects)
			
 
				+    if need_loff:
			
 
				+        chunk_count += 1
			
 
				+
			
 
				+    # Chunk table: (chunk_count + 1) * 12 bytes (including terminator)
			
 
				+    chunk_table_size = (chunk_count + 1) * 12
			
 
				+
			
 
				+    # Calculate chunk offsets
			
 
				+    current_offset = header_size + chunk_table_size
			
 
				+
			
 
				+    # PNAM chunk: pack names as null-terminated strings, padded to 4-byte boundary
			
 
				+    pnam_data = b"".join(name.encode("utf-8") + b"\x00" for name in pack_names)
			
 
				+    # Pad to 4-byte boundary
			
 
				+    pnam_padding = (4 - len(pnam_data) % 4) % 4
			
 
				+    pnam_data += b"\x00" * pnam_padding
			
 
				+    pnam_offset = current_offset
			
 
				+    current_offset += len(pnam_data)
			
 
				+
			
 
				+    # OIDF chunk: 256 * 4 bytes
			
 
				+    oidf_offset = current_offset
			
 
				+    oidf_size = 256 * 4
			
 
				+    current_offset += oidf_size
			
 
				+
			
 
				+    # OIDL chunk: num_objects * hash_size bytes
			
 
				+    oidl_offset = current_offset
			
 
				+    oidl_size = num_objects * hash_size
			
 
				+    current_offset += oidl_size
			
 
				+
			
 
				+    # OOFF chunk: num_objects * 8 bytes (4 for pack_id + 4 for offset)
			
 
				+    ooff_offset = current_offset
			
 
				+    ooff_size = num_objects * 8
			
 
				+    current_offset += ooff_size
			
 
				+
			
 
				+    # LOFF chunk (if needed): variable size
			
 
				+    # We'll calculate the exact size when we know how many large offsets we have
			
 
				+    loff_offset = current_offset if need_loff else 0
			
 
				+    large_offsets: list[int] = []
			
 
				+
			
 
				+    # Calculate trailer offset (where checksum starts)
			
 
				+    # We need to pre-calculate large offset count for accurate trailer offset
			
 
				+    if need_loff:
			
 
				+        # Count large offsets
			
 
				+        large_offset_count = sum(1 for _, _, offset in all_objects if offset >= 2**31)
			
 
				+        loff_size = large_offset_count * 8
			
 
				+        trailer_offset = current_offset + loff_size
			
 
				+    else:
			
 
				+        trailer_offset = current_offset
			
 
				+
			
 
				+    # Write header
			
 
				+    writer.write(MIDX_SIGNATURE)  # 4 bytes: signature
			
 
				+    writer.write(bytes([MIDX_VERSION]))  # 1 byte: version
			
 
				+    writer.write(bytes([hash_algorithm]))  # 1 byte: hash algorithm
			
 
				+    writer.write(bytes([chunk_count]))  # 1 byte: chunk count
			
 
				+    writer.write(bytes([0]))  # 1 byte: base MIDX files (always 0)
			
 
				+    writer.write(struct.pack(">L", num_packs))  # 4 bytes: pack count
			
 
				+
			
 
				+    # Write chunk table
			
 
				+    chunk_table = [
			
 
				+        (CHUNK_PNAM, pnam_offset),
			
 
				+        (CHUNK_OIDF, oidf_offset),
			
 
				+        (CHUNK_OIDL, oidl_offset),
			
 
				+        (CHUNK_OOFF, ooff_offset),
			
 
				+    ]
			
 
				+    if need_loff:
			
 
				+        chunk_table.append((CHUNK_LOFF, loff_offset))
			
 
				+
			
 
				+    for chunk_id, chunk_offset in chunk_table:
			
 
				+        writer.write(chunk_id)  # 4 bytes
			
 
				+        writer.write(struct.pack(">Q", chunk_offset))  # 8 bytes
			
 
				+
			
 
				+    # Write terminator (points to where trailer/checksum starts)
			
 
				+    writer.write(b"\x00\x00\x00\x00")  # 4 bytes
			
 
				+    writer.write(struct.pack(">Q", trailer_offset))  # 8 bytes
			
 
				+
			
 
				+    # Write PNAM chunk
			
 
				+    writer.write(pnam_data)
			
 
				+
			
 
				+    # Write OIDF chunk (fanout table)
			
 
				+    fanout: list[int] = [0] * 256
			
 
				+    for sha, _pack_id, _offset in all_objects:
			
 
				+        first_byte = sha[0]
			
 
				+        fanout[first_byte] += 1
			
 
				+
			
 
				+    # Convert counts to cumulative
			
 
				+    cumulative = 0
			
 
				+    for i in range(256):
			
 
				+        cumulative += fanout[i]
			
 
				+        writer.write(struct.pack(">L", cumulative))
			
 
				+
			
 
				+    # Write OIDL chunk (object IDs)
			
 
				+    for sha, _pack_id, _offset in all_objects:
			
 
				+        writer.write(sha)
			
 
				+
			
 
				+    # Write OOFF chunk (pack ID and offset for each object)
			
 
				+    for _sha, pack_id, offset in all_objects:
			
 
				+        writer.write(struct.pack(">L", pack_id))
			
 
				+
			
 
				+        if offset >= 2**31:
			
 
				+            # Use large offset table
			
 
				+            large_offset_index = len(large_offsets)
			
 
				+            large_offsets.append(offset)
			
 
				+            # Set MSB to indicate large offset
			
 
				+            writer.write(struct.pack(">L", 0x80000000 | large_offset_index))
			
 
				+        else:
			
 
				+            writer.write(struct.pack(">L", offset))
			
 
				+
			
 
				+    # Write LOFF chunk if needed
			
 
				+    if need_loff:
			
 
				+        for large_offset in large_offsets:
			
 
				+            writer.write(struct.pack(">Q", large_offset))
			
 
				+
			
 
				+    # Write checksum
			
 
				+    return writer.write_sha()
			
 
				+
			
 
				+
			
 
				+def write_midx_file(
			
 
				+    path: str | os.PathLike[str],
			
 
				+    pack_index_entries: list[tuple[str, list[tuple[RawObjectID, int, int | None]]]],
			
 
				+    hash_algorithm: int = HASH_ALGORITHM_SHA1,
			
 
				+) -> bytes:
			
 
				+    """Write a multi-pack-index file to disk.
			
 
				+
			
 
				+    Args:
			
 
				+        path: Path where to write the MIDX file
			
 
				+        pack_index_entries: List of (pack_name, entries) tuples where entries are
			
 
				+                          (sha, offset, crc32) tuples, sorted by SHA
			
 
				+        hash_algorithm: Hash algorithm to use (1=SHA-1, 2=SHA-256)
			
 
				+
			
 
				+    Returns:
			
 
				+        SHA-1 checksum of the written MIDX file
			
 
				+    """
			
 
				+    with GitFile(path, "wb") as f:
			
 
				+        return write_midx(f, pack_index_entries, hash_algorithm)
			
 
				+
			
 
				+
			
 
				+# TODO: Add support for incremental MIDX chains
			
 
				+# TODO: Add support for BTMP and RIDX chunks for bitmap integration
			
--- a/dulwich/object_store.py
+++ b/dulwich/object_store.py
@@ -42,6 +42,7 @@ from typing import (
 
				 
			
 
				 from .errors import NotTreeError
			
 
				 from .file import GitFile, _GitFile
			
 
				+from .midx import MultiPackIndex, load_midx
			
 
				 from .objects import (
			
 
				     S_ISGITLINK,
			
 
				     ZERO_SHA,
			
@@ -1399,6 +1400,10 @@ class DiskObjectStore(PackBasedObjectStore):
 
				         self._commit_graph = None
			
 
				         self._use_commit_graph = True  # Default to true
			
 
				 
			
 
				+        # Multi-pack-index support - lazy loaded
			
 
				+        self._midx: MultiPackIndex | None = None
			
 
				+        self._use_midx = True  # Default to true
			
 
				+
			
 
				     def __repr__(self) -> str:
			
 
				         """Return string representation of DiskObjectStore.
			
 
				 
			
@@ -1485,6 +1490,9 @@ class DiskObjectStore(PackBasedObjectStore):
 
				         # Read core.commitGraph setting
			
 
				         use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True)
			
 
				 
			
 
				+        # Read core.multiPackIndex setting
			
 
				+        use_midx = config.get_boolean((b"core",), b"multiPackIndex", True)
			
 
				+
			
 
				         # Read core.fsyncObjectFiles setting
			
 
				         fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False)
			
 
				 
			
@@ -1521,6 +1529,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
				             dir_mode=dir_mode,
			
 
				         )
			
 
				         instance._use_commit_graph = use_commit_graph
			
 
				+        instance._use_midx = use_midx
			
 
				         return instance
			
 
				 
			
 
				     @property
			
@@ -2036,6 +2045,162 @@ class DiskObjectStore(PackBasedObjectStore):
 
				                 self._commit_graph = read_commit_graph(graph_file)
			
 
				         return self._commit_graph
			
 
				 
			
 
				+    def get_midx(self) -> MultiPackIndex | None:
			
 
				+        """Get the multi-pack-index for this object store.
			
 
				+
			
 
				+        Returns:
			
 
				+          MultiPackIndex object if available, None otherwise
			
 
				+
			
 
				+        Raises:
			
 
				+          ValueError: If MIDX file is corrupt
			
 
				+          OSError: If MIDX file cannot be read
			
 
				+        """
			
 
				+        if not self._use_midx:
			
 
				+            return None
			
 
				+
			
 
				+        if self._midx is None:
			
 
				+            # Look for MIDX in pack directory
			
 
				+            midx_file = os.path.join(self.pack_dir, "multi-pack-index")
			
 
				+            if os.path.exists(midx_file):
			
 
				+                self._midx = load_midx(midx_file)
			
 
				+        return self._midx
			
 
				+
			
 
				+    def _get_pack_by_name(self, pack_name: str) -> Pack:
			
 
				+        """Get a pack by its base name.
			
 
				+
			
 
				+        Args:
			
 
				+            pack_name: Base name of the pack (e.g., 'pack-abc123.pack' or 'pack-abc123.idx')
			
 
				+
			
 
				+        Returns:
			
 
				+            Pack object
			
 
				+
			
 
				+        Raises:
			
 
				+            KeyError: If pack doesn't exist
			
 
				+        """
			
 
				+        # Remove .pack or .idx extension if present
			
 
				+        if pack_name.endswith(".pack"):
			
 
				+            base_name = pack_name[:-5]
			
 
				+        elif pack_name.endswith(".idx"):
			
 
				+            base_name = pack_name[:-4]
			
 
				+        else:
			
 
				+            base_name = pack_name
			
 
				+
			
 
				+        # Check if already in cache
			
 
				+        if base_name in self._pack_cache:
			
 
				+            return self._pack_cache[base_name]
			
 
				+
			
 
				+        # Load the pack
			
 
				+        pack_path = os.path.join(self.pack_dir, base_name)
			
 
				+        if not os.path.exists(pack_path + ".pack"):
			
 
				+            raise KeyError(f"Pack {pack_name} not found")
			
 
				+
			
 
				+        pack = Pack(
			
 
				+            pack_path,
			
 
				+            delta_window_size=self.pack_delta_window_size,
			
 
				+            window_memory=self.pack_window_memory,
			
 
				+            delta_cache_size=self.pack_delta_cache_size,
			
 
				+            depth=self.pack_depth,
			
 
				+            threads=self.pack_threads,
			
 
				+            big_file_threshold=self.pack_big_file_threshold,
			
 
				+        )
			
 
				+        self._pack_cache[base_name] = pack
			
 
				+        return pack
			
 
				+
			
 
				+    def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:
			
 
				+        """Check if a particular object is present by SHA1 and is packed.
			
 
				+
			
 
				+        This checks the MIDX first if available, then falls back to checking
			
 
				+        individual pack indexes.
			
 
				+
			
 
				+        Args:
			
 
				+            sha: Binary SHA of the object
			
 
				+
			
 
				+        Returns:
			
 
				+            True if the object is in a pack file
			
 
				+        """
			
 
				+        # Check MIDX first for faster lookup
			
 
				+        midx = self.get_midx()
			
 
				+        if midx is not None and sha in midx:
			
 
				+            return True
			
 
				+
			
 
				+        # Fall back to checking individual packs
			
 
				+        return super().contains_packed(sha)
			
 
				+
			
 
				+    def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]:
			
 
				+        """Obtain the raw fulltext for an object.
			
 
				+
			
 
				+        This uses the MIDX if available for faster lookups.
			
 
				+
			
 
				+        Args:
			
 
				+            name: SHA for the object (20 bytes binary or 40 bytes hex)
			
 
				+
			
 
				+        Returns:
			
 
				+            Tuple with numeric type and object contents
			
 
				+
			
 
				+        Raises:
			
 
				+            KeyError: If object not found
			
 
				+        """
			
 
				+        if name == ZERO_SHA:
			
 
				+            raise KeyError(name)
			
 
				+
			
 
				+        sha: RawObjectID
			
 
				+        if len(name) == 40:
			
 
				+            # name is ObjectID (hex), convert to RawObjectID
			
 
				+            sha = hex_to_sha(cast(ObjectID, name))
			
 
				+        elif len(name) == 20:
			
 
				+            # name is already RawObjectID (binary)
			
 
				+            sha = RawObjectID(name)
			
 
				+        else:
			
 
				+            raise AssertionError(f"Invalid object name {name!r}")
			
 
				+
			
 
				+        # Try MIDX first for faster lookup
			
 
				+        midx = self.get_midx()
			
 
				+        if midx is not None:
			
 
				+            result = midx.object_offset(sha)
			
 
				+            if result is not None:
			
 
				+                pack_name, _offset = result
			
 
				+                try:
			
 
				+                    pack = self._get_pack_by_name(pack_name)
			
 
				+                    return pack.get_raw(sha)
			
 
				+                except (KeyError, PackFileDisappeared):
			
 
				+                    # Pack disappeared or object not found, fall through to standard lookup
			
 
				+                    pass
			
 
				+
			
 
				+        # Fall back to the standard implementation
			
 
				+        return super().get_raw(name)
			
 
				+
			
 
				+    def write_midx(self) -> bytes:
			
 
				+        """Write a multi-pack-index file for this object store.
			
 
				+
			
 
				+        Creates a MIDX file that indexes all pack files in the pack directory.
			
 
				+
			
 
				+        Returns:
			
 
				+            SHA-1 checksum of the written MIDX file
			
 
				+
			
 
				+        Raises:
			
 
				+            OSError: If the pack directory doesn't exist or MIDX can't be written
			
 
				+        """
			
 
				+        from .midx import write_midx_file
			
 
				+
			
 
				+        # Get all pack files
			
 
				+        packs = self.packs
			
 
				+        if not packs:
			
 
				+            # No packs to index
			
 
				+            return b"\x00" * 20
			
 
				+
			
 
				+        # Collect entries from all packs
			
 
				+        pack_entries: list[tuple[str, list[tuple[RawObjectID, int, int | None]]]] = []
			
 
				+
			
 
				+        for pack in packs:
			
 
				+            # Git stores .idx extension in MIDX, not .pack
			
 
				+            pack_name = os.path.basename(pack._basename) + ".idx"
			
 
				+            entries = list(pack.index.iterentries())
			
 
				+            pack_entries.append((pack_name, entries))
			
 
				+
			
 
				+        # Write MIDX file
			
 
				+        midx_path = os.path.join(self.pack_dir, "multi-pack-index")
			
 
				+        return write_midx_file(midx_path, pack_entries)
			
 
				+
			
 
				     def write_commit_graph(
			
 
				         self, refs: Iterable[ObjectID] | None = None, reachable: bool = True
			
 
				     ) -> None:
			
@@ -2149,6 +2314,25 @@ class DiskObjectStore(PackBasedObjectStore):
 
				                 if time.time() - mtime > grace_period:
			
 
				                     os.remove(pack_path)
			
 
				 
			
 
				+    def close(self) -> None:
			
 
				+        """Close the object store and release resources.
			
 
				+
			
 
				+        This method closes all cached pack files, MIDX, and frees associated resources.
			
 
				+        """
			
 
				+        # Close MIDX if it's loaded
			
 
				+        if self._midx is not None:
			
 
				+            self._midx.close()
			
 
				+            self._midx = None
			
 
				+
			
 
				+        # Close alternates
			
 
				+        if self._alternates is not None:
			
 
				+            for alt in self._alternates:
			
 
				+                alt.close()
			
 
				+            self._alternates = None
			
 
				+
			
 
				+        # Call parent class close to handle pack files
			
 
				+        super().close()
			
 
				+
			
 
				 
			
 
				 class MemoryObjectStore(PackCapableObjectStore):
			
 
				     """Object store that keeps all objects in memory."""
			
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -162,6 +162,7 @@ def self_test_suite() -> unittest.TestSuite:
 
				         "mbox",
			
 
				         "merge",
			
 
				         "merge_drivers",
			
 
				+        "midx",
			
 
				         "missing_obj_finder",
			
 
				         "notes",
			
 
				         "objects",
			
--- a/tests/compat/__init__.py
+++ b/tests/compat/__init__.py
@@ -34,6 +34,7 @@ def test_suite() -> unittest.TestSuite:
 
				         "dumb",
			
 
				         "index",
			
 
				         "lfs",
			
 
				+        "midx",
			
 
				         "pack",
			
 
				         "patch",
			
 
				         "porcelain",
			
--- a/tests/compat/test_midx.py
+++ b/tests/compat/test_midx.py
@@ -0,0 +1,267 @@
 
				+# test_midx.py -- Compatibility tests for multi-pack-index functionality
			
 
				+# Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
			
 
				+#
			
 
				+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
			
 
				+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
			
 
				+# General Public License as published by the Free Software Foundation; version 2.0
			
 
				+# or (at your option) any later version. You can redistribute it and/or
			
 
				+# modify it under the terms of either of these two licenses.
			
 
				+
			
 
				+"""Compatibility tests for Git multi-pack-index functionality.
			
 
				+
			
 
				+These tests verify that dulwich's MIDX implementation can read and interact
			
 
				+with MIDX files created by C Git, and that Git can read MIDX files created
			
 
				+by Dulwich.
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import tempfile
			
 
				+
			
 
				+from dulwich.midx import load_midx
			
 
				+from dulwich.object_store import DiskObjectStore
			
 
				+from dulwich.repo import Repo
			
 
				+
			
 
				+from .utils import CompatTestCase, run_git_or_fail
			
 
				+
			
 
				+
			
 
				+class MIDXCompatTests(CompatTestCase):
			
 
				+    """Compatibility tests for multi-pack-index functionality."""
			
 
				+
			
 
				+    # Multi-pack-index was introduced in Git 2.21.0
			
 
				+    min_git_version = (2, 21, 0)
			
 
				+
			
 
				+    def setUp(self):
			
 
				+        super().setUp()
			
 
				+        self.test_dir = tempfile.mkdtemp()
			
 
				+        self.repo_path = os.path.join(self.test_dir, "test-repo")
			
 
				+
			
 
				+        # Set up git identity to avoid committer identity errors
			
 
				+        self.overrideEnv("GIT_COMMITTER_NAME", "Test Author")
			
 
				+        self.overrideEnv("GIT_COMMITTER_EMAIL", "test@example.com")
			
 
				+        self.overrideEnv("GIT_AUTHOR_NAME", "Test Author")
			
 
				+        self.overrideEnv("GIT_AUTHOR_EMAIL", "test@example.com")
			
 
				+
			
 
				+    def tearDown(self):
			
 
				+        from .utils import rmtree_ro
			
 
				+
			
 
				+        rmtree_ro(self.test_dir)
			
 
				+
			
 
				+    def create_test_repo_with_packs(self):
			
 
				+        """Create a test repository with multiple pack files."""
			
 
				+        # Initialize repository
			
 
				+        run_git_or_fail(["init"], cwd=self.test_dir)
			
 
				+        os.rename(os.path.join(self.test_dir, ".git"), self.repo_path)
			
 
				+
			
 
				+        work_dir = os.path.join(self.test_dir, "work")
			
 
				+        os.makedirs(work_dir)
			
 
				+
			
 
				+        # Create .git file pointing to our repo
			
 
				+        with open(os.path.join(work_dir, ".git"), "w") as f:
			
 
				+            f.write(f"gitdir: {self.repo_path}\n")
			
 
				+
			
 
				+        # Create some commits and pack them
			
 
				+        for i in range(5):
			
 
				+            filename = f"file{i}.txt"
			
 
				+            with open(os.path.join(work_dir, filename), "w") as f:
			
 
				+                f.write(f"Content {i}\n" * 100)  # Make files bigger to ensure packing
			
 
				+
			
 
				+            run_git_or_fail(["add", filename], cwd=work_dir)
			
 
				+            run_git_or_fail(
			
 
				+                [
			
 
				+                    "commit",
			
 
				+                    "-m",
			
 
				+                    f"Commit {i}",
			
 
				+                    "--author",
			
 
				+                    "Test Author <test@example.com>",
			
 
				+                ],
			
 
				+                cwd=work_dir,
			
 
				+            )
			
 
				+
			
 
				+            # Create a pack file after each commit to get multiple packs
			
 
				+            if i > 0:  # Skip first commit to avoid empty pack
			
 
				+                run_git_or_fail(["repack", "-d"], cwd=work_dir)
			
 
				+
			
 
				+        return work_dir
			
 
				+
			
 
				+    def test_read_git_midx(self):
			
 
				+        """Test that Dulwich can read a MIDX file created by Git."""
			
 
				+        work_dir = self.create_test_repo_with_packs()
			
 
				+
			
 
				+        # Have Git create a MIDX file
			
 
				+        run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)
			
 
				+
			
 
				+        # Verify Git created the MIDX file
			
 
				+        midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
			
 
				+        self.assertTrue(
			
 
				+            os.path.exists(midx_path), "Git did not create multi-pack-index file"
			
 
				+        )
			
 
				+
			
 
				+        # Load the MIDX file with Dulwich
			
 
				+        midx = load_midx(midx_path)
			
 
				+        try:
			
 
				+            # Verify we can read it
			
 
				+            self.assertGreater(len(midx), 0, "MIDX should contain objects")
			
 
				+            self.assertGreater(midx.pack_count, 0, "MIDX should reference packs")
			
 
				+
			
 
				+            # Verify the pack names look reasonable
			
 
				+            # Git stores .idx extensions in MIDX files
			
 
				+            for pack_name in midx.pack_names:
			
 
				+                self.assertTrue(pack_name.startswith("pack-"))
			
 
				+                self.assertTrue(pack_name.endswith(".idx"))
			
 
				+        finally:
			
 
				+            midx.close()
			
 
				+
			
 
				+    def test_git_uses_dulwich_midx(self):
			
 
				+        """Test that Git can use a MIDX file created by Dulwich."""
			
 
				+        work_dir = self.create_test_repo_with_packs()
			
 
				+
			
 
				+        # Use Dulwich to create a MIDX file
			
 
				+        repo = Repo(self.repo_path)
			
 
				+        try:
			
 
				+            store = repo.object_store
			
 
				+            self.assertIsInstance(store, DiskObjectStore)
			
 
				+
			
 
				+            # Write MIDX with Dulwich
			
 
				+            checksum = store.write_midx()
			
 
				+            self.assertEqual(20, len(checksum))
			
 
				+        finally:
			
 
				+            repo.close()
			
 
				+
			
 
				+        # Verify the file was created
			
 
				+        midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
			
 
				+        self.assertTrue(os.path.exists(midx_path))
			
 
				+
			
 
				+        # Have Git verify the MIDX file (should succeed with return code 0)
			
 
				+        run_git_or_fail(["multi-pack-index", "verify"], cwd=work_dir)
			
 
				+
			
 
				+        # Try to use the MIDX with Git commands
			
 
				+        # This should work if the MIDX is valid
			
 
				+        run_git_or_fail(["fsck"], cwd=work_dir)
			
 
				+
			
 
				+    def test_midx_object_lookup_matches_git(self):
			
 
				+        """Test that object lookups through MIDX match Git's results."""
			
 
				+        work_dir = self.create_test_repo_with_packs()
			
 
				+
			
 
				+        # Have Git create a MIDX file
			
 
				+        run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)
			
 
				+
			
 
				+        # Load with Dulwich
			
 
				+        repo = Repo(self.repo_path)
			
 
				+        try:
			
 
				+            store = repo.object_store
			
 
				+
			
 
				+            # Get MIDX
			
 
				+            midx = store.get_midx()
			
 
				+            self.assertIsNotNone(midx, "MIDX should be loaded")
			
 
				+
			
 
				+            # Get all objects from Git
			
 
				+            result = run_git_or_fail(["rev-list", "--all", "--objects"], cwd=work_dir)
			
 
				+            object_shas = [
			
 
				+                line.split()[0].encode("ascii")
			
 
				+                for line in result.decode("utf-8").strip().split("\n")
			
 
				+                if line
			
 
				+            ]
			
 
				+
			
 
				+            # Verify we can find these objects through the MIDX
			
 
				+            found_count = 0
			
 
				+            for sha_hex in object_shas:
			
 
				+                # Convert hex to binary
			
 
				+                sha_bin = bytes.fromhex(sha_hex.decode("ascii"))
			
 
				+
			
 
				+                # Check if it's in the MIDX
			
 
				+                if sha_bin in midx:
			
 
				+                    found_count += 1
			
 
				+
			
 
				+                    # Verify we can get the object location
			
 
				+                    result = midx.object_offset(sha_bin)
			
 
				+                    self.assertIsNotNone(result)
			
 
				+                    pack_name, offset = result
			
 
				+                    self.assertIsInstance(pack_name, str)
			
 
				+                    self.assertIsInstance(offset, int)
			
 
				+                    self.assertGreater(offset, 0)
			
 
				+
			
 
				+            # We should find at least some objects in the MIDX
			
 
				+            self.assertGreater(
			
 
				+                found_count, 0, "Should find at least some objects in MIDX"
			
 
				+            )
			
 
				+        finally:
			
 
				+            repo.close()
			
 
				+
			
 
				+    def test_midx_with_multiple_packs(self):
			
 
				+        """Test MIDX functionality with multiple pack files."""
			
 
				+        work_dir = self.create_test_repo_with_packs()
			
 
				+
			
 
				+        # Create multiple pack files explicitly
			
 
				+        run_git_or_fail(["repack"], cwd=work_dir)
			
 
				+        run_git_or_fail(["repack"], cwd=work_dir)
			
 
				+
			
 
				+        # Create MIDX with Git
			
 
				+        run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)
			
 
				+
			
 
				+        # Load with Dulwich
			
 
				+        midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
			
 
				+        midx = load_midx(midx_path)
			
 
				+        try:
			
 
				+            # Should have multiple packs
			
 
				+            # (Exact count may vary depending on Git version and repacking)
			
 
				+            self.assertGreaterEqual(midx.pack_count, 1)
			
 
				+
			
 
				+            # Verify we can iterate over all entries
			
 
				+            entries = list(midx.iterentries())
			
 
				+            self.assertGreater(len(entries), 0)
			
 
				+
			
 
				+            # All entries should have valid structure
			
 
				+            for sha, pack_name, offset in entries:
			
 
				+                self.assertEqual(20, len(sha))  # SHA-1 is 20 bytes
			
 
				+                self.assertIsInstance(pack_name, str)
			
 
				+                # Git stores .idx extensions in MIDX files
			
 
				+                self.assertTrue(pack_name.endswith(".idx"))
			
 
				+                self.assertIsInstance(offset, int)
			
 
				+                self.assertGreaterEqual(offset, 0)
			
 
				+        finally:
			
 
				+            midx.close()
			
 
				+
			
 
				+    def test_dulwich_object_store_with_git_midx(self):
			
 
				+        """Test that DiskObjectStore can use Git-created MIDX for lookups."""
			
 
				+        work_dir = self.create_test_repo_with_packs()
			
 
				+
			
 
				+        # Have Git create a MIDX file
			
 
				+        run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)
			
 
				+
			
 
				+        # Load repo with Dulwich
			
 
				+        repo = Repo(self.repo_path)
			
 
				+        try:
			
 
				+            # Get a commit from the repo
			
 
				+            result = run_git_or_fail(["rev-parse", "HEAD"], cwd=work_dir)
			
 
				+            head_sha = result.decode("utf-8").strip().encode("ascii")
			
 
				+
			
 
				+            # Verify we can access it through Dulwich
			
 
				+            # This should use the MIDX for lookup
			
 
				+            obj = repo.object_store[head_sha]
			
 
				+            self.assertIsNotNone(obj)
			
 
				+            self.assertEqual(b"commit", obj.type_name)
			
 
				+        finally:
			
 
				+            repo.close()
			
 
				+
			
 
				+    def test_repack_with_midx(self):
			
 
				+        """Test that repacking works correctly with MIDX present."""
			
 
				+        work_dir = self.create_test_repo_with_packs()
			
 
				+
			
 
				+        # Create MIDX with Dulwich
			
 
				+        repo = Repo(self.repo_path)
			
 
				+        try:
			
 
				+            repo.object_store.write_midx()
			
 
				+        finally:
			
 
				+            repo.close()
			
 
				+
			
 
				+        # Verify Git can still repack
			
 
				+        run_git_or_fail(["repack", "-d"], cwd=work_dir)
			
 
				+
			
 
				+        # The MIDX should still be readable
			
 
				+        midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
			
 
				+        if os.path.exists(midx_path):  # Git may remove it during repack
			
 
				+            midx = load_midx(midx_path)
			
 
				+            try:
			
 
				+                self.assertGreaterEqual(len(midx), 0)
			
 
				+            finally:
			
 
				+                midx.close()
			
--- a/tests/test_midx.py
+++ b/tests/test_midx.py
@@ -0,0 +1,277 @@
 
				+# test_midx.py -- Tests for multi-pack-index
			
 
				+# Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
			
 
				+#
			
 
				+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
			
 
				+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
			
 
				+# General Public License as published by the Free Software Foundation; version 2.0
			
 
				+# or (at your option) any later version. You can redistribute it and/or
			
 
				+# modify it under the terms of either of these two licenses.
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+#
			
 
				+# You should have received a copy of the licenses; if not, see
			
 
				+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
			
 
				+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
			
 
				+# License, Version 2.0.
			
 
				+#
			
 
				+
			
 
				+"""Tests for multi-pack-index (MIDX) functionality."""
			
 
				+
			
 
				+import os
			
 
				+import tempfile
			
 
				+from io import BytesIO
			
 
				+from unittest import TestCase
			
 
				+
			
 
				+from dulwich.midx import (
			
 
				+    HASH_ALGORITHM_SHA1,
			
 
				+    MultiPackIndex,
			
 
				+    write_midx,
			
 
				+    write_midx_file,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+class MIDXWriteTests(TestCase):
			
 
				+    """Tests for writing MIDX files."""
			
 
				+
			
 
				+    def test_write_empty_midx(self):
			
 
				+        """Test writing an empty MIDX file."""
			
 
				+        f = BytesIO()
			
 
				+        pack_entries = []
			
 
				+        checksum = write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
			
 
				+
			
 
				+        # Checksum should be 20 bytes
			
 
				+        self.assertEqual(20, len(checksum))
			
 
				+
			
 
				+        # Should be able to read it back
			
 
				+        f.seek(0)
			
 
				+        midx = MultiPackIndex("test.midx", file=f, contents=f.read())
			
 
				+        self.assertEqual(0, len(midx))
			
 
				+        self.assertEqual(0, midx.pack_count)
			
 
				+        self.assertEqual([], midx.pack_names)
			
 
				+
			
 
				+    def test_write_single_pack_midx(self):
			
 
				+        """Test writing a MIDX file with a single pack."""
			
 
				+        f = BytesIO()
			
 
				+
			
 
				+        # Create some fake pack entries
			
 
				+        pack_entries = [
			
 
				+            (
			
 
				+                "pack-abc123.idx",
			
 
				+                [
			
 
				+                    (b"\x01" * 20, 100, 0x12345678),  # sha, offset, crc32
			
 
				+                    (b"\x02" * 20, 200, 0x87654321),
			
 
				+                    (b"\x03" * 20, 300, 0xABCDEF00),
			
 
				+                ],
			
 
				+            )
			
 
				+        ]
			
 
				+
			
 
				+        checksum = write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
			
 
				+        self.assertEqual(20, len(checksum))
			
 
				+
			
 
				+        # Read it back
			
 
				+        f.seek(0)
			
 
				+        midx = MultiPackIndex("test.midx", file=f, contents=f.read())
			
 
				+
			
 
				+        self.assertEqual(3, len(midx))
			
 
				+        self.assertEqual(1, midx.pack_count)
			
 
				+        self.assertEqual(["pack-abc123.idx"], midx.pack_names)
			
 
				+
			
 
				+        # Check object lookups
			
 
				+        result = midx.object_offset(b"\x01" * 20)
			
 
				+        self.assertIsNotNone(result)
			
 
				+        pack_name, offset = result
			
 
				+        self.assertEqual("pack-abc123.idx", pack_name)
			
 
				+        self.assertEqual(100, offset)
			
 
				+
			
 
				+        result = midx.object_offset(b"\x02" * 20)
			
 
				+        self.assertIsNotNone(result)
			
 
				+        pack_name, offset = result
			
 
				+        self.assertEqual("pack-abc123.idx", pack_name)
			
 
				+        self.assertEqual(200, offset)
			
 
				+
			
 
				+        result = midx.object_offset(b"\x03" * 20)
			
 
				+        self.assertIsNotNone(result)
			
 
				+        pack_name, offset = result
			
 
				+        self.assertEqual("pack-abc123.idx", pack_name)
			
 
				+        self.assertEqual(300, offset)
			
 
				+
			
 
				+        # Check non-existent object
			
 
				+        result = midx.object_offset(b"\xff" * 20)
			
 
				+        self.assertIsNone(result)
			
 
				+
			
 
				+    def test_write_multiple_packs_midx(self):
			
 
				+        """Test writing a MIDX file with multiple packs."""
			
 
				+        f = BytesIO()
			
 
				+
			
 
				+        pack_entries = [
			
 
				+            (
			
 
				+                "pack-111.idx",
			
 
				+                [
			
 
				+                    (b"\x01" * 20, 100, 0),
			
 
				+                    (b"\x03" * 20, 300, 0),
			
 
				+                ],
			
 
				+            ),
			
 
				+            (
			
 
				+                "pack-222.idx",
			
 
				+                [
			
 
				+                    (b"\x02" * 20, 50, 0),
			
 
				+                    (b"\x04" * 20, 150, 0),
			
 
				+                ],
			
 
				+            ),
			
 
				+        ]
			
 
				+
			
 
				+        checksum = write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
			
 
				+        self.assertEqual(20, len(checksum))
			
 
				+
			
 
				+        # Read it back
			
 
				+        f.seek(0)
			
 
				+        midx = MultiPackIndex("test.midx", file=f, contents=f.read())
			
 
				+
			
 
				+        self.assertEqual(4, len(midx))
			
 
				+        self.assertEqual(2, midx.pack_count)
			
 
				+        self.assertEqual(["pack-111.idx", "pack-222.idx"], midx.pack_names)
			
 
				+
			
 
				+        # Objects should be findable across packs
			
 
				+        result = midx.object_offset(b"\x01" * 20)
			
 
				+        self.assertIsNotNone(result)
			
 
				+        self.assertEqual("pack-111.idx", result[0])
			
 
				+
			
 
				+        result = midx.object_offset(b"\x02" * 20)
			
 
				+        self.assertIsNotNone(result)
			
 
				+        self.assertEqual("pack-222.idx", result[0])
			
 
				+
			
 
				+    def test_write_large_offsets(self):
			
 
				+        """Test writing a MIDX file with large offsets (>= 2^31)."""
			
 
				+        f = BytesIO()
			
 
				+
			
 
				+        large_offset = 2**32  # Offset that requires LOFF chunk
			
 
				+        pack_entries = [
			
 
				+            (
			
 
				+                "pack-large.idx",
			
 
				+                [
			
 
				+                    (b"\x01" * 20, 100, 0),
			
 
				+                    (b"\x02" * 20, large_offset, 0),  # Large offset
			
 
				+                ],
			
 
				+            )
			
 
				+        ]
			
 
				+
			
 
				+        checksum = write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
			
 
				+        self.assertEqual(20, len(checksum))
			
 
				+
			
 
				+        # Read it back
			
 
				+        f.seek(0)
			
 
				+        midx = MultiPackIndex("test.midx", file=f, contents=f.read())
			
 
				+
			
 
				+        self.assertEqual(2, len(midx))
			
 
				+
			
 
				+        # Small offset should work
			
 
				+        result = midx.object_offset(b"\x01" * 20)
			
 
				+        self.assertIsNotNone(result)
			
 
				+        self.assertEqual(100, result[1])
			
 
				+
			
 
				+        # Large offset should work
			
 
				+        result = midx.object_offset(b"\x02" * 20)
			
 
				+        self.assertIsNotNone(result)
			
 
				+        self.assertEqual(large_offset, result[1])
			
 
				+
			
 
				+    def test_write_midx_file(self):
			
 
				+        """Test writing a MIDX file to disk."""
			
 
				+        with tempfile.TemporaryDirectory() as tmpdir:
			
 
				+            midx_path = os.path.join(tmpdir, "multi-pack-index")
			
 
				+
			
 
				+            pack_entries = [
			
 
				+                (
			
 
				+                    "pack-test.idx",
			
 
				+                    [
			
 
				+                        (b"\xaa" * 20, 1000, 0),
			
 
				+                    ],
			
 
				+                )
			
 
				+            ]
			
 
				+
			
 
				+            checksum = write_midx_file(midx_path, pack_entries, HASH_ALGORITHM_SHA1)
			
 
				+            self.assertEqual(20, len(checksum))
			
 
				+
			
 
				+            # Verify file was created
			
 
				+            self.assertTrue(os.path.exists(midx_path))
			
 
				+
			
 
				+            # Read it back from disk
			
 
				+            with open(midx_path, "rb") as f:
			
 
				+                midx = MultiPackIndex(midx_path, file=f, contents=f.read())
			
 
				+
			
 
				+            self.assertEqual(1, len(midx))
			
 
				+            result = midx.object_offset(b"\xaa" * 20)
			
 
				+            self.assertIsNotNone(result)
			
 
				+            self.assertEqual("pack-test.idx", result[0])
			
 
				+            self.assertEqual(1000, result[1])
			
 
				+
			
 
				+
			
 
				+class MIDXContainsTests(TestCase):
			
 
				+    """Tests for MIDX __contains__ method."""
			
 
				+
			
 
				+    def test_contains_object(self):
			
 
				+        """Test checking if an object is in the MIDX."""
			
 
				+        f = BytesIO()
			
 
				+        pack_entries = [
			
 
				+            (
			
 
				+                "pack-test.idx",
			
 
				+                [
			
 
				+                    (b"\x01" * 20, 100, 0),
			
 
				+                    (b"\x02" * 20, 200, 0),
			
 
				+                ],
			
 
				+            )
			
 
				+        ]
			
 
				+
			
 
				+        write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
			
 
				+        f.seek(0)
			
 
				+        midx = MultiPackIndex("test.midx", file=f, contents=f.read())
			
 
				+
			
 
				+        self.assertTrue(b"\x01" * 20 in midx)
			
 
				+        self.assertTrue(b"\x02" * 20 in midx)
			
 
				+        self.assertFalse(b"\xff" * 20 in midx)
			
 
				+
			
 
				+
			
 
				+class MIDXIterEntriesTests(TestCase):
			
 
				+    """Tests for MIDX iterentries method."""
			
 
				+
			
 
				+    def test_iterentries(self):
			
 
				+        """Test iterating over MIDX entries."""
			
 
				+        f = BytesIO()
			
 
				+        pack_entries = [
			
 
				+            (
			
 
				+                "pack-111.idx",
			
 
				+                [
			
 
				+                    (b"\x01" * 20, 100, 0),
			
 
				+                    (b"\x03" * 20, 300, 0),
			
 
				+                ],
			
 
				+            ),
			
 
				+            (
			
 
				+                "pack-222.idx",
			
 
				+                [
			
 
				+                    (b"\x02" * 20, 50, 0),
			
 
				+                ],
			
 
				+            ),
			
 
				+        ]
			
 
				+
			
 
				+        write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
			
 
				+        f.seek(0)
			
 
				+        midx = MultiPackIndex("test.midx", file=f, contents=f.read())
			
 
				+
			
 
				+        entries = list(midx.iterentries())
			
 
				+        self.assertEqual(3, len(entries))
			
 
				+
			
 
				+        # Entries should be sorted by SHA
			
 
				+        self.assertEqual(b"\x01" * 20, entries[0][0])
			
 
				+        self.assertEqual("pack-111.idx", entries[0][1])
			
 
				+        self.assertEqual(100, entries[0][2])
			
 
				+
			
 
				+        self.assertEqual(b"\x02" * 20, entries[1][0])
			
 
				+        self.assertEqual("pack-222.idx", entries[1][1])
			
 
				+        self.assertEqual(50, entries[1][2])
			
 
				+
			
 
				+        self.assertEqual(b"\x03" * 20, entries[2][0])
			
 
				+        self.assertEqual("pack-111.idx", entries[2][1])
			
 
				+        self.assertEqual(300, entries[2][2])