2 ヶ月前 · 12cfb78a07
--- a/dulwich/midx.py
+++ b/dulwich/midx.py
@@ -0,0 +1,418 @@
 
				+# midx.py -- Multi-Pack-Index (MIDX) support
			
 
				+# Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
			
 
				+#
			
 
				+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
			
 
				+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
			
 
				+# General Public License as published by the Free Software Foundation; version 2.0
			
 
				+# or (at your option) any later version. You can redistribute it and/or
			
 
				+# modify it under the terms of either of these two licenses.
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+#
			
 
				+# You should have received a copy of the licenses; if not, see
			
 
				+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
			
 
				+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
			
 
				+# License, Version 2.0.
			
 
				+#
			
 
				+
			
 
				+"""Multi-Pack-Index (MIDX) support.
			
 
				+
			
 
				+A multi-pack-index (MIDX) provides a single index that covers multiple pack files,
			
 
				+enabling fast object lookup across all packs without opening each pack index.
			
 
				+
			
 
				+The MIDX file format consists of:
			
 
				+- A header with signature, version, and hash algorithm
			
 
				+- A chunk lookup table
			
 
				+- Multiple chunks containing pack names, OID fanout, OID lookup, and object offsets
			
 
				+- A trailer with checksum
			
 
				+
			
 
				+This module provides:
			
 
				+- Reading MIDX files
			
 
				+- Writing MIDX files
			
 
				+- Integration with pack-based object stores
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import struct
			
 
				+from collections.abc import Iterator
			
 
				+from typing import IO, Any
			
 
				+
			
 
				+from .file import GitFile, _GitFile
			
 
				+
			
 
				+# MIDX signature
			
 
				+MIDX_SIGNATURE = b"MIDX"
			
 
				+
			
 
				+# MIDX version
			
 
				+MIDX_VERSION = 1
			
 
				+
			
 
				+# Chunk identifiers (4 bytes each)
			
 
				+CHUNK_PNAM = b"PNAM"  # Packfile names
			
 
				+CHUNK_OIDF = b"OIDF"  # OID fanout table
			
 
				+CHUNK_OIDL = b"OIDL"  # OID lookup table
			
 
				+CHUNK_OOFF = b"OOFF"  # Object offsets
			
 
				+CHUNK_LOFF = b"LOFF"  # Large offsets (optional)
			
 
				+CHUNK_BTMP = b"BTMP"  # Bitmapped packfiles (optional)
			
 
				+CHUNK_RIDX = b"RIDX"  # Reverse index (optional)
			
 
				+
			
 
				+# Hash algorithm identifiers
			
 
				+HASH_ALGORITHM_SHA1 = 1
			
 
				+HASH_ALGORITHM_SHA256 = 2
			
 
				+
			
 
				+
			
 
				+class MultiPackIndex:
			
 
				+    """Multi-pack-index for efficient object lookup across multiple pack files."""
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        filename: str | os.PathLike[str],
			
 
				+        file: IO[bytes] | _GitFile | None = None,
			
 
				+        contents: bytes | None = None,
			
 
				+        size: int | None = None,
			
 
				+    ) -> None:
			
 
				+        """Initialize a MultiPackIndex.
			
 
				+
			
 
				+        Args:
			
 
				+            filename: Path to the MIDX file
			
 
				+            file: Optional file object
			
 
				+            contents: Optional mmap'd contents
			
 
				+            size: Optional size of the MIDX file
			
 
				+        """
			
 
				+        self._filename = os.fspath(filename)
			
 
				+        self._file = file
			
 
				+        self._size = size
			
 
				+
			
 
				+        # Instance variables that will be set during parsing
			
 
				+        self.version: int
			
 
				+        self.hash_algorithm: int
			
 
				+        self.hash_size: int
			
 
				+        self.chunk_count: int
			
 
				+        self.base_midx_files: int
			
 
				+        self.pack_count: int
			
 
				+        self.pack_names: list[str]
			
 
				+        self.object_count: int
			
 
				+        self._chunks: dict[bytes, int]
			
 
				+        self._fanout_table: list[int]
			
 
				+        self._oidl_offset: int
			
 
				+        self._ooff_offset: int
			
 
				+        self._loff_offset: int
			
 
				+
			
 
				+        # Load file contents
			
 
				+        if contents is None:
			
 
				+            if file is None:
			
 
				+                with GitFile(filename, "rb") as f:
			
 
				+                    self._contents, self._size = self._load_file_contents(f, size)
			
 
				+            else:
			
 
				+                self._contents, self._size = self._load_file_contents(file, size)
			
 
				+        else:
			
 
				+            self._contents = contents
			
 
				+
			
 
				+        # Parse header
			
 
				+        self._parse_header()
			
 
				+
			
 
				+        # Parse chunk lookup table
			
 
				+        self._parse_chunk_table()
			
 
				+
			
 
				+    def _load_file_contents(
			
 
				+        self, f: IO[bytes] | _GitFile, size: int | None = None
			
 
				+    ) -> tuple[bytes | Any, int]:
			
 
				+        """Load contents from a file, preferring mmap when possible.
			
 
				+
			
 
				+        Args:
			
 
				+            f: File-like object to load
			
 
				+            size: Expected size, or None to determine from file
			
 
				+
			
 
				+        Returns:
			
 
				+            Tuple of (contents, size)
			
 
				+        """
			
 
				+        # Simplified version - similar to pack.py's _load_file_contents
			
 
				+        if size is None:
			
 
				+            f.seek(0, 2)  # SEEK_END
			
 
				+            size = f.tell()
			
 
				+            f.seek(0)
			
 
				+
			
 
				+        # For now, just read the entire file into memory
			
 
				+        # TODO: Add mmap support for large files
			
 
				+        contents = f.read(size)
			
 
				+        return contents, size
			
 
				+
			
 
				+    def _parse_header(self) -> None:
			
 
				+        """Parse the MIDX header."""
			
 
				+        if len(self._contents) < 12:
			
 
				+            raise ValueError("MIDX file too small")
			
 
				+
			
 
				+        # Check signature
			
 
				+        signature = self._contents[0:4]
			
 
				+        if signature != MIDX_SIGNATURE:
			
 
				+            raise ValueError(f"Invalid MIDX signature: {signature!r}")
			
 
				+
			
 
				+        # Read version
			
 
				+        self.version = self._contents[4]
			
 
				+        if self.version != MIDX_VERSION:
			
 
				+            raise ValueError(f"Unsupported MIDX version: {self.version}")
			
 
				+
			
 
				+        # Read object ID version (hash algorithm)
			
 
				+        self.hash_algorithm = self._contents[5]
			
 
				+        if self.hash_algorithm == HASH_ALGORITHM_SHA1:
			
 
				+            self.hash_size = 20
			
 
				+        elif self.hash_algorithm == HASH_ALGORITHM_SHA256:
			
 
				+            self.hash_size = 32
			
 
				+        else:
			
 
				+            raise ValueError(f"Unknown hash algorithm: {self.hash_algorithm}")
			
 
				+
			
 
				+        # Read chunk count
			
 
				+        self.chunk_count = self._contents[6]
			
 
				+
			
 
				+        # Read base MIDX files count (currently always 0)
			
 
				+        self.base_midx_files = self._contents[7]
			
 
				+        if self.base_midx_files != 0:
			
 
				+            raise ValueError("Incremental MIDX not yet supported")
			
 
				+
			
 
				+        # Read pack file count
			
 
				+        (self.pack_count,) = struct.unpack(">L", self._contents[8:12])
			
 
				+
			
 
				+    def _parse_chunk_table(self) -> None:
			
 
				+        """Parse the chunk lookup table."""
			
 
				+        self._chunks = {}
			
 
				+
			
 
				+        # Chunk table starts at offset 12
			
 
				+        offset = 12
			
 
				+
			
 
				+        # Each chunk entry is 12 bytes (4-byte ID + 8-byte offset)
			
 
				+        for i in range(self.chunk_count + 1):  # +1 for terminator
			
 
				+            chunk_id = self._contents[offset : offset + 4]
			
 
				+            (chunk_offset,) = struct.unpack(
			
 
				+                ">Q", self._contents[offset + 4 : offset + 12]
			
 
				+            )
			
 
				+
			
 
				+            if chunk_id == b"\x00\x00\x00\x00":
			
 
				+                # Terminator entry
			
 
				+                break
			
 
				+
			
 
				+            self._chunks[chunk_id] = chunk_offset
			
 
				+            offset += 12
			
 
				+
			
 
				+        # Parse required chunks
			
 
				+        self._parse_pnam_chunk()
			
 
				+        self._parse_oidf_chunk()
			
 
				+        self._parse_oidl_chunk()
			
 
				+        self._parse_ooff_chunk()
			
 
				+
			
 
				+        # Parse optional chunks
			
 
				+        if CHUNK_LOFF in self._chunks:
			
 
				+            self._parse_loff_chunk()
			
 
				+
			
 
				+    def _parse_pnam_chunk(self) -> None:
			
 
				+        """Parse the Packfile Names (PNAM) chunk."""
			
 
				+        if CHUNK_PNAM not in self._chunks:
			
 
				+            raise ValueError("Required PNAM chunk not found")
			
 
				+
			
 
				+        offset = self._chunks[CHUNK_PNAM]
			
 
				+        self.pack_names = []
			
 
				+
			
 
				+        # Find the end of the PNAM chunk (next chunk or end of chunks section)
			
 
				+        next_offset = min(
			
 
				+            (o for o in self._chunks.values() if o > offset),
			
 
				+            default=len(self._contents),
			
 
				+        )
			
 
				+
			
 
				+        # Parse null-terminated pack names
			
 
				+        current = offset
			
 
				+        while current < next_offset:
			
 
				+            # Find the next null terminator
			
 
				+            null_pos = self._contents.find(b"\x00", current, next_offset)
			
 
				+            if null_pos == -1:
			
 
				+                break
			
 
				+
			
 
				+            pack_name = self._contents[current:null_pos].decode("utf-8")
			
 
				+            if pack_name:  # Skip empty strings (padding)
			
 
				+                self.pack_names.append(pack_name)
			
 
				+            current = null_pos + 1
			
 
				+
			
 
				+    def _parse_oidf_chunk(self) -> None:
			
 
				+        """Parse the OID Fanout (OIDF) chunk."""
			
 
				+        if CHUNK_OIDF not in self._chunks:
			
 
				+            raise ValueError("Required OIDF chunk not found")
			
 
				+
			
 
				+        offset = self._chunks[CHUNK_OIDF]
			
 
				+        self._fanout_table = []
			
 
				+
			
 
				+        # Read 256 4-byte entries
			
 
				+        for i in range(256):
			
 
				+            (count,) = struct.unpack(
			
 
				+                ">L", self._contents[offset + i * 4 : offset + i * 4 + 4]
			
 
				+            )
			
 
				+            self._fanout_table.append(count)
			
 
				+
			
 
				+        # Total object count is the last entry
			
 
				+        self.object_count = self._fanout_table[255]
			
 
				+
			
 
				+    def _parse_oidl_chunk(self) -> None:
			
 
				+        """Parse the OID Lookup (OIDL) chunk."""
			
 
				+        if CHUNK_OIDL not in self._chunks:
			
 
				+            raise ValueError("Required OIDL chunk not found")
			
 
				+
			
 
				+        self._oidl_offset = self._chunks[CHUNK_OIDL]
			
 
				+
			
 
				+    def _parse_ooff_chunk(self) -> None:
			
 
				+        """Parse the Object Offsets (OOFF) chunk."""
			
 
				+        if CHUNK_OOFF not in self._chunks:
			
 
				+            raise ValueError("Required OOFF chunk not found")
			
 
				+
			
 
				+        self._ooff_offset = self._chunks[CHUNK_OOFF]
			
 
				+
			
 
				+    def _parse_loff_chunk(self) -> None:
			
 
				+        """Parse the Large Offsets (LOFF) chunk."""
			
 
				+        self._loff_offset = self._chunks[CHUNK_LOFF]
			
 
				+
			
 
				+    def __len__(self) -> int:
			
 
				+        """Return the number of objects in this MIDX."""
			
 
				+        return self.object_count
			
 
				+
			
 
				+    def _get_oid(self, index: int) -> bytes:
			
 
				+        """Get the object ID at the given index.
			
 
				+
			
 
				+        Args:
			
 
				+            index: Index of the object
			
 
				+
			
 
				+        Returns:
			
 
				+            Binary object ID
			
 
				+        """
			
 
				+        if index < 0 or index >= self.object_count:
			
 
				+            raise IndexError(f"Index {index} out of range")
			
 
				+
			
 
				+        offset = self._oidl_offset + index * self.hash_size
			
 
				+        return self._contents[offset : offset + self.hash_size]
			
 
				+
			
 
				+    def _get_pack_info(self, index: int) -> tuple[int, int]:
			
 
				+        """Get pack ID and offset for object at the given index.
			
 
				+
			
 
				+        Args:
			
 
				+            index: Index of the object
			
 
				+
			
 
				+        Returns:
			
 
				+            Tuple of (pack_id, offset)
			
 
				+        """
			
 
				+        if index < 0 or index >= self.object_count:
			
 
				+            raise IndexError(f"Index {index} out of range")
			
 
				+
			
 
				+        # Each entry is 8 bytes (4-byte pack ID + 4-byte offset)
			
 
				+        offset = self._ooff_offset + index * 8
			
 
				+
			
 
				+        (pack_id,) = struct.unpack(">L", self._contents[offset : offset + 4])
			
 
				+        (pack_offset,) = struct.unpack(">L", self._contents[offset + 4 : offset + 8])
			
 
				+
			
 
				+        # Check if this is a large offset (MSB set)
			
 
				+        if pack_offset & 0x80000000:
			
 
				+            # Look up in LOFF chunk
			
 
				+            if CHUNK_LOFF not in self._chunks:
			
 
				+                raise ValueError("Large offset found but no LOFF chunk")
			
 
				+
			
 
				+            large_index = pack_offset & 0x7FFFFFFF
			
 
				+            large_offset_pos = self._loff_offset + large_index * 8
			
 
				+            (pack_offset,) = struct.unpack(
			
 
				+                ">Q", self._contents[large_offset_pos : large_offset_pos + 8]
			
 
				+            )
			
 
				+
			
 
				+        return pack_id, pack_offset
			
 
				+
			
 
				+    def object_offset(self, sha: bytes) -> tuple[str, int] | None:
			
 
				+        """Return the pack name and offset for the given object.
			
 
				+
			
 
				+        Args:
			
 
				+            sha: Binary SHA-1 or SHA-256 hash
			
 
				+
			
 
				+        Returns:
			
 
				+            Tuple of (pack_name, offset) or None if not found
			
 
				+        """
			
 
				+        if len(sha) != self.hash_size:
			
 
				+            raise ValueError(
			
 
				+                f"SHA size mismatch: expected {self.hash_size}, got {len(sha)}"
			
 
				+            )
			
 
				+
			
 
				+        # Use fanout table to narrow search range
			
 
				+        first_byte = sha[0]
			
 
				+        start_idx = 0 if first_byte == 0 else self._fanout_table[first_byte - 1]
			
 
				+        end_idx = self._fanout_table[first_byte]
			
 
				+
			
 
				+        # Binary search within the range
			
 
				+        while start_idx < end_idx:
			
 
				+            mid = (start_idx + end_idx) // 2
			
 
				+            mid_sha = self._get_oid(mid)
			
 
				+
			
 
				+            if mid_sha == sha:
			
 
				+                # Found it!
			
 
				+                pack_id, offset = self._get_pack_info(mid)
			
 
				+                return self.pack_names[pack_id], offset
			
 
				+            elif mid_sha < sha:
			
 
				+                start_idx = mid + 1
			
 
				+            else:
			
 
				+                end_idx = mid
			
 
				+
			
 
				+        return None
			
 
				+
			
 
				+    def __contains__(self, sha: bytes) -> bool:
			
 
				+        """Check if the given object SHA is in this MIDX.
			
 
				+
			
 
				+        Args:
			
 
				+            sha: Binary SHA hash
			
 
				+
			
 
				+        Returns:
			
 
				+            True if the object is in this MIDX
			
 
				+        """
			
 
				+        return self.object_offset(sha) is not None
			
 
				+
			
 
				+    def iterentries(self) -> Iterator[tuple[bytes, str, int]]:
			
 
				+        """Iterate over all entries in this MIDX.
			
 
				+
			
 
				+        Yields:
			
 
				+            Tuples of (sha, pack_name, offset)
			
 
				+        """
			
 
				+        for i in range(self.object_count):
			
 
				+            sha = self._get_oid(i)
			
 
				+            pack_id, offset = self._get_pack_info(i)
			
 
				+            pack_name = self.pack_names[pack_id]
			
 
				+            yield sha, pack_name, offset
			
 
				+
			
 
				+    def close(self) -> None:
			
 
				+        """Close the MIDX file."""
			
 
				+        if self._file is not None:
			
 
				+            self._file.close()
			
 
				+            self._file = None
			
 
				+
			
 
				+
			
 
				+def load_midx(path: str | os.PathLike[str]) -> MultiPackIndex:
			
 
				+    """Load a multi-pack-index file by path.
			
 
				+
			
 
				+    Args:
			
 
				+        path: Path to the MIDX file
			
 
				+
			
 
				+    Returns:
			
 
				+        A MultiPackIndex loaded from the given path
			
 
				+    """
			
 
				+    with GitFile(path, "rb") as f:
			
 
				+        return load_midx_file(path, f)
			
 
				+
			
 
				+
			
 
				+def load_midx_file(
			
 
				+    path: str | os.PathLike[str], f: IO[bytes] | _GitFile
			
 
				+) -> MultiPackIndex:
			
 
				+    """Load a multi-pack-index from a file-like object.
			
 
				+
			
 
				+    Args:
			
 
				+        path: Path for the MIDX file
			
 
				+        f: File-like object
			
 
				+
			
 
				+    Returns:
			
 
				+        A MultiPackIndex loaded from the given file
			
 
				+    """
			
 
				+    return MultiPackIndex(path, file=f)
			
 
				+
			
 
				+
			
 
				+# TODO: Implement MIDX writing functionality
			
 
				+# TODO: Implement integration with object_store.py
			
 
				+# TODO: Add support for incremental MIDX chains
			
 
				+# TODO: Add support for BTMP and RIDX chunks for bitmap integration
			
--- a/dulwich/object_store.py
+++ b/dulwich/object_store.py
@@ -42,6 +42,7 @@ from typing import (
 
				 
			
 
				 from .errors import NotTreeError
			
 
				 from .file import GitFile, _GitFile
			
 
				+from .midx import MultiPackIndex, load_midx
			
 
				 from .objects import (
			
 
				     S_ISGITLINK,
			
 
				     ZERO_SHA,
			
@@ -1399,6 +1400,10 @@ class DiskObjectStore(PackBasedObjectStore):
 
				         self._commit_graph = None
			
 
				         self._use_commit_graph = True  # Default to true
			
 
				 
			
 
				+        # Multi-pack-index support - lazy loaded
			
 
				+        self._midx: MultiPackIndex | None = None
			
 
				+        self._use_midx = True  # Default to true
			
 
				+
			
 
				     def __repr__(self) -> str:
			
 
				         """Return string representation of DiskObjectStore.
			
 
				 
			
@@ -1485,6 +1490,9 @@ class DiskObjectStore(PackBasedObjectStore):
 
				         # Read core.commitGraph setting
			
 
				         use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True)
			
 
				 
			
 
				+        # Read core.multiPackIndex setting
			
 
				+        use_midx = config.get_boolean((b"core",), b"multiPackIndex", True)
			
 
				+
			
 
				         # Read core.fsyncObjectFiles setting
			
 
				         fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False)
			
 
				 
			
@@ -1521,6 +1529,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
				             dir_mode=dir_mode,
			
 
				         )
			
 
				         instance._use_commit_graph = use_commit_graph
			
 
				+        instance._use_midx = use_midx
			
 
				         return instance
			
 
				 
			
 
				     @property
			
@@ -2036,6 +2045,125 @@ class DiskObjectStore(PackBasedObjectStore):
 
				                 self._commit_graph = read_commit_graph(graph_file)
			
 
				         return self._commit_graph
			
 
				 
			
 
				+    def get_midx(self) -> MultiPackIndex | None:
			
 
				+        """Get the multi-pack-index for this object store.
			
 
				+
			
 
				+        Returns:
			
 
				+          MultiPackIndex object if available, None otherwise
			
 
				+
			
 
				+        Raises:
			
 
				+          ValueError: If MIDX file is corrupt
			
 
				+          OSError: If MIDX file cannot be read
			
 
				+        """
			
 
				+        if not self._use_midx:
			
 
				+            return None
			
 
				+
			
 
				+        if self._midx is None:
			
 
				+            # Look for MIDX in pack directory
			
 
				+            midx_file = os.path.join(self.pack_dir, "multi-pack-index")
			
 
				+            if os.path.exists(midx_file):
			
 
				+                self._midx = load_midx(midx_file)
			
 
				+        return self._midx
			
 
				+
			
 
				+    def _get_pack_by_name(self, pack_name: str) -> Pack:
			
 
				+        """Get a pack by its base name.
			
 
				+
			
 
				+        Args:
			
 
				+            pack_name: Base name of the pack (e.g., 'pack-abc123.pack')
			
 
				+
			
 
				+        Returns:
			
 
				+            Pack object
			
 
				+
			
 
				+        Raises:
			
 
				+            KeyError: If pack doesn't exist
			
 
				+        """
			
 
				+        # Remove .pack extension if present
			
 
				+        if pack_name.endswith(".pack"):
			
 
				+            base_name = pack_name[:-5]
			
 
				+        else:
			
 
				+            base_name = pack_name
			
 
				+
			
 
				+        # Check if already in cache
			
 
				+        if base_name in self._pack_cache:
			
 
				+            return self._pack_cache[base_name]
			
 
				+
			
 
				+        # Load the pack
			
 
				+        pack_path = os.path.join(self.pack_dir, base_name)
			
 
				+        if not os.path.exists(pack_path + ".pack"):
			
 
				+            raise KeyError(f"Pack {pack_name} not found")
			
 
				+
			
 
				+        pack = Pack(
			
 
				+            pack_path,
			
 
				+            delta_window_size=self.pack_delta_window_size,
			
 
				+            window_memory=self.pack_window_memory,
			
 
				+            delta_cache_size=self.pack_delta_cache_size,
			
 
				+            depth=self.pack_depth,
			
 
				+            threads=self.pack_threads,
			
 
				+            big_file_threshold=self.pack_big_file_threshold,
			
 
				+        )
			
 
				+        self._pack_cache[base_name] = pack
			
 
				+        return pack
			
 
				+
			
 
				+    def contains_packed(self, sha: bytes) -> bool:
			
 
				+        """Check if a particular object is present by SHA1 and is packed.
			
 
				+
			
 
				+        This checks the MIDX first if available, then falls back to checking
			
 
				+        individual pack indexes.
			
 
				+
			
 
				+        Args:
			
 
				+            sha: Binary SHA of the object
			
 
				+
			
 
				+        Returns:
			
 
				+            True if the object is in a pack file
			
 
				+        """
			
 
				+        # Check MIDX first for faster lookup
			
 
				+        midx = self.get_midx()
			
 
				+        if midx is not None and sha in midx:
			
 
				+            return True
			
 
				+
			
 
				+        # Fall back to checking individual packs
			
 
				+        return super().contains_packed(sha)
			
 
				+
			
 
				+    def get_raw(self, name: bytes) -> tuple[int, bytes]:
			
 
				+        """Obtain the raw fulltext for an object.
			
 
				+
			
 
				+        This uses the MIDX if available for faster lookups.
			
 
				+
			
 
				+        Args:
			
 
				+            name: SHA for the object (20 bytes binary or 40 bytes hex)
			
 
				+
			
 
				+        Returns:
			
 
				+            Tuple with numeric type and object contents
			
 
				+
			
 
				+        Raises:
			
 
				+            KeyError: If object not found
			
 
				+        """
			
 
				+        if name == ZERO_SHA:
			
 
				+            raise KeyError(name)
			
 
				+
			
 
				+        if len(name) == 40:
			
 
				+            sha = hex_to_sha(name)
			
 
				+        elif len(name) == 20:
			
 
				+            sha = name
			
 
				+        else:
			
 
				+            raise AssertionError(f"Invalid object name {name!r}")
			
 
				+
			
 
				+        # Try MIDX first for faster lookup
			
 
				+        midx = self.get_midx()
			
 
				+        if midx is not None:
			
 
				+            result = midx.object_offset(sha)
			
 
				+            if result is not None:
			
 
				+                pack_name, _offset = result
			
 
				+                try:
			
 
				+                    pack = self._get_pack_by_name(pack_name)
			
 
				+                    return pack.get_raw(sha)
			
 
				+                except (KeyError, PackFileDisappeared):
			
 
				+                    # Pack disappeared or object not found, fall through to standard lookup
			
 
				+                    pass
			
 
				+
			
 
				+        # Fall back to the standard implementation
			
 
				+        return super().get_raw(name)
			
 
				+
			
 
				     def write_commit_graph(
			
 
				         self, refs: Iterable[ObjectID] | None = None, reachable: bool = True
			
 
				     ) -> None: