Преглед на файлове

Use bitmap support (#1995)

Actually use bitmap support and support creating bitmap files.

Fixes #1792
Jelmer Vernooij преди 1 месец
родител
ревизия
258f2d3c51
променени са 9 файла, в които са добавени 1354 реда и са изтрити 22 реда
  1. 3 0
      NEWS
  2. 493 9
      dulwich/bitmap.py
  3. 7 2
      dulwich/cli.py
  4. 327 10
      dulwich/object_store.py
  5. 53 0
      dulwich/pack.py
  6. 6 1
      dulwich/porcelain.py
  7. 426 0
      tests/test_bitmap.py
  8. 16 0
      tests/test_cli.py
  9. 23 0
      tests/test_porcelain.py

+ 3 - 0
NEWS

@@ -15,6 +15,9 @@
 
  * Drop support for Python 3.9. (Jelmer Vernooij)
 
+ * Add support for pack bitmap indexes for fast reachability queries.
+   (Jelmer Vernooij, #1792)
+
  * Add support for ``git rerere`` (reuse recorded resolution) with CLI
    subcommands and porcelain functions. Supports ``rerere.enabled`` and
    ``rerere.autoupdate`` configuration. (Jelmer Vernooij, #1786)

+ 493 - 9
dulwich/bitmap.py

@@ -30,14 +30,17 @@ for efficient storage and fast bitwise operations.
 
 import os
 import struct
-from collections.abc import Iterator
+from collections import deque
+from collections.abc import Callable, Iterable, Iterator
 from io import BytesIO
 from typing import IO, TYPE_CHECKING
 
 from .file import GitFile
+from .objects import Blob, Commit, Tag, Tree
 
 if TYPE_CHECKING:
-    from .pack import PackIndex
+    from .object_store import BaseObjectStore
+    from .pack import Pack, PackIndex
 
 # Bitmap file signature
 BITMAP_SIGNATURE = b"BITM"
@@ -51,6 +54,11 @@ BITMAP_OPT_HASH_CACHE = 0x4  # Name-hash cache
 BITMAP_OPT_LOOKUP_TABLE = 0x10  # Lookup table for random access
 BITMAP_OPT_PSEUDO_MERGES = 0x20  # Pseudo-merge bitmaps
 
+# EWAH compression constants
+MAX_LITERAL_WORDS = 0x7FFFFFFF  # Maximum literal words in EWAH format (31 bits)
+MAX_XOR_OFFSET = 160  # Maximum distance to search for XOR compression base
+DEFAULT_COMMIT_INTERVAL = 100  # Default interval for commit selection
+
 
 def _encode_ewah_words(words: list[int]) -> list[int]:
     """Encode a list of 64-bit words using EWAH run-length compression.
@@ -79,7 +87,7 @@ def _encode_ewah_words(words: list[int]) -> list[int]:
             while i < len(words) and words[i] != 0 and words[i] != 0xFFFFFFFFFFFFFFFF:
                 literals.append(words[i])
                 i += 1
-                if len(literals) >= 0x7FFFFFFF:  # Max literal count in RLW
+                if len(literals) >= MAX_LITERAL_WORDS:
                     break
 
             # Create RLW with correct bit layout:
@@ -94,7 +102,7 @@ def _encode_ewah_words(words: list[int]) -> list[int]:
             while i < len(words) and words[i] != 0 and words[i] != 0xFFFFFFFFFFFFFFFF:
                 literals.append(words[i])
                 i += 1
-                if len(literals) >= 0x7FFFFFFF:  # Max literal count
+                if len(literals) >= MAX_LITERAL_WORDS:
                     break
 
             # RLW with no run, just literals
@@ -138,7 +146,8 @@ class EWAHBitmap:
         """Decode EWAH compressed bitmap data.
 
         Args:
-            data: Compressed bitmap data (EWAH format with header + words + RLW position)
+            data: Compressed bitmap data (EWAH format with header + words +
+                RLW position)
         """
         f = BytesIO(data)
 
@@ -168,7 +177,7 @@ class EWAHBitmap:
         idx = 0
         while idx < len(words):
             # This is an RLW
-            # Bit layout: [literal_words(31 bits)][running_len(32 bits)][running_bit(1 bit)]
+            # Bit layout: [literal_words(31)][running_len(32)][running_bit(1)]
             rlw = words[idx]
             running_bit = rlw & 1
             running_len = (rlw >> 1) & 0xFFFFFFFF
@@ -303,6 +312,23 @@ class EWAHBitmap:
         result.bit_count = max(self.bit_count, other.bit_count)
         return result
 
+    def __sub__(self, other: "EWAHBitmap") -> "EWAHBitmap":
+        """Bitwise subtraction (set difference).
+
+        Returns bits that are in self but not in other.
+        Equivalent to: self & ~other
+
+        Args:
+            other: Bitmap to subtract
+
+        Returns:
+            New bitmap with bits in self but not in other
+        """
+        result = EWAHBitmap()
+        result.bits = self.bits - other.bits
+        result.bit_count = self.bit_count
+        return result
+
     def add(self, bit: int) -> None:
         """Set a bit.
 
@@ -327,7 +353,7 @@ class BitmapEntry:
 
         Args:
             object_pos: Position of object in pack index
-            xor_offset: XOR offset for compression (0-160)
+            xor_offset: XOR offset for compression
             flags: Entry flags
             bitmap: The EWAH bitmap data
         """
@@ -405,7 +431,7 @@ class PackBitmap:
                 # Entry not found in list, return as-is
                 return entry.bitmap
 
-            # XOR offset is how many positions back to look (max 160)
+            # XOR offset is how many positions back to look
             if current_idx >= entry.xor_offset:
                 base_sha, _base_entry = self.entries_list[
                     current_idx - entry.xor_offset
@@ -564,7 +590,7 @@ def read_bitmap_file(f: IO[bytes], pack_index: "PackIndex | None" = None) -> Pac
         entry_flags = flags_bytes[0]
 
         # Read self-describing EWAH bitmap
-        # EWAH format: bit_count (4) + word_count (4) + words (word_count * 8) + rlw_pos (4)
+        # EWAH format: bit_count (4) + word_count (4) + words + rlw_pos (4)
         bit_count_bytes = f.read(4)
         word_count_bytes = f.read(4)
 
@@ -733,3 +759,461 @@ def write_bitmap_file(f: IO[bytes], bitmap: PackBitmap) -> None:
     if bitmap.flags & BITMAP_OPT_HASH_CACHE and bitmap.name_hash_cache:
         for hash_value in bitmap.name_hash_cache:
             f.write(struct.pack(">I", hash_value))
+
+
+def _compute_name_hash(name: bytes) -> int:
+    """Compute the name hash for a tree entry.
+
+    This is the same algorithm Git uses for the name-hash cache.
+
+    Args:
+        name: The name of the tree entry
+
+    Returns:
+        32-bit hash value
+    """
+    hash_value = 0
+    for byte in name:
+        hash_value = (hash_value >> 19) | (hash_value << 13)
+        hash_value += byte
+        hash_value &= 0xFFFFFFFF
+    return hash_value
+
+
+def select_bitmap_commits(
+    refs: dict[bytes, bytes],
+    object_store: "BaseObjectStore",
+    commit_interval: int = DEFAULT_COMMIT_INTERVAL,
+) -> list[bytes]:
+    """Select commits for bitmap generation.
+
+    Uses Git's strategy:
+    - All branch and tag tips
+    - Every Nth commit in history
+
+    Args:
+        refs: Dictionary of ref names to commit SHAs
+        object_store: Object store to read commits from
+        commit_interval: Include every Nth commit in history
+
+    Returns:
+        List of commit SHAs to create bitmaps for
+    """
+    selected = set()
+    seen = set()
+
+    # Start with all refs
+    ref_commits = set()
+    for ref_name, sha in refs.items():
+        try:
+            obj = object_store[sha]
+        except KeyError:
+            continue
+        else:
+            # Dereference tags to get to commits
+            while isinstance(obj, Tag):
+                obj = object_store[obj.object[1]]
+            if isinstance(obj, Commit):
+                ref_commits.add(obj.id)
+
+    # Add all ref tips
+    selected.update(ref_commits)
+
+    # Walk the commit graph and select every Nth commit
+    queue = deque(ref_commits)
+    commit_count = 0
+
+    while queue:
+        commit_sha = queue.popleft()
+        if commit_sha in seen:
+            continue
+        seen.add(commit_sha)
+
+        try:
+            obj = object_store[commit_sha]
+            if not isinstance(obj, Commit):
+                continue
+
+            commit_count += 1
+            if commit_count % commit_interval == 0:
+                selected.add(commit_sha)
+
+            # Add parents to queue
+            for parent in obj.parents:
+                if parent not in seen:
+                    queue.append(parent)
+        except KeyError:
+            continue
+
+    return sorted(selected)
+
+
+def build_reachability_bitmap(
+    commit_sha: bytes,
+    sha_to_pos: dict[bytes, int],
+    object_store: "BaseObjectStore",
+) -> EWAHBitmap:
+    """Build a reachability bitmap for a commit.
+
+    The bitmap has a bit set for each object that is reachable from the commit.
+    The bit position corresponds to the object's position in the pack index.
+
+    Args:
+        commit_sha: The commit to build a bitmap for
+        sha_to_pos: Pre-built mapping from SHA to position in pack
+        object_store: Object store to traverse objects
+
+    Returns:
+        EWAH bitmap with bits set for reachable objects
+    """
+    bitmap = EWAHBitmap()
+
+    # Traverse all objects reachable from the commit
+    seen = set()
+    queue = deque([commit_sha])
+
+    while queue:
+        sha = queue.popleft()
+        if sha in seen:
+            continue
+        seen.add(sha)
+
+        # Add this object to the bitmap if it's in the pack
+        if sha in sha_to_pos:
+            bitmap.add(sha_to_pos[sha])
+
+        # Get the object and traverse its references
+        try:
+            obj = object_store[sha]
+
+            if isinstance(obj, Commit):
+                # Add parents and tree
+                queue.append(obj.tree)
+                queue.extend(obj.parents)
+            elif hasattr(obj, "items"):
+                # Tree object - add all entries
+                for item in obj.items():
+                    queue.append(item.sha)
+        except KeyError:
+            # Object not in store, skip it
+            continue
+
+    return bitmap
+
+
+def apply_xor_compression(
+    bitmaps: list[tuple[bytes, EWAHBitmap]],
+    max_xor_offset: int = MAX_XOR_OFFSET,
+) -> list[tuple[bytes, EWAHBitmap, int]]:
+    """Apply XOR compression to bitmaps.
+
+    XOR compression stores some bitmaps as XOR differences from previous bitmaps,
+    reducing storage size when bitmaps are similar.
+
+    Args:
+        bitmaps: List of (commit_sha, bitmap) tuples
+        max_xor_offset: Maximum offset to search for XOR base
+
+    Returns:
+        List of (commit_sha, bitmap, xor_offset) tuples
+    """
+    compressed = []
+
+    for i, (sha, bitmap) in enumerate(bitmaps):
+        best_xor_offset = 0
+        best_size = len(bitmap.encode())
+        best_xor_bitmap = bitmap
+
+        # Try XORing with previous bitmaps within max_xor_offset
+        for offset in range(1, min(i + 1, max_xor_offset + 1)):
+            _prev_sha, prev_bitmap = bitmaps[i - offset]
+            xor_bitmap = bitmap ^ prev_bitmap
+            xor_size = len(xor_bitmap.encode())
+
+            # Use XOR if it reduces size
+            if xor_size < best_size:
+                best_size = xor_size
+                best_xor_offset = offset
+                best_xor_bitmap = xor_bitmap
+
+        compressed.append((sha, best_xor_bitmap, best_xor_offset))
+
+    return compressed
+
+
+def build_type_bitmaps(
+    sha_to_pos: dict[bytes, int],
+    object_store: "BaseObjectStore",
+) -> tuple[EWAHBitmap, EWAHBitmap, EWAHBitmap, EWAHBitmap]:
+    """Build type bitmaps for all objects in a pack.
+
+    Type bitmaps classify objects by type: commit, tree, blob, or tag.
+
+    Args:
+        sha_to_pos: Pre-built mapping from SHA to position in pack
+        object_store: Object store to read object types
+
+    Returns:
+        Tuple of (commit_bitmap, tree_bitmap, blob_bitmap, tag_bitmap)
+    """
+    from .objects import sha_to_hex
+
+    commit_bitmap = EWAHBitmap()
+    tree_bitmap = EWAHBitmap()
+    blob_bitmap = EWAHBitmap()
+    tag_bitmap = EWAHBitmap()
+
+    for sha, pos in sha_to_pos.items():
+        # Pack index returns binary SHA (20 bytes), but object_store expects hex SHA (40 bytes)
+        hex_sha = sha_to_hex(sha) if len(sha) == 20 else sha
+        try:
+            obj = object_store[hex_sha]
+        except KeyError:
+            # Object not in store, skip it
+            continue
+
+        obj_type = obj.type_num
+
+        if obj_type == Commit.type_num:
+            commit_bitmap.add(pos)
+        elif obj_type == Tree.type_num:
+            tree_bitmap.add(pos)
+        elif obj_type == Blob.type_num:
+            blob_bitmap.add(pos)
+        elif obj_type == Tag.type_num:
+            tag_bitmap.add(pos)
+
+    return commit_bitmap, tree_bitmap, blob_bitmap, tag_bitmap
+
+
+def build_name_hash_cache(
+    sha_to_pos: dict[bytes, int],
+    object_store: "BaseObjectStore",
+) -> list[int]:
+    """Build name-hash cache for all objects in a pack.
+
+    The name-hash cache stores a hash of the name for each object,
+    which can speed up path-based operations.
+
+    Args:
+        sha_to_pos: Pre-built mapping from SHA to position in pack
+        object_store: Object store to read objects
+
+    Returns:
+        List of 32-bit hash values, one per object in the pack
+    """
+    from .objects import sha_to_hex
+
+    # Pre-allocate list with correct size
+    num_objects = len(sha_to_pos)
+    name_hashes = [0] * num_objects
+
+    for sha, pos in sha_to_pos.items():
+        # Pack index returns binary SHA (20 bytes), but object_store expects hex SHA (40 bytes)
+        hex_sha = sha_to_hex(sha) if len(sha) == 20 else sha
+        try:
+            obj = object_store[hex_sha]
+        except KeyError:
+            # Object not in store, use zero hash
+            continue
+
+        # For tree entries, use the tree entry name
+        # For commits, use the tree SHA
+        # For other objects, use the object SHA
+        if isinstance(obj, Tree):
+            # Tree object - use the SHA as the name
+            name_hash = _compute_name_hash(sha)
+        elif isinstance(obj, Commit):
+            # Commit - use the tree SHA as the name
+            name_hash = _compute_name_hash(obj.tree)
+        else:
+            # Other objects - use the SHA as the name
+            name_hash = _compute_name_hash(sha)
+
+        name_hashes[pos] = name_hash
+
+    return name_hashes
+
+
+def generate_bitmap(
+    pack_index: "PackIndex",
+    object_store: "BaseObjectStore",
+    refs: dict[bytes, bytes],
+    pack_checksum: bytes,
+    include_hash_cache: bool = True,
+    include_lookup_table: bool = True,
+    commit_interval: int | None = None,
+    progress: Callable[[str], None] | None = None,
+) -> PackBitmap:
+    """Generate a complete bitmap for a pack.
+
+    Args:
+        pack_index: Pack index for the pack
+        object_store: Object store to read objects from
+        refs: Dictionary of ref names to commit SHAs
+        pack_checksum: SHA-1 checksum of the pack file
+        include_hash_cache: Whether to include name-hash cache
+        include_lookup_table: Whether to include lookup table
+        commit_interval: Include every Nth commit in history (None for default)
+        progress: Optional progress reporting callback
+
+    Returns:
+        Complete PackBitmap ready to write to disk
+    """
+    if commit_interval is None:
+        commit_interval = DEFAULT_COMMIT_INTERVAL
+
+    if progress:
+        progress("Building pack index mapping")
+
+    # Build mapping from SHA to position in pack index ONCE
+    # This is used by all subsequent operations and avoids repeated enumeration
+    sha_to_pos = {}
+    for pos, (sha, _offset, _crc32) in enumerate(pack_index.iterentries()):
+        sha_to_pos[sha] = pos
+
+    if progress:
+        progress("Selecting commits for bitmap")
+
+    # Select commits to create bitmaps for
+    selected_commits = select_bitmap_commits(refs, object_store, commit_interval)
+
+    if progress:
+        progress(f"Building bitmaps for {len(selected_commits)} commits")
+
+    # Build reachability bitmaps for selected commits
+    commit_bitmaps = []
+    for i, commit_sha in enumerate(selected_commits):
+        if progress and i % 10 == 0:
+            progress(f"Building bitmap {i + 1}/{len(selected_commits)}")
+
+        bitmap = build_reachability_bitmap(commit_sha, sha_to_pos, object_store)
+        commit_bitmaps.append((commit_sha, bitmap))
+
+    if progress:
+        progress("Applying XOR compression")
+
+    # Apply XOR compression
+    compressed_bitmaps = apply_xor_compression(commit_bitmaps)
+
+    if progress:
+        progress("Building type bitmaps")
+
+    # Build type bitmaps (using pre-built sha_to_pos mapping)
+    commit_type_bitmap, tree_type_bitmap, blob_type_bitmap, tag_type_bitmap = (
+        build_type_bitmaps(sha_to_pos, object_store)
+    )
+
+    # Create PackBitmap
+    flags = BITMAP_OPT_FULL_DAG
+    if include_hash_cache:
+        flags |= BITMAP_OPT_HASH_CACHE
+    if include_lookup_table:
+        flags |= BITMAP_OPT_LOOKUP_TABLE
+
+    pack_bitmap = PackBitmap(version=1, flags=flags)
+    pack_bitmap.pack_checksum = pack_checksum
+    pack_bitmap.commit_bitmap = commit_type_bitmap
+    pack_bitmap.tree_bitmap = tree_type_bitmap
+    pack_bitmap.blob_bitmap = blob_type_bitmap
+    pack_bitmap.tag_bitmap = tag_type_bitmap
+
+    # Add bitmap entries
+    for commit_sha, xor_bitmap, xor_offset in compressed_bitmaps:
+        if commit_sha not in sha_to_pos:
+            continue
+
+        entry = BitmapEntry(
+            object_pos=sha_to_pos[commit_sha],
+            xor_offset=xor_offset,
+            flags=0,
+            bitmap=xor_bitmap,
+        )
+        pack_bitmap.entries[commit_sha] = entry
+        pack_bitmap.entries_list.append((commit_sha, entry))
+
+    # Build optional name-hash cache (using pre-built sha_to_pos mapping)
+    if include_hash_cache:
+        if progress:
+            progress("Building name-hash cache")
+        pack_bitmap.name_hash_cache = build_name_hash_cache(sha_to_pos, object_store)
+
+    # Build optional lookup table
+    if include_lookup_table:
+        if progress:
+            progress("Building lookup table")
+        # The lookup table is built automatically from the entries
+        # For now, we'll leave it as None and let the write function handle it
+        # TODO: Implement lookup table generation if needed
+        pack_bitmap.lookup_table = None
+
+    if progress:
+        progress("Bitmap generation complete")
+
+    return pack_bitmap
+
+
+def find_commit_bitmaps(
+    commit_shas: set[bytes], packs: Iterable["Pack"]
+) -> dict[bytes, tuple["Pack", "PackBitmap", dict[bytes, int]]]:
+    """Find which packs have bitmaps for the given commits.
+
+    Args:
+        commit_shas: Set of commit SHAs to look for
+        packs: Iterable of Pack objects to search
+
+    Returns:
+        Dict mapping commit SHA to (pack, pack_bitmap, position) tuple
+    """
+    result = {}
+    remaining = set(commit_shas)
+
+    for pack in packs:
+        if not remaining:
+            break
+
+        pack_bitmap = pack.bitmap
+        if not pack_bitmap:
+            # No bitmap for this pack
+            continue
+
+        # Build SHA to position mapping for this pack
+        sha_to_pos = {}
+        for pos, (sha, _offset, _crc32) in enumerate(pack.index.iterentries()):
+            sha_to_pos[sha] = pos
+
+        # Check which commits have bitmaps
+        for commit_sha in list(remaining):
+            if pack_bitmap.has_commit(commit_sha):
+                if commit_sha in sha_to_pos:
+                    result[commit_sha] = (pack, pack_bitmap, sha_to_pos)
+                    remaining.remove(commit_sha)
+
+    return result
+
+
+def bitmap_to_object_shas(
+    bitmap: EWAHBitmap,
+    pack_index: "PackIndex",
+    type_filter: EWAHBitmap | None = None,
+) -> set[bytes]:
+    """Convert a bitmap to a set of object SHAs.
+
+    Args:
+        bitmap: The EWAH bitmap with set bits for objects
+        pack_index: Pack index to map positions to SHAs
+        type_filter: Optional type bitmap to filter results (e.g., commits only)
+
+    Returns:
+        Set of object SHAs
+    """
+    result = set()
+
+    for pos, (sha, _offset, _crc32) in enumerate(pack_index.iterentries()):
+        # Check if this position is in the bitmap
+        if pos in bitmap:
+            # Apply type filter if provided
+            if type_filter is None or pos in type_filter:
+                result.add(sha)
+
+    return result

+ 7 - 2
dulwich/cli.py

@@ -2830,8 +2830,13 @@ class cmd_repack(Command):
             args: Command line arguments
         """
         parser = argparse.ArgumentParser()
-        parser.parse_args(args)
-        porcelain.repack(".")
+        parser.add_argument(
+            "--write-bitmap-index",
+            action="store_true",
+            help="write a bitmap index for packs",
+        )
+        parsed_args = parser.parse_args(args)
+        porcelain.repack(".", write_bitmaps=parsed_args.write_bitmap_index)
 
 
 class cmd_reflog(Command):

+ 327 - 10
dulwich/object_store.py

@@ -82,9 +82,11 @@ from .protocol import DEPTH_INFINITE
 from .refs import PEELED_TAG_SUFFIX, Ref
 
 if TYPE_CHECKING:
+    from .bitmap import EWAHBitmap
     from .commit_graph import CommitGraph
     from .config import Config
     from .diff_tree import RenameDetector
+    from .pack import Pack
 
 
 class GraphWalker(Protocol):
@@ -359,13 +361,19 @@ class BaseObjectStore:
         """
         raise NotImplementedError(self.add_objects)
 
-    def get_reachability_provider(self) -> ObjectReachabilityProvider:
+    def get_reachability_provider(
+        self, prefer_bitmap: bool = True
+    ) -> ObjectReachabilityProvider:
         """Get a reachability provider for this object store.
 
         Returns an ObjectReachabilityProvider that can efficiently compute
         object reachability queries. Subclasses can override this to provide
         optimized implementations (e.g., using bitmap indexes).
 
+        Args:
+            prefer_bitmap: Whether to prefer bitmap-based reachability if
+                available.
+
         Returns:
           ObjectReachabilityProvider instance
         """
@@ -481,9 +489,12 @@ class BaseObjectStore:
 
         Args:
           shas: Iterable of object SHAs to retrieve
-          include_comp: Whether to include compressed data (ignored in base implementation)
-          allow_missing: If True, skip missing objects; if False, raise KeyError
-          convert_ofs_delta: Whether to convert OFS_DELTA objects (ignored in base implementation)
+          include_comp: Whether to include compressed data (ignored in base
+            implementation)
+          allow_missing: If True, skip missing objects; if False, raise
+            KeyError
+          convert_ofs_delta: Whether to convert OFS_DELTA objects (ignored in
+            base implementation)
 
         Returns:
           Iterator of UnpackedObject instances
@@ -795,6 +806,39 @@ class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
         self.pack_threads = pack_threads
         self.pack_big_file_threshold = pack_big_file_threshold
 
+    def get_reachability_provider(
+        self,
+        prefer_bitmaps: bool = True,
+    ) -> ObjectReachabilityProvider:
+        """Get the best reachability provider for the object store.
+
+        Args:
+          object_store: The object store to query
+          prefer_bitmaps: Whether to use bitmaps if available
+
+        Returns:
+          ObjectReachabilityProvider implementation (either bitmap-accelerated
+          or graph traversal)
+        """
+        if prefer_bitmaps:
+            # Check if any packs have bitmaps
+            has_bitmap = False
+            for pack in self.packs:
+                try:
+                    # Try to access bitmap property
+                    if pack.bitmap is not None:
+                        has_bitmap = True
+                        break
+                except FileNotFoundError:
+                    # Bitmap file doesn't exist for this pack
+                    continue
+
+            if has_bitmap:
+                return BitmapReachability(self)
+
+        # Fall back to graph traversal
+        return GraphTraversalReachability(self)
+
     def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
         """Add a new pack to this object store."""
         raise NotImplementedError(self.add_pack)
@@ -1034,6 +1078,38 @@ class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
         self._update_pack_cache()
         return len(objects)
 
+    def generate_pack_bitmaps(
+        self,
+        refs: dict[bytes, bytes],
+        *,
+        commit_interval: int | None = None,
+        progress: Callable[[str], None] | None = None,
+    ) -> int:
+        """Generate bitmap indexes for all packs that don't have them.
+
+        This generates .bitmap files for packfiles, enabling fast reachability
+        queries. Equivalent to the bitmap generation part of 'git repack -b'.
+
+        Args:
+          refs: Dictionary of ref names to commit SHAs
+          commit_interval: Include every Nth commit in bitmap index (None for default)
+          progress: Optional progress reporting callback
+
+        Returns:
+          Number of bitmaps generated
+        """
+        count = 0
+        for pack in self.packs:
+            pack.ensure_bitmap(
+                self, refs, commit_interval=commit_interval, progress=progress
+            )
+            count += 1
+
+        # Update cache to pick up new bitmaps
+        self._update_pack_cache()
+
+        return count
+
     def __iter__(self) -> Iterator[bytes]:
         """Iterate over the SHAs that are present in this store."""
         self._update_pack_cache()
@@ -1269,6 +1345,9 @@ class DiskObjectStore(PackBasedObjectStore):
         pack_threads: int | None = None,
         pack_big_file_threshold: int | None = None,
         fsync_object_files: bool = False,
+        pack_write_bitmaps: bool = False,
+        pack_write_bitmap_hash_cache: bool = True,
+        pack_write_bitmap_lookup_table: bool = True,
         file_mode: int | None = None,
         dir_mode: int | None = None,
     ) -> None:
@@ -1286,6 +1365,9 @@ class DiskObjectStore(PackBasedObjectStore):
           pack_threads: number of threads for pack operations
           pack_big_file_threshold: threshold for treating files as big
           fsync_object_files: whether to fsync object files for durability
+          pack_write_bitmaps: whether to write bitmap indexes for packs
+          pack_write_bitmap_hash_cache: whether to include name-hash cache in bitmaps
+          pack_write_bitmap_lookup_table: whether to include lookup table in bitmaps
           file_mode: File permission mask for shared repository
           dir_mode: Directory permission mask for shared repository
         """
@@ -1306,6 +1388,9 @@ class DiskObjectStore(PackBasedObjectStore):
         self.pack_compression_level = pack_compression_level
         self.pack_index_version = pack_index_version
         self.fsync_object_files = fsync_object_files
+        self.pack_write_bitmaps = pack_write_bitmaps
+        self.pack_write_bitmap_hash_cache = pack_write_bitmap_hash_cache
+        self.pack_write_bitmap_lookup_table = pack_write_bitmap_lookup_table
         self.file_mode = file_mode
         self.dir_mode = dir_mode
 
@@ -1402,6 +1487,20 @@ class DiskObjectStore(PackBasedObjectStore):
         # Read core.fsyncObjectFiles setting
         fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False)
 
+        # Read bitmap settings
+        pack_write_bitmaps = config.get_boolean((b"pack",), b"writeBitmaps", False)
+        pack_write_bitmap_hash_cache = config.get_boolean(
+            (b"pack",), b"writeBitmapHashCache", True
+        )
+        pack_write_bitmap_lookup_table = config.get_boolean(
+            (b"pack",), b"writeBitmapLookupTable", True
+        )
+        # Also check repack.writeBitmaps for backwards compatibility
+        if not pack_write_bitmaps:
+            pack_write_bitmaps = config.get_boolean(
+                (b"repack",), b"writeBitmaps", False
+            )
+
         instance = cls(
             path,
             loose_compression_level=loose_compression_level,
@@ -1414,6 +1513,9 @@ class DiskObjectStore(PackBasedObjectStore):
             pack_threads=pack_threads,
             pack_big_file_threshold=pack_big_file_threshold,
             fsync_object_files=fsync_object_files,
+            pack_write_bitmaps=pack_write_bitmaps,
+            pack_write_bitmap_hash_cache=pack_write_bitmap_hash_cache,
+            pack_write_bitmap_lookup_table=pack_write_bitmap_lookup_table,
             file_mode=file_mode,
             dir_mode=dir_mode,
         )
@@ -1631,6 +1733,7 @@ class DiskObjectStore(PackBasedObjectStore):
         num_objects: int,
         indexer: PackIndexer,
         progress: Callable[..., None] | None = None,
+        refs: dict[bytes, bytes] | None = None,
     ) -> Pack:
         """Move a specific file containing a pack into the pack directory.
 
@@ -1643,6 +1746,7 @@ class DiskObjectStore(PackBasedObjectStore):
           num_objects: Number of objects in the pack.
           indexer: A PackIndexer for indexing the pack.
           progress: Optional progress reporting function.
+          refs: Optional dictionary of refs for bitmap generation.
         """
         entries = []
         for i, entry in enumerate(indexer):
@@ -1698,6 +1802,40 @@ class DiskObjectStore(PackBasedObjectStore):
                 index_file, entries, pack_sha, version=self.pack_index_version
             )
 
+        # Generate bitmap if configured and refs are available
+        if self.pack_write_bitmaps and refs:
+            from .bitmap import generate_bitmap, write_bitmap
+            from .pack import load_pack_index_file
+
+            if progress:
+                progress("Generating bitmap index\r".encode("ascii"))
+
+            # Load the index we just wrote
+            with open(target_index_path, "rb") as idx_file:
+                pack_index = load_pack_index_file(
+                    os.path.basename(target_index_path), idx_file
+                )
+
+            # Generate the bitmap
+            bitmap = generate_bitmap(
+                pack_index=pack_index,
+                object_store=self,
+                refs=refs,
+                pack_checksum=pack_sha,
+                include_hash_cache=self.pack_write_bitmap_hash_cache,
+                include_lookup_table=self.pack_write_bitmap_lookup_table,
+                progress=lambda msg: progress(msg.encode("ascii"))
+                if progress and isinstance(msg, str)
+                else None,
+            )
+
+            # Write the bitmap
+            target_bitmap_path = pack_base_name + ".bitmap"
+            write_bitmap(target_bitmap_path, bitmap)
+
+            if progress:
+                progress("Bitmap index written\r".encode("ascii"))
+
         # Add the pack to the store and return it.
         final_pack = Pack(
             pack_base_name,
@@ -2351,8 +2489,11 @@ class MissingObjectFinder:
             have_commits, exclude=None, shallow=shallow
         )
         # all_missing - complete set of commits between haves and wants
-        # common - commits from all_ancestors we hit into while
-        # traversing parent hierarchy of wants
+        # common_commits - boundary commits directly encountered when traversing wants
+        # We use _collect_ancestors here because we need the exact boundary behavior:
+        # commits that are in all_ancestors and directly reachable from wants,
+        # but we don't traverse past them. This is hard to express with the
+        # reachability abstraction alone.
         missing_commits, common_commits = _collect_ancestors(
             object_store,
             want_commits,
@@ -2360,6 +2501,7 @@ class MissingObjectFinder:
             shallow=frozenset(shallow),
             get_parents=self._get_parents,
         )
+
         self.remote_has: set[bytes] = set()
         # Now, fill sha_done with commits and revisions of
         # files and directories known to be both locally
@@ -2369,8 +2511,10 @@ class MissingObjectFinder:
             self.remote_has.add(h)
             cmt = object_store[h]
             assert isinstance(cmt, Commit)
+            # Get tree objects for this commit
             tree_objects = reachability.get_tree_objects([cmt.tree])
             self.remote_has.update(tree_objects)
+
         # record tags we have as visited, too
         for t in have_tags:
             self.remote_has.add(t)
@@ -3079,9 +3223,6 @@ def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]:
     return unpeeled, obj
 
 
-# ObjectReachabilityProvider implementation
-
-
 class GraphTraversalReachability:
     """Naive graph traversal implementation of ObjectReachabilityProvider.
 
@@ -3118,7 +3259,6 @@ class GraphTraversalReachability:
         """
         exclude_set = frozenset(exclude) if exclude else frozenset()
         shallow_set = frozenset(shallow) if shallow else frozenset()
-
         commits, _bases = _collect_ancestors(
             self.store, heads, exclude_set, shallow_set
         )
@@ -3180,3 +3320,180 @@ class GraphTraversalReachability:
             result -= exclude_objects
 
         return result
+
+
+class BitmapReachability:
+    """Bitmap-accelerated implementation of ObjectReachabilityProvider.
+
+    This implementation uses packfile bitmap indexes where available to
+    accelerate reachability queries. Falls back to graph traversal when
+    bitmaps don't cover the requested commits.
+    """
+
+    def __init__(self, object_store: "PackBasedObjectStore") -> None:
+        """Initialize the bitmap provider.
+
+        Args:
+          object_store: Pack-based object store with bitmap support
+        """
+        self.store = object_store
+        # Fallback to graph traversal for operations not yet optimized
+        self._fallback = GraphTraversalReachability(object_store)
+
+    def _combine_commit_bitmaps(
+        self,
+        commit_shas: set[bytes],
+        exclude_shas: set[bytes] | None = None,
+    ) -> tuple["EWAHBitmap", "Pack"] | None:
+        """Combine bitmaps for multiple commits using OR, with optional exclusion.
+
+        Args:
+          commit_shas: Set of commit SHAs to combine
+          exclude_shas: Optional set of commit SHAs to exclude
+
+        Returns:
+          Tuple of (combined_bitmap, pack) or None if bitmaps unavailable
+        """
+        from .bitmap import find_commit_bitmaps
+
+        # Find bitmaps for the commits
+        commit_bitmaps = find_commit_bitmaps(commit_shas, self.store.packs)
+
+        # If we can't find bitmaps for all commits, return None
+        if len(commit_bitmaps) < len(commit_shas):
+            return None
+
+        # Combine bitmaps using OR
+        combined_bitmap = None
+        result_pack = None
+
+        for commit_sha in commit_shas:
+            pack, pack_bitmap, _sha_to_pos = commit_bitmaps[commit_sha]
+            commit_bitmap = pack_bitmap.get_bitmap(commit_sha)
+
+            if commit_bitmap is None:
+                return None
+
+            if combined_bitmap is None:
+                combined_bitmap = commit_bitmap
+                result_pack = pack
+            elif pack == result_pack:
+                # Same pack, can OR directly
+                combined_bitmap = combined_bitmap | commit_bitmap
+            else:
+                # Different packs, can't combine
+                return None
+
+        # Handle exclusions if provided
+        if exclude_shas and result_pack and combined_bitmap:
+            exclude_bitmaps = find_commit_bitmaps(exclude_shas, [result_pack])
+
+            if len(exclude_bitmaps) == len(exclude_shas):
+                # All excludes have bitmaps, compute exclusion
+                exclude_combined = None
+
+                for commit_sha in exclude_shas:
+                    _pack, pack_bitmap, _sha_to_pos = exclude_bitmaps[commit_sha]
+                    exclude_bitmap = pack_bitmap.get_bitmap(commit_sha)
+
+                    if exclude_bitmap is None:
+                        break
+
+                    if exclude_combined is None:
+                        exclude_combined = exclude_bitmap
+                    else:
+                        exclude_combined = exclude_combined | exclude_bitmap
+
+                # Subtract excludes using set difference
+                if exclude_combined:
+                    combined_bitmap = combined_bitmap - exclude_combined
+
+        if combined_bitmap and result_pack:
+            return (combined_bitmap, result_pack)
+        return None
+
+    def get_reachable_commits(
+        self,
+        heads: Iterable[bytes],
+        exclude: Iterable[bytes] | None = None,
+        shallow: Set[bytes] | None = None,
+    ) -> set[bytes]:
+        """Get all commits reachable from heads using bitmaps where possible.
+
+        Args:
+          heads: Starting commit SHAs
+          exclude: Commit SHAs to exclude (and their ancestors)
+          shallow: Set of shallow commit boundaries
+
+        Returns:
+          Set of commit SHAs reachable from heads but not from exclude
+        """
+        from .bitmap import bitmap_to_object_shas
+
+        # If shallow is specified, fall back to graph traversal
+        # (bitmaps don't support shallow boundaries well)
+        if shallow:
+            return self._fallback.get_reachable_commits(heads, exclude, shallow)
+
+        heads_set = set(heads)
+        exclude_set = set(exclude) if exclude else None
+
+        # Try to combine bitmaps
+        result = self._combine_commit_bitmaps(heads_set, exclude_set)
+        if result is None:
+            return self._fallback.get_reachable_commits(heads, exclude, shallow)
+
+        combined_bitmap, result_pack = result
+
+        # Convert bitmap to commit SHAs, filtering for commits only
+        pack_bitmap = result_pack.bitmap
+        if pack_bitmap is None:
+            return self._fallback.get_reachable_commits(heads, exclude, shallow)
+        commit_type_filter = pack_bitmap.commit_bitmap
+        return bitmap_to_object_shas(
+            combined_bitmap, result_pack.index, commit_type_filter
+        )
+
+    def get_tree_objects(
+        self,
+        tree_shas: Iterable[bytes],
+    ) -> set[bytes]:
+        """Get all trees and blobs reachable from the given trees.
+
+        Args:
+          tree_shas: Starting tree SHAs
+
+        Returns:
+          Set of tree and blob SHAs
+        """
+        # Tree traversal doesn't benefit much from bitmaps, use fallback
+        return self._fallback.get_tree_objects(tree_shas)
+
+    def get_reachable_objects(
+        self,
+        commits: Iterable[bytes],
+        exclude_commits: Iterable[bytes] | None = None,
+    ) -> set[bytes]:
+        """Get all objects reachable from commits using bitmaps.
+
+        Args:
+          commits: Starting commit SHAs
+          exclude_commits: Commits whose objects should be excluded
+
+        Returns:
+          Set of all object SHAs (commits, trees, blobs)
+        """
+        from .bitmap import bitmap_to_object_shas
+
+        commits_set = set(commits)
+        exclude_set = set(exclude_commits) if exclude_commits else None
+
+        # Try to combine bitmaps
+        result = self._combine_commit_bitmaps(commits_set, exclude_set)
+        if result is None:
+            return self._fallback.get_reachable_objects(commits, exclude_commits)
+
+        combined_bitmap, result_pack = result
+
+        # Convert bitmap to all object SHAs (no type filter)
+        return bitmap_to_object_shas(combined_bitmap, result_pack.index, None)

+ 53 - 0
dulwich/pack.py

@@ -76,6 +76,7 @@ if TYPE_CHECKING:
 
     from .bitmap import PackBitmap
     from .commit_graph import CommitGraph
+    from .object_store import BaseObjectStore
 
 # For some reason the above try, except fails to set has_mmap = False for plan9
 if sys.platform == "Plan9":
@@ -3531,6 +3532,58 @@ class Pack:
             self._bitmap = read_bitmap(self._bitmap_path, pack_index=self.index)
         return self._bitmap
 
+    def ensure_bitmap(
+        self,
+        object_store: "BaseObjectStore",
+        refs: dict[bytes, bytes],
+        commit_interval: int | None = None,
+        progress: Callable[[str], None] | None = None,
+    ) -> "PackBitmap":
+        """Ensure a bitmap exists for this pack, generating one if needed.
+
+        Args:
+          object_store: Object store to read objects from
+          refs: Dictionary of ref names to commit SHAs
+          commit_interval: Include every Nth commit in bitmap index
+          progress: Optional progress reporting callback
+
+        Returns:
+          PackBitmap instance (either existing or newly generated)
+        """
+        from .bitmap import generate_bitmap, write_bitmap
+
+        # Check if bitmap already exists
+        try:
+            existing = self.bitmap
+            if existing is not None:
+                return existing
+        except FileNotFoundError:
+            pass  # No bitmap, we'll generate one
+
+        # Generate new bitmap
+        if progress:
+            progress(f"Generating bitmap for {self.name().decode('utf-8')}...\n")
+
+        pack_bitmap = generate_bitmap(
+            self.index,
+            object_store,
+            refs,
+            self.get_stored_checksum(),
+            commit_interval=commit_interval,
+            progress=progress,
+        )
+
+        # Write bitmap file
+        write_bitmap(self._bitmap_path, pack_bitmap)
+
+        if progress:
+            progress(f"Wrote {self._bitmap_path}\n")
+
+        # Update cached bitmap
+        self._bitmap = pack_bitmap
+
+        return pack_bitmap
+
     def close(self) -> None:
         """Close the pack file and index."""
         if self._data is not None:

+ 6 - 1
dulwich/porcelain.py

@@ -4818,16 +4818,21 @@ def ls_remote(
     )
 
 
-def repack(repo: RepoPath) -> None:
+def repack(repo: RepoPath, write_bitmaps: bool = False) -> None:
     """Repack loose files in a repository.
 
     Currently this only packs loose objects.
 
     Args:
       repo: Path to the repository
+      write_bitmaps: Whether to write bitmap indexes for packs
     """
     with open_repo_closing(repo) as r:
         r.object_store.pack_loose_objects()
+        if write_bitmaps:
+            # Update pack cache to pick up newly created packs
+            r.object_store._update_pack_cache()
+            r.object_store.generate_pack_bitmaps(r.refs.as_dict())
 
 
 def pack_objects(

+ 426 - 0
tests/test_bitmap.py

@@ -22,6 +22,7 @@
 """Tests for bitmap support."""
 
 import os
+import shutil
 import tempfile
 import unittest
 from io import BytesIO
@@ -39,6 +40,7 @@ from dulwich.bitmap import (
     read_bitmap_file,
     write_bitmap_file,
 )
+from dulwich.object_store import BitmapReachability, GraphTraversalReachability
 
 
 class EWAHCompressionTests(unittest.TestCase):
@@ -903,3 +905,427 @@ class BitmapConfigTests(unittest.TestCase):
         config = ConfigFile()
         config.set((b"pack",), b"useBitmapIndex", b"false")
         self.assertFalse(config.get_boolean((b"pack",), b"useBitmapIndex", True))
+
+
+class ReachabilityProviderTests(unittest.TestCase):
+    """Tests for ObjectReachabilityProvider implementations."""
+
+    def setUp(self):
+        """Set up test repository with commits."""
+        from dulwich.object_store import DiskObjectStore
+        from dulwich.objects import Blob, Commit, Tree
+
+        self.test_dir = tempfile.mkdtemp()
+        self.store = DiskObjectStore(self.test_dir)
+
+        # Create a simple commit history:
+        # commit1 -> commit2 -> commit3
+        #         \-> commit4
+
+        # Create blob and tree
+        self.blob1 = Blob.from_string(b"test content 1")
+        self.store.add_object(self.blob1)
+
+        self.blob2 = Blob.from_string(b"test content 2")
+        self.store.add_object(self.blob2)
+
+        self.tree1 = Tree()
+        self.tree1[b"file1.txt"] = (0o100644, self.blob1.id)
+        self.store.add_object(self.tree1)
+
+        self.tree2 = Tree()
+        self.tree2[b"file1.txt"] = (0o100644, self.blob1.id)
+        self.tree2[b"file2.txt"] = (0o100644, self.blob2.id)
+        self.store.add_object(self.tree2)
+
+        # Create commit1 (root)
+        self.commit1 = Commit()
+        self.commit1.tree = self.tree1.id
+        self.commit1.message = b"First commit"
+        self.commit1.author = self.commit1.committer = b"Test <test@example.com>"
+        self.commit1.author_time = self.commit1.commit_time = 1234567890
+        self.commit1.author_timezone = self.commit1.commit_timezone = 0
+        self.store.add_object(self.commit1)
+
+        # Create commit2 (child of commit1)
+        self.commit2 = Commit()
+        self.commit2.tree = self.tree1.id
+        self.commit2.parents = [self.commit1.id]
+        self.commit2.message = b"Second commit"
+        self.commit2.author = self.commit2.committer = b"Test <test@example.com>"
+        self.commit2.author_time = self.commit2.commit_time = 1234567891
+        self.commit2.author_timezone = self.commit2.commit_timezone = 0
+        self.store.add_object(self.commit2)
+
+        # Create commit3 (child of commit2)
+        self.commit3 = Commit()
+        self.commit3.tree = self.tree2.id
+        self.commit3.parents = [self.commit2.id]
+        self.commit3.message = b"Third commit"
+        self.commit3.author = self.commit3.committer = b"Test <test@example.com>"
+        self.commit3.author_time = self.commit3.commit_time = 1234567892
+        self.commit3.author_timezone = self.commit3.commit_timezone = 0
+        self.store.add_object(self.commit3)
+
+        # Create commit4 (child of commit1, creates a branch)
+        self.commit4 = Commit()
+        self.commit4.tree = self.tree2.id
+        self.commit4.parents = [self.commit1.id]
+        self.commit4.message = b"Fourth commit"
+        self.commit4.author = self.commit4.committer = b"Test <test@example.com>"
+        self.commit4.author_time = self.commit4.commit_time = 1234567893
+        self.commit4.author_timezone = self.commit4.commit_timezone = 0
+        self.store.add_object(self.commit4)
+
+    def tearDown(self):
+        """Clean up test directory."""
+        import shutil
+
+        # Close store to release file handles on Windows
+        self.store.close()
+        shutil.rmtree(self.test_dir)
+
+    def test_graph_traversal_reachability_single_commit(self):
+        """Test GraphTraversalReachability with single commit."""
+        from dulwich.object_store import GraphTraversalReachability
+
+        provider = GraphTraversalReachability(self.store)
+
+        # Get reachable commits from commit1
+        reachable = provider.get_reachable_commits(
+            [self.commit1.id], exclude=None, shallow=None
+        )
+
+        # Should only include commit1
+        self.assertEqual({self.commit1.id}, reachable)
+
+    def test_graph_traversal_reachability_linear_history(self):
+        """Test GraphTraversalReachability with linear history."""
+        from dulwich.object_store import GraphTraversalReachability
+
+        provider = GraphTraversalReachability(self.store)
+
+        # Get reachable commits from commit3
+        reachable = provider.get_reachable_commits(
+            [self.commit3.id], exclude=None, shallow=None
+        )
+
+        # Should include commit3, commit2, and commit1
+        expected = {self.commit1.id, self.commit2.id, self.commit3.id}
+        self.assertEqual(expected, reachable)
+
+    def test_graph_traversal_reachability_with_exclusion(self):
+        """Test GraphTraversalReachability with exclusion."""
+        from dulwich.object_store import GraphTraversalReachability
+
+        provider = GraphTraversalReachability(self.store)
+
+        # Get commits reachable from commit3 but not from commit1
+        reachable = provider.get_reachable_commits(
+            [self.commit3.id], exclude=[self.commit1.id], shallow=None
+        )
+
+        # Should include commit3 and commit2, but not commit1
+        expected = {self.commit2.id, self.commit3.id}
+        self.assertEqual(expected, reachable)
+
+    def test_graph_traversal_reachability_branching(self):
+        """Test GraphTraversalReachability with branching history."""
+        from dulwich.object_store import GraphTraversalReachability
+
+        provider = GraphTraversalReachability(self.store)
+
+        # Get reachable commits from both commit3 and commit4
+        reachable = provider.get_reachable_commits(
+            [self.commit3.id, self.commit4.id], exclude=None, shallow=None
+        )
+
+        # Should include all commits
+        expected = {self.commit1.id, self.commit2.id, self.commit3.id, self.commit4.id}
+        self.assertEqual(expected, reachable)
+
+    def test_graph_traversal_reachable_objects(self):
+        """Test GraphTraversalReachability.get_reachable_objects()."""
+        from dulwich.object_store import GraphTraversalReachability
+
+        provider = GraphTraversalReachability(self.store)
+
+        # Get all objects reachable from commit3
+        reachable = provider.get_reachable_objects(
+            [self.commit3.id], exclude_commits=None
+        )
+
+        # Should include commit3, blob1, and blob2 (but not tree objects themselves)
+        self.assertIn(self.commit3.id, reachable)
+        self.assertIn(self.blob1.id, reachable)
+        self.assertIn(self.blob2.id, reachable)
+        # Verify at least 3 objects
+        self.assertGreaterEqual(len(reachable), 3)
+
+    def test_graph_traversal_reachable_objects_with_exclusion(self):
+        """Test GraphTraversalReachability.get_reachable_objects() with exclusion."""
+        from dulwich.object_store import GraphTraversalReachability
+
+        provider = GraphTraversalReachability(self.store)
+
+        # Get objects reachable from commit3 but not from commit2
+        reachable = provider.get_reachable_objects(
+            [self.commit3.id], exclude_commits=[self.commit2.id]
+        )
+
+        # commit2 uses tree1 (which has blob1), commit3 uses tree2 (which has blob1 + blob2)
+        # So should include commit3 and blob2 (new in commit3)
+        # blob1 should be excluded because it's in tree1 (reachable from commit2)
+        self.assertIn(self.commit3.id, reachable)
+        self.assertIn(self.blob2.id, reachable)
+
+    def test_get_reachability_provider_without_bitmaps(self):
+        """Test get_reachability_provider returns GraphTraversalReachability when no bitmaps."""
+        from dulwich.object_store import GraphTraversalReachability
+
+        provider = self.store.get_reachability_provider()
+
+        # Should return GraphTraversalReachability when no bitmaps available
+        self.assertIsInstance(provider, GraphTraversalReachability)
+
+    def test_get_reachability_provider_prefer_bitmaps_false(self):
+        """Test get_reachability_provider with prefer_bitmaps=False."""
+        from dulwich.object_store import GraphTraversalReachability
+
+        provider = self.store.get_reachability_provider(prefer_bitmaps=False)
+
+        # Should return GraphTraversalReachability when prefer_bitmaps=False
+        self.assertIsInstance(provider, GraphTraversalReachability)
+
+    def test_bitmap_reachability_fallback_without_bitmaps(self):
+        """Test BitmapReachability falls back to graph traversal without bitmaps."""
+        provider = BitmapReachability(self.store)
+
+        # Without bitmaps, should fall back to graph traversal
+        reachable = provider.get_reachable_commits(
+            [self.commit3.id], exclude=None, shallow=None
+        )
+
+        # Should still work via fallback
+        expected = {self.commit1.id, self.commit2.id, self.commit3.id}
+        self.assertEqual(expected, reachable)
+
+    def test_bitmap_reachability_fallback_with_shallow(self):
+        """Test BitmapReachability falls back for shallow clones."""
+        provider = BitmapReachability(self.store)
+
+        # With shallow boundary, should fall back to graph traversal
+        reachable = provider.get_reachable_commits(
+            [self.commit3.id], exclude=None, shallow={self.commit2.id}
+        )
+
+        # Should include commit3 and commit2 (shallow boundary includes boundary commit)
+        # but not commit1 (beyond shallow boundary)
+        self.assertEqual({self.commit2.id, self.commit3.id}, reachable)
+
+    def test_reachability_provider_protocol(self):
+        """Test that both providers implement the same interface."""
+        graph_provider = GraphTraversalReachability(self.store)
+        bitmap_provider = BitmapReachability(self.store)
+
+        # Both should have the same methods
+        for method in [
+            "get_reachable_commits",
+            "get_reachable_objects",
+            "get_tree_objects",
+        ]:
+            self.assertTrue(hasattr(graph_provider, method))
+            self.assertTrue(hasattr(bitmap_provider, method))
+
+    def test_graph_traversal_vs_bitmap_consistency(self):
+        """Test that GraphTraversalReachability and BitmapReachability produce same results."""
+        graph_provider = GraphTraversalReachability(self.store)
+        bitmap_provider = BitmapReachability(self.store)  # Will use fallback
+
+        # Test get_reachable_commits
+        graph_commits = graph_provider.get_reachable_commits(
+            [self.commit3.id], exclude=[self.commit1.id], shallow=None
+        )
+        bitmap_commits = bitmap_provider.get_reachable_commits(
+            [self.commit3.id], exclude=[self.commit1.id], shallow=None
+        )
+        self.assertEqual(graph_commits, bitmap_commits)
+
+        # Test get_reachable_objects
+        graph_objects = graph_provider.get_reachable_objects(
+            [self.commit3.id], exclude_commits=None
+        )
+        bitmap_objects = bitmap_provider.get_reachable_objects(
+            [self.commit3.id], exclude_commits=None
+        )
+        self.assertEqual(graph_objects, bitmap_objects)
+
+
+class PackEnsureBitmapTests(unittest.TestCase):
+    """Tests for Pack.ensure_bitmap() method."""
+
+    def setUp(self):
+        """Set up test repository with a pack."""
+        from dulwich.object_store import DiskObjectStore
+        from dulwich.objects import Blob, Commit, Tree
+
+        self.temp_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, self.temp_dir)
+
+        # Create pack directory
+        os.makedirs(os.path.join(self.temp_dir, "pack"))
+
+        self.store = DiskObjectStore(self.temp_dir)
+        # Close store before cleanup to release file handles on Windows
+        self.addCleanup(self.store.close)
+
+        # Create test objects
+        self.blob = Blob.from_string(b"test content")
+        self.store.add_object(self.blob)
+
+        self.tree = Tree()
+        self.tree.add(b"file.txt", 0o100644, self.blob.id)
+        self.store.add_object(self.tree)
+
+        self.commit = Commit()
+        self.commit.tree = self.tree.id
+        self.commit.author = self.commit.committer = b"Test <test@example.com>"
+        self.commit.author_time = self.commit.commit_time = 1234567890
+        self.commit.author_timezone = self.commit.commit_timezone = 0
+        self.commit.message = b"Test commit"
+        self.store.add_object(self.commit)
+
+        # Repack to create a pack
+        self.store.repack()
+        self.pack = self.store.packs[0]
+
+    def test_ensure_bitmap_creates_bitmap(self):
+        """Test that ensure_bitmap creates a bitmap file."""
+        # Initially no bitmap
+        self.assertFalse(os.path.exists(self.pack._bitmap_path))
+
+        # Ensure bitmap with commit_interval=1 to ensure our single commit is selected
+        refs = {b"refs/heads/master": self.commit.id}
+        bitmap = self.pack.ensure_bitmap(self.store, refs, commit_interval=1)
+
+        # Bitmap should now exist
+        self.assertIsNotNone(bitmap)
+        self.assertTrue(os.path.exists(self.pack._bitmap_path))
+        # Verify it's a PackBitmap instance
+        from dulwich.bitmap import PackBitmap
+
+        self.assertIsInstance(bitmap, PackBitmap)
+
+    def test_ensure_bitmap_returns_existing(self):
+        """Test that ensure_bitmap returns existing bitmap without regenerating."""
+        refs = {b"refs/heads/master": self.commit.id}
+
+        # Create bitmap with commit_interval=1
+        self.pack.ensure_bitmap(self.store, refs, commit_interval=1)
+        mtime1 = os.path.getmtime(self.pack._bitmap_path)
+
+        # Ensure again - should return existing
+        import time
+
+        time.sleep(0.01)  # Ensure time difference
+        self.pack.ensure_bitmap(self.store, refs, commit_interval=1)
+        mtime2 = os.path.getmtime(self.pack._bitmap_path)
+
+        # File should not have been regenerated
+        self.assertEqual(mtime1, mtime2)
+
+    def test_ensure_bitmap_with_custom_interval(self):
+        """Test ensure_bitmap with custom commit_interval."""
+        refs = {b"refs/heads/master": self.commit.id}
+        bitmap = self.pack.ensure_bitmap(self.store, refs, commit_interval=50)
+        self.assertIsNotNone(bitmap)
+
+
+class GeneratePackBitmapsTests(unittest.TestCase):
+    """Tests for PackBasedObjectStore.generate_pack_bitmaps()."""
+
+    def setUp(self):
+        """Set up test repository."""
+        from dulwich.object_store import DiskObjectStore
+        from dulwich.objects import Blob, Commit, Tree
+
+        self.temp_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, self.temp_dir)
+
+        # Create pack directory
+        os.makedirs(os.path.join(self.temp_dir, "pack"))
+
+        self.store = DiskObjectStore(self.temp_dir)
+        # Close store before cleanup to release file handles on Windows
+        self.addCleanup(self.store.close)
+
+        # Create multiple commits
+        self.commits = []
+        for i in range(3):
+            blob = Blob.from_string(f"content {i}".encode())
+            self.store.add_object(blob)
+
+            tree = Tree()
+            tree.add(f"file{i}.txt".encode(), 0o100644, blob.id)
+            self.store.add_object(tree)
+
+            commit = Commit()
+            commit.tree = tree.id
+            if i > 0:
+                commit.parents = [self.commits[-1].id]
+            commit.author = commit.committer = b"Test <test@example.com>"
+            commit.author_time = commit.commit_time = 1234567890 + i
+            commit.author_timezone = commit.commit_timezone = 0
+            commit.message = f"Commit {i}".encode()
+            self.store.add_object(commit)
+            self.commits.append(commit)
+
+        # Repack to create pack
+        self.store.repack()
+
+    def test_generate_pack_bitmaps(self):
+        """Test generating bitmaps for all packs."""
+        refs = {b"refs/heads/master": self.commits[-1].id}
+
+        # Initially no bitmaps
+        for pack in self.store.packs:
+            self.assertFalse(os.path.exists(pack._bitmap_path))
+
+        # Generate bitmaps
+        count = self.store.generate_pack_bitmaps(refs)
+
+        # Should have generated bitmaps
+        self.assertEqual(count, len(self.store.packs))
+        for pack in self.store.packs:
+            self.assertTrue(os.path.exists(pack._bitmap_path))
+
+    def test_generate_pack_bitmaps_multiple_calls(self):
+        """Test that calling generate_pack_bitmaps multiple times is safe."""
+        refs = {b"refs/heads/master": self.commits[-1].id}
+
+        # Generate once
+        self.store.generate_pack_bitmaps(refs)
+        mtimes1 = [os.path.getmtime(p._bitmap_path) for p in self.store.packs]
+
+        # Generate again
+        import time
+
+        time.sleep(0.01)
+        self.store.generate_pack_bitmaps(refs)
+        mtimes2 = [os.path.getmtime(p._bitmap_path) for p in self.store.packs]
+
+        # Should not regenerate existing bitmaps
+        self.assertEqual(mtimes1, mtimes2)
+
+    def test_generate_pack_bitmaps_with_progress(self):
+        """Test generate_pack_bitmaps with progress callback."""
+        refs = {b"refs/heads/master": self.commits[-1].id}
+        messages = []
+
+        def progress(msg):
+            messages.append(msg)
+
+        self.store.generate_pack_bitmaps(refs, progress=progress)
+
+        # Should have received progress messages
+        self.assertGreater(len(messages), 0)

+ 16 - 0
tests/test_cli.py

@@ -2550,6 +2550,22 @@ class RepackCommandTest(DulwichCliTestCase):
         pack_dir = os.path.join(self.repo_path, ".git", "objects", "pack")
         self.assertTrue(any(f.endswith(".pack") for f in os.listdir(pack_dir)))
 
+    def test_repack_write_bitmap_index(self):
+        """Test repack with --write-bitmap-index flag."""
+        # Create some objects
+        for i in range(5):
+            test_file = os.path.join(self.repo_path, f"test{i}.txt")
+            with open(test_file, "w") as f:
+                f.write(f"content {i}")
+            self._run_cli("add", f"test{i}.txt")
+            self._run_cli("commit", f"--message=Commit {i}")
+
+        _result, _stdout, _stderr = self._run_cli("repack", "--write-bitmap-index")
+        # Should create pack and bitmap files
+        pack_dir = os.path.join(self.repo_path, ".git", "objects", "pack")
+        self.assertTrue(any(f.endswith(".pack") for f in os.listdir(pack_dir)))
+        self.assertTrue(any(f.endswith(".bitmap") for f in os.listdir(pack_dir)))
+
 
 class ResetCommandTest(DulwichCliTestCase):
     """Tests for reset command."""

+ 23 - 0
tests/test_porcelain.py

@@ -7885,6 +7885,29 @@ class RepackTests(PorcelainTestCase):
         porcelain.add(repo=self.repo.path, paths=fullpath)
         porcelain.repack(self.repo)
 
+    def test_write_bitmaps(self) -> None:
+        """Test that write_bitmaps generates bitmap files."""
+        # Create some content
+        handle, fullpath = tempfile.mkstemp(dir=self.repo.path)
+        os.close(handle)
+        with open(fullpath, "w") as f:
+            f.write("test content")
+        porcelain.add(repo=self.repo.path, paths=fullpath)
+        porcelain.commit(
+            repo=self.repo.path,
+            message=b"test commit",
+            author=b"Test Author <test@example.com>",
+            committer=b"Test Committer <test@example.com>",
+        )
+
+        # Repack with bitmaps
+        porcelain.repack(self.repo, write_bitmaps=True)
+
+        # Check that bitmap files were created
+        pack_dir = os.path.join(self.repo.path, ".git", "objects", "pack")
+        bitmap_files = [f for f in os.listdir(pack_dir) if f.endswith(".bitmap")]
+        self.assertGreater(len(bitmap_files), 0)
+
 
 class LsTreeTests(PorcelainTestCase):
     def test_empty(self) -> None: