Просмотр исходного кода

Use bitmap support (#1995)

Actually use bitmap support and support creating bitmap files.

Fixes #1792
Jelmer Vernooij 1 месяц назад
Родитель
Сommit
258f2d3c51
9 измененных файлов с 1354 добавлено и 22 удалено
  1. 3 0
      NEWS
  2. 493 9
      dulwich/bitmap.py
  3. 7 2
      dulwich/cli.py
  4. 327 10
      dulwich/object_store.py
  5. 53 0
      dulwich/pack.py
  6. 6 1
      dulwich/porcelain.py
  7. 426 0
      tests/test_bitmap.py
  8. 16 0
      tests/test_cli.py
  9. 23 0
      tests/test_porcelain.py

+ 3 - 0
NEWS

@@ -15,6 +15,9 @@
 
 
  * Drop support for Python 3.9. (Jelmer Vernooij)
  * Drop support for Python 3.9. (Jelmer Vernooij)
 
 
+ * Add support for pack bitmap indexes for fast reachability queries.
+   (Jelmer Vernooij, #1792)
+
  * Add support for ``git rerere`` (reuse recorded resolution) with CLI
  * Add support for ``git rerere`` (reuse recorded resolution) with CLI
    subcommands and porcelain functions. Supports ``rerere.enabled`` and
    subcommands and porcelain functions. Supports ``rerere.enabled`` and
    ``rerere.autoupdate`` configuration. (Jelmer Vernooij, #1786)
    ``rerere.autoupdate`` configuration. (Jelmer Vernooij, #1786)

+ 493 - 9
dulwich/bitmap.py

@@ -30,14 +30,17 @@ for efficient storage and fast bitwise operations.
 
 
 import os
 import os
 import struct
 import struct
-from collections.abc import Iterator
+from collections import deque
+from collections.abc import Callable, Iterable, Iterator
 from io import BytesIO
 from io import BytesIO
 from typing import IO, TYPE_CHECKING
 from typing import IO, TYPE_CHECKING
 
 
 from .file import GitFile
 from .file import GitFile
+from .objects import Blob, Commit, Tag, Tree
 
 
 if TYPE_CHECKING:
 if TYPE_CHECKING:
-    from .pack import PackIndex
+    from .object_store import BaseObjectStore
+    from .pack import Pack, PackIndex
 
 
 # Bitmap file signature
 # Bitmap file signature
 BITMAP_SIGNATURE = b"BITM"
 BITMAP_SIGNATURE = b"BITM"
@@ -51,6 +54,11 @@ BITMAP_OPT_HASH_CACHE = 0x4  # Name-hash cache
 BITMAP_OPT_LOOKUP_TABLE = 0x10  # Lookup table for random access
 BITMAP_OPT_LOOKUP_TABLE = 0x10  # Lookup table for random access
 BITMAP_OPT_PSEUDO_MERGES = 0x20  # Pseudo-merge bitmaps
 BITMAP_OPT_PSEUDO_MERGES = 0x20  # Pseudo-merge bitmaps
 
 
+# EWAH compression constants
+MAX_LITERAL_WORDS = 0x7FFFFFFF  # Maximum literal words in EWAH format (31 bits)
+MAX_XOR_OFFSET = 160  # Maximum distance to search for XOR compression base
+DEFAULT_COMMIT_INTERVAL = 100  # Default interval for commit selection
+
 
 
 def _encode_ewah_words(words: list[int]) -> list[int]:
 def _encode_ewah_words(words: list[int]) -> list[int]:
     """Encode a list of 64-bit words using EWAH run-length compression.
     """Encode a list of 64-bit words using EWAH run-length compression.
@@ -79,7 +87,7 @@ def _encode_ewah_words(words: list[int]) -> list[int]:
             while i < len(words) and words[i] != 0 and words[i] != 0xFFFFFFFFFFFFFFFF:
             while i < len(words) and words[i] != 0 and words[i] != 0xFFFFFFFFFFFFFFFF:
                 literals.append(words[i])
                 literals.append(words[i])
                 i += 1
                 i += 1
-                if len(literals) >= 0x7FFFFFFF:  # Max literal count in RLW
+                if len(literals) >= MAX_LITERAL_WORDS:
                     break
                     break
 
 
             # Create RLW with correct bit layout:
             # Create RLW with correct bit layout:
@@ -94,7 +102,7 @@ def _encode_ewah_words(words: list[int]) -> list[int]:
             while i < len(words) and words[i] != 0 and words[i] != 0xFFFFFFFFFFFFFFFF:
             while i < len(words) and words[i] != 0 and words[i] != 0xFFFFFFFFFFFFFFFF:
                 literals.append(words[i])
                 literals.append(words[i])
                 i += 1
                 i += 1
-                if len(literals) >= 0x7FFFFFFF:  # Max literal count
+                if len(literals) >= MAX_LITERAL_WORDS:
                     break
                     break
 
 
             # RLW with no run, just literals
             # RLW with no run, just literals
@@ -138,7 +146,8 @@ class EWAHBitmap:
         """Decode EWAH compressed bitmap data.
         """Decode EWAH compressed bitmap data.
 
 
         Args:
         Args:
-            data: Compressed bitmap data (EWAH format with header + words + RLW position)
+            data: Compressed bitmap data (EWAH format with header + words +
+                RLW position)
         """
         """
         f = BytesIO(data)
         f = BytesIO(data)
 
 
@@ -168,7 +177,7 @@ class EWAHBitmap:
         idx = 0
         idx = 0
         while idx < len(words):
         while idx < len(words):
             # This is an RLW
             # This is an RLW
-            # Bit layout: [literal_words(31 bits)][running_len(32 bits)][running_bit(1 bit)]
+            # Bit layout: [literal_words(31)][running_len(32)][running_bit(1)]
             rlw = words[idx]
             rlw = words[idx]
             running_bit = rlw & 1
             running_bit = rlw & 1
             running_len = (rlw >> 1) & 0xFFFFFFFF
             running_len = (rlw >> 1) & 0xFFFFFFFF
@@ -303,6 +312,23 @@ class EWAHBitmap:
         result.bit_count = max(self.bit_count, other.bit_count)
         result.bit_count = max(self.bit_count, other.bit_count)
         return result
         return result
 
 
+    def __sub__(self, other: "EWAHBitmap") -> "EWAHBitmap":
+        """Bitwise subtraction (set difference).
+
+        Returns bits that are in self but not in other.
+        Equivalent to: self & ~other
+
+        Args:
+            other: Bitmap to subtract
+
+        Returns:
+            New bitmap with bits in self but not in other
+        """
+        result = EWAHBitmap()
+        result.bits = self.bits - other.bits
+        result.bit_count = self.bit_count
+        return result
+
     def add(self, bit: int) -> None:
     def add(self, bit: int) -> None:
         """Set a bit.
         """Set a bit.
 
 
@@ -327,7 +353,7 @@ class BitmapEntry:
 
 
         Args:
         Args:
             object_pos: Position of object in pack index
             object_pos: Position of object in pack index
-            xor_offset: XOR offset for compression (0-160)
+            xor_offset: XOR offset for compression
             flags: Entry flags
             flags: Entry flags
             bitmap: The EWAH bitmap data
             bitmap: The EWAH bitmap data
         """
         """
@@ -405,7 +431,7 @@ class PackBitmap:
                 # Entry not found in list, return as-is
                 # Entry not found in list, return as-is
                 return entry.bitmap
                 return entry.bitmap
 
 
-            # XOR offset is how many positions back to look (max 160)
+            # XOR offset is how many positions back to look
             if current_idx >= entry.xor_offset:
             if current_idx >= entry.xor_offset:
                 base_sha, _base_entry = self.entries_list[
                 base_sha, _base_entry = self.entries_list[
                     current_idx - entry.xor_offset
                     current_idx - entry.xor_offset
@@ -564,7 +590,7 @@ def read_bitmap_file(f: IO[bytes], pack_index: "PackIndex | None" = None) -> Pac
         entry_flags = flags_bytes[0]
         entry_flags = flags_bytes[0]
 
 
         # Read self-describing EWAH bitmap
         # Read self-describing EWAH bitmap
-        # EWAH format: bit_count (4) + word_count (4) + words (word_count * 8) + rlw_pos (4)
+        # EWAH format: bit_count (4) + word_count (4) + words + rlw_pos (4)
         bit_count_bytes = f.read(4)
         bit_count_bytes = f.read(4)
         word_count_bytes = f.read(4)
         word_count_bytes = f.read(4)
 
 
@@ -733,3 +759,461 @@ def write_bitmap_file(f: IO[bytes], bitmap: PackBitmap) -> None:
     if bitmap.flags & BITMAP_OPT_HASH_CACHE and bitmap.name_hash_cache:
     if bitmap.flags & BITMAP_OPT_HASH_CACHE and bitmap.name_hash_cache:
         for hash_value in bitmap.name_hash_cache:
         for hash_value in bitmap.name_hash_cache:
             f.write(struct.pack(">I", hash_value))
             f.write(struct.pack(">I", hash_value))
+
+
+def _compute_name_hash(name: bytes) -> int:
+    """Compute the name hash for a tree entry.
+
+    This is the same algorithm Git uses for the name-hash cache.
+
+    Args:
+        name: The name of the tree entry
+
+    Returns:
+        32-bit hash value
+    """
+    hash_value = 0
+    for byte in name:
+        hash_value = (hash_value >> 19) | (hash_value << 13)
+        hash_value += byte
+        hash_value &= 0xFFFFFFFF
+    return hash_value
+
+
+def select_bitmap_commits(
+    refs: dict[bytes, bytes],
+    object_store: "BaseObjectStore",
+    commit_interval: int = DEFAULT_COMMIT_INTERVAL,
+) -> list[bytes]:
+    """Select commits for bitmap generation.
+
+    Uses Git's strategy:
+    - All branch and tag tips
+    - Every Nth commit in history
+
+    Args:
+        refs: Dictionary of ref names to commit SHAs
+        object_store: Object store to read commits from
+        commit_interval: Include every Nth commit in history
+
+    Returns:
+        List of commit SHAs to create bitmaps for
+    """
+    selected = set()
+    seen = set()
+
+    # Start with all refs
+    ref_commits = set()
+    for ref_name, sha in refs.items():
+        try:
+            obj = object_store[sha]
+        except KeyError:
+            continue
+        else:
+            # Dereference tags to get to commits
+            while isinstance(obj, Tag):
+                obj = object_store[obj.object[1]]
+            if isinstance(obj, Commit):
+                ref_commits.add(obj.id)
+
+    # Add all ref tips
+    selected.update(ref_commits)
+
+    # Walk the commit graph and select every Nth commit
+    queue = deque(ref_commits)
+    commit_count = 0
+
+    while queue:
+        commit_sha = queue.popleft()
+        if commit_sha in seen:
+            continue
+        seen.add(commit_sha)
+
+        try:
+            obj = object_store[commit_sha]
+            if not isinstance(obj, Commit):
+                continue
+
+            commit_count += 1
+            if commit_count % commit_interval == 0:
+                selected.add(commit_sha)
+
+            # Add parents to queue
+            for parent in obj.parents:
+                if parent not in seen:
+                    queue.append(parent)
+        except KeyError:
+            continue
+
+    return sorted(selected)
+
+
+def build_reachability_bitmap(
+    commit_sha: bytes,
+    sha_to_pos: dict[bytes, int],
+    object_store: "BaseObjectStore",
+) -> EWAHBitmap:
+    """Build a reachability bitmap for a commit.
+
+    The bitmap has a bit set for each object that is reachable from the commit.
+    The bit position corresponds to the object's position in the pack index.
+
+    Args:
+        commit_sha: The commit to build a bitmap for
+        sha_to_pos: Pre-built mapping from SHA to position in pack
+        object_store: Object store to traverse objects
+
+    Returns:
+        EWAH bitmap with bits set for reachable objects
+    """
+    bitmap = EWAHBitmap()
+
+    # Traverse all objects reachable from the commit
+    seen = set()
+    queue = deque([commit_sha])
+
+    while queue:
+        sha = queue.popleft()
+        if sha in seen:
+            continue
+        seen.add(sha)
+
+        # Add this object to the bitmap if it's in the pack
+        if sha in sha_to_pos:
+            bitmap.add(sha_to_pos[sha])
+
+        # Get the object and traverse its references
+        try:
+            obj = object_store[sha]
+
+            if isinstance(obj, Commit):
+                # Add parents and tree
+                queue.append(obj.tree)
+                queue.extend(obj.parents)
+            elif hasattr(obj, "items"):
+                # Tree object - add all entries
+                for item in obj.items():
+                    queue.append(item.sha)
+        except KeyError:
+            # Object not in store, skip it
+            continue
+
+    return bitmap
+
+
+def apply_xor_compression(
+    bitmaps: list[tuple[bytes, EWAHBitmap]],
+    max_xor_offset: int = MAX_XOR_OFFSET,
+) -> list[tuple[bytes, EWAHBitmap, int]]:
+    """Apply XOR compression to bitmaps.
+
+    XOR compression stores some bitmaps as XOR differences from previous bitmaps,
+    reducing storage size when bitmaps are similar.
+
+    Args:
+        bitmaps: List of (commit_sha, bitmap) tuples
+        max_xor_offset: Maximum offset to search for XOR base
+
+    Returns:
+        List of (commit_sha, bitmap, xor_offset) tuples
+    """
+    compressed = []
+
+    for i, (sha, bitmap) in enumerate(bitmaps):
+        best_xor_offset = 0
+        best_size = len(bitmap.encode())
+        best_xor_bitmap = bitmap
+
+        # Try XORing with previous bitmaps within max_xor_offset
+        for offset in range(1, min(i + 1, max_xor_offset + 1)):
+            _prev_sha, prev_bitmap = bitmaps[i - offset]
+            xor_bitmap = bitmap ^ prev_bitmap
+            xor_size = len(xor_bitmap.encode())
+
+            # Use XOR if it reduces size
+            if xor_size < best_size:
+                best_size = xor_size
+                best_xor_offset = offset
+                best_xor_bitmap = xor_bitmap
+
+        compressed.append((sha, best_xor_bitmap, best_xor_offset))
+
+    return compressed
+
+
+def build_type_bitmaps(
+    sha_to_pos: dict[bytes, int],
+    object_store: "BaseObjectStore",
+) -> tuple[EWAHBitmap, EWAHBitmap, EWAHBitmap, EWAHBitmap]:
+    """Build type bitmaps for all objects in a pack.
+
+    Type bitmaps classify objects by type: commit, tree, blob, or tag.
+
+    Args:
+        sha_to_pos: Pre-built mapping from SHA to position in pack
+        object_store: Object store to read object types
+
+    Returns:
+        Tuple of (commit_bitmap, tree_bitmap, blob_bitmap, tag_bitmap)
+    """
+    from .objects import sha_to_hex
+
+    commit_bitmap = EWAHBitmap()
+    tree_bitmap = EWAHBitmap()
+    blob_bitmap = EWAHBitmap()
+    tag_bitmap = EWAHBitmap()
+
+    for sha, pos in sha_to_pos.items():
+        # Pack index returns binary SHA (20 bytes), but object_store expects hex SHA (40 bytes)
+        hex_sha = sha_to_hex(sha) if len(sha) == 20 else sha
+        try:
+            obj = object_store[hex_sha]
+        except KeyError:
+            # Object not in store, skip it
+            continue
+
+        obj_type = obj.type_num
+
+        if obj_type == Commit.type_num:
+            commit_bitmap.add(pos)
+        elif obj_type == Tree.type_num:
+            tree_bitmap.add(pos)
+        elif obj_type == Blob.type_num:
+            blob_bitmap.add(pos)
+        elif obj_type == Tag.type_num:
+            tag_bitmap.add(pos)
+
+    return commit_bitmap, tree_bitmap, blob_bitmap, tag_bitmap
+
+
+def build_name_hash_cache(
+    sha_to_pos: dict[bytes, int],
+    object_store: "BaseObjectStore",
+) -> list[int]:
+    """Build name-hash cache for all objects in a pack.
+
+    The name-hash cache stores a hash of the name for each object,
+    which can speed up path-based operations.
+
+    Args:
+        sha_to_pos: Pre-built mapping from SHA to position in pack
+        object_store: Object store to read objects
+
+    Returns:
+        List of 32-bit hash values, one per object in the pack
+    """
+    from .objects import sha_to_hex
+
+    # Pre-allocate list with correct size
+    num_objects = len(sha_to_pos)
+    name_hashes = [0] * num_objects
+
+    for sha, pos in sha_to_pos.items():
+        # Pack index returns binary SHA (20 bytes), but object_store expects hex SHA (40 bytes)
+        hex_sha = sha_to_hex(sha) if len(sha) == 20 else sha
+        try:
+            obj = object_store[hex_sha]
+        except KeyError:
+            # Object not in store, use zero hash
+            continue
+
+        # For tree entries, use the tree entry name
+        # For commits, use the tree SHA
+        # For other objects, use the object SHA
+        if isinstance(obj, Tree):
+            # Tree object - use the SHA as the name
+            name_hash = _compute_name_hash(sha)
+        elif isinstance(obj, Commit):
+            # Commit - use the tree SHA as the name
+            name_hash = _compute_name_hash(obj.tree)
+        else:
+            # Other objects - use the SHA as the name
+            name_hash = _compute_name_hash(sha)
+
+        name_hashes[pos] = name_hash
+
+    return name_hashes
+
+
+def generate_bitmap(
+    pack_index: "PackIndex",
+    object_store: "BaseObjectStore",
+    refs: dict[bytes, bytes],
+    pack_checksum: bytes,
+    include_hash_cache: bool = True,
+    include_lookup_table: bool = True,
+    commit_interval: int | None = None,
+    progress: Callable[[str], None] | None = None,
+) -> PackBitmap:
+    """Generate a complete bitmap for a pack.
+
+    Args:
+        pack_index: Pack index for the pack
+        object_store: Object store to read objects from
+        refs: Dictionary of ref names to commit SHAs
+        pack_checksum: SHA-1 checksum of the pack file
+        include_hash_cache: Whether to include name-hash cache
+        include_lookup_table: Whether to include lookup table
+        commit_interval: Include every Nth commit in history (None for default)
+        progress: Optional progress reporting callback
+
+    Returns:
+        Complete PackBitmap ready to write to disk
+    """
+    if commit_interval is None:
+        commit_interval = DEFAULT_COMMIT_INTERVAL
+
+    if progress:
+        progress("Building pack index mapping")
+
+    # Build mapping from SHA to position in pack index ONCE
+    # This is used by all subsequent operations and avoids repeated enumeration
+    sha_to_pos = {}
+    for pos, (sha, _offset, _crc32) in enumerate(pack_index.iterentries()):
+        sha_to_pos[sha] = pos
+
+    if progress:
+        progress("Selecting commits for bitmap")
+
+    # Select commits to create bitmaps for
+    selected_commits = select_bitmap_commits(refs, object_store, commit_interval)
+
+    if progress:
+        progress(f"Building bitmaps for {len(selected_commits)} commits")
+
+    # Build reachability bitmaps for selected commits
+    commit_bitmaps = []
+    for i, commit_sha in enumerate(selected_commits):
+        if progress and i % 10 == 0:
+            progress(f"Building bitmap {i + 1}/{len(selected_commits)}")
+
+        bitmap = build_reachability_bitmap(commit_sha, sha_to_pos, object_store)
+        commit_bitmaps.append((commit_sha, bitmap))
+
+    if progress:
+        progress("Applying XOR compression")
+
+    # Apply XOR compression
+    compressed_bitmaps = apply_xor_compression(commit_bitmaps)
+
+    if progress:
+        progress("Building type bitmaps")
+
+    # Build type bitmaps (using pre-built sha_to_pos mapping)
+    commit_type_bitmap, tree_type_bitmap, blob_type_bitmap, tag_type_bitmap = (
+        build_type_bitmaps(sha_to_pos, object_store)
+    )
+
+    # Create PackBitmap
+    flags = BITMAP_OPT_FULL_DAG
+    if include_hash_cache:
+        flags |= BITMAP_OPT_HASH_CACHE
+    if include_lookup_table:
+        flags |= BITMAP_OPT_LOOKUP_TABLE
+
+    pack_bitmap = PackBitmap(version=1, flags=flags)
+    pack_bitmap.pack_checksum = pack_checksum
+    pack_bitmap.commit_bitmap = commit_type_bitmap
+    pack_bitmap.tree_bitmap = tree_type_bitmap
+    pack_bitmap.blob_bitmap = blob_type_bitmap
+    pack_bitmap.tag_bitmap = tag_type_bitmap
+
+    # Add bitmap entries
+    for commit_sha, xor_bitmap, xor_offset in compressed_bitmaps:
+        if commit_sha not in sha_to_pos:
+            continue
+
+        entry = BitmapEntry(
+            object_pos=sha_to_pos[commit_sha],
+            xor_offset=xor_offset,
+            flags=0,
+            bitmap=xor_bitmap,
+        )
+        pack_bitmap.entries[commit_sha] = entry
+        pack_bitmap.entries_list.append((commit_sha, entry))
+
+    # Build optional name-hash cache (using pre-built sha_to_pos mapping)
+    if include_hash_cache:
+        if progress:
+            progress("Building name-hash cache")
+        pack_bitmap.name_hash_cache = build_name_hash_cache(sha_to_pos, object_store)
+
+    # Build optional lookup table
+    if include_lookup_table:
+        if progress:
+            progress("Building lookup table")
+        # The lookup table is built automatically from the entries
+        # For now, we'll leave it as None and let the write function handle it
+        # TODO: Implement lookup table generation if needed
+        pack_bitmap.lookup_table = None
+
+    if progress:
+        progress("Bitmap generation complete")
+
+    return pack_bitmap
+
+
+def find_commit_bitmaps(
+    commit_shas: set[bytes], packs: Iterable["Pack"]
+) -> dict[bytes, tuple["Pack", "PackBitmap", dict[bytes, int]]]:
+    """Find which packs have bitmaps for the given commits.
+
+    Args:
+        commit_shas: Set of commit SHAs to look for
+        packs: Iterable of Pack objects to search
+
+    Returns:
+        Dict mapping commit SHA to (pack, pack_bitmap, position) tuple
+    """
+    result = {}
+    remaining = set(commit_shas)
+
+    for pack in packs:
+        if not remaining:
+            break
+
+        pack_bitmap = pack.bitmap
+        if not pack_bitmap:
+            # No bitmap for this pack
+            continue
+
+        # Build SHA to position mapping for this pack
+        sha_to_pos = {}
+        for pos, (sha, _offset, _crc32) in enumerate(pack.index.iterentries()):
+            sha_to_pos[sha] = pos
+
+        # Check which commits have bitmaps
+        for commit_sha in list(remaining):
+            if pack_bitmap.has_commit(commit_sha):
+                if commit_sha in sha_to_pos:
+                    result[commit_sha] = (pack, pack_bitmap, sha_to_pos)
+                    remaining.remove(commit_sha)
+
+    return result
+
+
+def bitmap_to_object_shas(
+    bitmap: EWAHBitmap,
+    pack_index: "PackIndex",
+    type_filter: EWAHBitmap | None = None,
+) -> set[bytes]:
+    """Convert a bitmap to a set of object SHAs.
+
+    Args:
+        bitmap: The EWAH bitmap with set bits for objects
+        pack_index: Pack index to map positions to SHAs
+        type_filter: Optional type bitmap to filter results (e.g., commits only)
+
+    Returns:
+        Set of object SHAs
+    """
+    result = set()
+
+    for pos, (sha, _offset, _crc32) in enumerate(pack_index.iterentries()):
+        # Check if this position is in the bitmap
+        if pos in bitmap:
+            # Apply type filter if provided
+            if type_filter is None or pos in type_filter:
+                result.add(sha)
+
+    return result

+ 7 - 2
dulwich/cli.py

@@ -2830,8 +2830,13 @@ class cmd_repack(Command):
             args: Command line arguments
             args: Command line arguments
         """
         """
         parser = argparse.ArgumentParser()
         parser = argparse.ArgumentParser()
-        parser.parse_args(args)
-        porcelain.repack(".")
+        parser.add_argument(
+            "--write-bitmap-index",
+            action="store_true",
+            help="write a bitmap index for packs",
+        )
+        parsed_args = parser.parse_args(args)
+        porcelain.repack(".", write_bitmaps=parsed_args.write_bitmap_index)
 
 
 
 
 class cmd_reflog(Command):
 class cmd_reflog(Command):

+ 327 - 10
dulwich/object_store.py

@@ -82,9 +82,11 @@ from .protocol import DEPTH_INFINITE
 from .refs import PEELED_TAG_SUFFIX, Ref
 from .refs import PEELED_TAG_SUFFIX, Ref
 
 
 if TYPE_CHECKING:
 if TYPE_CHECKING:
+    from .bitmap import EWAHBitmap
     from .commit_graph import CommitGraph
     from .commit_graph import CommitGraph
     from .config import Config
     from .config import Config
     from .diff_tree import RenameDetector
     from .diff_tree import RenameDetector
+    from .pack import Pack
 
 
 
 
 class GraphWalker(Protocol):
 class GraphWalker(Protocol):
@@ -359,13 +361,19 @@ class BaseObjectStore:
         """
         """
         raise NotImplementedError(self.add_objects)
         raise NotImplementedError(self.add_objects)
 
 
-    def get_reachability_provider(self) -> ObjectReachabilityProvider:
+    def get_reachability_provider(
+        self, prefer_bitmap: bool = True
+    ) -> ObjectReachabilityProvider:
         """Get a reachability provider for this object store.
         """Get a reachability provider for this object store.
 
 
         Returns an ObjectReachabilityProvider that can efficiently compute
         Returns an ObjectReachabilityProvider that can efficiently compute
         object reachability queries. Subclasses can override this to provide
         object reachability queries. Subclasses can override this to provide
         optimized implementations (e.g., using bitmap indexes).
         optimized implementations (e.g., using bitmap indexes).
 
 
+        Args:
+            prefer_bitmap: Whether to prefer bitmap-based reachability if
+                available.
+
         Returns:
         Returns:
           ObjectReachabilityProvider instance
           ObjectReachabilityProvider instance
         """
         """
@@ -481,9 +489,12 @@ class BaseObjectStore:
 
 
         Args:
         Args:
           shas: Iterable of object SHAs to retrieve
           shas: Iterable of object SHAs to retrieve
-          include_comp: Whether to include compressed data (ignored in base implementation)
-          allow_missing: If True, skip missing objects; if False, raise KeyError
-          convert_ofs_delta: Whether to convert OFS_DELTA objects (ignored in base implementation)
+          include_comp: Whether to include compressed data (ignored in base
+            implementation)
+          allow_missing: If True, skip missing objects; if False, raise
+            KeyError
+          convert_ofs_delta: Whether to convert OFS_DELTA objects (ignored in
+            base implementation)
 
 
         Returns:
         Returns:
           Iterator of UnpackedObject instances
           Iterator of UnpackedObject instances
@@ -795,6 +806,39 @@ class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
         self.pack_threads = pack_threads
         self.pack_threads = pack_threads
         self.pack_big_file_threshold = pack_big_file_threshold
         self.pack_big_file_threshold = pack_big_file_threshold
 
 
+    def get_reachability_provider(
+        self,
+        prefer_bitmaps: bool = True,
+    ) -> ObjectReachabilityProvider:
+        """Get the best reachability provider for the object store.
+
+        Args:
+          object_store: The object store to query
+          prefer_bitmaps: Whether to use bitmaps if available
+
+        Returns:
+          ObjectReachabilityProvider implementation (either bitmap-accelerated
+          or graph traversal)
+        """
+        if prefer_bitmaps:
+            # Check if any packs have bitmaps
+            has_bitmap = False
+            for pack in self.packs:
+                try:
+                    # Try to access bitmap property
+                    if pack.bitmap is not None:
+                        has_bitmap = True
+                        break
+                except FileNotFoundError:
+                    # Bitmap file doesn't exist for this pack
+                    continue
+
+            if has_bitmap:
+                return BitmapReachability(self)
+
+        # Fall back to graph traversal
+        return GraphTraversalReachability(self)
+
     def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
     def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
         """Add a new pack to this object store."""
         """Add a new pack to this object store."""
         raise NotImplementedError(self.add_pack)
         raise NotImplementedError(self.add_pack)
@@ -1034,6 +1078,38 @@ class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
         self._update_pack_cache()
         self._update_pack_cache()
         return len(objects)
         return len(objects)
 
 
+    def generate_pack_bitmaps(
+        self,
+        refs: dict[bytes, bytes],
+        *,
+        commit_interval: int | None = None,
+        progress: Callable[[str], None] | None = None,
+    ) -> int:
+        """Generate bitmap indexes for all packs that don't have them.
+
+        This generates .bitmap files for packfiles, enabling fast reachability
+        queries. Equivalent to the bitmap generation part of 'git repack -b'.
+
+        Args:
+          refs: Dictionary of ref names to commit SHAs
+          commit_interval: Include every Nth commit in bitmap index (None for default)
+          progress: Optional progress reporting callback
+
+        Returns:
+          Number of bitmaps generated
+        """
+        count = 0
+        for pack in self.packs:
+            pack.ensure_bitmap(
+                self, refs, commit_interval=commit_interval, progress=progress
+            )
+            count += 1
+
+        # Update cache to pick up new bitmaps
+        self._update_pack_cache()
+
+        return count
+
     def __iter__(self) -> Iterator[bytes]:
     def __iter__(self) -> Iterator[bytes]:
         """Iterate over the SHAs that are present in this store."""
         """Iterate over the SHAs that are present in this store."""
         self._update_pack_cache()
         self._update_pack_cache()
@@ -1269,6 +1345,9 @@ class DiskObjectStore(PackBasedObjectStore):
         pack_threads: int | None = None,
         pack_threads: int | None = None,
         pack_big_file_threshold: int | None = None,
         pack_big_file_threshold: int | None = None,
         fsync_object_files: bool = False,
         fsync_object_files: bool = False,
+        pack_write_bitmaps: bool = False,
+        pack_write_bitmap_hash_cache: bool = True,
+        pack_write_bitmap_lookup_table: bool = True,
         file_mode: int | None = None,
         file_mode: int | None = None,
         dir_mode: int | None = None,
         dir_mode: int | None = None,
     ) -> None:
     ) -> None:
@@ -1286,6 +1365,9 @@ class DiskObjectStore(PackBasedObjectStore):
           pack_threads: number of threads for pack operations
           pack_threads: number of threads for pack operations
           pack_big_file_threshold: threshold for treating files as big
           pack_big_file_threshold: threshold for treating files as big
           fsync_object_files: whether to fsync object files for durability
           fsync_object_files: whether to fsync object files for durability
+          pack_write_bitmaps: whether to write bitmap indexes for packs
+          pack_write_bitmap_hash_cache: whether to include name-hash cache in bitmaps
+          pack_write_bitmap_lookup_table: whether to include lookup table in bitmaps
           file_mode: File permission mask for shared repository
           file_mode: File permission mask for shared repository
           dir_mode: Directory permission mask for shared repository
           dir_mode: Directory permission mask for shared repository
         """
         """
@@ -1306,6 +1388,9 @@ class DiskObjectStore(PackBasedObjectStore):
         self.pack_compression_level = pack_compression_level
         self.pack_compression_level = pack_compression_level
         self.pack_index_version = pack_index_version
         self.pack_index_version = pack_index_version
         self.fsync_object_files = fsync_object_files
         self.fsync_object_files = fsync_object_files
+        self.pack_write_bitmaps = pack_write_bitmaps
+        self.pack_write_bitmap_hash_cache = pack_write_bitmap_hash_cache
+        self.pack_write_bitmap_lookup_table = pack_write_bitmap_lookup_table
         self.file_mode = file_mode
         self.file_mode = file_mode
         self.dir_mode = dir_mode
         self.dir_mode = dir_mode
 
 
@@ -1402,6 +1487,20 @@ class DiskObjectStore(PackBasedObjectStore):
         # Read core.fsyncObjectFiles setting
         # Read core.fsyncObjectFiles setting
         fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False)
         fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False)
 
 
+        # Read bitmap settings
+        pack_write_bitmaps = config.get_boolean((b"pack",), b"writeBitmaps", False)
+        pack_write_bitmap_hash_cache = config.get_boolean(
+            (b"pack",), b"writeBitmapHashCache", True
+        )
+        pack_write_bitmap_lookup_table = config.get_boolean(
+            (b"pack",), b"writeBitmapLookupTable", True
+        )
+        # Also check repack.writeBitmaps for backwards compatibility
+        if not pack_write_bitmaps:
+            pack_write_bitmaps = config.get_boolean(
+                (b"repack",), b"writeBitmaps", False
+            )
+
         instance = cls(
         instance = cls(
             path,
             path,
             loose_compression_level=loose_compression_level,
             loose_compression_level=loose_compression_level,
@@ -1414,6 +1513,9 @@ class DiskObjectStore(PackBasedObjectStore):
             pack_threads=pack_threads,
             pack_threads=pack_threads,
             pack_big_file_threshold=pack_big_file_threshold,
             pack_big_file_threshold=pack_big_file_threshold,
             fsync_object_files=fsync_object_files,
             fsync_object_files=fsync_object_files,
+            pack_write_bitmaps=pack_write_bitmaps,
+            pack_write_bitmap_hash_cache=pack_write_bitmap_hash_cache,
+            pack_write_bitmap_lookup_table=pack_write_bitmap_lookup_table,
             file_mode=file_mode,
             file_mode=file_mode,
             dir_mode=dir_mode,
             dir_mode=dir_mode,
         )
         )
@@ -1631,6 +1733,7 @@ class DiskObjectStore(PackBasedObjectStore):
         num_objects: int,
         num_objects: int,
         indexer: PackIndexer,
         indexer: PackIndexer,
         progress: Callable[..., None] | None = None,
         progress: Callable[..., None] | None = None,
+        refs: dict[bytes, bytes] | None = None,
     ) -> Pack:
     ) -> Pack:
         """Move a specific file containing a pack into the pack directory.
         """Move a specific file containing a pack into the pack directory.
 
 
@@ -1643,6 +1746,7 @@ class DiskObjectStore(PackBasedObjectStore):
           num_objects: Number of objects in the pack.
           num_objects: Number of objects in the pack.
           indexer: A PackIndexer for indexing the pack.
           indexer: A PackIndexer for indexing the pack.
           progress: Optional progress reporting function.
           progress: Optional progress reporting function.
+          refs: Optional dictionary of refs for bitmap generation.
         """
         """
         entries = []
         entries = []
         for i, entry in enumerate(indexer):
         for i, entry in enumerate(indexer):
@@ -1698,6 +1802,40 @@ class DiskObjectStore(PackBasedObjectStore):
                 index_file, entries, pack_sha, version=self.pack_index_version
                 index_file, entries, pack_sha, version=self.pack_index_version
             )
             )
 
 
+        # Generate bitmap if configured and refs are available
+        if self.pack_write_bitmaps and refs:
+            from .bitmap import generate_bitmap, write_bitmap
+            from .pack import load_pack_index_file
+
+            if progress:
+                progress("Generating bitmap index\r".encode("ascii"))
+
+            # Load the index we just wrote
+            with open(target_index_path, "rb") as idx_file:
+                pack_index = load_pack_index_file(
+                    os.path.basename(target_index_path), idx_file
+                )
+
+            # Generate the bitmap
+            bitmap = generate_bitmap(
+                pack_index=pack_index,
+                object_store=self,
+                refs=refs,
+                pack_checksum=pack_sha,
+                include_hash_cache=self.pack_write_bitmap_hash_cache,
+                include_lookup_table=self.pack_write_bitmap_lookup_table,
+                progress=lambda msg: progress(msg.encode("ascii"))
+                if progress and isinstance(msg, str)
+                else None,
+            )
+
+            # Write the bitmap
+            target_bitmap_path = pack_base_name + ".bitmap"
+            write_bitmap(target_bitmap_path, bitmap)
+
+            if progress:
+                progress("Bitmap index written\r".encode("ascii"))
+
         # Add the pack to the store and return it.
         # Add the pack to the store and return it.
         final_pack = Pack(
         final_pack = Pack(
             pack_base_name,
             pack_base_name,
@@ -2351,8 +2489,11 @@ class MissingObjectFinder:
             have_commits, exclude=None, shallow=shallow
             have_commits, exclude=None, shallow=shallow
         )
         )
         # all_missing - complete set of commits between haves and wants
         # all_missing - complete set of commits between haves and wants
-        # common - commits from all_ancestors we hit into while
-        # traversing parent hierarchy of wants
+        # common_commits - boundary commits directly encountered when traversing wants
+        # We use _collect_ancestors here because we need the exact boundary behavior:
+        # commits that are in all_ancestors and directly reachable from wants,
+        # but we don't traverse past them. This is hard to express with the
+        # reachability abstraction alone.
         missing_commits, common_commits = _collect_ancestors(
         missing_commits, common_commits = _collect_ancestors(
             object_store,
             object_store,
             want_commits,
             want_commits,
@@ -2360,6 +2501,7 @@ class MissingObjectFinder:
             shallow=frozenset(shallow),
             shallow=frozenset(shallow),
             get_parents=self._get_parents,
             get_parents=self._get_parents,
         )
         )
+
         self.remote_has: set[bytes] = set()
         self.remote_has: set[bytes] = set()
         # Now, fill sha_done with commits and revisions of
         # Now, fill sha_done with commits and revisions of
         # files and directories known to be both locally
         # files and directories known to be both locally
@@ -2369,8 +2511,10 @@ class MissingObjectFinder:
             self.remote_has.add(h)
             self.remote_has.add(h)
             cmt = object_store[h]
             cmt = object_store[h]
             assert isinstance(cmt, Commit)
             assert isinstance(cmt, Commit)
+            # Get tree objects for this commit
             tree_objects = reachability.get_tree_objects([cmt.tree])
             tree_objects = reachability.get_tree_objects([cmt.tree])
             self.remote_has.update(tree_objects)
             self.remote_has.update(tree_objects)
+
         # record tags we have as visited, too
         # record tags we have as visited, too
         for t in have_tags:
         for t in have_tags:
             self.remote_has.add(t)
             self.remote_has.add(t)
@@ -3079,9 +3223,6 @@ def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]:
     return unpeeled, obj
     return unpeeled, obj
 
 
 
 
-# ObjectReachabilityProvider implementation
-
-
 class GraphTraversalReachability:
 class GraphTraversalReachability:
     """Naive graph traversal implementation of ObjectReachabilityProvider.
     """Naive graph traversal implementation of ObjectReachabilityProvider.
 
 
@@ -3118,7 +3259,6 @@ class GraphTraversalReachability:
         """
         """
         exclude_set = frozenset(exclude) if exclude else frozenset()
         exclude_set = frozenset(exclude) if exclude else frozenset()
         shallow_set = frozenset(shallow) if shallow else frozenset()
         shallow_set = frozenset(shallow) if shallow else frozenset()
-
         commits, _bases = _collect_ancestors(
         commits, _bases = _collect_ancestors(
             self.store, heads, exclude_set, shallow_set
             self.store, heads, exclude_set, shallow_set
         )
         )
@@ -3180,3 +3320,180 @@ class GraphTraversalReachability:
             result -= exclude_objects
             result -= exclude_objects
 
 
         return result
         return result
+
+
+class BitmapReachability:
+    """Bitmap-accelerated implementation of ObjectReachabilityProvider.
+
+    This implementation uses packfile bitmap indexes where available to
+    accelerate reachability queries. Falls back to graph traversal when
+    bitmaps don't cover the requested commits.
+    """
+
+    def __init__(self, object_store: "PackBasedObjectStore") -> None:
+        """Initialize the bitmap provider.
+
+        Args:
+          object_store: Pack-based object store with bitmap support
+        """
+        self.store = object_store
+        # Fallback to graph traversal for operations not yet optimized
+        self._fallback = GraphTraversalReachability(object_store)
+
+    def _combine_commit_bitmaps(
+        self,
+        commit_shas: set[bytes],
+        exclude_shas: set[bytes] | None = None,
+    ) -> tuple["EWAHBitmap", "Pack"] | None:
+        """Combine bitmaps for multiple commits using OR, with optional exclusion.
+
+        Args:
+          commit_shas: Set of commit SHAs to combine
+          exclude_shas: Optional set of commit SHAs to exclude
+
+        Returns:
+          Tuple of (combined_bitmap, pack) or None if bitmaps unavailable
+        """
+        from .bitmap import find_commit_bitmaps
+
+        # Find bitmaps for the commits
+        commit_bitmaps = find_commit_bitmaps(commit_shas, self.store.packs)
+
+        # If we can't find bitmaps for all commits, return None
+        if len(commit_bitmaps) < len(commit_shas):
+            return None
+
+        # Combine bitmaps using OR
+        combined_bitmap = None
+        result_pack = None
+
+        for commit_sha in commit_shas:
+            pack, pack_bitmap, _sha_to_pos = commit_bitmaps[commit_sha]
+            commit_bitmap = pack_bitmap.get_bitmap(commit_sha)
+
+            if commit_bitmap is None:
+                return None
+
+            if combined_bitmap is None:
+                combined_bitmap = commit_bitmap
+                result_pack = pack
+            elif pack == result_pack:
+                # Same pack, can OR directly
+                combined_bitmap = combined_bitmap | commit_bitmap
+            else:
+                # Different packs, can't combine
+                return None
+
+        # Handle exclusions if provided
+        if exclude_shas and result_pack and combined_bitmap:
+            exclude_bitmaps = find_commit_bitmaps(exclude_shas, [result_pack])
+
+            if len(exclude_bitmaps) == len(exclude_shas):
+                # All excludes have bitmaps, compute exclusion
+                exclude_combined = None
+
+                for commit_sha in exclude_shas:
+                    _pack, pack_bitmap, _sha_to_pos = exclude_bitmaps[commit_sha]
+                    exclude_bitmap = pack_bitmap.get_bitmap(commit_sha)
+
+                    if exclude_bitmap is None:
+                        break
+
+                    if exclude_combined is None:
+                        exclude_combined = exclude_bitmap
+                    else:
+                        exclude_combined = exclude_combined | exclude_bitmap
+
+                # Subtract excludes using set difference
+                if exclude_combined:
+                    combined_bitmap = combined_bitmap - exclude_combined
+
+        if combined_bitmap and result_pack:
+            return (combined_bitmap, result_pack)
+        return None
+
+    def get_reachable_commits(
+        self,
+        heads: Iterable[bytes],
+        exclude: Iterable[bytes] | None = None,
+        shallow: Set[bytes] | None = None,
+    ) -> set[bytes]:
+        """Get all commits reachable from heads using bitmaps where possible.
+
+        Args:
+          heads: Starting commit SHAs
+          exclude: Commit SHAs to exclude (and their ancestors)
+          shallow: Set of shallow commit boundaries
+
+        Returns:
+          Set of commit SHAs reachable from heads but not from exclude
+        """
+        from .bitmap import bitmap_to_object_shas
+
+        # If shallow is specified, fall back to graph traversal
+        # (bitmaps don't support shallow boundaries well)
+        if shallow:
+            return self._fallback.get_reachable_commits(heads, exclude, shallow)
+
+        heads_set = set(heads)
+        exclude_set = set(exclude) if exclude else None
+
+        # Try to combine bitmaps
+        result = self._combine_commit_bitmaps(heads_set, exclude_set)
+        if result is None:
+            return self._fallback.get_reachable_commits(heads, exclude, shallow)
+
+        combined_bitmap, result_pack = result
+
+        # Convert bitmap to commit SHAs, filtering for commits only
+        pack_bitmap = result_pack.bitmap
+        if pack_bitmap is None:
+            return self._fallback.get_reachable_commits(heads, exclude, shallow)
+        commit_type_filter = pack_bitmap.commit_bitmap
+        return bitmap_to_object_shas(
+            combined_bitmap, result_pack.index, commit_type_filter
+        )
+
+    def get_tree_objects(
+        self,
+        tree_shas: Iterable[bytes],
+    ) -> set[bytes]:
+        """Get all trees and blobs reachable from the given trees.
+
+        Args:
+          tree_shas: Starting tree SHAs
+
+        Returns:
+          Set of tree and blob SHAs
+        """
+        # Tree traversal doesn't benefit much from bitmaps, use fallback
+        return self._fallback.get_tree_objects(tree_shas)
+
+    def get_reachable_objects(
+        self,
+        commits: Iterable[bytes],
+        exclude_commits: Iterable[bytes] | None = None,
+    ) -> set[bytes]:
+        """Get all objects reachable from commits using bitmaps.
+
+        Args:
+          commits: Starting commit SHAs
+          exclude_commits: Commits whose objects should be excluded
+
+        Returns:
+          Set of all object SHAs (commits, trees, blobs)
+        """
+        from .bitmap import bitmap_to_object_shas
+
+        commits_set = set(commits)
+        exclude_set = set(exclude_commits) if exclude_commits else None
+
+        # Try to combine bitmaps
+        result = self._combine_commit_bitmaps(commits_set, exclude_set)
+        if result is None:
+            return self._fallback.get_reachable_objects(commits, exclude_commits)
+
+        combined_bitmap, result_pack = result
+
+        # Convert bitmap to all object SHAs (no type filter)
+        return bitmap_to_object_shas(combined_bitmap, result_pack.index, None)

+ 53 - 0
dulwich/pack.py

@@ -76,6 +76,7 @@ if TYPE_CHECKING:
 
 
     from .bitmap import PackBitmap
     from .bitmap import PackBitmap
     from .commit_graph import CommitGraph
     from .commit_graph import CommitGraph
+    from .object_store import BaseObjectStore
 
 
 # For some reason the above try, except fails to set has_mmap = False for plan9
 # For some reason the above try, except fails to set has_mmap = False for plan9
 if sys.platform == "Plan9":
 if sys.platform == "Plan9":
@@ -3531,6 +3532,58 @@ class Pack:
             self._bitmap = read_bitmap(self._bitmap_path, pack_index=self.index)
             self._bitmap = read_bitmap(self._bitmap_path, pack_index=self.index)
         return self._bitmap
         return self._bitmap
 
 
+    def ensure_bitmap(
+        self,
+        object_store: "BaseObjectStore",
+        refs: dict[bytes, bytes],
+        commit_interval: int | None = None,
+        progress: Callable[[str], None] | None = None,
+    ) -> "PackBitmap":
+        """Ensure a bitmap exists for this pack, generating one if needed.
+
+        Args:
+          object_store: Object store to read objects from
+          refs: Dictionary of ref names to commit SHAs
+          commit_interval: Include every Nth commit in bitmap index
+          progress: Optional progress reporting callback
+
+        Returns:
+          PackBitmap instance (either existing or newly generated)
+        """
+        from .bitmap import generate_bitmap, write_bitmap
+
+        # Check if bitmap already exists
+        try:
+            existing = self.bitmap
+            if existing is not None:
+                return existing
+        except FileNotFoundError:
+            pass  # No bitmap, we'll generate one
+
+        # Generate new bitmap
+        if progress:
+            progress(f"Generating bitmap for {self.name().decode('utf-8')}...\n")
+
+        pack_bitmap = generate_bitmap(
+            self.index,
+            object_store,
+            refs,
+            self.get_stored_checksum(),
+            commit_interval=commit_interval,
+            progress=progress,
+        )
+
+        # Write bitmap file
+        write_bitmap(self._bitmap_path, pack_bitmap)
+
+        if progress:
+            progress(f"Wrote {self._bitmap_path}\n")
+
+        # Update cached bitmap
+        self._bitmap = pack_bitmap
+
+        return pack_bitmap
+
     def close(self) -> None:
     def close(self) -> None:
         """Close the pack file and index."""
         """Close the pack file and index."""
         if self._data is not None:
         if self._data is not None:

+ 6 - 1
dulwich/porcelain.py

@@ -4818,16 +4818,21 @@ def ls_remote(
     )
     )
 
 
 
 
-def repack(repo: RepoPath) -> None:
+def repack(repo: RepoPath, write_bitmaps: bool = False) -> None:
     """Repack loose files in a repository.
     """Repack loose files in a repository.
 
 
     Currently this only packs loose objects.
     Currently this only packs loose objects.
 
 
     Args:
     Args:
       repo: Path to the repository
       repo: Path to the repository
+      write_bitmaps: Whether to write bitmap indexes for packs
     """
     """
     with open_repo_closing(repo) as r:
     with open_repo_closing(repo) as r:
         r.object_store.pack_loose_objects()
         r.object_store.pack_loose_objects()
+        if write_bitmaps:
+            # Update pack cache to pick up newly created packs
+            r.object_store._update_pack_cache()
+            r.object_store.generate_pack_bitmaps(r.refs.as_dict())
 
 
 
 
 def pack_objects(
 def pack_objects(

+ 426 - 0
tests/test_bitmap.py

@@ -22,6 +22,7 @@
 """Tests for bitmap support."""
 """Tests for bitmap support."""
 
 
 import os
 import os
+import shutil
 import tempfile
 import tempfile
 import unittest
 import unittest
 from io import BytesIO
 from io import BytesIO
@@ -39,6 +40,7 @@ from dulwich.bitmap import (
     read_bitmap_file,
     read_bitmap_file,
     write_bitmap_file,
     write_bitmap_file,
 )
 )
+from dulwich.object_store import BitmapReachability, GraphTraversalReachability
 
 
 
 
 class EWAHCompressionTests(unittest.TestCase):
 class EWAHCompressionTests(unittest.TestCase):
@@ -903,3 +905,427 @@ class BitmapConfigTests(unittest.TestCase):
         config = ConfigFile()
         config = ConfigFile()
         config.set((b"pack",), b"useBitmapIndex", b"false")
         config.set((b"pack",), b"useBitmapIndex", b"false")
         self.assertFalse(config.get_boolean((b"pack",), b"useBitmapIndex", True))
         self.assertFalse(config.get_boolean((b"pack",), b"useBitmapIndex", True))
+
+
+class ReachabilityProviderTests(unittest.TestCase):
+    """Tests for ObjectReachabilityProvider implementations."""
+
+    def setUp(self):
+        """Set up test repository with commits."""
+        from dulwich.object_store import DiskObjectStore
+        from dulwich.objects import Blob, Commit, Tree
+
+        self.test_dir = tempfile.mkdtemp()
+        self.store = DiskObjectStore(self.test_dir)
+
+        # Create a simple commit history:
+        # commit1 -> commit2 -> commit3
+        #         \-> commit4
+
+        # Create blob and tree
+        self.blob1 = Blob.from_string(b"test content 1")
+        self.store.add_object(self.blob1)
+
+        self.blob2 = Blob.from_string(b"test content 2")
+        self.store.add_object(self.blob2)
+
+        self.tree1 = Tree()
+        self.tree1[b"file1.txt"] = (0o100644, self.blob1.id)
+        self.store.add_object(self.tree1)
+
+        self.tree2 = Tree()
+        self.tree2[b"file1.txt"] = (0o100644, self.blob1.id)
+        self.tree2[b"file2.txt"] = (0o100644, self.blob2.id)
+        self.store.add_object(self.tree2)
+
+        # Create commit1 (root)
+        self.commit1 = Commit()
+        self.commit1.tree = self.tree1.id
+        self.commit1.message = b"First commit"
+        self.commit1.author = self.commit1.committer = b"Test <test@example.com>"
+        self.commit1.author_time = self.commit1.commit_time = 1234567890
+        self.commit1.author_timezone = self.commit1.commit_timezone = 0
+        self.store.add_object(self.commit1)
+
+        # Create commit2 (child of commit1)
+        self.commit2 = Commit()
+        self.commit2.tree = self.tree1.id
+        self.commit2.parents = [self.commit1.id]
+        self.commit2.message = b"Second commit"
+        self.commit2.author = self.commit2.committer = b"Test <test@example.com>"
+        self.commit2.author_time = self.commit2.commit_time = 1234567891
+        self.commit2.author_timezone = self.commit2.commit_timezone = 0
+        self.store.add_object(self.commit2)
+
+        # Create commit3 (child of commit2)
+        self.commit3 = Commit()
+        self.commit3.tree = self.tree2.id
+        self.commit3.parents = [self.commit2.id]
+        self.commit3.message = b"Third commit"
+        self.commit3.author = self.commit3.committer = b"Test <test@example.com>"
+        self.commit3.author_time = self.commit3.commit_time = 1234567892
+        self.commit3.author_timezone = self.commit3.commit_timezone = 0
+        self.store.add_object(self.commit3)
+
+        # Create commit4 (child of commit1, creates a branch)
+        self.commit4 = Commit()
+        self.commit4.tree = self.tree2.id
+        self.commit4.parents = [self.commit1.id]
+        self.commit4.message = b"Fourth commit"
+        self.commit4.author = self.commit4.committer = b"Test <test@example.com>"
+        self.commit4.author_time = self.commit4.commit_time = 1234567893
+        self.commit4.author_timezone = self.commit4.commit_timezone = 0
+        self.store.add_object(self.commit4)
+
+    def tearDown(self):
+        """Clean up test directory."""
+        import shutil
+
+        # Close store to release file handles on Windows
+        self.store.close()
+        shutil.rmtree(self.test_dir)
+
+    def test_graph_traversal_reachability_single_commit(self):
+        """Test GraphTraversalReachability with single commit."""
+        from dulwich.object_store import GraphTraversalReachability
+
+        provider = GraphTraversalReachability(self.store)
+
+        # Get reachable commits from commit1
+        reachable = provider.get_reachable_commits(
+            [self.commit1.id], exclude=None, shallow=None
+        )
+
+        # Should only include commit1
+        self.assertEqual({self.commit1.id}, reachable)
+
+    def test_graph_traversal_reachability_linear_history(self):
+        """Test GraphTraversalReachability with linear history."""
+        from dulwich.object_store import GraphTraversalReachability
+
+        provider = GraphTraversalReachability(self.store)
+
+        # Get reachable commits from commit3
+        reachable = provider.get_reachable_commits(
+            [self.commit3.id], exclude=None, shallow=None
+        )
+
+        # Should include commit3, commit2, and commit1
+        expected = {self.commit1.id, self.commit2.id, self.commit3.id}
+        self.assertEqual(expected, reachable)
+
+    def test_graph_traversal_reachability_with_exclusion(self):
+        """Test GraphTraversalReachability with exclusion."""
+        from dulwich.object_store import GraphTraversalReachability
+
+        provider = GraphTraversalReachability(self.store)
+
+        # Get commits reachable from commit3 but not from commit1
+        reachable = provider.get_reachable_commits(
+            [self.commit3.id], exclude=[self.commit1.id], shallow=None
+        )
+
+        # Should include commit3 and commit2, but not commit1
+        expected = {self.commit2.id, self.commit3.id}
+        self.assertEqual(expected, reachable)
+
+    def test_graph_traversal_reachability_branching(self):
+        """Test GraphTraversalReachability with branching history."""
+        from dulwich.object_store import GraphTraversalReachability
+
+        provider = GraphTraversalReachability(self.store)
+
+        # Get reachable commits from both commit3 and commit4
+        reachable = provider.get_reachable_commits(
+            [self.commit3.id, self.commit4.id], exclude=None, shallow=None
+        )
+
+        # Should include all commits
+        expected = {self.commit1.id, self.commit2.id, self.commit3.id, self.commit4.id}
+        self.assertEqual(expected, reachable)
+
+    def test_graph_traversal_reachable_objects(self):
+        """Test GraphTraversalReachability.get_reachable_objects()."""
+        from dulwich.object_store import GraphTraversalReachability
+
+        provider = GraphTraversalReachability(self.store)
+
+        # Get all objects reachable from commit3
+        reachable = provider.get_reachable_objects(
+            [self.commit3.id], exclude_commits=None
+        )
+
+        # Should include commit3, blob1, and blob2 (but not tree objects themselves)
+        self.assertIn(self.commit3.id, reachable)
+        self.assertIn(self.blob1.id, reachable)
+        self.assertIn(self.blob2.id, reachable)
+        # Verify at least 3 objects
+        self.assertGreaterEqual(len(reachable), 3)
+
+    def test_graph_traversal_reachable_objects_with_exclusion(self):
+        """Test GraphTraversalReachability.get_reachable_objects() with exclusion."""
+        from dulwich.object_store import GraphTraversalReachability
+
+        provider = GraphTraversalReachability(self.store)
+
+        # Get objects reachable from commit3 but not from commit2
+        reachable = provider.get_reachable_objects(
+            [self.commit3.id], exclude_commits=[self.commit2.id]
+        )
+
+        # commit2 uses tree1 (which has blob1), commit3 uses tree2 (which has blob1 + blob2)
+        # So should include commit3 and blob2 (new in commit3)
+        # blob1 should be excluded because it's in tree1 (reachable from commit2)
+        self.assertIn(self.commit3.id, reachable)
+        self.assertIn(self.blob2.id, reachable)
+
+    def test_get_reachability_provider_without_bitmaps(self):
+        """Test get_reachability_provider returns GraphTraversalReachability when no bitmaps."""
+        from dulwich.object_store import GraphTraversalReachability
+
+        provider = self.store.get_reachability_provider()
+
+        # Should return GraphTraversalReachability when no bitmaps available
+        self.assertIsInstance(provider, GraphTraversalReachability)
+
+    def test_get_reachability_provider_prefer_bitmaps_false(self):
+        """Test get_reachability_provider with prefer_bitmaps=False."""
+        from dulwich.object_store import GraphTraversalReachability
+
+        provider = self.store.get_reachability_provider(prefer_bitmaps=False)
+
+        # Should return GraphTraversalReachability when prefer_bitmaps=False
+        self.assertIsInstance(provider, GraphTraversalReachability)
+
+    def test_bitmap_reachability_fallback_without_bitmaps(self):
+        """Test BitmapReachability falls back to graph traversal without bitmaps."""
+        provider = BitmapReachability(self.store)
+
+        # Without bitmaps, should fall back to graph traversal
+        reachable = provider.get_reachable_commits(
+            [self.commit3.id], exclude=None, shallow=None
+        )
+
+        # Should still work via fallback
+        expected = {self.commit1.id, self.commit2.id, self.commit3.id}
+        self.assertEqual(expected, reachable)
+
+    def test_bitmap_reachability_fallback_with_shallow(self):
+        """Test BitmapReachability falls back for shallow clones."""
+        provider = BitmapReachability(self.store)
+
+        # With shallow boundary, should fall back to graph traversal
+        reachable = provider.get_reachable_commits(
+            [self.commit3.id], exclude=None, shallow={self.commit2.id}
+        )
+
+        # Should include commit3 and commit2 (shallow boundary includes boundary commit)
+        # but not commit1 (beyond shallow boundary)
+        self.assertEqual({self.commit2.id, self.commit3.id}, reachable)
+
+    def test_reachability_provider_protocol(self):
+        """Test that both providers implement the same interface."""
+        graph_provider = GraphTraversalReachability(self.store)
+        bitmap_provider = BitmapReachability(self.store)
+
+        # Both should have the same methods
+        for method in [
+            "get_reachable_commits",
+            "get_reachable_objects",
+            "get_tree_objects",
+        ]:
+            self.assertTrue(hasattr(graph_provider, method))
+            self.assertTrue(hasattr(bitmap_provider, method))
+
+    def test_graph_traversal_vs_bitmap_consistency(self):
+        """Test that GraphTraversalReachability and BitmapReachability produce same results."""
+        graph_provider = GraphTraversalReachability(self.store)
+        bitmap_provider = BitmapReachability(self.store)  # Will use fallback
+
+        # Test get_reachable_commits
+        graph_commits = graph_provider.get_reachable_commits(
+            [self.commit3.id], exclude=[self.commit1.id], shallow=None
+        )
+        bitmap_commits = bitmap_provider.get_reachable_commits(
+            [self.commit3.id], exclude=[self.commit1.id], shallow=None
+        )
+        self.assertEqual(graph_commits, bitmap_commits)
+
+        # Test get_reachable_objects
+        graph_objects = graph_provider.get_reachable_objects(
+            [self.commit3.id], exclude_commits=None
+        )
+        bitmap_objects = bitmap_provider.get_reachable_objects(
+            [self.commit3.id], exclude_commits=None
+        )
+        self.assertEqual(graph_objects, bitmap_objects)
+
+
+class PackEnsureBitmapTests(unittest.TestCase):
+    """Tests for Pack.ensure_bitmap() method."""
+
+    def setUp(self):
+        """Set up test repository with a pack."""
+        from dulwich.object_store import DiskObjectStore
+        from dulwich.objects import Blob, Commit, Tree
+
+        self.temp_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, self.temp_dir)
+
+        # Create pack directory
+        os.makedirs(os.path.join(self.temp_dir, "pack"))
+
+        self.store = DiskObjectStore(self.temp_dir)
+        # Close store before cleanup to release file handles on Windows
+        self.addCleanup(self.store.close)
+
+        # Create test objects
+        self.blob = Blob.from_string(b"test content")
+        self.store.add_object(self.blob)
+
+        self.tree = Tree()
+        self.tree.add(b"file.txt", 0o100644, self.blob.id)
+        self.store.add_object(self.tree)
+
+        self.commit = Commit()
+        self.commit.tree = self.tree.id
+        self.commit.author = self.commit.committer = b"Test <test@example.com>"
+        self.commit.author_time = self.commit.commit_time = 1234567890
+        self.commit.author_timezone = self.commit.commit_timezone = 0
+        self.commit.message = b"Test commit"
+        self.store.add_object(self.commit)
+
+        # Repack to create a pack
+        self.store.repack()
+        self.pack = self.store.packs[0]
+
+    def test_ensure_bitmap_creates_bitmap(self):
+        """Test that ensure_bitmap creates a bitmap file."""
+        # Initially no bitmap
+        self.assertFalse(os.path.exists(self.pack._bitmap_path))
+
+        # Ensure bitmap with commit_interval=1 to ensure our single commit is selected
+        refs = {b"refs/heads/master": self.commit.id}
+        bitmap = self.pack.ensure_bitmap(self.store, refs, commit_interval=1)
+
+        # Bitmap should now exist
+        self.assertIsNotNone(bitmap)
+        self.assertTrue(os.path.exists(self.pack._bitmap_path))
+        # Verify it's a PackBitmap instance
+        from dulwich.bitmap import PackBitmap
+
+        self.assertIsInstance(bitmap, PackBitmap)
+
+    def test_ensure_bitmap_returns_existing(self):
+        """Test that ensure_bitmap returns existing bitmap without regenerating."""
+        refs = {b"refs/heads/master": self.commit.id}
+
+        # Create bitmap with commit_interval=1
+        self.pack.ensure_bitmap(self.store, refs, commit_interval=1)
+        mtime1 = os.path.getmtime(self.pack._bitmap_path)
+
+        # Ensure again - should return existing
+        import time
+
+        time.sleep(0.01)  # Ensure time difference
+        self.pack.ensure_bitmap(self.store, refs, commit_interval=1)
+        mtime2 = os.path.getmtime(self.pack._bitmap_path)
+
+        # File should not have been regenerated
+        self.assertEqual(mtime1, mtime2)
+
+    def test_ensure_bitmap_with_custom_interval(self):
+        """Test ensure_bitmap with custom commit_interval."""
+        refs = {b"refs/heads/master": self.commit.id}
+        bitmap = self.pack.ensure_bitmap(self.store, refs, commit_interval=50)
+        self.assertIsNotNone(bitmap)
+
+
+class GeneratePackBitmapsTests(unittest.TestCase):
+    """Tests for PackBasedObjectStore.generate_pack_bitmaps()."""
+
+    def setUp(self):
+        """Set up test repository."""
+        from dulwich.object_store import DiskObjectStore
+        from dulwich.objects import Blob, Commit, Tree
+
+        self.temp_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, self.temp_dir)
+
+        # Create pack directory
+        os.makedirs(os.path.join(self.temp_dir, "pack"))
+
+        self.store = DiskObjectStore(self.temp_dir)
+        # Close store before cleanup to release file handles on Windows
+        self.addCleanup(self.store.close)
+
+        # Create multiple commits
+        self.commits = []
+        for i in range(3):
+            blob = Blob.from_string(f"content {i}".encode())
+            self.store.add_object(blob)
+
+            tree = Tree()
+            tree.add(f"file{i}.txt".encode(), 0o100644, blob.id)
+            self.store.add_object(tree)
+
+            commit = Commit()
+            commit.tree = tree.id
+            if i > 0:
+                commit.parents = [self.commits[-1].id]
+            commit.author = commit.committer = b"Test <test@example.com>"
+            commit.author_time = commit.commit_time = 1234567890 + i
+            commit.author_timezone = commit.commit_timezone = 0
+            commit.message = f"Commit {i}".encode()
+            self.store.add_object(commit)
+            self.commits.append(commit)
+
+        # Repack to create pack
+        self.store.repack()
+
+    def test_generate_pack_bitmaps(self):
+        """Test generating bitmaps for all packs."""
+        refs = {b"refs/heads/master": self.commits[-1].id}
+
+        # Initially no bitmaps
+        for pack in self.store.packs:
+            self.assertFalse(os.path.exists(pack._bitmap_path))
+
+        # Generate bitmaps
+        count = self.store.generate_pack_bitmaps(refs)
+
+        # Should have generated bitmaps
+        self.assertEqual(count, len(self.store.packs))
+        for pack in self.store.packs:
+            self.assertTrue(os.path.exists(pack._bitmap_path))
+
+    def test_generate_pack_bitmaps_multiple_calls(self):
+        """Test that calling generate_pack_bitmaps multiple times is safe."""
+        refs = {b"refs/heads/master": self.commits[-1].id}
+
+        # Generate once
+        self.store.generate_pack_bitmaps(refs)
+        mtimes1 = [os.path.getmtime(p._bitmap_path) for p in self.store.packs]
+
+        # Generate again
+        import time
+
+        time.sleep(0.01)
+        self.store.generate_pack_bitmaps(refs)
+        mtimes2 = [os.path.getmtime(p._bitmap_path) for p in self.store.packs]
+
+        # Should not regenerate existing bitmaps
+        self.assertEqual(mtimes1, mtimes2)
+
+    def test_generate_pack_bitmaps_with_progress(self):
+        """Test generate_pack_bitmaps with progress callback."""
+        refs = {b"refs/heads/master": self.commits[-1].id}
+        messages = []
+
+        def progress(msg):
+            messages.append(msg)
+
+        self.store.generate_pack_bitmaps(refs, progress=progress)
+
+        # Should have received progress messages
+        self.assertGreater(len(messages), 0)

+ 16 - 0
tests/test_cli.py

@@ -2550,6 +2550,22 @@ class RepackCommandTest(DulwichCliTestCase):
         pack_dir = os.path.join(self.repo_path, ".git", "objects", "pack")
         pack_dir = os.path.join(self.repo_path, ".git", "objects", "pack")
         self.assertTrue(any(f.endswith(".pack") for f in os.listdir(pack_dir)))
         self.assertTrue(any(f.endswith(".pack") for f in os.listdir(pack_dir)))
 
 
+    def test_repack_write_bitmap_index(self):
+        """Test repack with --write-bitmap-index flag."""
+        # Create some objects
+        for i in range(5):
+            test_file = os.path.join(self.repo_path, f"test{i}.txt")
+            with open(test_file, "w") as f:
+                f.write(f"content {i}")
+            self._run_cli("add", f"test{i}.txt")
+            self._run_cli("commit", f"--message=Commit {i}")
+
+        _result, _stdout, _stderr = self._run_cli("repack", "--write-bitmap-index")
+        # Should create pack and bitmap files
+        pack_dir = os.path.join(self.repo_path, ".git", "objects", "pack")
+        self.assertTrue(any(f.endswith(".pack") for f in os.listdir(pack_dir)))
+        self.assertTrue(any(f.endswith(".bitmap") for f in os.listdir(pack_dir)))
+
 
 
 class ResetCommandTest(DulwichCliTestCase):
 class ResetCommandTest(DulwichCliTestCase):
     """Tests for reset command."""
     """Tests for reset command."""

+ 23 - 0
tests/test_porcelain.py

@@ -7885,6 +7885,29 @@ class RepackTests(PorcelainTestCase):
         porcelain.add(repo=self.repo.path, paths=fullpath)
         porcelain.add(repo=self.repo.path, paths=fullpath)
         porcelain.repack(self.repo)
         porcelain.repack(self.repo)
 
 
+    def test_write_bitmaps(self) -> None:
+        """Test that write_bitmaps generates bitmap files."""
+        # Create some content
+        handle, fullpath = tempfile.mkstemp(dir=self.repo.path)
+        os.close(handle)
+        with open(fullpath, "w") as f:
+            f.write("test content")
+        porcelain.add(repo=self.repo.path, paths=fullpath)
+        porcelain.commit(
+            repo=self.repo.path,
+            message=b"test commit",
+            author=b"Test Author <test@example.com>",
+            committer=b"Test Committer <test@example.com>",
+        )
+
+        # Repack with bitmaps
+        porcelain.repack(self.repo, write_bitmaps=True)
+
+        # Check that bitmap files were created
+        pack_dir = os.path.join(self.repo.path, ".git", "objects", "pack")
+        bitmap_files = [f for f in os.listdir(pack_dir) if f.endswith(".bitmap")]
+        self.assertGreater(len(bitmap_files), 0)
+
 
 
 class LsTreeTests(PorcelainTestCase):
 class LsTreeTests(PorcelainTestCase):
     def test_empty(self) -> None:
     def test_empty(self) -> None: