Просмотр исходного кода

Implement bitmap-accelerated reachability queries

Add actual bitmap acceleration to BitmapReachability, using bitmap indexes
to speed up object reachability computations. Falls back gracefully to graph
traversal when bitmaps don't cover the requested commits or when commits span
multiple packs.
Jelmer Vernooij 2 месяцев назад
Родитель
Сommit
f8d7db7a5f
2 измененных файлов с 216 добавлено и 8 удалено
  1. 72 2
      dulwich/bitmap.py
  2. 144 6
      dulwich/object_store.py

+ 72 - 2
dulwich/bitmap.py

@@ -31,7 +31,7 @@ for efficient storage and fast bitwise operations.
 import os
 import struct
 from collections import deque
-from collections.abc import Callable, Iterator
+from collections.abc import Callable, Iterable, Iterator
 from io import BytesIO
 from typing import IO, TYPE_CHECKING, Optional
 
@@ -40,7 +40,7 @@ from .objects import Blob, Commit, Tag, Tree
 
 if TYPE_CHECKING:
     from .object_store import BaseObjectStore
-    from .pack import PackIndex
+    from .pack import Pack, PackIndex
 
 # Bitmap file signature
 BITMAP_SIGNATURE = b"BITM"
@@ -1119,3 +1119,73 @@ def generate_bitmap(
         progress("Bitmap generation complete")
 
     return pack_bitmap
+
+
+def find_commit_bitmaps(
+    commit_shas: set[bytes], packs: Iterable[Pack]
+) -> dict[bytes, tuple]:
+    """Find which packs have bitmaps for the given commits.
+
+    Args:
+        commit_shas: Set of commit SHAs to look for
+        packs: Iterable of Pack objects to search
+
+    Returns:
+        Dict mapping commit SHA to (pack, pack_bitmap, position) tuple
+    """
+    result = {}
+    remaining = set(commit_shas)
+
+    for pack in packs:
+        if not remaining:
+            break
+
+        try:
+            pack_bitmap = pack.bitmap
+            if not pack_bitmap:
+                continue
+
+            # Build SHA to position mapping for this pack
+            sha_to_pos = {}
+            for pos, (sha, _offset, _crc32) in enumerate(pack.index.iterentries()):
+                sha_to_pos[sha] = pos
+
+            # Check which commits have bitmaps
+            for commit_sha in list(remaining):
+                if pack_bitmap.has_commit(commit_sha):
+                    if commit_sha in sha_to_pos:
+                        result[commit_sha] = (pack, pack_bitmap, sha_to_pos)
+                        remaining.remove(commit_sha)
+
+        except (FileNotFoundError, ValueError, AttributeError):
+            # No bitmap or corrupt, skip this pack
+            continue
+
+    return result
+
+
+def bitmap_to_object_shas(
+    bitmap: EWAHBitmap,
+    pack_index: "PackIndex",
+    type_filter: EWAHBitmap | None = None,
+) -> set[bytes]:
+    """Convert a bitmap to a set of object SHAs.
+
+    Args:
+        bitmap: The EWAH bitmap with set bits for objects
+        pack_index: Pack index to map positions to SHAs
+        type_filter: Optional type bitmap to filter results (e.g., commits only)
+
+    Returns:
+        Set of object SHAs
+    """
+    result = set()
+
+    for pos, (sha, _offset, _crc32) in enumerate(pack_index.iterentries()):
+        # Check if this position is in the bitmap
+        if pos in bitmap:
+            # Apply type filter if provided
+            if type_filter is None or pos in type_filter:
+                result.add(sha)
+
+    return result

+ 144 - 6
dulwich/object_store.py

@@ -360,7 +360,9 @@ class BaseObjectStore:
         """
         raise NotImplementedError(self.add_objects)
 
-    def get_reachability_provider(self, prefer_bitmap: bool = True) -> ObjectReachabilityProvider:
+    def get_reachability_provider(
+        self, prefer_bitmap: bool = True
+    ) -> ObjectReachabilityProvider:
         """Get a reachability provider for this object store.
 
         Returns an ObjectReachabilityProvider that can efficiently compute
@@ -3235,7 +3237,6 @@ class GraphTraversalReachability:
         return result
 
 
-
 class BitmapReachability:
     """Bitmap-accelerated implementation of ObjectReachabilityProvider.
 
@@ -3270,8 +3271,82 @@ class BitmapReachability:
         Returns:
           Set of commit SHAs reachable from heads but not from exclude
         """
-        # TODO: Implement bitmap-accelerated version
-        # For now, fall back to graph traversal
+        from .bitmap import bitmap_to_object_shas, find_commit_bitmaps
+
+        heads_set = set(heads)
+        exclude_set = set(exclude) if exclude else set()
+
+        # If shallow is specified, fall back to graph traversal
+        # (bitmap don't support shallow boundaries well)
+        if shallow:
+            return self._fallback.get_reachable_commits(heads, exclude, shallow)
+
+        # Try to find bitmaps for the heads
+        head_bitmaps = find_commit_bitmaps(heads_set, self.store.packs)
+
+        # If we can't find bitmaps for all heads, fall back
+        if len(head_bitmaps) < len(heads_set):
+            return self._fallback.get_reachable_commits(heads, exclude, shallow)
+
+        # Combine bitmaps for all heads using OR
+        combined_bitmap = None
+        result_pack = None
+
+        for commit_sha in heads_set:
+            pack, pack_bitmap, _sha_to_pos = head_bitmaps[commit_sha]
+            commit_bitmap = pack_bitmap.get_bitmap(commit_sha)
+
+            if commit_bitmap is None:
+                # Bitmap not found, fall back
+                return self._fallback.get_reachable_commits(heads, exclude, shallow)
+
+            if combined_bitmap is None:
+                combined_bitmap = commit_bitmap
+                result_pack = pack
+            elif pack == result_pack:
+                # Same pack, can OR directly
+                combined_bitmap = combined_bitmap | commit_bitmap
+            else:
+                # Different packs, fall back to traversal
+                return self._fallback.get_reachable_commits(heads, exclude, shallow)
+
+        # Handle exclusions if provided
+        if exclude_set and result_pack:
+            exclude_bitmaps = find_commit_bitmaps(exclude_set, [result_pack])
+
+            if len(exclude_bitmaps) == len(exclude_set):
+                # All excludes have bitmaps, compute exclusion
+                exclude_combined = None
+
+                for commit_sha in exclude_set:
+                    pack, pack_bitmap, _sha_to_pos = exclude_bitmaps[commit_sha]
+                    exclude_bitmap = pack_bitmap.get_bitmap(commit_sha)
+
+                    if exclude_bitmap is None:
+                        break
+
+                    if exclude_combined is None:
+                        exclude_combined = exclude_bitmap
+                    else:
+                        exclude_combined = exclude_combined | exclude_bitmap
+
+                # Subtract excludes: combined & ~exclude
+                if exclude_combined:
+                    # Create a bitmap with all bits set in exclude_combined inverted
+                    # Then AND with combined_bitmap
+                    combined_bitmap = combined_bitmap & (
+                        combined_bitmap ^ exclude_combined
+                    )
+
+        # Convert bitmap to commit SHAs
+        if combined_bitmap and result_pack:
+            # Filter for commits only using the commit type bitmap
+            commit_type_filter = result_pack.bitmap.commit_bitmap
+            return bitmap_to_object_shas(
+                combined_bitmap, result_pack.index, commit_type_filter
+            )
+
+        # Fallback if anything went wrong
         return self._fallback.get_reachable_commits(heads, exclude, shallow)
 
     def get_tree_objects(
@@ -3303,6 +3378,69 @@ class BitmapReachability:
         Returns:
           Set of all object SHAs (commits, trees, blobs)
         """
-        # TODO: Implement bitmap-accelerated version
-        # For now, fall back to graph traversal
+        from .bitmap import bitmap_to_object_shas, find_commit_bitmaps
+
+        commits_set = set(commits)
+        exclude_set = set(exclude_commits) if exclude_commits else set()
+
+        # Try to find bitmaps for the commits
+        commit_bitmaps = find_commit_bitmaps(commits_set, self.store.packs)
+
+        # If we can't find bitmaps for all commits, fall back
+        if len(commit_bitmaps) < len(commits_set):
+            return self._fallback.get_reachable_objects(commits, exclude_commits)
+
+        # Combine bitmaps for all commits using OR
+        combined_bitmap = None
+        result_pack = None
+
+        for commit_sha in commits_set:
+            pack, pack_bitmap, _sha_to_pos = commit_bitmaps[commit_sha]
+            commit_bitmap = pack_bitmap.get_bitmap(commit_sha)
+
+            if commit_bitmap is None:
+                # Bitmap not found, fall back
+                return self._fallback.get_reachable_objects(commits, exclude_commits)
+
+            if combined_bitmap is None:
+                combined_bitmap = commit_bitmap
+                result_pack = pack
+            elif pack == result_pack:
+                # Same pack, can OR directly
+                combined_bitmap = combined_bitmap | commit_bitmap
+            else:
+                # Different packs, fall back to traversal
+                return self._fallback.get_reachable_objects(commits, exclude_commits)
+
+        # Handle exclusions if provided
+        if exclude_set and result_pack:
+            exclude_bitmaps = find_commit_bitmaps(exclude_set, [result_pack])
+
+            if len(exclude_bitmaps) == len(exclude_set):
+                # All excludes have bitmaps, compute exclusion
+                exclude_combined = None
+
+                for commit_sha in exclude_set:
+                    pack, pack_bitmap, _sha_to_pos = exclude_bitmaps[commit_sha]
+                    exclude_bitmap = pack_bitmap.get_bitmap(commit_sha)
+
+                    if exclude_bitmap is None:
+                        break
+
+                    if exclude_combined is None:
+                        exclude_combined = exclude_bitmap
+                    else:
+                        exclude_combined = exclude_combined | exclude_bitmap
+
+                # Subtract excludes: combined & ~exclude
+                if exclude_combined:
+                    combined_bitmap = combined_bitmap & (
+                        combined_bitmap ^ exclude_combined
+                    )
+
+        # Convert bitmap to all object SHAs (no type filter)
+        if combined_bitmap and result_pack:
+            return bitmap_to_object_shas(combined_bitmap, result_pack.index, None)
+
+        # Fallback if anything went wrong
         return self._fallback.get_reachable_objects(commits, exclude_commits)