Jelmer Vernooij 2 месяцев назад
Родитель
Сommit
d085602b29
7 измененных файлов с 1935 добавлено и 3 удалено
  1. 3 3
      Cargo.lock
  2. 737 0
      dulwich/bitmap.py
  3. 20 0
      dulwich/pack.py
  4. 1 0
      tests/__init__.py
  5. 1 0
      tests/compat/__init__.py
  6. 268 0
      tests/compat/test_bitmap.py
  7. 905 0
      tests/test_bitmap.py

+ 3 - 3
Cargo.lock

@@ -10,7 +10,7 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
 
 [[package]]
 name = "diff-tree-py"
-version = "0.24.6"
+version = "0.24.7"
 dependencies = [
  "pyo3",
 ]
@@ -50,7 +50,7 @@ dependencies = [
 
 [[package]]
 name = "objects-py"
-version = "0.24.6"
+version = "0.24.7"
 dependencies = [
  "memchr",
  "pyo3",
@@ -64,7 +64,7 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
 
 [[package]]
 name = "pack-py"
-version = "0.24.6"
+version = "0.24.7"
 dependencies = [
  "memchr",
  "pyo3",

+ 737 - 0
dulwich/bitmap.py

@@ -0,0 +1,737 @@
+# bitmap.py -- Packfile bitmap support for git
+# Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as published by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Support for Git packfile bitmaps.
+
+Bitmaps store reachability information for packfiles, enabling faster
+object counting and enumeration operations without full graph traversal.
+
+The bitmap format uses EWAH (Enhanced Word-Aligned Hybrid) compression
+for efficient storage and fast bitwise operations.
+"""
+
+import os
+import struct
+from collections.abc import Iterator
+from io import BytesIO
+from typing import IO, TYPE_CHECKING, Optional, Union
+
+from .file import GitFile
+
+if TYPE_CHECKING:
+    from .pack import PackIndex
+
+# Bitmap file signature
+BITMAP_SIGNATURE = b"BITM"
+
+# Bitmap format version
+BITMAP_VERSION = 1
+
+# Bitmap flags
+BITMAP_OPT_FULL_DAG = 0x1  # Full closure
+BITMAP_OPT_HASH_CACHE = 0x4  # Name-hash cache
+BITMAP_OPT_LOOKUP_TABLE = 0x10  # Lookup table for random access
+BITMAP_OPT_PSEUDO_MERGES = 0x20  # Pseudo-merge bitmaps
+
+
+def _encode_ewah_words(words: list[int]) -> list[int]:
+    """Encode a list of 64-bit words using EWAH run-length compression.
+
+    Args:
+        words: List of 64-bit words to encode
+
+    Returns:
+        List of compressed words (RLWs followed by literals)
+    """
+    compressed_words = []
+    i = 0
+
+    while i < len(words):
+        # Check for runs of all zeros or all ones
+        if words[i] == 0 or words[i] == 0xFFFFFFFFFFFFFFFF:
+            # Count consecutive identical words
+            run_value = words[i]
+            run_length = 0
+            while i < len(words) and words[i] == run_value:
+                run_length += 1
+                i += 1
+
+            # Collect following literal words
+            literals = []
+            while i < len(words) and words[i] != 0 and words[i] != 0xFFFFFFFFFFFFFFFF:
+                literals.append(words[i])
+                i += 1
+                if len(literals) >= 0x7FFFFFFF:  # Max literal count in RLW
+                    break
+
+            # Create RLW with correct bit layout:
+            # [literal_words(31 bits)][running_len(32 bits)][running_bit(1 bit)]
+            running_bit = 1 if run_value == 0xFFFFFFFFFFFFFFFF else 0
+            rlw = (len(literals) << 33) | (run_length << 1) | running_bit
+            compressed_words.append(rlw)
+            compressed_words.extend(literals)
+        else:
+            # Collect literal words
+            literals = []
+            while i < len(words) and words[i] != 0 and words[i] != 0xFFFFFFFFFFFFFFFF:
+                literals.append(words[i])
+                i += 1
+                if len(literals) >= 0x7FFFFFFF:  # Max literal count
+                    break
+
+            # RLW with no run, just literals
+            # [literal_words(31 bits)][running_len(32 bits)][running_bit(1 bit)]
+            rlw = (len(literals) << 33) | (0 << 1) | 0
+            compressed_words.append(rlw)
+            compressed_words.extend(literals)
+
+    return compressed_words
+
+
+class EWAHBitmap:
+    """EWAH (Enhanced Word-Aligned Hybrid) compressed bitmap.
+
+    EWAH uses run-length encoding for efficient bitmap storage.
+    Each bitmap consists of:
+    - Uncompressed bit count (4 bytes)
+    - Compressed word count (4 bytes)
+    - Compressed words (8 bytes each)
+    - Current RLW position (4 bytes)
+
+    Each Run Length Word (RLW) 64-bit layout (LSB to MSB):
+    - Bit 0: running_bit (1 bit) - value of repeated words (0 or 1)
+    - Bits 1-32: running_len (32 bits) - count of repeated words
+    - Bits 33-63: literal_words (31 bits) - count of literal words following this RLW
+    """
+
+    def __init__(self, data: Optional[bytes] = None) -> None:
+        """Initialize EWAH bitmap.
+
+        Args:
+            data: Optional compressed bitmap data to decode
+        """
+        self.bits: set[int] = set()
+        self.bit_count = 0
+
+        if data:
+            self._decode(data)
+
+    def _decode(self, data: bytes) -> None:
+        """Decode EWAH compressed bitmap data.
+
+        Args:
+            data: Compressed bitmap data (EWAH format with header + words + RLW position)
+        """
+        f = BytesIO(data)
+
+        # Read header
+        bit_count_bytes = f.read(4)
+        word_count_bytes = f.read(4)
+
+        if len(bit_count_bytes) < 4 or len(word_count_bytes) < 4:
+            return
+
+        bit_count = struct.unpack(">I", bit_count_bytes)[0]
+        word_count = struct.unpack(">I", word_count_bytes)[0]
+
+        self.bit_count = bit_count
+        current_bit = 0
+
+        # Read all words first
+        words = []
+        for _ in range(word_count):
+            word_bytes = f.read(8)
+            if len(word_bytes) < 8:
+                break
+            word = struct.unpack(">Q", word_bytes)[0]
+            words.append(word)
+
+        # Process EWAH chunks: RLW followed by literal words
+        idx = 0
+        while idx < len(words):
+            # This is an RLW
+            # Bit layout: [literal_words(31 bits)][running_len(32 bits)][running_bit(1 bit)]
+            rlw = words[idx]
+            running_bit = rlw & 1
+            running_len = (rlw >> 1) & 0xFFFFFFFF
+            literal_words = rlw >> 33
+            idx += 1
+
+            # Process running bits
+            if running_len > 0:
+                if running_bit == 1:
+                    # Add all bits in the repeated section
+                    for i in range(running_len * 64):
+                        self.bits.add(current_bit + i)
+                current_bit += running_len * 64
+
+            # Process literal words
+            for _ in range(literal_words):
+                if idx >= len(words):
+                    break
+
+                literal = words[idx]
+                idx += 1
+
+                # Extract set bits from literal word
+                for i in range(64):
+                    if literal & (1 << i):
+                        self.bits.add(current_bit + i)
+                current_bit += 64
+
+        # Read RLW position (we don't use it currently, but it's part of the format)
+        f.read(4)
+
+    def encode(self) -> bytes:
+        """Encode bitmap to EWAH compressed format.
+
+        Returns:
+            Compressed bitmap data including header, words, and RLW position
+        """
+        if not self.bits:
+            # Empty bitmap: bit_count=0, word_count=0, rlw_pos=0
+            return struct.pack(">III", 0, 0, 0)
+
+        max_bit = max(self.bits) if self.bits else 0
+        bit_count = max_bit + 1
+        word_count = (bit_count + 63) // 64
+
+        # Create literal words
+        words = [0] * word_count
+        for bit in self.bits:
+            word_idx = bit // 64
+            bit_idx = bit % 64
+            words[word_idx] |= 1 << bit_idx
+
+        # Compress using EWAH run-length encoding
+        compressed_words = _encode_ewah_words(words)
+
+        # Build EWAH data
+        f = BytesIO()
+
+        # Header
+        f.write(struct.pack(">I", bit_count))
+        f.write(struct.pack(">I", len(compressed_words)))
+
+        # Write compressed words
+        for word in compressed_words:
+            f.write(struct.pack(">Q", word))
+
+        # Write RLW position (position of last RLW in the compressed words)
+        # For now, we'll use 0 as we don't track this during encoding
+        # This could be improved in the future if needed
+        f.write(struct.pack(">I", 0))
+
+        return f.getvalue()
+
+    def __contains__(self, bit: int) -> bool:
+        """Check if a bit is set.
+
+        Args:
+            bit: Bit position to check
+
+        Returns:
+            True if bit is set, False otherwise
+        """
+        return bit in self.bits
+
+    def __len__(self) -> int:
+        """Return the number of set bits.
+
+        Returns:
+            Count of set bits
+        """
+        return len(self.bits)
+
+    def __or__(self, other: "EWAHBitmap") -> "EWAHBitmap":
+        """Bitwise OR operation.
+
+        Args:
+            other: Other bitmap to OR with
+
+        Returns:
+            New bitmap with OR result
+        """
+        result = EWAHBitmap()
+        result.bits = self.bits | other.bits
+        result.bit_count = max(self.bit_count, other.bit_count)
+        return result
+
+    def __and__(self, other: "EWAHBitmap") -> "EWAHBitmap":
+        """Bitwise AND operation.
+
+        Args:
+            other: Other bitmap to AND with
+
+        Returns:
+            New bitmap with AND result
+        """
+        result = EWAHBitmap()
+        result.bits = self.bits & other.bits
+        result.bit_count = max(self.bit_count, other.bit_count)
+        return result
+
+    def __xor__(self, other: "EWAHBitmap") -> "EWAHBitmap":
+        """Bitwise XOR operation.
+
+        Args:
+            other: Other bitmap to XOR with
+
+        Returns:
+            New bitmap with XOR result
+        """
+        result = EWAHBitmap()
+        result.bits = self.bits ^ other.bits
+        result.bit_count = max(self.bit_count, other.bit_count)
+        return result
+
+    def add(self, bit: int) -> None:
+        """Set a bit.
+
+        Args:
+            bit: Bit position to set
+        """
+        self.bits.add(bit)
+        self.bit_count = max(self.bit_count, bit + 1)
+
+
+class BitmapEntry:
+    """A single bitmap entry for a commit."""
+
+    def __init__(
+        self,
+        object_pos: int,
+        xor_offset: int,
+        flags: int,
+        bitmap: EWAHBitmap,
+    ) -> None:
+        """Initialize a bitmap entry.
+
+        Args:
+            object_pos: Position of object in pack index
+            xor_offset: XOR offset for compression (0-160)
+            flags: Entry flags
+            bitmap: The EWAH bitmap data
+        """
+        self.object_pos = object_pos
+        self.xor_offset = xor_offset
+        self.flags = flags
+        self.bitmap = bitmap
+
+
+class PackBitmap:
+    """A pack bitmap index.
+
+    Bitmaps store reachability information for commits in a packfile,
+    allowing fast object enumeration without graph traversal.
+    """
+
+    def __init__(
+        self,
+        version: int = BITMAP_VERSION,
+        flags: int = BITMAP_OPT_FULL_DAG,
+    ) -> None:
+        """Initialize a pack bitmap.
+
+        Args:
+            version: Bitmap format version
+            flags: Bitmap flags
+        """
+        self.version = version
+        self.flags = flags
+        self.pack_checksum: Optional[bytes] = None
+
+        # Type bitmaps for commits, trees, blobs, tags
+        self.commit_bitmap = EWAHBitmap()
+        self.tree_bitmap = EWAHBitmap()
+        self.blob_bitmap = EWAHBitmap()
+        self.tag_bitmap = EWAHBitmap()
+
+        # Bitmap entries indexed by commit SHA
+        self.entries: dict[bytes, BitmapEntry] = {}
+
+        # List of entries in order (for XOR offset resolution)
+        self.entries_list: list[tuple[bytes, BitmapEntry]] = []
+
+        # Optional lookup table for random access
+        self.lookup_table: Optional[list[tuple[int, int, int]]] = None
+
+        # Optional name-hash cache
+        self.name_hash_cache: Optional[list[int]] = None
+
+    def get_bitmap(self, commit_sha: bytes) -> Optional[EWAHBitmap]:
+        """Get the bitmap for a commit.
+
+        Args:
+            commit_sha: SHA-1 of the commit
+
+        Returns:
+            EWAH bitmap or None if not found
+        """
+        entry = self.entries.get(commit_sha)
+        if entry is None:
+            return None
+
+        # Decompress using XOR if needed
+        if entry.xor_offset > 0:
+            # Find the entry at the XOR offset
+            # The XOR offset tells us how many entries back to look
+            # We need to find this entry in the ordered list
+            try:
+                current_idx = next(
+                    i
+                    for i, (sha, _) in enumerate(self.entries_list)
+                    if sha == commit_sha
+                )
+            except StopIteration:
+                # Entry not found in list, return as-is
+                return entry.bitmap
+
+            # XOR offset is how many positions back to look (max 160)
+            if current_idx >= entry.xor_offset:
+                base_sha, _base_entry = self.entries_list[
+                    current_idx - entry.xor_offset
+                ]
+                # Get the base bitmap (recursively if it also uses XOR)
+                base_bitmap = self.get_bitmap(base_sha)
+                if base_bitmap is not None:
+                    # XOR the current bitmap with the base
+                    return entry.bitmap ^ base_bitmap
+
+        return entry.bitmap
+
+    def has_commit(self, commit_sha: bytes) -> bool:
+        """Check if a commit has a bitmap.
+
+        Args:
+            commit_sha: SHA-1 of the commit
+
+        Returns:
+            True if bitmap exists for this commit
+        """
+        return commit_sha in self.entries
+
+    def iter_commits(self) -> Iterator[bytes]:
+        """Iterate over all commits with bitmaps.
+
+        Returns:
+            Iterator of commit SHAs
+        """
+        return iter(self.entries.keys())
+
+
+def read_bitmap(
+    filename: Union[str, os.PathLike[str]],
+    pack_index: Optional["PackIndex"] = None,
+) -> PackBitmap:
+    """Read a bitmap index file.
+
+    Args:
+        filename: Path to the .bitmap file
+        pack_index: Optional PackIndex to resolve object positions to SHAs
+
+    Returns:
+        Loaded PackBitmap
+
+    Raises:
+        ValueError: If file format is invalid
+        ChecksumMismatch: If checksum verification fails
+    """
+    with GitFile(filename, "rb") as f:
+        return read_bitmap_file(f, pack_index=pack_index)
+
+
+def read_bitmap_file(
+    f: IO[bytes], pack_index: Optional["PackIndex"] = None
+) -> PackBitmap:
+    """Read bitmap data from a file object.
+
+    Args:
+        f: File object to read from
+        pack_index: Optional PackIndex to resolve object positions to SHAs
+
+    Returns:
+        Loaded PackBitmap
+
+    Raises:
+        ValueError: If file format is invalid
+    """
+    # Read header
+    signature = f.read(4)
+    if signature != BITMAP_SIGNATURE:
+        raise ValueError(
+            f"Invalid bitmap signature: {signature!r}, expected {BITMAP_SIGNATURE!r}"
+        )
+
+    version_bytes = f.read(2)
+    flags_bytes = f.read(2)
+
+    if len(version_bytes) < 2 or len(flags_bytes) < 2:
+        raise ValueError("Incomplete bitmap header")
+
+    version = struct.unpack(">H", version_bytes)[0]
+    flags = struct.unpack(">H", flags_bytes)[0]
+
+    if version != BITMAP_VERSION:
+        raise ValueError(f"Unsupported bitmap version: {version}")
+
+    # Read entry count
+    entry_count_bytes = f.read(4)
+    if len(entry_count_bytes) < 4:
+        raise ValueError("Missing entry count")
+    entry_count = struct.unpack(">I", entry_count_bytes)[0]
+
+    # Read pack checksum
+    pack_checksum = f.read(20)
+    if len(pack_checksum) < 20:
+        raise ValueError("Missing pack checksum")
+
+    bitmap = PackBitmap(version=version, flags=flags)
+    bitmap.pack_checksum = pack_checksum
+
+    # Read type bitmaps (EWAH bitmaps are self-describing)
+    for i, type_bitmap in enumerate(
+        [
+            bitmap.commit_bitmap,
+            bitmap.tree_bitmap,
+            bitmap.blob_bitmap,
+            bitmap.tag_bitmap,
+        ]
+    ):
+        # EWAH format:
+        # 4 bytes: bit count
+        # 4 bytes: word count
+        # N x 8 bytes: compressed words
+        # 4 bytes: RLW position
+
+        # Read header to determine size
+        bit_count_bytes = f.read(4)
+        word_count_bytes = f.read(4)
+
+        if len(bit_count_bytes) < 4 or len(word_count_bytes) < 4:
+            raise ValueError(f"Missing type bitmap {i} header")
+
+        word_count = struct.unpack(">I", word_count_bytes)[0]
+
+        # Read compressed words
+        words_data = f.read(word_count * 8)
+        if len(words_data) < word_count * 8:
+            raise ValueError(f"Incomplete type bitmap {i} data")
+
+        # Read RLW position
+        rlw_pos_bytes = f.read(4)
+        if len(rlw_pos_bytes) < 4:
+            raise ValueError(f"Missing type bitmap {i} RLW position")
+
+        # Reconstruct the full EWAH data to pass to _decode
+        ewah_data = bit_count_bytes + word_count_bytes + words_data + rlw_pos_bytes
+        type_bitmap._decode(ewah_data)
+
+    # Read bitmap entries
+    for _ in range(entry_count):
+        # Read object position (4 bytes)
+        obj_pos_bytes = f.read(4)
+        if len(obj_pos_bytes) < 4:
+            raise ValueError("Incomplete bitmap entry")
+        obj_pos = struct.unpack(">I", obj_pos_bytes)[0]
+
+        # Read XOR offset (1 byte)
+        xor_offset_bytes = f.read(1)
+        if len(xor_offset_bytes) < 1:
+            raise ValueError("Missing XOR offset")
+        xor_offset = xor_offset_bytes[0]
+
+        # Read flags (1 byte)
+        flags_bytes = f.read(1)
+        if len(flags_bytes) < 1:
+            raise ValueError("Missing entry flags")
+        entry_flags = flags_bytes[0]
+
+        # Read self-describing EWAH bitmap
+        # EWAH format: bit_count (4) + word_count (4) + words (word_count * 8) + rlw_pos (4)
+        bit_count_bytes = f.read(4)
+        word_count_bytes = f.read(4)
+
+        if len(bit_count_bytes) < 4 or len(word_count_bytes) < 4:
+            raise ValueError("Incomplete bitmap entry EWAH header")
+
+        word_count = struct.unpack(">I", word_count_bytes)[0]
+
+        # Read compressed words
+        words_data = f.read(word_count * 8)
+        if len(words_data) < word_count * 8:
+            raise ValueError("Incomplete bitmap entry EWAH words")
+
+        # Read RLW position
+        rlw_pos_bytes = f.read(4)
+        if len(rlw_pos_bytes) < 4:
+            raise ValueError("Missing bitmap entry EWAH RLW position")
+
+        # Reconstruct full EWAH data
+        bitmap_data = bit_count_bytes + word_count_bytes + words_data + rlw_pos_bytes
+
+        # Create bitmap entry
+        ewah_bitmap = EWAHBitmap(bitmap_data) if word_count > 0 else EWAHBitmap()
+        entry = BitmapEntry(
+            object_pos=obj_pos,
+            xor_offset=xor_offset,
+            flags=entry_flags,
+            bitmap=ewah_bitmap,
+        )
+
+        # Resolve object position to SHA if we have a pack index
+        if pack_index is not None:
+            # Get the SHA at the given position in the sorted index
+            sha = None
+            for idx, (entry_sha, _offset, _crc32) in enumerate(
+                pack_index.iterentries()
+            ):
+                if idx == obj_pos:
+                    sha = entry_sha
+                    break
+
+            if sha is not None:
+                bitmap.entries[sha] = entry
+                bitmap.entries_list.append((sha, entry))
+        else:
+            # Without pack index, use position as temporary key
+            temp_key = obj_pos.to_bytes(4, byteorder="big")
+            bitmap.entries[temp_key] = entry
+            bitmap.entries_list.append((temp_key, entry))
+
+    # Read optional lookup table
+    if flags & BITMAP_OPT_LOOKUP_TABLE:
+        # Lookup table contains triplets: (commit_pos, offset, xor_row)
+        # Number of entries matches the bitmap entry count
+        lookup_table = []
+        for _ in range(entry_count):
+            # Read commit position (4 bytes)
+            commit_pos_bytes = f.read(4)
+            if len(commit_pos_bytes) < 4:
+                break
+            commit_pos = struct.unpack(">I", commit_pos_bytes)[0]
+
+            # Read file offset (8 bytes)
+            offset_bytes = f.read(8)
+            if len(offset_bytes) < 8:
+                break
+            offset = struct.unpack(">Q", offset_bytes)[0]
+
+            # Read XOR row (4 bytes)
+            xor_row_bytes = f.read(4)
+            if len(xor_row_bytes) < 4:
+                break
+            xor_row = struct.unpack(">I", xor_row_bytes)[0]
+
+            lookup_table.append((commit_pos, offset, xor_row))
+
+        bitmap.lookup_table = lookup_table
+
+    # Read optional name-hash cache
+    if flags & BITMAP_OPT_HASH_CACHE:
+        # Name-hash cache contains one 32-bit hash per object in the pack
+        # The number of hashes depends on the total number of objects
+        # For now, we'll read what's available
+        name_hash_cache = []
+        while True:
+            hash_bytes = f.read(4)
+            if len(hash_bytes) < 4:
+                break
+            hash_value = struct.unpack(">I", hash_bytes)[0]
+            name_hash_cache.append(hash_value)
+
+        if name_hash_cache:
+            bitmap.name_hash_cache = name_hash_cache
+
+    return bitmap
+
+
+def write_bitmap(
+    filename: Union[str, os.PathLike[str]],
+    bitmap: PackBitmap,
+) -> None:
+    """Write a bitmap index file.
+
+    Args:
+        filename: Path to write the .bitmap file
+        bitmap: PackBitmap to write
+    """
+    with GitFile(filename, "wb") as f:
+        write_bitmap_file(f, bitmap)
+
+
+def write_bitmap_file(f: IO[bytes], bitmap: PackBitmap) -> None:
+    """Write bitmap data to a file object.
+
+    Args:
+        f: File object to write to
+        bitmap: PackBitmap to write
+    """
+    # Write header
+    f.write(BITMAP_SIGNATURE)
+    f.write(struct.pack(">H", bitmap.version))
+    f.write(struct.pack(">H", bitmap.flags))
+
+    # Write entry count
+    f.write(struct.pack(">I", len(bitmap.entries)))
+
+    # Write pack checksum
+    if bitmap.pack_checksum:
+        f.write(bitmap.pack_checksum)
+    else:
+        f.write(b"\x00" * 20)
+
+    # Write type bitmaps (self-describing EWAH format, no size prefix needed)
+    for type_bitmap in [
+        bitmap.commit_bitmap,
+        bitmap.tree_bitmap,
+        bitmap.blob_bitmap,
+        bitmap.tag_bitmap,
+    ]:
+        data = type_bitmap.encode()
+        f.write(data)
+
+    # Write bitmap entries
+    for _sha, entry in bitmap.entries.items():
+        # Write object position (4 bytes)
+        f.write(struct.pack(">I", entry.object_pos))
+
+        # Write XOR offset (1 byte)
+        f.write(bytes([entry.xor_offset]))
+
+        # Write flags (1 byte)
+        f.write(bytes([entry.flags]))
+
+        # Write compressed bitmap data (self-describing EWAH format, no size prefix)
+        bitmap_data = entry.bitmap.encode()
+        f.write(bitmap_data)
+
+    # Write optional lookup table
+    if bitmap.flags & BITMAP_OPT_LOOKUP_TABLE and bitmap.lookup_table:
+        for commit_pos, offset, xor_row in bitmap.lookup_table:
+            f.write(struct.pack(">I", commit_pos))  # 4 bytes
+            f.write(struct.pack(">Q", offset))  # 8 bytes
+            f.write(struct.pack(">I", xor_row))  # 4 bytes
+
+    # Write optional name-hash cache
+    if bitmap.flags & BITMAP_OPT_HASH_CACHE and bitmap.name_hash_cache:
+        for hash_value in bitmap.name_hash_cache:
+            f.write(struct.pack(">I", hash_value))

+ 20 - 0
dulwich/pack.py

@@ -82,6 +82,7 @@ else:
 if TYPE_CHECKING:
     from _hashlib import HASH as HashObject
 
+    from .bitmap import PackBitmap
     from .commit_graph import CommitGraph
 
 # For some reason the above try, except fails to set has_mmap = False for plan9
@@ -3428,6 +3429,7 @@ class Pack:
 
     _data: Optional[PackData]
     _idx: Optional[PackIndex]
+    _bitmap: Optional["PackBitmap"]
 
     def __init__(
         self,
@@ -3456,8 +3458,10 @@ class Pack:
         self._basename = basename
         self._data = None
         self._idx = None
+        self._bitmap = None
         self._idx_path = self._basename + ".idx"
         self._data_path = self._basename + ".pack"
+        self._bitmap_path = self._basename + ".bitmap"
         self.delta_window_size = delta_window_size
         self.window_memory = window_memory
         self.delta_cache_size = delta_cache_size
@@ -3521,6 +3525,22 @@ class Pack:
             self._idx = self._idx_load()
         return self._idx
 
+    @property
+    def bitmap(self) -> Optional["PackBitmap"]:
+        """The bitmap being used, if available.
+
+        Returns:
+            PackBitmap instance or None if no bitmap exists
+
+        Raises:
+            ValueError: If bitmap file is invalid or corrupt
+        """
+        if self._bitmap is None:
+            from .bitmap import read_bitmap
+
+            self._bitmap = read_bitmap(self._bitmap_path, pack_index=self.index)
+        return self._bitmap
+
     def close(self) -> None:
         """Close the pack file and index."""
         if self._data is not None:

+ 1 - 0
tests/__init__.py

@@ -127,6 +127,7 @@ def self_test_suite() -> unittest.TestSuite:
         "archive",
         "attrs",
         "bisect",
+        "bitmap",
         "blackbox",
         "bundle",
         "cli",

+ 1 - 0
tests/compat/__init__.py

@@ -26,6 +26,7 @@ import unittest
 
 def test_suite() -> unittest.TestSuite:
     names = [
+        "bitmap",
         "bundle",
         "check_ignore",
         "client",

+ 268 - 0
tests/compat/test_bitmap.py

@@ -0,0 +1,268 @@
+# test_bitmap.py -- Compatibility tests for git pack bitmaps.
+# Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as published by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Compatibility tests for git pack bitmaps."""
+
+import os
+import tempfile
+
+from dulwich.bitmap import (
+    BITMAP_OPT_FULL_DAG,
+    BITMAP_OPT_HASH_CACHE,
+    BITMAP_OPT_LOOKUP_TABLE,
+    BitmapEntry,
+    EWAHBitmap,
+    PackBitmap,
+    write_bitmap,
+)
+from dulwich.pack import Pack
+from dulwich.repo import Repo
+
+from .. import TestCase
+from .utils import remove_ro, require_git_version, rmtree_ro, run_git_or_fail
+
+
+class BitmapCompatTests(TestCase):
+    """Compatibility tests for reading git-generated bitmaps."""
+
+    def setUp(self):
+        super().setUp()
+        # Git bitmap support was added in 2.0.0
+        require_git_version((2, 0, 0))
+        self._tempdir = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, self._tempdir)
+
+    def _init_repo_with_bitmap(self):
+        """Create a repo and generate a bitmap using git."""
+        repo_path = os.path.join(self._tempdir, "test-repo")
+        os.mkdir(repo_path)
+
+        # Initialize repo
+        run_git_or_fail(["init"], cwd=repo_path)
+
+        # Create some commits
+        test_file = os.path.join(repo_path, "test.txt")
+        for i in range(5):
+            with open(test_file, "w") as f:
+                f.write(f"Content {i}\n")
+            run_git_or_fail(["add", "test.txt"], cwd=repo_path)
+            run_git_or_fail(
+                ["commit", "-m", f"Commit {i}"],
+                cwd=repo_path,
+                env={"GIT_AUTHOR_NAME": "Test", "GIT_AUTHOR_EMAIL": "test@example.com"},
+            )
+
+        # Enable bitmap writing and repack
+        run_git_or_fail(
+            ["config", "pack.writeBitmaps", "true"],
+            cwd=repo_path,
+        )
+        run_git_or_fail(["repack", "-a", "-d", "-b"], cwd=repo_path)
+
+        return repo_path
+
+    def test_read_git_generated_bitmap(self):
+        """Test that Dulwich can read a bitmap generated by git."""
+        repo_path = self._init_repo_with_bitmap()
+
+        # Find the pack file with bitmap
+        pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
+        bitmap_files = [f for f in os.listdir(pack_dir) if f.endswith(".bitmap")]
+
+        # Get the pack file (basename without extension)
+        bitmap_name = bitmap_files[0]
+        pack_basename = bitmap_name.replace(".bitmap", "")
+        pack_path = os.path.join(pack_dir, pack_basename)
+
+        # Verify bitmap file exists at expected location
+        bitmap_path = pack_path + ".bitmap"
+        self.assertTrue(
+            os.path.exists(bitmap_path), f"Bitmap file not found at {bitmap_path}"
+        )
+
+        # Try to load the bitmap using Dulwich
+        with Pack(pack_path) as pack:
+            bitmap = pack.bitmap
+
+            # Basic checks
+            self.assertIsNotNone(bitmap, f"Failed to load bitmap from {pack_path}")
+            self.assertIsNotNone(bitmap.pack_checksum, "Bitmap missing pack checksum")
+
+            # Check that we have some type bitmaps
+            # At minimum, we should have some commits
+            self.assertGreater(
+                len(bitmap.commit_bitmap.bits),
+                0,
+                "Commit bitmap should not be empty",
+            )
+
+    def test_git_can_use_dulwich_repo_with_bitmap(self):
+        """Test that git can work with a repo that has Dulwich-created objects."""
+        repo_path = os.path.join(self._tempdir, "dulwich-repo")
+
+        # Create a repo with Dulwich and add commits to ensure git creates bitmaps
+        repo = Repo.init(repo_path, mkdir=True)
+        self.addCleanup(repo.close)
+
+        # Create actual commits, not just loose objects - git needs commits for bitmaps
+        test_file = os.path.join(repo_path, "test.txt")
+        for i in range(5):
+            with open(test_file, "w") as f:
+                f.write(f"Content {i}\n")
+            run_git_or_fail(["add", "test.txt"], cwd=repo_path)
+            run_git_or_fail(
+                ["commit", "-m", f"Commit {i}"],
+                cwd=repo_path,
+                env={"GIT_AUTHOR_NAME": "Test", "GIT_AUTHOR_EMAIL": "test@example.com"},
+            )
+
+        # Configure git to write bitmaps
+        run_git_or_fail(
+            ["config", "pack.writeBitmaps", "true"],
+            cwd=repo_path,
+        )
+
+        # Git should be able to repack with bitmaps
+        run_git_or_fail(["repack", "-a", "-d", "-b"], cwd=repo_path)
+
+        # Verify git created a bitmap
+        pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
+        self.assertTrue(os.path.exists(pack_dir), "Pack directory should exist")
+
+        bitmap_files = [f for f in os.listdir(pack_dir) if f.endswith(".bitmap")]
+        self.assertGreater(
+            len(bitmap_files), 0, "Git should have created a bitmap file after repack"
+        )
+
+    def test_git_can_read_dulwich_bitmap(self):
+        """Test that git can read a bitmap file written by Dulwich."""
+        repo_path = os.path.join(self._tempdir, "dulwich-bitmap-repo")
+
+        # Create a repo with git and add commits
+        run_git_or_fail(["init"], cwd=None, env={"GIT_DIR": repo_path})
+
+        test_file = os.path.join(repo_path, "..", "test.txt")
+        os.makedirs(os.path.dirname(test_file), exist_ok=True)
+
+        for i in range(5):
+            with open(test_file, "w") as f:
+                f.write(f"Content {i}\n")
+            run_git_or_fail(
+                ["add", test_file],
+                cwd=os.path.dirname(repo_path),
+                env={
+                    "GIT_DIR": repo_path,
+                    "GIT_WORK_TREE": os.path.dirname(repo_path),
+                },
+            )
+            run_git_or_fail(
+                ["commit", "-m", f"Commit {i}"],
+                cwd=os.path.dirname(repo_path),
+                env={
+                    "GIT_DIR": repo_path,
+                    "GIT_WORK_TREE": os.path.dirname(repo_path),
+                    "GIT_AUTHOR_NAME": "Test",
+                    "GIT_AUTHOR_EMAIL": "test@example.com",
+                },
+            )
+
+        # Create a pack with git first
+        run_git_or_fail(["repack", "-a", "-d"], cwd=None, env={"GIT_DIR": repo_path})
+
+        # Now use Dulwich to write a bitmap for the pack
+        pack_dir = os.path.join(repo_path, "objects", "pack")
+        pack_files = [f for f in os.listdir(pack_dir) if f.endswith(".pack")]
+        self.assertGreater(len(pack_files), 0, "Should have at least one pack file")
+
+        pack_basename = pack_files[0].replace(".pack", "")
+        pack_path = os.path.join(pack_dir, pack_basename)
+
+        # Load the pack and create bitmap data, then close before writing
+        with Pack(pack_path) as pack:
+            # Create a simple bitmap for testing
+            # Git requires BITMAP_OPT_FULL_DAG flag
+            bitmap = PackBitmap(
+                flags=BITMAP_OPT_FULL_DAG
+                | BITMAP_OPT_HASH_CACHE
+                | BITMAP_OPT_LOOKUP_TABLE
+            )
+            bitmap.pack_checksum = pack.get_stored_checksum()
+
+            # Add bitmap entries for the first few commits in the pack
+            for i, (sha, offset, crc) in enumerate(pack.index.iterentries()):
+                if i >= 3:  # Just add 3 entries
+                    break
+
+                ewah = EWAHBitmap()
+                # Mark this object and a couple others as reachable
+                for j in range(i + 1):
+                    ewah.add(j)
+
+                entry = BitmapEntry(object_pos=i, xor_offset=0, flags=0, bitmap=ewah)
+                bitmap.entries[sha] = entry
+                bitmap.entries_list.append((sha, entry))
+
+            # Add name hash cache
+            bitmap.name_hash_cache = [0x12345678, 0xABCDEF00, 0xFEDCBA98]
+
+        # Write the bitmap after pack is closed to avoid file locking on Windows
+        bitmap_path = pack_path + ".bitmap"
+        remove_ro(bitmap_path)
+        write_bitmap(bitmap_path, bitmap)
+
+        # Verify git can use the repository with our bitmap
+        # This should succeed if git can read our bitmap
+        run_git_or_fail(
+            ["rev-list", "--count", "--use-bitmap-index", "HEAD"],
+            cwd=None,
+            env={"GIT_DIR": repo_path},
+        )
+
+        # Verify git count-objects works with our bitmap
+        run_git_or_fail(["count-objects", "-v"], cwd=None, env={"GIT_DIR": repo_path})
+
+    def test_bitmap_file_format_structure(self):
+        """Test that git-generated bitmap has expected structure."""
+        repo_path = self._init_repo_with_bitmap()
+
+        # Find bitmap
+        pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
+        bitmap_files = [f for f in os.listdir(pack_dir) if f.endswith(".bitmap")]
+
+        bitmap_path = os.path.join(pack_dir, bitmap_files[0])
+
+        # Read the raw file to verify header
+        with open(bitmap_path, "rb") as f:
+            signature = f.read(4)
+            self.assertEqual(b"BITM", signature, "Invalid bitmap signature")
+
+            version = int.from_bytes(f.read(2), byteorder="big")
+            self.assertGreaterEqual(version, 1, "Bitmap version should be >= 1")
+
+        # Load with Dulwich and verify structure
+        bitmap_name = bitmap_files[0]
+        pack_basename = bitmap_name.replace(".bitmap", "")
+        pack_path = os.path.join(pack_dir, pack_basename)
+        with Pack(pack_path) as pack:
+            bitmap = pack.bitmap
+
+            self.assertIsNotNone(bitmap)
+            self.assertEqual(bitmap.version, version)

+ 905 - 0
tests/test_bitmap.py

@@ -0,0 +1,905 @@
+# test_bitmap.py -- Tests for bitmap support
+# Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as published by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Tests for bitmap support."""
+
+import os
+import tempfile
+import unittest
+from io import BytesIO
+
+from dulwich.bitmap import (
+    BITMAP_OPT_FULL_DAG,
+    BITMAP_OPT_HASH_CACHE,
+    BITMAP_OPT_LOOKUP_TABLE,
+    BITMAP_SIGNATURE,
+    BITMAP_VERSION,
+    BitmapEntry,
+    EWAHBitmap,
+    PackBitmap,
+    _encode_ewah_words,
+    read_bitmap_file,
+    write_bitmap_file,
+)
+
+
+class EWAHCompressionTests(unittest.TestCase):
+    """Tests for EWAH compression helper functions."""
+
+    def test_encode_empty_words(self):
+        """Test encoding empty word list."""
+        result = _encode_ewah_words([])
+        self.assertEqual([], result)
+
+    def test_encode_single_literal(self):
+        """Test encoding single literal word."""
+        result = _encode_ewah_words([0x123])
+        # Should be: RLW(0 run, 1 literal) + literal
+        self.assertEqual(2, len(result))
+        # RLW bit layout: [literal_words(31)][running_len(32)][running_bit(1)]
+        # running_bit=0, running_len=0, literal_words=1
+        expected_rlw = (1 << 33) | (0 << 1) | 0
+        self.assertEqual(expected_rlw, result[0])
+        self.assertEqual(0x123, result[1])
+
+    def test_encode_zero_run(self):
+        """Test encoding run of zeros."""
+        result = _encode_ewah_words([0, 0, 0])
+        # Should be: RLW(3 zeros, 0 literals)
+        self.assertEqual(1, len(result))
+        # RLW bit layout: [literal_words(31)][running_len(32)][running_bit(1)]
+        # running_bit=0, running_len=3, literal_words=0
+        expected_rlw = (0 << 33) | (3 << 1) | 0
+        self.assertEqual(expected_rlw, result[0])
+
+    def test_encode_ones_run(self):
+        """Test encoding run of all-ones."""
+        result = _encode_ewah_words([0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF])
+        # Should be: RLW(2 ones, 0 literals)
+        self.assertEqual(1, len(result))
+        # RLW bit layout: [literal_words(31)][running_len(32)][running_bit(1)]
+        # running_bit=1, running_len=2, literal_words=0
+        expected_rlw = (0 << 33) | (2 << 1) | 1
+        self.assertEqual(expected_rlw, result[0])
+
+    def test_encode_run_followed_by_literals(self):
+        """Test encoding run followed by literal words."""
+        result = _encode_ewah_words([0, 0, 0x123, 0x456])
+        # Should be: RLW(2 zeros, 2 literals) + literals
+        self.assertEqual(3, len(result))
+        # RLW bit layout: [literal_words(31)][running_len(32)][running_bit(1)]
+        # running_bit=0, running_len=2, literal_words=2
+        expected_rlw = (2 << 33) | (2 << 1) | 0
+        self.assertEqual(expected_rlw, result[0])
+        self.assertEqual(0x123, result[1])
+        self.assertEqual(0x456, result[2])
+
+    def test_encode_mixed_pattern(self):
+        """Test encoding mixed runs and literals."""
+        result = _encode_ewah_words(
+            [0, 0, 0x123, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF]
+        )
+        # Should be: RLW(2 zeros, 1 literal) + literal + RLW(2 ones, 0 literals)
+        self.assertEqual(3, len(result))
+
+
+class EWAHCompatibilityTests(unittest.TestCase):
+    """Tests for EWAH encode/decode compatibility."""
+
+    def test_encode_decode_sparse_bitmap(self):
+        """Test encoding and decoding a sparse bitmap with runs."""
+        # Create a bitmap with a pattern that benefits from run-length encoding
+        # Bits: 0, 1, 128 (has runs of zeros between set bits)
+        bitmap = EWAHBitmap()
+        bitmap.add(0)
+        bitmap.add(1)
+        bitmap.add(128)
+
+        # Encode
+        encoded = bitmap.encode()
+
+        # Decode
+        bitmap2 = EWAHBitmap(encoded)
+
+        # Verify all bits are preserved
+        self.assertEqual(len(bitmap), len(bitmap2))
+        self.assertIn(0, bitmap2)
+        self.assertIn(1, bitmap2)
+        self.assertIn(128, bitmap2)
+        self.assertNotIn(2, bitmap2)
+        self.assertNotIn(127, bitmap2)
+
+    def test_encode_decode_dense_bitmap(self):
+        """Test encoding and decoding a dense bitmap."""
+        # Create a bitmap with many consecutive bits set
+        bitmap = EWAHBitmap()
+        for i in range(64):
+            bitmap.add(i)
+
+        # Encode
+        encoded = bitmap.encode()
+
+        # Decode
+        bitmap2 = EWAHBitmap(encoded)
+
+        # Verify all bits are preserved
+        self.assertEqual(64, len(bitmap2))
+        for i in range(64):
+            self.assertIn(i, bitmap2)
+        self.assertNotIn(64, bitmap2)
+
+    def test_encode_decode_runs_of_zeros(self):
+        """Test encoding and decoding bitmap with long runs of zeros."""
+        # Create bitmap: bits 0, 200, 400 (lots of zeros in between)
+        bitmap = EWAHBitmap()
+        bitmap.add(0)
+        bitmap.add(200)
+        bitmap.add(400)
+
+        # Encode
+        encoded = bitmap.encode()
+
+        # Decode
+        bitmap2 = EWAHBitmap(encoded)
+
+        # Verify
+        self.assertEqual(3, len(bitmap2))
+        self.assertIn(0, bitmap2)
+        self.assertIn(200, bitmap2)
+        self.assertIn(400, bitmap2)
+        self.assertNotIn(1, bitmap2)
+        self.assertNotIn(199, bitmap2)
+        self.assertNotIn(201, bitmap2)
+
+    def test_encode_decode_mixed_pattern(self):
+        """Test encoding and decoding bitmap with mixed dense/sparse regions."""
+        bitmap = EWAHBitmap()
+        # Dense region: 0-63
+        for i in range(64):
+            bitmap.add(i)
+        # Sparse region: 200, 300, 400
+        bitmap.add(200)
+        bitmap.add(300)
+        bitmap.add(400)
+        # Another dense region: 500-563
+        for i in range(500, 564):
+            bitmap.add(i)
+
+        # Encode
+        encoded = bitmap.encode()
+
+        # Decode
+        bitmap2 = EWAHBitmap(encoded)
+
+        # Verify all bits preserved
+        self.assertEqual(len(bitmap), len(bitmap2))
+        for i in range(64):
+            self.assertIn(i, bitmap2)
+        self.assertIn(200, bitmap2)
+        self.assertIn(300, bitmap2)
+        self.assertIn(400, bitmap2)
+        for i in range(500, 564):
+            self.assertIn(i, bitmap2)
+        # Check some bits that shouldn't be set
+        self.assertNotIn(64, bitmap2)
+        self.assertNotIn(199, bitmap2)
+        self.assertNotIn(201, bitmap2)
+        self.assertNotIn(499, bitmap2)
+
+    def test_encode_decode_preserves_bitwise_ops(self):
+        """Test that encode/decode doesn't break bitwise operations."""
+        # Create two bitmaps
+        bitmap1 = EWAHBitmap()
+        bitmap1.add(0)
+        bitmap1.add(5)
+        bitmap1.add(100)
+
+        bitmap2 = EWAHBitmap()
+        bitmap2.add(5)
+        bitmap2.add(10)
+        bitmap2.add(100)
+
+        # Encode and decode both
+        bitmap1_decoded = EWAHBitmap(bitmap1.encode())
+        bitmap2_decoded = EWAHBitmap(bitmap2.encode())
+
+        # Perform operations on decoded bitmaps
+        or_result = bitmap1_decoded | bitmap2_decoded
+        and_result = bitmap1_decoded & bitmap2_decoded
+        xor_result = bitmap1_decoded ^ bitmap2_decoded
+
+        # Verify results
+        # OR: {0, 5, 10, 100}
+        self.assertEqual(4, len(or_result))
+        self.assertIn(0, or_result)
+        self.assertIn(5, or_result)
+        self.assertIn(10, or_result)
+        self.assertIn(100, or_result)
+
+        # AND: {5, 100}
+        self.assertEqual(2, len(and_result))
+        self.assertIn(5, and_result)
+        self.assertIn(100, and_result)
+
+        # XOR: {0, 10}
+        self.assertEqual(2, len(xor_result))
+        self.assertIn(0, xor_result)
+        self.assertIn(10, xor_result)
+
+    def test_round_trip_large_bitmap(self):
+        """Test round-trip encoding/decoding of a large bitmap."""
+        # Create a large bitmap with various patterns
+        bitmap = EWAHBitmap()
+        # Add bits at various intervals
+        for i in range(0, 10000, 7):
+            bitmap.add(i)
+
+        original_bits = set(bitmap.bits)
+
+        # Encode and decode
+        encoded = bitmap.encode()
+        decoded = EWAHBitmap(encoded)
+
+        # Verify all bits preserved
+        self.assertEqual(original_bits, decoded.bits)
+        self.assertEqual(len(bitmap), len(decoded))
+
+
+class EWAHBitmapTests(unittest.TestCase):
+    """Tests for EWAH bitmap compression."""
+
+    def test_empty_bitmap(self):
+        """Test empty bitmap."""
+        bitmap = EWAHBitmap()
+        self.assertEqual(0, len(bitmap))
+        self.assertEqual(0, bitmap.bit_count)
+
+    def test_add_bit(self):
+        """Test adding bits to bitmap."""
+        bitmap = EWAHBitmap()
+        bitmap.add(0)
+        bitmap.add(5)
+        bitmap.add(100)
+
+        self.assertEqual(3, len(bitmap))
+        self.assertEqual(101, bitmap.bit_count)
+        self.assertIn(0, bitmap)
+        self.assertIn(5, bitmap)
+        self.assertIn(100, bitmap)
+        self.assertNotIn(1, bitmap)
+        self.assertNotIn(99, bitmap)
+
+    def test_encode_decode(self):
+        """Test encoding and decoding bitmaps."""
+        bitmap = EWAHBitmap()
+        bitmap.add(0)
+        bitmap.add(1)
+        bitmap.add(64)
+        bitmap.add(128)
+
+        # Encode
+        data = bitmap.encode()
+        self.assertIsInstance(data, bytes)
+
+        # Decode
+        bitmap2 = EWAHBitmap(data)
+        self.assertEqual(len(bitmap), len(bitmap2))
+        self.assertIn(0, bitmap2)
+        self.assertIn(1, bitmap2)
+        self.assertIn(64, bitmap2)
+        self.assertIn(128, bitmap2)
+
+    def test_bitwise_or(self):
+        """Test bitwise OR operation."""
+        bitmap1 = EWAHBitmap()
+        bitmap1.add(0)
+        bitmap1.add(5)
+
+        bitmap2 = EWAHBitmap()
+        bitmap2.add(5)
+        bitmap2.add(10)
+
+        result = bitmap1 | bitmap2
+        self.assertEqual(3, len(result))
+        self.assertIn(0, result)
+        self.assertIn(5, result)
+        self.assertIn(10, result)
+
+    def test_bitwise_and(self):
+        """Test bitwise AND operation."""
+        bitmap1 = EWAHBitmap()
+        bitmap1.add(0)
+        bitmap1.add(5)
+
+        bitmap2 = EWAHBitmap()
+        bitmap2.add(5)
+        bitmap2.add(10)
+
+        result = bitmap1 & bitmap2
+        self.assertEqual(1, len(result))
+        self.assertIn(5, result)
+        self.assertNotIn(0, result)
+        self.assertNotIn(10, result)
+
+    def test_bitwise_xor(self):
+        """Test bitwise XOR operation."""
+        bitmap1 = EWAHBitmap()
+        bitmap1.add(0)
+        bitmap1.add(5)
+
+        bitmap2 = EWAHBitmap()
+        bitmap2.add(5)
+        bitmap2.add(10)
+
+        result = bitmap1 ^ bitmap2
+        self.assertEqual(2, len(result))
+        self.assertIn(0, result)
+        self.assertIn(10, result)
+        self.assertNotIn(5, result)
+
+
+class BitmapEntryTests(unittest.TestCase):
+    """Tests for bitmap entries."""
+
+    def test_create_entry(self):
+        """Test creating a bitmap entry."""
+        bitmap = EWAHBitmap()
+        bitmap.add(0)
+        bitmap.add(10)
+
+        entry = BitmapEntry(
+            object_pos=100,
+            xor_offset=0,
+            flags=0,
+            bitmap=bitmap,
+        )
+
+        self.assertEqual(100, entry.object_pos)
+        self.assertEqual(0, entry.xor_offset)
+        self.assertEqual(0, entry.flags)
+        self.assertEqual(bitmap, entry.bitmap)
+
+
+class PackBitmapTests(unittest.TestCase):
+    """Tests for pack bitmap."""
+
+    def test_create_bitmap(self):
+        """Test creating a pack bitmap."""
+        bitmap = PackBitmap()
+        self.assertEqual(BITMAP_VERSION, bitmap.version)
+        self.assertEqual(BITMAP_OPT_FULL_DAG, bitmap.flags)
+        self.assertIsNone(bitmap.pack_checksum)
+
+    def test_bitmap_with_entries(self):
+        """Test bitmap with entries."""
+        bitmap = PackBitmap()
+        commit_sha = b"\x00" * 20
+
+        ewah_bitmap = EWAHBitmap()
+        ewah_bitmap.add(0)
+        ewah_bitmap.add(5)
+
+        entry = BitmapEntry(
+            object_pos=0,
+            xor_offset=0,
+            flags=0,
+            bitmap=ewah_bitmap,
+        )
+
+        bitmap.entries[commit_sha] = entry
+
+        self.assertTrue(bitmap.has_commit(commit_sha))
+        self.assertFalse(bitmap.has_commit(b"\x01" * 20))
+
+    def test_iter_commits(self):
+        """Test iterating over commits with bitmaps."""
+        bitmap = PackBitmap()
+        commit1 = b"\x00" * 20
+        commit2 = b"\x01" * 20
+
+        ewah_bitmap = EWAHBitmap()
+        entry = BitmapEntry(0, 0, 0, ewah_bitmap)
+
+        bitmap.entries[commit1] = entry
+        bitmap.entries[commit2] = entry
+
+        commits = list(bitmap.iter_commits())
+        self.assertEqual(2, len(commits))
+        self.assertIn(commit1, commits)
+        self.assertIn(commit2, commits)
+
+
+class BitmapFileTests(unittest.TestCase):
+    """Tests for bitmap file I/O."""
+
+    def test_write_read_empty_bitmap(self):
+        """Test writing and reading an empty bitmap."""
+        bitmap = PackBitmap()
+        bitmap.pack_checksum = b"\x00" * 20
+
+        # Write to bytes
+        f = BytesIO()
+        write_bitmap_file(f, bitmap)
+
+        # Read back
+        f.seek(0)
+        bitmap2 = read_bitmap_file(f)
+
+        self.assertEqual(bitmap.version, bitmap2.version)
+        self.assertEqual(bitmap.flags, bitmap2.flags)
+        self.assertEqual(bitmap.pack_checksum, bitmap2.pack_checksum)
+
+    def test_write_read_with_type_bitmaps(self):
+        """Test writing and reading bitmaps with type information."""
+        bitmap = PackBitmap()
+        bitmap.pack_checksum = b"\xaa" * 20
+
+        # Add some type bitmap data
+        bitmap.commit_bitmap.add(0)
+        bitmap.commit_bitmap.add(5)
+        bitmap.tree_bitmap.add(1)
+        bitmap.blob_bitmap.add(2)
+        bitmap.tag_bitmap.add(3)
+
+        # Write to bytes
+        f = BytesIO()
+        write_bitmap_file(f, bitmap)
+
+        # Read back
+        f.seek(0)
+        bitmap2 = read_bitmap_file(f)
+
+        self.assertEqual(bitmap.version, bitmap2.version)
+        self.assertEqual(bitmap.pack_checksum, bitmap2.pack_checksum)
+
+        # Check type bitmaps
+        self.assertIn(0, bitmap2.commit_bitmap)
+        self.assertIn(5, bitmap2.commit_bitmap)
+        self.assertIn(1, bitmap2.tree_bitmap)
+        self.assertIn(2, bitmap2.blob_bitmap)
+        self.assertIn(3, bitmap2.tag_bitmap)
+
+    def test_invalid_signature(self):
+        """Test reading file with invalid signature."""
+        f = BytesIO(b"XXXX\x00\x01\x00\x00")
+        with self.assertRaises(ValueError) as cm:
+            read_bitmap_file(f)
+        self.assertIn("Invalid bitmap signature", str(cm.exception))
+
+    def test_invalid_version(self):
+        """Test reading file with invalid version."""
+        f = BytesIO(BITMAP_SIGNATURE + b"\x00\x02\x00\x01")
+        with self.assertRaises(ValueError) as cm:
+            read_bitmap_file(f)
+        self.assertIn("Unsupported bitmap version", str(cm.exception))
+
+    def test_incomplete_header(self):
+        """Test reading file with incomplete header."""
+        f = BytesIO(BITMAP_SIGNATURE + b"\x00")
+        with self.assertRaises(ValueError) as cm:
+            read_bitmap_file(f)
+        self.assertIn("Incomplete bitmap header", str(cm.exception))
+
+
+class BitmapIntegrationTests(unittest.TestCase):
+    """Integration tests for bitmap functionality."""
+
+    def test_round_trip_file(self):
+        """Test writing and reading bitmap to/from actual file."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            bitmap_path = os.path.join(tmpdir, "test.bitmap")
+
+            # Create bitmap
+            bitmap = PackBitmap()
+            bitmap.pack_checksum = b"\xff" * 20
+            bitmap.commit_bitmap.add(10)
+            bitmap.tree_bitmap.add(20)
+
+            # Write to file
+            from dulwich.bitmap import write_bitmap
+
+            write_bitmap(bitmap_path, bitmap)
+
+            # Read back
+            from dulwich.bitmap import read_bitmap
+
+            bitmap2 = read_bitmap(bitmap_path)
+
+            self.assertEqual(bitmap.version, bitmap2.version)
+            self.assertEqual(bitmap.pack_checksum, bitmap2.pack_checksum)
+            self.assertIn(10, bitmap2.commit_bitmap)
+            self.assertIn(20, bitmap2.tree_bitmap)
+
+    def test_xor_decompression(self):
+        """Test XOR decompression of bitmap entries."""
+        bitmap = PackBitmap()
+
+        # Create base bitmap
+        base_bitmap = EWAHBitmap()
+        base_bitmap.add(0)
+        base_bitmap.add(1)
+        base_bitmap.add(2)
+
+        base_entry = BitmapEntry(
+            object_pos=0,
+            xor_offset=0,
+            flags=0,
+            bitmap=base_bitmap,
+        )
+
+        # Create XOR'd bitmap
+        # If we XOR base (bits 0,1,2) with XOR bitmap (bits 1,3),
+        # result should be (bits 0,2,3)
+        xor_bitmap = EWAHBitmap()
+        xor_bitmap.add(1)
+        xor_bitmap.add(3)
+
+        xor_entry = BitmapEntry(
+            object_pos=1,
+            xor_offset=1,  # Reference the previous entry
+            flags=0,
+            bitmap=xor_bitmap,
+        )
+
+        # Add entries
+        base_sha = b"\x00" * 20
+        xor_sha = b"\x01" * 20
+
+        bitmap.entries[base_sha] = base_entry
+        bitmap.entries_list.append((base_sha, base_entry))
+
+        bitmap.entries[xor_sha] = xor_entry
+        bitmap.entries_list.append((xor_sha, xor_entry))
+
+        # Get decompressed bitmap
+        result = bitmap.get_bitmap(xor_sha)
+
+        self.assertIsNotNone(result)
+        self.assertIn(0, result)
+        self.assertNotIn(1, result)  # XOR cancels this
+        self.assertIn(2, result)
+        self.assertIn(3, result)
+
+    def test_lookup_table_round_trip(self):
+        """Test reading and writing lookup tables."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            bitmap_path = os.path.join(tmpdir, "test.bitmap")
+
+            # Create bitmap with lookup table
+            bitmap = PackBitmap()
+            bitmap.flags = BITMAP_OPT_FULL_DAG | BITMAP_OPT_LOOKUP_TABLE
+            bitmap.pack_checksum = b"\xaa" * 20
+
+            # Add some bitmap entries (required for lookup table to be written/read)
+            for i in range(3):
+                ewah = EWAHBitmap()
+                ewah.add(i)
+                entry = BitmapEntry(
+                    object_pos=i,
+                    xor_offset=0,
+                    flags=0,
+                    bitmap=ewah,
+                )
+                sha = i.to_bytes(20, byteorder="big")
+                bitmap.entries[sha] = entry
+                bitmap.entries_list.append((sha, entry))
+
+            bitmap.lookup_table = [
+                (0, 100, 0),
+                (1, 200, 0),
+                (2, 300, 1),
+            ]
+
+            # Write to file
+            from dulwich.bitmap import write_bitmap
+
+            write_bitmap(bitmap_path, bitmap)
+
+            # Read back
+            from dulwich.bitmap import read_bitmap
+
+            bitmap2 = read_bitmap(bitmap_path)
+
+            self.assertEqual(bitmap.flags, bitmap2.flags)
+            self.assertIsNotNone(bitmap2.lookup_table)
+            self.assertEqual(3, len(bitmap2.lookup_table))
+            self.assertEqual((0, 100, 0), bitmap2.lookup_table[0])
+            self.assertEqual((1, 200, 0), bitmap2.lookup_table[1])
+            self.assertEqual((2, 300, 1), bitmap2.lookup_table[2])
+
+    def test_name_hash_cache_round_trip(self):
+        """Test reading and writing name-hash cache."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            bitmap_path = os.path.join(tmpdir, "test.bitmap")
+
+            # Create bitmap with name-hash cache
+            bitmap = PackBitmap()
+            bitmap.flags = BITMAP_OPT_FULL_DAG | BITMAP_OPT_HASH_CACHE
+            bitmap.pack_checksum = b"\xbb" * 20
+            bitmap.name_hash_cache = [0x12345678, 0x9ABCDEF0, 0xFEDCBA98]
+
+            # Write to file
+            from dulwich.bitmap import write_bitmap
+
+            write_bitmap(bitmap_path, bitmap)
+
+            # Read back
+            from dulwich.bitmap import read_bitmap
+
+            bitmap2 = read_bitmap(bitmap_path)
+
+            self.assertEqual(bitmap.flags, bitmap2.flags)
+            self.assertIsNotNone(bitmap2.name_hash_cache)
+            self.assertEqual(3, len(bitmap2.name_hash_cache))
+            self.assertEqual(0x12345678, bitmap2.name_hash_cache[0])
+            self.assertEqual(0x9ABCDEF0, bitmap2.name_hash_cache[1])
+            self.assertEqual(0xFEDCBA98, bitmap2.name_hash_cache[2])
+
+
+class BitmapErrorHandlingTests(unittest.TestCase):
+    """Tests for error handling in bitmap reading."""
+
+    def test_truncated_header(self):
+        """Test reading bitmap with truncated header."""
+        # Only 10 bytes instead of full header
+        data = b"BITM\x00\x01\x00\x00\x00\x00"
+
+        with self.assertRaises(ValueError) as ctx:
+            read_bitmap_file(BytesIO(data))
+
+        # Should raise ValueError about missing/incomplete header data
+        self.assertIsInstance(ctx.exception, ValueError)
+
+    def test_invalid_signature(self):
+        """Test reading bitmap with invalid signature."""
+        data = b"JUNK\x00\x01\x00\x00" + b"\x00" * 20
+
+        with self.assertRaises(ValueError) as ctx:
+            read_bitmap_file(BytesIO(data))
+
+        self.assertIn("signature", str(ctx.exception).lower())
+
+    def test_unsupported_version(self):
+        """Test reading bitmap with unsupported version."""
+        data = BITMAP_SIGNATURE
+        data += b"\x00\x99"  # Version 153 (unsupported)
+        data += b"\x00\x00"  # Flags
+        data += b"\x00\x00\x00\x00"  # Entry count
+        data += b"\x00" * 20  # Pack checksum
+
+        with self.assertRaises(ValueError) as ctx:
+            read_bitmap_file(BytesIO(data))
+
+        self.assertIn("version", str(ctx.exception).lower())
+
+    def test_truncated_type_bitmap(self):
+        """Test reading bitmap with truncated type bitmap data."""
+        # Valid header
+        data = BITMAP_SIGNATURE
+        data += BITMAP_VERSION.to_bytes(2, "big")
+        data += b"\x00\x00"  # Flags
+        data += b"\x00\x00\x00\x00"  # Entry count
+        data += b"\x00" * 20  # Pack checksum
+
+        # Truncated type bitmap (incomplete EWAH header)
+        data += b"\x00\x00\x00\x05"  # bit_count
+        data += b"\x00\x00"  # Incomplete word_count
+
+        with self.assertRaises(ValueError) as ctx:
+            read_bitmap_file(BytesIO(data))
+
+        self.assertIn("type bitmap", str(ctx.exception).lower())
+
+    def test_truncated_bitmap_entry(self):
+        """Test reading bitmap with truncated entry."""
+        # Valid header with 1 entry
+        data = BITMAP_SIGNATURE
+        data += BITMAP_VERSION.to_bytes(2, "big")
+        data += b"\x00\x00"  # Flags
+        data += b"\x00\x00\x00\x01"  # 1 entry
+        data += b"\x00" * 20  # Pack checksum
+
+        # Write empty type bitmaps
+        for _ in range(4):
+            empty_ewah = EWAHBitmap().encode()
+            data += empty_ewah
+
+        # Truncated entry (only object position, missing rest)
+        data += b"\x00\x00\x00\x00"  # Object position
+        # Missing: XOR offset, flags, bitmap data
+
+        with self.assertRaises(ValueError) as ctx:
+            read_bitmap_file(BytesIO(data))
+
+        # Should raise ValueError about missing data
+        self.assertIsInstance(ctx.exception, ValueError)
+
+    def test_empty_bitmap_file(self):
+        """Test reading completely empty file."""
+        with self.assertRaises(ValueError):
+            read_bitmap_file(BytesIO(b""))
+
+    def test_bitmap_with_zero_entries(self):
+        """Test valid bitmap with zero entries."""
+        bitmap = PackBitmap()
+        bitmap.pack_checksum = b"\x00" * 20
+
+        f = BytesIO()
+        write_bitmap_file(f, bitmap)
+        f.seek(0)
+
+        # Should read successfully
+        bitmap2 = read_bitmap_file(f)
+        self.assertEqual(0, len(bitmap2.entries))
+        self.assertIsNotNone(bitmap2.pack_checksum)
+
+
+class BitmapEdgeCaseTests(unittest.TestCase):
+    """Tests for edge cases in bitmap handling."""
+
+    def test_very_large_bitmap(self):
+        """Test bitmap with many bits set."""
+        bitmap = EWAHBitmap()
+        # Add 100,000 bits
+        for i in range(100000):
+            if i % 3 == 0:  # Every 3rd bit
+                bitmap.add(i)
+
+        # Should encode and decode without issues
+        encoded = bitmap.encode()
+        decoded = EWAHBitmap(encoded)
+
+        self.assertEqual(len(bitmap), len(decoded))
+        # Verify a sample of bits
+        self.assertIn(0, decoded)
+        self.assertIn(99999, decoded)
+        self.assertNotIn(1, decoded)
+        self.assertNotIn(99998, decoded)
+
+    def test_bitmap_with_large_gaps(self):
+        """Test bitmap with large gaps between set bits."""
+        bitmap = EWAHBitmap()
+        bitmap.add(0)
+        bitmap.add(100000)
+        bitmap.add(200000)
+
+        encoded = bitmap.encode()
+        decoded = EWAHBitmap(encoded)
+
+        self.assertEqual(3, len(decoded))
+        self.assertIn(0, decoded)
+        self.assertIn(100000, decoded)
+        self.assertIn(200000, decoded)
+
+    def test_bitmap_all_bits_in_word(self):
+        """Test bitmap with all 64 bits in a word set."""
+        bitmap = EWAHBitmap()
+        for i in range(64):
+            bitmap.add(i)
+
+        encoded = bitmap.encode()
+        decoded = EWAHBitmap(encoded)
+
+        self.assertEqual(64, len(decoded))
+        for i in range(64):
+            self.assertIn(i, decoded)
+
+    def test_multiple_flags_combined(self):
+        """Test bitmap with multiple flags set."""
+        bitmap = PackBitmap(
+            flags=BITMAP_OPT_FULL_DAG | BITMAP_OPT_HASH_CACHE | BITMAP_OPT_LOOKUP_TABLE
+        )
+        bitmap.pack_checksum = b"\x00" * 20
+        bitmap.lookup_table = [(0, 0, 0), (1, 100, 0)]
+        bitmap.name_hash_cache = [0x12345678, 0xABCDEF00]
+
+        # Add an entry
+        test_bitmap = EWAHBitmap()
+        test_bitmap.add(0)
+        test_bitmap.add(5)
+        entry = BitmapEntry(object_pos=0, xor_offset=0, flags=0, bitmap=test_bitmap)
+        bitmap.entries[b"\x00" * 20] = entry
+
+        # Write and read back
+        f = BytesIO()
+        write_bitmap_file(f, bitmap)
+        f.seek(0)
+
+        bitmap2 = read_bitmap_file(f)
+
+        self.assertEqual(bitmap.flags, bitmap2.flags)
+        self.assertTrue(bitmap2.flags & BITMAP_OPT_FULL_DAG)
+        self.assertTrue(bitmap2.flags & BITMAP_OPT_HASH_CACHE)
+        self.assertTrue(bitmap2.flags & BITMAP_OPT_LOOKUP_TABLE)
+        self.assertIsNotNone(bitmap2.lookup_table)
+        self.assertIsNotNone(bitmap2.name_hash_cache)
+
+
+class BitmapConfigTests(unittest.TestCase):
+    """Tests for bitmap-related configuration settings."""
+
+    def test_pack_write_bitmaps_default(self):
+        """Test pack.writeBitmaps defaults to false."""
+        from dulwich.config import ConfigFile
+
+        config = ConfigFile()
+        self.assertFalse(config.get_boolean((b"pack",), b"writeBitmaps", False))
+
+    def test_pack_write_bitmaps_true(self):
+        """Test pack.writeBitmaps = true."""
+        from dulwich.config import ConfigFile
+
+        config = ConfigFile()
+        config.set((b"pack",), b"writeBitmaps", b"true")
+        self.assertTrue(config.get_boolean((b"pack",), b"writeBitmaps", False))
+
+    def test_pack_write_bitmaps_false(self):
+        """Test pack.writeBitmaps = false."""
+        from dulwich.config import ConfigFile
+
+        config = ConfigFile()
+        config.set((b"pack",), b"writeBitmaps", b"false")
+        self.assertFalse(config.get_boolean((b"pack",), b"writeBitmaps", False))
+
+    def test_pack_write_bitmap_hash_cache_default(self):
+        """Test pack.writeBitmapHashCache defaults to true."""
+        from dulwich.config import ConfigFile
+
+        config = ConfigFile()
+        self.assertTrue(config.get_boolean((b"pack",), b"writeBitmapHashCache", True))
+
+    def test_pack_write_bitmap_hash_cache_false(self):
+        """Test pack.writeBitmapHashCache = false."""
+        from dulwich.config import ConfigFile
+
+        config = ConfigFile()
+        config.set((b"pack",), b"writeBitmapHashCache", b"false")
+        self.assertFalse(config.get_boolean((b"pack",), b"writeBitmapHashCache", True))
+
+    def test_pack_write_bitmap_lookup_table_default(self):
+        """Test pack.writeBitmapLookupTable defaults to true."""
+        from dulwich.config import ConfigFile
+
+        config = ConfigFile()
+        self.assertTrue(config.get_boolean((b"pack",), b"writeBitmapLookupTable", True))
+
+    def test_repack_write_bitmaps(self):
+        """Test repack.writeBitmaps configuration."""
+        from dulwich.config import ConfigFile
+
+        config = ConfigFile()
+        config.set((b"repack",), b"writeBitmaps", b"true")
+        self.assertTrue(config.get_boolean((b"repack",), b"writeBitmaps", False))
+
+    def test_pack_use_bitmap_index_default(self):
+        """Test pack.useBitmapIndex defaults to true."""
+        from dulwich.config import ConfigFile
+
+        config = ConfigFile()
+        self.assertTrue(config.get_boolean((b"pack",), b"useBitmapIndex", True))
+
+    def test_pack_use_bitmap_index_false(self):
+        """Test pack.useBitmapIndex = false."""
+        from dulwich.config import ConfigFile
+
+        config = ConfigFile()
+        config.set((b"pack",), b"useBitmapIndex", b"false")
+        self.assertFalse(config.get_boolean((b"pack",), b"useBitmapIndex", True))