bitmap.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737
  1. # bitmap.py -- Packfile bitmap support for git
  2. # Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Support for Git packfile bitmaps.
  22. Bitmaps store reachability information for packfiles, enabling faster
  23. object counting and enumeration operations without full graph traversal.
  24. The bitmap format uses EWAH (Enhanced Word-Aligned Hybrid) compression
  25. for efficient storage and fast bitwise operations.
  26. """
  27. import os
  28. import struct
  29. from collections.abc import Iterator
  30. from io import BytesIO
  31. from typing import IO, TYPE_CHECKING
  32. from .file import GitFile
  33. if TYPE_CHECKING:
  34. from .pack import PackIndex
  35. # Bitmap file signature
  36. BITMAP_SIGNATURE = b"BITM"
  37. # Bitmap format version
  38. BITMAP_VERSION = 1
  39. # Bitmap flags
  40. BITMAP_OPT_FULL_DAG = 0x1 # Full closure
  41. BITMAP_OPT_HASH_CACHE = 0x4 # Name-hash cache
  42. BITMAP_OPT_LOOKUP_TABLE = 0x10 # Lookup table for random access
  43. BITMAP_OPT_PSEUDO_MERGES = 0x20 # Pseudo-merge bitmaps
  44. def _encode_ewah_words(words: list[int]) -> list[int]:
  45. """Encode a list of 64-bit words using EWAH run-length compression.
  46. Args:
  47. words: List of 64-bit words to encode
  48. Returns:
  49. List of compressed words (RLWs followed by literals)
  50. """
  51. compressed_words = []
  52. i = 0
  53. while i < len(words):
  54. # Check for runs of all zeros or all ones
  55. if words[i] == 0 or words[i] == 0xFFFFFFFFFFFFFFFF:
  56. # Count consecutive identical words
  57. run_value = words[i]
  58. run_length = 0
  59. while i < len(words) and words[i] == run_value:
  60. run_length += 1
  61. i += 1
  62. # Collect following literal words
  63. literals = []
  64. while i < len(words) and words[i] != 0 and words[i] != 0xFFFFFFFFFFFFFFFF:
  65. literals.append(words[i])
  66. i += 1
  67. if len(literals) >= 0x7FFFFFFF: # Max literal count in RLW
  68. break
  69. # Create RLW with correct bit layout:
  70. # [literal_words(31 bits)][running_len(32 bits)][running_bit(1 bit)]
  71. running_bit = 1 if run_value == 0xFFFFFFFFFFFFFFFF else 0
  72. rlw = (len(literals) << 33) | (run_length << 1) | running_bit
  73. compressed_words.append(rlw)
  74. compressed_words.extend(literals)
  75. else:
  76. # Collect literal words
  77. literals = []
  78. while i < len(words) and words[i] != 0 and words[i] != 0xFFFFFFFFFFFFFFFF:
  79. literals.append(words[i])
  80. i += 1
  81. if len(literals) >= 0x7FFFFFFF: # Max literal count
  82. break
  83. # RLW with no run, just literals
  84. # [literal_words(31 bits)][running_len(32 bits)][running_bit(1 bit)]
  85. rlw = (len(literals) << 33) | (0 << 1) | 0
  86. compressed_words.append(rlw)
  87. compressed_words.extend(literals)
  88. return compressed_words
  89. class EWAHBitmap:
  90. """EWAH (Enhanced Word-Aligned Hybrid) compressed bitmap.
  91. EWAH uses run-length encoding for efficient bitmap storage.
  92. Each bitmap consists of:
  93. - Uncompressed bit count (4 bytes)
  94. - Compressed word count (4 bytes)
  95. - Compressed words (8 bytes each)
  96. - Current RLW position (4 bytes)
  97. Each Run Length Word (RLW) 64-bit layout (LSB to MSB):
  98. - Bit 0: running_bit (1 bit) - value of repeated words (0 or 1)
  99. - Bits 1-32: running_len (32 bits) - count of repeated words
  100. - Bits 33-63: literal_words (31 bits) - count of literal words following this RLW
  101. """
  102. def __init__(self, data: bytes | None = None) -> None:
  103. """Initialize EWAH bitmap.
  104. Args:
  105. data: Optional compressed bitmap data to decode
  106. """
  107. self.bits: set[int] = set()
  108. self.bit_count = 0
  109. if data:
  110. self._decode(data)
  111. def _decode(self, data: bytes) -> None:
  112. """Decode EWAH compressed bitmap data.
  113. Args:
  114. data: Compressed bitmap data (EWAH format with header + words + RLW position)
  115. """
  116. f = BytesIO(data)
  117. # Read header
  118. bit_count_bytes = f.read(4)
  119. word_count_bytes = f.read(4)
  120. if len(bit_count_bytes) < 4 or len(word_count_bytes) < 4:
  121. return
  122. bit_count = struct.unpack(">I", bit_count_bytes)[0]
  123. word_count = struct.unpack(">I", word_count_bytes)[0]
  124. self.bit_count = bit_count
  125. current_bit = 0
  126. # Read all words first
  127. words = []
  128. for _ in range(word_count):
  129. word_bytes = f.read(8)
  130. if len(word_bytes) < 8:
  131. break
  132. word = struct.unpack(">Q", word_bytes)[0]
  133. words.append(word)
  134. # Process EWAH chunks: RLW followed by literal words
  135. idx = 0
  136. while idx < len(words):
  137. # This is an RLW
  138. # Bit layout: [literal_words(31 bits)][running_len(32 bits)][running_bit(1 bit)]
  139. rlw = words[idx]
  140. running_bit = rlw & 1
  141. running_len = (rlw >> 1) & 0xFFFFFFFF
  142. literal_words = rlw >> 33
  143. idx += 1
  144. # Process running bits
  145. if running_len > 0:
  146. if running_bit == 1:
  147. # Add all bits in the repeated section
  148. for i in range(running_len * 64):
  149. self.bits.add(current_bit + i)
  150. current_bit += running_len * 64
  151. # Process literal words
  152. for _ in range(literal_words):
  153. if idx >= len(words):
  154. break
  155. literal = words[idx]
  156. idx += 1
  157. # Extract set bits from literal word
  158. for i in range(64):
  159. if literal & (1 << i):
  160. self.bits.add(current_bit + i)
  161. current_bit += 64
  162. # Read RLW position (we don't use it currently, but it's part of the format)
  163. f.read(4)
  164. def encode(self) -> bytes:
  165. """Encode bitmap to EWAH compressed format.
  166. Returns:
  167. Compressed bitmap data including header, words, and RLW position
  168. """
  169. if not self.bits:
  170. # Empty bitmap: bit_count=0, word_count=0, rlw_pos=0
  171. return struct.pack(">III", 0, 0, 0)
  172. max_bit = max(self.bits) if self.bits else 0
  173. bit_count = max_bit + 1
  174. word_count = (bit_count + 63) // 64
  175. # Create literal words
  176. words = [0] * word_count
  177. for bit in self.bits:
  178. word_idx = bit // 64
  179. bit_idx = bit % 64
  180. words[word_idx] |= 1 << bit_idx
  181. # Compress using EWAH run-length encoding
  182. compressed_words = _encode_ewah_words(words)
  183. # Build EWAH data
  184. f = BytesIO()
  185. # Header
  186. f.write(struct.pack(">I", bit_count))
  187. f.write(struct.pack(">I", len(compressed_words)))
  188. # Write compressed words
  189. for word in compressed_words:
  190. f.write(struct.pack(">Q", word))
  191. # Write RLW position (position of last RLW in the compressed words)
  192. # For now, we'll use 0 as we don't track this during encoding
  193. # This could be improved in the future if needed
  194. f.write(struct.pack(">I", 0))
  195. return f.getvalue()
  196. def __contains__(self, bit: int) -> bool:
  197. """Check if a bit is set.
  198. Args:
  199. bit: Bit position to check
  200. Returns:
  201. True if bit is set, False otherwise
  202. """
  203. return bit in self.bits
  204. def __len__(self) -> int:
  205. """Return the number of set bits.
  206. Returns:
  207. Count of set bits
  208. """
  209. return len(self.bits)
  210. def __or__(self, other: "EWAHBitmap") -> "EWAHBitmap":
  211. """Bitwise OR operation.
  212. Args:
  213. other: Other bitmap to OR with
  214. Returns:
  215. New bitmap with OR result
  216. """
  217. result = EWAHBitmap()
  218. result.bits = self.bits | other.bits
  219. result.bit_count = max(self.bit_count, other.bit_count)
  220. return result
  221. def __and__(self, other: "EWAHBitmap") -> "EWAHBitmap":
  222. """Bitwise AND operation.
  223. Args:
  224. other: Other bitmap to AND with
  225. Returns:
  226. New bitmap with AND result
  227. """
  228. result = EWAHBitmap()
  229. result.bits = self.bits & other.bits
  230. result.bit_count = max(self.bit_count, other.bit_count)
  231. return result
  232. def __xor__(self, other: "EWAHBitmap") -> "EWAHBitmap":
  233. """Bitwise XOR operation.
  234. Args:
  235. other: Other bitmap to XOR with
  236. Returns:
  237. New bitmap with XOR result
  238. """
  239. result = EWAHBitmap()
  240. result.bits = self.bits ^ other.bits
  241. result.bit_count = max(self.bit_count, other.bit_count)
  242. return result
  243. def add(self, bit: int) -> None:
  244. """Set a bit.
  245. Args:
  246. bit: Bit position to set
  247. """
  248. self.bits.add(bit)
  249. self.bit_count = max(self.bit_count, bit + 1)
  250. class BitmapEntry:
  251. """A single bitmap entry for a commit."""
  252. def __init__(
  253. self,
  254. object_pos: int,
  255. xor_offset: int,
  256. flags: int,
  257. bitmap: EWAHBitmap,
  258. ) -> None:
  259. """Initialize a bitmap entry.
  260. Args:
  261. object_pos: Position of object in pack index
  262. xor_offset: XOR offset for compression (0-160)
  263. flags: Entry flags
  264. bitmap: The EWAH bitmap data
  265. """
  266. self.object_pos = object_pos
  267. self.xor_offset = xor_offset
  268. self.flags = flags
  269. self.bitmap = bitmap
  270. class PackBitmap:
  271. """A pack bitmap index.
  272. Bitmaps store reachability information for commits in a packfile,
  273. allowing fast object enumeration without graph traversal.
  274. """
  275. def __init__(
  276. self,
  277. version: int = BITMAP_VERSION,
  278. flags: int = BITMAP_OPT_FULL_DAG,
  279. ) -> None:
  280. """Initialize a pack bitmap.
  281. Args:
  282. version: Bitmap format version
  283. flags: Bitmap flags
  284. """
  285. self.version = version
  286. self.flags = flags
  287. self.pack_checksum: bytes | None = None
  288. # Type bitmaps for commits, trees, blobs, tags
  289. self.commit_bitmap = EWAHBitmap()
  290. self.tree_bitmap = EWAHBitmap()
  291. self.blob_bitmap = EWAHBitmap()
  292. self.tag_bitmap = EWAHBitmap()
  293. # Bitmap entries indexed by commit SHA
  294. self.entries: dict[bytes, BitmapEntry] = {}
  295. # List of entries in order (for XOR offset resolution)
  296. self.entries_list: list[tuple[bytes, BitmapEntry]] = []
  297. # Optional lookup table for random access
  298. self.lookup_table: list[tuple[int, int, int]] | None = None
  299. # Optional name-hash cache
  300. self.name_hash_cache: list[int] | None = None
  301. def get_bitmap(self, commit_sha: bytes) -> EWAHBitmap | None:
  302. """Get the bitmap for a commit.
  303. Args:
  304. commit_sha: SHA-1 of the commit
  305. Returns:
  306. EWAH bitmap or None if not found
  307. """
  308. entry = self.entries.get(commit_sha)
  309. if entry is None:
  310. return None
  311. # Decompress using XOR if needed
  312. if entry.xor_offset > 0:
  313. # Find the entry at the XOR offset
  314. # The XOR offset tells us how many entries back to look
  315. # We need to find this entry in the ordered list
  316. try:
  317. current_idx = next(
  318. i
  319. for i, (sha, _) in enumerate(self.entries_list)
  320. if sha == commit_sha
  321. )
  322. except StopIteration:
  323. # Entry not found in list, return as-is
  324. return entry.bitmap
  325. # XOR offset is how many positions back to look (max 160)
  326. if current_idx >= entry.xor_offset:
  327. base_sha, _base_entry = self.entries_list[
  328. current_idx - entry.xor_offset
  329. ]
  330. # Get the base bitmap (recursively if it also uses XOR)
  331. base_bitmap = self.get_bitmap(base_sha)
  332. if base_bitmap is not None:
  333. # XOR the current bitmap with the base
  334. return entry.bitmap ^ base_bitmap
  335. return entry.bitmap
  336. def has_commit(self, commit_sha: bytes) -> bool:
  337. """Check if a commit has a bitmap.
  338. Args:
  339. commit_sha: SHA-1 of the commit
  340. Returns:
  341. True if bitmap exists for this commit
  342. """
  343. return commit_sha in self.entries
  344. def iter_commits(self) -> Iterator[bytes]:
  345. """Iterate over all commits with bitmaps.
  346. Returns:
  347. Iterator of commit SHAs
  348. """
  349. return iter(self.entries.keys())
  350. def read_bitmap(
  351. filename: str | os.PathLike[str],
  352. pack_index: "PackIndex" | None = None,
  353. ) -> PackBitmap:
  354. """Read a bitmap index file.
  355. Args:
  356. filename: Path to the .bitmap file
  357. pack_index: Optional PackIndex to resolve object positions to SHAs
  358. Returns:
  359. Loaded PackBitmap
  360. Raises:
  361. ValueError: If file format is invalid
  362. ChecksumMismatch: If checksum verification fails
  363. """
  364. with GitFile(filename, "rb") as f:
  365. return read_bitmap_file(f, pack_index=pack_index)
  366. def read_bitmap_file(
  367. f: IO[bytes], pack_index: "PackIndex" | None = None
  368. ) -> PackBitmap:
  369. """Read bitmap data from a file object.
  370. Args:
  371. f: File object to read from
  372. pack_index: Optional PackIndex to resolve object positions to SHAs
  373. Returns:
  374. Loaded PackBitmap
  375. Raises:
  376. ValueError: If file format is invalid
  377. """
  378. # Read header
  379. signature = f.read(4)
  380. if signature != BITMAP_SIGNATURE:
  381. raise ValueError(
  382. f"Invalid bitmap signature: {signature!r}, expected {BITMAP_SIGNATURE!r}"
  383. )
  384. version_bytes = f.read(2)
  385. flags_bytes = f.read(2)
  386. if len(version_bytes) < 2 or len(flags_bytes) < 2:
  387. raise ValueError("Incomplete bitmap header")
  388. version = struct.unpack(">H", version_bytes)[0]
  389. flags = struct.unpack(">H", flags_bytes)[0]
  390. if version != BITMAP_VERSION:
  391. raise ValueError(f"Unsupported bitmap version: {version}")
  392. # Read entry count
  393. entry_count_bytes = f.read(4)
  394. if len(entry_count_bytes) < 4:
  395. raise ValueError("Missing entry count")
  396. entry_count = struct.unpack(">I", entry_count_bytes)[0]
  397. # Read pack checksum
  398. pack_checksum = f.read(20)
  399. if len(pack_checksum) < 20:
  400. raise ValueError("Missing pack checksum")
  401. bitmap = PackBitmap(version=version, flags=flags)
  402. bitmap.pack_checksum = pack_checksum
  403. # Read type bitmaps (EWAH bitmaps are self-describing)
  404. for i, type_bitmap in enumerate(
  405. [
  406. bitmap.commit_bitmap,
  407. bitmap.tree_bitmap,
  408. bitmap.blob_bitmap,
  409. bitmap.tag_bitmap,
  410. ]
  411. ):
  412. # EWAH format:
  413. # 4 bytes: bit count
  414. # 4 bytes: word count
  415. # N x 8 bytes: compressed words
  416. # 4 bytes: RLW position
  417. # Read header to determine size
  418. bit_count_bytes = f.read(4)
  419. word_count_bytes = f.read(4)
  420. if len(bit_count_bytes) < 4 or len(word_count_bytes) < 4:
  421. raise ValueError(f"Missing type bitmap {i} header")
  422. word_count = struct.unpack(">I", word_count_bytes)[0]
  423. # Read compressed words
  424. words_data = f.read(word_count * 8)
  425. if len(words_data) < word_count * 8:
  426. raise ValueError(f"Incomplete type bitmap {i} data")
  427. # Read RLW position
  428. rlw_pos_bytes = f.read(4)
  429. if len(rlw_pos_bytes) < 4:
  430. raise ValueError(f"Missing type bitmap {i} RLW position")
  431. # Reconstruct the full EWAH data to pass to _decode
  432. ewah_data = bit_count_bytes + word_count_bytes + words_data + rlw_pos_bytes
  433. type_bitmap._decode(ewah_data)
  434. # Read bitmap entries
  435. for _ in range(entry_count):
  436. # Read object position (4 bytes)
  437. obj_pos_bytes = f.read(4)
  438. if len(obj_pos_bytes) < 4:
  439. raise ValueError("Incomplete bitmap entry")
  440. obj_pos = struct.unpack(">I", obj_pos_bytes)[0]
  441. # Read XOR offset (1 byte)
  442. xor_offset_bytes = f.read(1)
  443. if len(xor_offset_bytes) < 1:
  444. raise ValueError("Missing XOR offset")
  445. xor_offset = xor_offset_bytes[0]
  446. # Read flags (1 byte)
  447. flags_bytes = f.read(1)
  448. if len(flags_bytes) < 1:
  449. raise ValueError("Missing entry flags")
  450. entry_flags = flags_bytes[0]
  451. # Read self-describing EWAH bitmap
  452. # EWAH format: bit_count (4) + word_count (4) + words (word_count * 8) + rlw_pos (4)
  453. bit_count_bytes = f.read(4)
  454. word_count_bytes = f.read(4)
  455. if len(bit_count_bytes) < 4 or len(word_count_bytes) < 4:
  456. raise ValueError("Incomplete bitmap entry EWAH header")
  457. word_count = struct.unpack(">I", word_count_bytes)[0]
  458. # Read compressed words
  459. words_data = f.read(word_count * 8)
  460. if len(words_data) < word_count * 8:
  461. raise ValueError("Incomplete bitmap entry EWAH words")
  462. # Read RLW position
  463. rlw_pos_bytes = f.read(4)
  464. if len(rlw_pos_bytes) < 4:
  465. raise ValueError("Missing bitmap entry EWAH RLW position")
  466. # Reconstruct full EWAH data
  467. bitmap_data = bit_count_bytes + word_count_bytes + words_data + rlw_pos_bytes
  468. # Create bitmap entry
  469. ewah_bitmap = EWAHBitmap(bitmap_data) if word_count > 0 else EWAHBitmap()
  470. entry = BitmapEntry(
  471. object_pos=obj_pos,
  472. xor_offset=xor_offset,
  473. flags=entry_flags,
  474. bitmap=ewah_bitmap,
  475. )
  476. # Resolve object position to SHA if we have a pack index
  477. if pack_index is not None:
  478. # Get the SHA at the given position in the sorted index
  479. sha = None
  480. for idx, (entry_sha, _offset, _crc32) in enumerate(
  481. pack_index.iterentries()
  482. ):
  483. if idx == obj_pos:
  484. sha = entry_sha
  485. break
  486. if sha is not None:
  487. bitmap.entries[sha] = entry
  488. bitmap.entries_list.append((sha, entry))
  489. else:
  490. # Without pack index, use position as temporary key
  491. temp_key = obj_pos.to_bytes(4, byteorder="big")
  492. bitmap.entries[temp_key] = entry
  493. bitmap.entries_list.append((temp_key, entry))
  494. # Read optional lookup table
  495. if flags & BITMAP_OPT_LOOKUP_TABLE:
  496. # Lookup table contains triplets: (commit_pos, offset, xor_row)
  497. # Number of entries matches the bitmap entry count
  498. lookup_table = []
  499. for _ in range(entry_count):
  500. # Read commit position (4 bytes)
  501. commit_pos_bytes = f.read(4)
  502. if len(commit_pos_bytes) < 4:
  503. break
  504. commit_pos = struct.unpack(">I", commit_pos_bytes)[0]
  505. # Read file offset (8 bytes)
  506. offset_bytes = f.read(8)
  507. if len(offset_bytes) < 8:
  508. break
  509. offset = struct.unpack(">Q", offset_bytes)[0]
  510. # Read XOR row (4 bytes)
  511. xor_row_bytes = f.read(4)
  512. if len(xor_row_bytes) < 4:
  513. break
  514. xor_row = struct.unpack(">I", xor_row_bytes)[0]
  515. lookup_table.append((commit_pos, offset, xor_row))
  516. bitmap.lookup_table = lookup_table
  517. # Read optional name-hash cache
  518. if flags & BITMAP_OPT_HASH_CACHE:
  519. # Name-hash cache contains one 32-bit hash per object in the pack
  520. # The number of hashes depends on the total number of objects
  521. # For now, we'll read what's available
  522. name_hash_cache = []
  523. while True:
  524. hash_bytes = f.read(4)
  525. if len(hash_bytes) < 4:
  526. break
  527. hash_value = struct.unpack(">I", hash_bytes)[0]
  528. name_hash_cache.append(hash_value)
  529. if name_hash_cache:
  530. bitmap.name_hash_cache = name_hash_cache
  531. return bitmap
  532. def write_bitmap(
  533. filename: str | os.PathLike[str],
  534. bitmap: PackBitmap,
  535. ) -> None:
  536. """Write a bitmap index file.
  537. Args:
  538. filename: Path to write the .bitmap file
  539. bitmap: PackBitmap to write
  540. """
  541. with GitFile(filename, "wb") as f:
  542. write_bitmap_file(f, bitmap)
  543. def write_bitmap_file(f: IO[bytes], bitmap: PackBitmap) -> None:
  544. """Write bitmap data to a file object.
  545. Args:
  546. f: File object to write to
  547. bitmap: PackBitmap to write
  548. """
  549. # Write header
  550. f.write(BITMAP_SIGNATURE)
  551. f.write(struct.pack(">H", bitmap.version))
  552. f.write(struct.pack(">H", bitmap.flags))
  553. # Write entry count
  554. f.write(struct.pack(">I", len(bitmap.entries)))
  555. # Write pack checksum
  556. if bitmap.pack_checksum:
  557. f.write(bitmap.pack_checksum)
  558. else:
  559. f.write(b"\x00" * 20)
  560. # Write type bitmaps (self-describing EWAH format, no size prefix needed)
  561. for type_bitmap in [
  562. bitmap.commit_bitmap,
  563. bitmap.tree_bitmap,
  564. bitmap.blob_bitmap,
  565. bitmap.tag_bitmap,
  566. ]:
  567. data = type_bitmap.encode()
  568. f.write(data)
  569. # Write bitmap entries
  570. for _sha, entry in bitmap.entries.items():
  571. # Write object position (4 bytes)
  572. f.write(struct.pack(">I", entry.object_pos))
  573. # Write XOR offset (1 byte)
  574. f.write(bytes([entry.xor_offset]))
  575. # Write flags (1 byte)
  576. f.write(bytes([entry.flags]))
  577. # Write compressed bitmap data (self-describing EWAH format, no size prefix)
  578. bitmap_data = entry.bitmap.encode()
  579. f.write(bitmap_data)
  580. # Write optional lookup table
  581. if bitmap.flags & BITMAP_OPT_LOOKUP_TABLE and bitmap.lookup_table:
  582. for commit_pos, offset, xor_row in bitmap.lookup_table:
  583. f.write(struct.pack(">I", commit_pos)) # 4 bytes
  584. f.write(struct.pack(">Q", offset)) # 8 bytes
  585. f.write(struct.pack(">I", xor_row)) # 4 bytes
  586. # Write optional name-hash cache
  587. if bitmap.flags & BITMAP_OPT_HASH_CACHE and bitmap.name_hash_cache:
  588. for hash_value in bitmap.name_hash_cache:
  589. f.write(struct.pack(">I", hash_value))