midx.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
  1. # midx.py -- Multi-Pack-Index (MIDX) support
  2. # Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Multi-Pack-Index (MIDX) support.
  22. A multi-pack-index (MIDX) provides a single index that covers multiple pack files,
  23. enabling fast object lookup across all packs without opening each pack index.
  24. The MIDX file format consists of:
  25. - A header with signature, version, and hash algorithm
  26. - A chunk lookup table
  27. - Multiple chunks containing pack names, OID fanout, OID lookup, and object offsets
  28. - A trailer with checksum
  29. This module provides:
  30. - Reading MIDX files
  31. - Writing MIDX files
  32. - Integration with pack-based object stores
  33. Limitations:
  34. - Incremental MIDX chains are not yet supported (base_midx_files must be 0)
  35. - BTMP (bitmapped packfiles) chunk is not yet implemented
  36. - RIDX (reverse index) chunk is not yet implemented
  37. Note: Incremental MIDX chains were introduced in Git 2.47 as an experimental
  38. feature, where multiple MIDX files can be chained together. The format includes
  39. a base_midx_files field in the header and uses a multi-pack-index.d/ directory
  40. with a multi-pack-index-chain file. This feature is not yet supported by Dulwich
  41. as the specification is still evolving.
  42. """
  43. import os
  44. import struct
  45. from collections.abc import Iterator
  46. from io import UnsupportedOperation
  47. from typing import IO, Any
  48. try:
  49. import mmap
  50. except ImportError:
  51. has_mmap = False
  52. else:
  53. has_mmap = True
  54. from .file import GitFile, _GitFile
  55. from .objects import ObjectID, RawObjectID
  56. from .pack import SHA1Writer
  57. # MIDX signature
  58. MIDX_SIGNATURE = b"MIDX"
  59. # MIDX version
  60. MIDX_VERSION = 1
  61. # Chunk identifiers (4 bytes each)
  62. CHUNK_PNAM = b"PNAM" # Packfile names
  63. CHUNK_OIDF = b"OIDF" # OID fanout table
  64. CHUNK_OIDL = b"OIDL" # OID lookup table
  65. CHUNK_OOFF = b"OOFF" # Object offsets
  66. CHUNK_LOFF = b"LOFF" # Large offsets (optional)
  67. CHUNK_BTMP = b"BTMP" # Bitmapped packfiles (optional)
  68. CHUNK_RIDX = b"RIDX" # Reverse index (optional)
  69. # Hash algorithm identifiers
  70. HASH_ALGORITHM_SHA1 = 1
  71. HASH_ALGORITHM_SHA256 = 2
  72. class MultiPackIndex:
  73. """Multi-pack-index for efficient object lookup across multiple pack files."""
  74. def __init__(
  75. self,
  76. filename: str | os.PathLike[str],
  77. file: IO[bytes] | _GitFile | None = None,
  78. contents: bytes | None = None,
  79. size: int | None = None,
  80. ) -> None:
  81. """Initialize a MultiPackIndex.
  82. Args:
  83. filename: Path to the MIDX file
  84. file: Optional file object
  85. contents: Optional mmap'd contents
  86. size: Optional size of the MIDX file
  87. """
  88. self._filename = os.fspath(filename)
  89. self._file = file
  90. self._size = size
  91. # Instance variables that will be set during parsing
  92. self.version: int
  93. self.hash_algorithm: int
  94. self.hash_size: int
  95. self.chunk_count: int
  96. self.base_midx_files: int
  97. self.pack_count: int
  98. self.pack_names: list[str]
  99. self.object_count: int
  100. self._chunks: dict[bytes, int]
  101. self._fanout_table: list[int]
  102. self._oidl_offset: int
  103. self._ooff_offset: int
  104. self._loff_offset: int
  105. # Load file contents
  106. if contents is None:
  107. if file is None:
  108. with GitFile(filename, "rb") as f:
  109. self._contents, self._size = self._load_file_contents(f, size)
  110. else:
  111. self._contents, self._size = self._load_file_contents(file, size)
  112. else:
  113. self._contents = contents
  114. # Parse header
  115. self._parse_header()
  116. # Parse chunk lookup table
  117. self._parse_chunk_table()
  118. def _load_file_contents(
  119. self, f: IO[bytes] | _GitFile, size: int | None = None
  120. ) -> tuple[bytes | Any, int]:
  121. """Load contents from a file, preferring mmap when possible.
  122. Args:
  123. f: File-like object to load
  124. size: Expected size, or None to determine from file
  125. Returns:
  126. Tuple of (contents, size)
  127. """
  128. try:
  129. fd = f.fileno()
  130. except (UnsupportedOperation, AttributeError):
  131. fd = None
  132. # Attempt to use mmap if possible
  133. if fd is not None:
  134. if size is None:
  135. size = os.fstat(fd).st_size
  136. if has_mmap:
  137. try:
  138. contents = mmap.mmap(fd, size, access=mmap.ACCESS_READ)
  139. except (OSError, ValueError):
  140. # Can't mmap - perhaps a socket or invalid file descriptor
  141. pass
  142. else:
  143. return contents, size
  144. # Fall back to reading entire file into memory
  145. contents_bytes = f.read()
  146. size = len(contents_bytes)
  147. return contents_bytes, size
  148. def _parse_header(self) -> None:
  149. """Parse the MIDX header."""
  150. if len(self._contents) < 12:
  151. raise ValueError("MIDX file too small")
  152. # Check signature
  153. signature = self._contents[0:4]
  154. if signature != MIDX_SIGNATURE:
  155. raise ValueError(f"Invalid MIDX signature: {signature!r}")
  156. # Read version
  157. self.version = self._contents[4]
  158. if self.version != MIDX_VERSION:
  159. raise ValueError(f"Unsupported MIDX version: {self.version}")
  160. # Read object ID version (hash algorithm)
  161. self.hash_algorithm = self._contents[5]
  162. if self.hash_algorithm == HASH_ALGORITHM_SHA1:
  163. self.hash_size = 20
  164. elif self.hash_algorithm == HASH_ALGORITHM_SHA256:
  165. self.hash_size = 32
  166. else:
  167. raise ValueError(f"Unknown hash algorithm: {self.hash_algorithm}")
  168. # Read chunk count
  169. self.chunk_count = self._contents[6]
  170. # Read base MIDX files count (currently always 0)
  171. self.base_midx_files = self._contents[7]
  172. if self.base_midx_files != 0:
  173. raise ValueError("Incremental MIDX not yet supported")
  174. # Read pack file count
  175. (self.pack_count,) = struct.unpack(">L", self._contents[8:12])
  176. def _parse_chunk_table(self) -> None:
  177. """Parse the chunk lookup table."""
  178. self._chunks = {}
  179. # Chunk table starts at offset 12
  180. offset = 12
  181. # Each chunk entry is 12 bytes (4-byte ID + 8-byte offset)
  182. for i in range(self.chunk_count + 1): # +1 for terminator
  183. chunk_id = self._contents[offset : offset + 4]
  184. (chunk_offset,) = struct.unpack(
  185. ">Q", self._contents[offset + 4 : offset + 12]
  186. )
  187. if chunk_id == b"\x00\x00\x00\x00":
  188. # Terminator entry
  189. break
  190. self._chunks[chunk_id] = chunk_offset
  191. offset += 12
  192. # Parse required chunks
  193. self._parse_pnam_chunk()
  194. self._parse_oidf_chunk()
  195. self._parse_oidl_chunk()
  196. self._parse_ooff_chunk()
  197. # Parse optional chunks
  198. if CHUNK_LOFF in self._chunks:
  199. self._parse_loff_chunk()
  200. def _parse_pnam_chunk(self) -> None:
  201. """Parse the Packfile Names (PNAM) chunk."""
  202. if CHUNK_PNAM not in self._chunks:
  203. raise ValueError("Required PNAM chunk not found")
  204. offset = self._chunks[CHUNK_PNAM]
  205. self.pack_names = []
  206. # Find the end of the PNAM chunk (next chunk or end of chunks section)
  207. next_offset = min(
  208. (o for o in self._chunks.values() if o > offset),
  209. default=len(self._contents),
  210. )
  211. # Parse null-terminated pack names
  212. current = offset
  213. while current < next_offset:
  214. # Find the next null terminator
  215. null_pos = self._contents.find(b"\x00", current, next_offset)
  216. if null_pos == -1:
  217. break
  218. pack_name = self._contents[current:null_pos].decode("utf-8")
  219. if pack_name: # Skip empty strings (padding)
  220. self.pack_names.append(pack_name)
  221. current = null_pos + 1
  222. def _parse_oidf_chunk(self) -> None:
  223. """Parse the OID Fanout (OIDF) chunk."""
  224. if CHUNK_OIDF not in self._chunks:
  225. raise ValueError("Required OIDF chunk not found")
  226. offset = self._chunks[CHUNK_OIDF]
  227. self._fanout_table = []
  228. # Read 256 4-byte entries
  229. for i in range(256):
  230. (count,) = struct.unpack(
  231. ">L", self._contents[offset + i * 4 : offset + i * 4 + 4]
  232. )
  233. self._fanout_table.append(count)
  234. # Total object count is the last entry
  235. self.object_count = self._fanout_table[255]
  236. def _parse_oidl_chunk(self) -> None:
  237. """Parse the OID Lookup (OIDL) chunk."""
  238. if CHUNK_OIDL not in self._chunks:
  239. raise ValueError("Required OIDL chunk not found")
  240. self._oidl_offset = self._chunks[CHUNK_OIDL]
  241. def _parse_ooff_chunk(self) -> None:
  242. """Parse the Object Offsets (OOFF) chunk."""
  243. if CHUNK_OOFF not in self._chunks:
  244. raise ValueError("Required OOFF chunk not found")
  245. self._ooff_offset = self._chunks[CHUNK_OOFF]
  246. def _parse_loff_chunk(self) -> None:
  247. """Parse the Large Offsets (LOFF) chunk."""
  248. self._loff_offset = self._chunks[CHUNK_LOFF]
  249. def __len__(self) -> int:
  250. """Return the number of objects in this MIDX."""
  251. return self.object_count
  252. def _get_oid(self, index: int) -> RawObjectID:
  253. """Get the object ID at the given index.
  254. Args:
  255. index: Index of the object
  256. Returns:
  257. Binary object ID
  258. """
  259. if index < 0 or index >= self.object_count:
  260. raise IndexError(f"Index {index} out of range")
  261. offset = self._oidl_offset + index * self.hash_size
  262. return RawObjectID(self._contents[offset : offset + self.hash_size])
  263. def _get_pack_info(self, index: int) -> tuple[int, int]:
  264. """Get pack ID and offset for object at the given index.
  265. Args:
  266. index: Index of the object
  267. Returns:
  268. Tuple of (pack_id, offset)
  269. """
  270. if index < 0 or index >= self.object_count:
  271. raise IndexError(f"Index {index} out of range")
  272. # Each entry is 8 bytes (4-byte pack ID + 4-byte offset)
  273. offset = self._ooff_offset + index * 8
  274. (pack_id,) = struct.unpack(">L", self._contents[offset : offset + 4])
  275. (pack_offset,) = struct.unpack(">L", self._contents[offset + 4 : offset + 8])
  276. # Check if this is a large offset (MSB set)
  277. if pack_offset & 0x80000000:
  278. # Look up in LOFF chunk
  279. if CHUNK_LOFF not in self._chunks:
  280. raise ValueError("Large offset found but no LOFF chunk")
  281. large_index = pack_offset & 0x7FFFFFFF
  282. large_offset_pos = self._loff_offset + large_index * 8
  283. (pack_offset,) = struct.unpack(
  284. ">Q", self._contents[large_offset_pos : large_offset_pos + 8]
  285. )
  286. return pack_id, pack_offset
  287. def object_offset(self, sha: ObjectID | RawObjectID) -> tuple[str, int] | None:
  288. """Return the pack name and offset for the given object.
  289. Args:
  290. sha: Binary SHA-1 or SHA-256 hash
  291. Returns:
  292. Tuple of (pack_name, offset) or None if not found
  293. """
  294. if len(sha) != self.hash_size:
  295. raise ValueError(
  296. f"SHA size mismatch: expected {self.hash_size}, got {len(sha)}"
  297. )
  298. # Use fanout table to narrow search range
  299. first_byte = sha[0]
  300. start_idx = 0 if first_byte == 0 else self._fanout_table[first_byte - 1]
  301. end_idx = self._fanout_table[first_byte]
  302. # Binary search within the range
  303. while start_idx < end_idx:
  304. mid = (start_idx + end_idx) // 2
  305. mid_sha = self._get_oid(mid)
  306. if mid_sha == sha:
  307. # Found it!
  308. pack_id, offset = self._get_pack_info(mid)
  309. return self.pack_names[pack_id], offset
  310. elif mid_sha < sha:
  311. start_idx = mid + 1
  312. else:
  313. end_idx = mid
  314. return None
  315. def __contains__(self, sha: ObjectID | RawObjectID) -> bool:
  316. """Check if the given object SHA is in this MIDX.
  317. Args:
  318. sha: Binary SHA hash
  319. Returns:
  320. True if the object is in this MIDX
  321. """
  322. return self.object_offset(sha) is not None
  323. def iterentries(self) -> Iterator[tuple[RawObjectID, str, int]]:
  324. """Iterate over all entries in this MIDX.
  325. Yields:
  326. Tuples of (sha, pack_name, offset)
  327. """
  328. for i in range(self.object_count):
  329. sha = self._get_oid(i)
  330. pack_id, offset = self._get_pack_info(i)
  331. pack_name = self.pack_names[pack_id]
  332. yield sha, pack_name, offset
  333. def close(self) -> None:
  334. """Close the MIDX file and release mmap resources."""
  335. # Close mmap'd contents first if it's an mmap object
  336. if self._contents is not None and has_mmap:
  337. if isinstance(self._contents, mmap.mmap):
  338. self._contents.close()
  339. self._contents = None
  340. # Close file handle
  341. if self._file is not None:
  342. self._file.close()
  343. self._file = None
  344. def load_midx(path: str | os.PathLike[str]) -> MultiPackIndex:
  345. """Load a multi-pack-index file by path.
  346. Args:
  347. path: Path to the MIDX file
  348. Returns:
  349. A MultiPackIndex loaded from the given path
  350. """
  351. with GitFile(path, "rb") as f:
  352. return load_midx_file(path, f)
  353. def load_midx_file(
  354. path: str | os.PathLike[str], f: IO[bytes] | _GitFile
  355. ) -> MultiPackIndex:
  356. """Load a multi-pack-index from a file-like object.
  357. Args:
  358. path: Path for the MIDX file
  359. f: File-like object
  360. Returns:
  361. A MultiPackIndex loaded from the given file
  362. """
  363. return MultiPackIndex(path, file=f)
  364. def write_midx(
  365. f: IO[bytes],
  366. pack_index_entries: list[tuple[str, list[tuple[RawObjectID, int, int | None]]]],
  367. hash_algorithm: int = HASH_ALGORITHM_SHA1,
  368. ) -> bytes:
  369. """Write a multi-pack-index file.
  370. Args:
  371. f: File-like object to write to
  372. pack_index_entries: List of (pack_name, entries) tuples where entries are
  373. (sha, offset, crc32) tuples, sorted by SHA
  374. hash_algorithm: Hash algorithm to use (1=SHA-1, 2=SHA-256)
  375. Returns:
  376. SHA-1 checksum of the written MIDX file
  377. """
  378. if hash_algorithm == HASH_ALGORITHM_SHA1:
  379. hash_size = 20
  380. elif hash_algorithm == HASH_ALGORITHM_SHA256:
  381. hash_size = 32
  382. else:
  383. raise ValueError(f"Unknown hash algorithm: {hash_algorithm}")
  384. # Wrap file in SHA1Writer to compute checksum
  385. writer = SHA1Writer(f)
  386. # Sort pack entries by pack name (required by Git)
  387. pack_index_entries_sorted = sorted(pack_index_entries, key=lambda x: x[0])
  388. # Collect all objects from all packs
  389. all_objects: list[tuple[RawObjectID, int, int]] = [] # (sha, pack_id, offset)
  390. pack_names: list[str] = []
  391. for pack_id, (pack_name, entries) in enumerate(pack_index_entries_sorted):
  392. pack_names.append(pack_name)
  393. for sha, offset, _crc32 in entries:
  394. all_objects.append((sha, pack_id, offset))
  395. # Sort all objects by SHA
  396. all_objects.sort(key=lambda x: x[0])
  397. # Calculate offsets for chunks
  398. num_packs = len(pack_names)
  399. num_objects = len(all_objects)
  400. # Header: 12 bytes
  401. header_size = 12
  402. # Chunk count: PNAM, OIDF, OIDL, OOFF, and optionally LOFF
  403. # We'll determine if LOFF is needed later
  404. chunk_count = 4 # PNAM, OIDF, OIDL, OOFF
  405. # Check if we need LOFF chunk (for offsets >= 2^31)
  406. need_loff = any(offset >= 2**31 for _sha, _pack_id, offset in all_objects)
  407. if need_loff:
  408. chunk_count += 1
  409. # Chunk table: (chunk_count + 1) * 12 bytes (including terminator)
  410. chunk_table_size = (chunk_count + 1) * 12
  411. # Calculate chunk offsets
  412. current_offset = header_size + chunk_table_size
  413. # PNAM chunk: pack names as null-terminated strings, padded to 4-byte boundary
  414. pnam_data = b"".join(name.encode("utf-8") + b"\x00" for name in pack_names)
  415. # Pad to 4-byte boundary
  416. pnam_padding = (4 - len(pnam_data) % 4) % 4
  417. pnam_data += b"\x00" * pnam_padding
  418. pnam_offset = current_offset
  419. current_offset += len(pnam_data)
  420. # OIDF chunk: 256 * 4 bytes
  421. oidf_offset = current_offset
  422. oidf_size = 256 * 4
  423. current_offset += oidf_size
  424. # OIDL chunk: num_objects * hash_size bytes
  425. oidl_offset = current_offset
  426. oidl_size = num_objects * hash_size
  427. current_offset += oidl_size
  428. # OOFF chunk: num_objects * 8 bytes (4 for pack_id + 4 for offset)
  429. ooff_offset = current_offset
  430. ooff_size = num_objects * 8
  431. current_offset += ooff_size
  432. # LOFF chunk (if needed): variable size
  433. # We'll calculate the exact size when we know how many large offsets we have
  434. loff_offset = current_offset if need_loff else 0
  435. large_offsets: list[int] = []
  436. # Calculate trailer offset (where checksum starts)
  437. # We need to pre-calculate large offset count for accurate trailer offset
  438. if need_loff:
  439. # Count large offsets
  440. large_offset_count = sum(1 for _, _, offset in all_objects if offset >= 2**31)
  441. loff_size = large_offset_count * 8
  442. trailer_offset = current_offset + loff_size
  443. else:
  444. trailer_offset = current_offset
  445. # Write header
  446. writer.write(MIDX_SIGNATURE) # 4 bytes: signature
  447. writer.write(bytes([MIDX_VERSION])) # 1 byte: version
  448. writer.write(bytes([hash_algorithm])) # 1 byte: hash algorithm
  449. writer.write(bytes([chunk_count])) # 1 byte: chunk count
  450. writer.write(bytes([0])) # 1 byte: base MIDX files (always 0)
  451. writer.write(struct.pack(">L", num_packs)) # 4 bytes: pack count
  452. # Write chunk table
  453. chunk_table = [
  454. (CHUNK_PNAM, pnam_offset),
  455. (CHUNK_OIDF, oidf_offset),
  456. (CHUNK_OIDL, oidl_offset),
  457. (CHUNK_OOFF, ooff_offset),
  458. ]
  459. if need_loff:
  460. chunk_table.append((CHUNK_LOFF, loff_offset))
  461. for chunk_id, chunk_offset in chunk_table:
  462. writer.write(chunk_id) # 4 bytes
  463. writer.write(struct.pack(">Q", chunk_offset)) # 8 bytes
  464. # Write terminator (points to where trailer/checksum starts)
  465. writer.write(b"\x00\x00\x00\x00") # 4 bytes
  466. writer.write(struct.pack(">Q", trailer_offset)) # 8 bytes
  467. # Write PNAM chunk
  468. writer.write(pnam_data)
  469. # Write OIDF chunk (fanout table)
  470. fanout: list[int] = [0] * 256
  471. for sha, _pack_id, _offset in all_objects:
  472. first_byte = sha[0]
  473. fanout[first_byte] += 1
  474. # Convert counts to cumulative
  475. cumulative = 0
  476. for i in range(256):
  477. cumulative += fanout[i]
  478. writer.write(struct.pack(">L", cumulative))
  479. # Write OIDL chunk (object IDs)
  480. for sha, _pack_id, _offset in all_objects:
  481. writer.write(sha)
  482. # Write OOFF chunk (pack ID and offset for each object)
  483. for _sha, pack_id, offset in all_objects:
  484. writer.write(struct.pack(">L", pack_id))
  485. if offset >= 2**31:
  486. # Use large offset table
  487. large_offset_index = len(large_offsets)
  488. large_offsets.append(offset)
  489. # Set MSB to indicate large offset
  490. writer.write(struct.pack(">L", 0x80000000 | large_offset_index))
  491. else:
  492. writer.write(struct.pack(">L", offset))
  493. # Write LOFF chunk if needed
  494. if need_loff:
  495. for large_offset in large_offsets:
  496. writer.write(struct.pack(">Q", large_offset))
  497. # Write checksum
  498. return writer.write_sha()
  499. def write_midx_file(
  500. path: str | os.PathLike[str],
  501. pack_index_entries: list[tuple[str, list[tuple[RawObjectID, int, int | None]]]],
  502. hash_algorithm: int = HASH_ALGORITHM_SHA1,
  503. ) -> bytes:
  504. """Write a multi-pack-index file to disk.
  505. Args:
  506. path: Path where to write the MIDX file
  507. pack_index_entries: List of (pack_name, entries) tuples where entries are
  508. (sha, offset, crc32) tuples, sorted by SHA
  509. hash_algorithm: Hash algorithm to use (1=SHA-1, 2=SHA-256)
  510. Returns:
  511. SHA-1 checksum of the written MIDX file
  512. """
  513. with GitFile(path, "wb") as f:
  514. return write_midx(f, pack_index_entries, hash_algorithm)
  515. # TODO: Add support for incremental MIDX chains
  516. # TODO: Add support for BTMP and RIDX chunks for bitmap integration