reftable.py 50 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419
  1. """Implementation of the reftable refs storage format.
  2. The reftable format is a binary format for storing Git refs that provides
  3. better performance and atomic operations compared to the traditional
  4. loose refs format.
  5. See: https://git-scm.com/docs/reftable
  6. """
  7. import os
  8. import random
  9. import shutil
  10. import struct
  11. import time
  12. import zlib
  13. from collections.abc import Callable, Mapping
  14. from dataclasses import dataclass
  15. from io import BytesIO
  16. from types import TracebackType
  17. from typing import BinaryIO
  18. from dulwich.objects import ObjectID
  19. from dulwich.refs import (
  20. SYMREF,
  21. RefsContainer,
  22. )
  23. def decode_varint_from_stream(stream: BinaryIO) -> int | None:
  24. """Decode a variable-length integer from a stream."""
  25. result = 0
  26. shift = 0
  27. while True:
  28. byte = stream.read(1)
  29. if not byte:
  30. return None
  31. byte_val = byte[0]
  32. result |= (byte_val & 0x7F) << shift
  33. if byte_val < 0x80:
  34. break
  35. shift += 7
  36. return result
  37. def encode_varint(value: int) -> bytes:
  38. """Encode an integer as a variable-width integer."""
  39. result = []
  40. while value >= 0x80:
  41. result.append((value & 0x7F) | 0x80)
  42. value >>= 7
  43. result.append(value)
  44. return bytes(result)
  45. def _encode_reftable_suffix_and_type(value: int) -> bytes:
  46. """Encode suffix_and_type using Git-compatible format.
  47. Git uses an additive format instead of proper LEB128:
  48. - Values < 128: Single byte (standard)
  49. - Values >= 128: Two bytes where byte1 + byte2 = value
  50. """
  51. if value < 128:
  52. return bytes([value])
  53. # Git's broken format: split into two bytes that add up to the value
  54. return bytes([0x80, value - 0x80])
  55. def _decode_reftable_suffix_and_type(stream: BinaryIO) -> int | None:
  56. """Decode suffix_and_type handling both Git's broken and standard formats."""
  57. pos = stream.tell()
  58. first_byte_data = stream.read(1)
  59. if not first_byte_data:
  60. return None
  61. first_byte = first_byte_data[0]
  62. # Single byte case - most common path
  63. if not (first_byte & 0x80):
  64. return first_byte
  65. # Two byte case - handle missing second byte
  66. second_byte_data = stream.read(1)
  67. if not second_byte_data:
  68. stream.seek(pos)
  69. return first_byte & 0x7F
  70. second_byte = second_byte_data[0]
  71. # Multi-byte varint case - delegate to proper decoder
  72. if second_byte & 0x80:
  73. stream.seek(pos)
  74. return decode_varint_from_stream(stream)
  75. # Two-byte case: choose between Git's format and standard LEB128
  76. git_value = first_byte + second_byte
  77. git_suffix_len = git_value >> 3
  78. git_value_type = git_value & 7
  79. # Use Git's format if it produces reasonable values
  80. if git_suffix_len < MAX_REASONABLE_SUFFIX_LEN and git_value_type <= 3:
  81. return git_value
  82. # Fall back to standard LEB128
  83. return (first_byte & 0x7F) | ((second_byte & 0x7F) << 7)
  84. # Reftable magic bytes
  85. REFTABLE_MAGIC = b"REFT"
  86. # Reftable version
  87. REFTABLE_VERSION = 1
  88. # Constants - extracted magic numbers
  89. EMBEDDED_FOOTER_MARKER = 0x1C # 28 - version 2 header size
  90. HEADER_SIZE_V1 = 24
  91. DEFAULT_BLOCK_SIZE = 4096
  92. MAX_SYMREF_DEPTH = 5
  93. CRC_DATA_SIZE = 64
  94. FINAL_PADDING_SIZE = 36
  95. # SHA1 and size constants
  96. SHA1_BINARY_SIZE = 20 # SHA1 is 20 bytes
  97. SHA1_HEX_SIZE = 40 # SHA1 as hex string is 40 characters
  98. SHA1_PEELED_HEX_SIZE = 80 # Peeled ref is 80 hex characters (2 SHA1s)
  99. # Validation limits
  100. MAX_REASONABLE_SUFFIX_LEN = 100
  101. MAX_REASONABLE_BLOCK_SIZE = 1000000
  102. MIN_RECORD_SIZE = 3
  103. # Block types
  104. BLOCK_TYPE_REF = b"r"
  105. BLOCK_TYPE_OBJ = b"o"
  106. BLOCK_TYPE_LOG = b"g"
  107. BLOCK_TYPE_INDEX = b"i"
  108. # Value types for ref records (matching Git's format)
  109. REF_VALUE_DELETE = 0x0 # deletion
  110. REF_VALUE_REF = 0x1 # one object name
  111. REF_VALUE_PEELED = 0x2 # two object names (ref + peeled)
  112. REF_VALUE_SYMREF = 0x3 # symbolic reference
  113. def _encode_ref_value(value_type: int, value: bytes) -> bytes:
  114. """Encode a ref value based on its type."""
  115. if value_type == REF_VALUE_DELETE:
  116. return b""
  117. elif value_type == REF_VALUE_REF:
  118. # Convert hex string to binary if needed
  119. if len(value) == SHA1_HEX_SIZE:
  120. return bytes.fromhex(value.decode())
  121. else:
  122. return value
  123. elif value_type == REF_VALUE_PEELED:
  124. # Convert hex strings to binary if needed
  125. if len(value) == SHA1_PEELED_HEX_SIZE:
  126. return bytes.fromhex(value.decode())
  127. else:
  128. return value
  129. elif value_type == REF_VALUE_SYMREF:
  130. # varint(target_len) + target
  131. return encode_varint(len(value)) + value
  132. else:
  133. raise ValueError(f"Unknown ref value type: {value_type}")
  134. def _decode_ref_value(stream: BinaryIO, value_type: int) -> bytes:
  135. """Decode a ref value from stream based on its type."""
  136. if value_type == REF_VALUE_DELETE:
  137. return b""
  138. elif value_type == REF_VALUE_REF:
  139. value = stream.read(SHA1_BINARY_SIZE)
  140. if len(value) != SHA1_BINARY_SIZE:
  141. raise ValueError("Unexpected end of stream while reading ref value")
  142. # Convert to hex string for interface compatibility
  143. return value.hex().encode()
  144. elif value_type == REF_VALUE_PEELED:
  145. value = stream.read(SHA1_BINARY_SIZE * 2)
  146. if len(value) != SHA1_BINARY_SIZE * 2:
  147. raise ValueError("Unexpected end of stream while reading peeled value")
  148. # Convert to hex string for interface compatibility
  149. return value.hex().encode()
  150. elif value_type == REF_VALUE_SYMREF:
  151. target_len = decode_varint_from_stream(stream)
  152. if target_len is None:
  153. raise ValueError("Unexpected end of stream while reading target length")
  154. value = stream.read(target_len)
  155. if len(value) != target_len:
  156. raise ValueError("Unexpected end of stream while reading symref target")
  157. return value
  158. else:
  159. raise ValueError(f"Unknown ref value type: {value_type}")
  160. @dataclass
  161. class RefValue:
  162. """A reference value with its type."""
  163. value_type: int
  164. value: bytes
  165. @property
  166. def is_symbolic(self) -> bool:
  167. """Check if this is a symbolic reference."""
  168. return self.value_type == REF_VALUE_SYMREF
  169. @property
  170. def is_deletion(self) -> bool:
  171. """Check if this is a deletion."""
  172. return self.value_type == REF_VALUE_DELETE
  173. @property
  174. def is_peeled(self) -> bool:
  175. """Check if this is a peeled reference."""
  176. return self.value_type == REF_VALUE_PEELED
  177. def get_sha(self) -> bytes | None:
  178. """Get the SHA1 value (for regular or peeled refs)."""
  179. if self.value_type == REF_VALUE_REF:
  180. return self.value
  181. elif self.value_type == REF_VALUE_PEELED:
  182. return self.value[:SHA1_HEX_SIZE]
  183. return None
  184. class RefUpdate:
  185. """A reference update operation."""
  186. def __init__(self, name: bytes, value_type: int, value: bytes):
  187. """Initialize RefUpdate.
  188. Args:
  189. name: Reference name
  190. value_type: Type of reference value
  191. value: Reference value
  192. """
  193. self.name = name
  194. self.value_type = value_type
  195. self.value = value
  196. class RefRecord:
  197. """A reference record in a reftable."""
  198. def __init__(
  199. self, refname: bytes, value_type: int, value: bytes, update_index: int = 0
  200. ):
  201. """Initialize RefRecord.
  202. Args:
  203. refname: Reference name
  204. value_type: Type of reference value
  205. value: Reference value
  206. update_index: Update index for the reference
  207. """
  208. self.refname = refname
  209. self.value_type = value_type
  210. self.value = value
  211. self.update_index = update_index
  212. def encode(self, prefix: bytes = b"", min_update_index: int = 0) -> bytes:
  213. """Encode the ref record with prefix compression."""
  214. common_prefix_len = 0
  215. for i in range(min(len(prefix), len(self.refname))):
  216. if prefix[i] != self.refname[i]:
  217. break
  218. common_prefix_len += 1
  219. suffix = self.refname[common_prefix_len:]
  220. # Encode according to Git format:
  221. # varint(prefix_length)
  222. # varint((suffix_length << 3) | value_type)
  223. # suffix
  224. # varint(update_index_delta)
  225. # value?
  226. result = encode_varint(common_prefix_len)
  227. result += _encode_reftable_suffix_and_type((len(suffix) << 3) | self.value_type)
  228. result += suffix
  229. # Calculate update_index_delta
  230. update_index_delta = self.update_index - min_update_index
  231. result += encode_varint(update_index_delta)
  232. # Encode value based on type
  233. result += _encode_ref_value(self.value_type, self.value)
  234. return result
  235. @classmethod
  236. def decode(
  237. cls, stream: BinaryIO, prefix: bytes = b"", min_update_index: int = 0
  238. ) -> tuple["RefRecord", bytes]:
  239. """Decode a ref record from a stream. Returns (record, full_refname)."""
  240. # Read prefix length
  241. prefix_len = decode_varint_from_stream(stream)
  242. if prefix_len is None:
  243. raise ValueError("Unexpected end of stream while reading prefix length")
  244. # Read combined suffix_length and value_type
  245. suffix_and_type = _decode_reftable_suffix_and_type(stream)
  246. if suffix_and_type is None:
  247. raise ValueError("Unexpected end of stream while reading suffix_and_type")
  248. suffix_len = suffix_and_type >> 3
  249. value_type = suffix_and_type & 0x7
  250. # Read suffix
  251. suffix = stream.read(suffix_len)
  252. if len(suffix) != suffix_len:
  253. raise ValueError("Unexpected end of stream while reading suffix")
  254. refname = prefix[:prefix_len] + suffix
  255. # Read update_index_delta
  256. update_index_delta = decode_varint_from_stream(stream)
  257. if update_index_delta is None:
  258. raise ValueError(
  259. "Unexpected end of stream while reading update_index_delta"
  260. )
  261. update_index = min_update_index + update_index_delta
  262. # Read value based on type
  263. value = _decode_ref_value(stream, value_type)
  264. return cls(refname, value_type, value, update_index), refname
  265. class LogBlock:
  266. """A block containing reflog records - simplified for empty blocks only."""
  267. def encode(self) -> bytes:
  268. """Encode empty log block for Git compatibility."""
  269. # Git expects compressed empty data for new repos
  270. return zlib.compress(b"")
  271. class RefBlock:
  272. """A block containing reference records."""
  273. def __init__(self) -> None:
  274. """Initialize RefBlock."""
  275. self.refs: list[RefRecord] = []
  276. def add_ref(
  277. self, refname: bytes, value_type: int, value: bytes, update_index: int = 1
  278. ) -> None:
  279. """Add a reference to the block."""
  280. self.refs.append(RefRecord(refname, value_type, value, update_index))
  281. def _create_embedded_footer(
  282. self, ref_offsets: list[int], header_data: bytes
  283. ) -> bytes:
  284. """Create the embedded footer pattern Git uses."""
  285. result = BytesIO()
  286. result.write(b"\x00\x00") # padding
  287. if len(self.refs) == 1:
  288. # Single ref pattern
  289. result.write(bytes([EMBEDDED_FOOTER_MARKER]))
  290. result.write(b"\x00\x01")
  291. else:
  292. # Multiple refs pattern
  293. result.write(bytes([EMBEDDED_FOOTER_MARKER]))
  294. result.write(b"\x00\x00") # first restart offset (always 0)
  295. # Special offset calculation: second_ref_offset + EMBEDDED_FOOTER_MARKER
  296. if len(ref_offsets) > 0:
  297. special_offset = ref_offsets[0] + EMBEDDED_FOOTER_MARKER
  298. result.write(struct.pack("B", special_offset))
  299. else:
  300. result.write(b"\x00")
  301. result.write(b"\x00") # padding byte
  302. result.write(struct.pack("B", 2)) # restart count (Git always uses 2)
  303. # Add header copy
  304. if header_data and len(header_data) >= HEADER_SIZE_V1:
  305. result.write(header_data[:HEADER_SIZE_V1])
  306. else:
  307. # Create a minimal header copy
  308. result.write(REFTABLE_MAGIC)
  309. result.write(b"\x01\x00\x10\x00") # version 1, block size 4096
  310. result.write(b"\x00" * 16) # min/max update indices
  311. # Final padding
  312. result.write(b"\x00\x00\x00\x00")
  313. return result.getvalue()
  314. def encode(
  315. self,
  316. min_update_index: int = 0,
  317. embed_footer: bool = False,
  318. header_data: bytes = b"",
  319. ) -> bytes:
  320. """Encode the ref block with restart points for Git compatibility."""
  321. # Sort refs by name
  322. self.refs.sort(key=lambda r: r.refname)
  323. # Encode refs with prefix compression
  324. ref_data = BytesIO()
  325. prefix = b""
  326. ref_offsets = []
  327. for i, ref in enumerate(self.refs):
  328. # Record offset for restart points
  329. if i > 0: # Git records offsets starting from second ref
  330. ref_offsets.append(ref_data.tell())
  331. encoded = ref.encode(prefix, min_update_index)
  332. ref_data.write(encoded)
  333. prefix = ref.refname
  334. record_data = ref_data.getvalue()
  335. # Git uses embedded footer format for ref blocks
  336. result = BytesIO()
  337. result.write(record_data)
  338. result.write(self._create_embedded_footer(ref_offsets, header_data))
  339. return result.getvalue()
  340. @classmethod
  341. def _find_ref_data_end(cls, data: bytes) -> int:
  342. """Find where ref records end in the data.
  343. Git embeds a footer at the end of ref blocks that contains offset
  344. information. This footer is marked by a special pattern (0x00 0x00 0x1c)
  345. where 0x1c (28) represents the version 2 header size.
  346. Args:
  347. data: The raw block data to search
  348. Returns:
  349. The position where ref records end and footer begins
  350. """
  351. # Look for embedded footer marker pattern
  352. marker_pattern = b"\x00\x00" + bytes([EMBEDDED_FOOTER_MARKER])
  353. marker_pos = data.find(marker_pattern)
  354. if marker_pos > 0:
  355. return marker_pos
  356. # Fallback: use most of the data, leaving room for footer
  357. return max(0, len(data) - 50)
  358. @classmethod
  359. def decode(cls, data: bytes, min_update_index: int = 0) -> "RefBlock":
  360. """Decode a ref block from bytes - simplified."""
  361. block = cls()
  362. if not data:
  363. return block
  364. # Find where ref data ends (before embedded footer)
  365. ref_data_end = cls._find_ref_data_end(data)
  366. record_data = data[:ref_data_end]
  367. stream = BytesIO(record_data)
  368. prefix = b""
  369. while stream.tell() < len(record_data):
  370. # Save position to check for progress
  371. pos_before = stream.tell()
  372. # Check if we have enough data for a minimal record
  373. remaining = len(record_data) - stream.tell()
  374. if (
  375. remaining < MIN_RECORD_SIZE
  376. ): # Need at least prefix_len + suffix_and_type + some data
  377. break
  378. try:
  379. ref, prefix = RefRecord.decode(stream, prefix, min_update_index)
  380. # Skip metadata records (empty refnames are Git's internal index records)
  381. if ref.refname: # Only add non-empty refnames
  382. block.refs.append(ref)
  383. except ValueError:
  384. # If we can't decode a record, we might have hit padding or invalid data
  385. # Stop parsing rather than raising an error
  386. break
  387. # Ensure we made progress to avoid infinite loops
  388. if stream.tell() == pos_before:
  389. break
  390. return block
  391. class ReftableWriter:
  392. """Writer for reftable files."""
  393. def __init__(
  394. self,
  395. f: BinaryIO,
  396. auto_create_head: bool = True,
  397. is_batch_operation: bool = False,
  398. ):
  399. """Initialize ReftableWriter.
  400. Args:
  401. f: Binary file object to write to
  402. auto_create_head: Whether to automatically create HEAD reference
  403. is_batch_operation: Whether this is a batch operation
  404. """
  405. self.f = f
  406. self.refs: dict[bytes, tuple[int, bytes]] = {}
  407. self.refs_order: list[bytes] = [] # Track insertion order for update indices
  408. self._written_data: list[bytes] = [] # Track written data for CRC calculation
  409. self.min_update_index = 1
  410. self.max_update_index = 1
  411. self.auto_create_head = auto_create_head
  412. self.is_batch_operation = (
  413. is_batch_operation # Track if this is a batch operation
  414. )
  415. def add_ref(self, refname: bytes, sha: bytes) -> None:
  416. """Add a direct reference."""
  417. self.refs[refname] = (REF_VALUE_REF, sha)
  418. if refname not in self.refs_order:
  419. self.refs_order.append(refname)
  420. self._maybe_auto_create_head()
  421. def add_symbolic_ref(self, refname: bytes, target: bytes) -> None:
  422. """Add a symbolic reference."""
  423. self.refs[refname] = (REF_VALUE_SYMREF, target)
  424. if refname not in self.refs_order:
  425. # HEAD should always be first (like git does)
  426. if refname == b"HEAD":
  427. self.refs_order.insert(0, refname)
  428. else:
  429. self.refs_order.append(refname)
  430. else:
  431. # Update existing ref (e.g., if HEAD was auto-created and now explicitly set)
  432. pass
  433. def delete_ref(self, refname: bytes) -> None:
  434. """Mark a reference as deleted."""
  435. self.refs[refname] = (REF_VALUE_DELETE, b"")
  436. if refname not in self.refs_order:
  437. self.refs_order.append(refname)
  438. def _maybe_auto_create_head(self) -> None:
  439. """Auto-create HEAD -> refs/heads/master if needed (Git compatibility)."""
  440. if self.auto_create_head and b"HEAD" not in self.refs:
  441. # Git always creates HEAD -> refs/heads/master by default
  442. self.refs[b"HEAD"] = (REF_VALUE_SYMREF, b"refs/heads/master")
  443. self.refs_order.insert(0, b"HEAD")
  444. def write(self) -> None:
  445. """Write the reftable to the file."""
  446. # Skip recalculation if max_update_index was already set higher than default
  447. # This preserves Git's behavior for symbolic-ref operations
  448. # Also skip for batch operations where min == max by design
  449. if (
  450. not self.is_batch_operation
  451. and self.max_update_index <= self.min_update_index
  452. and self.refs_order
  453. ):
  454. # Calculate max_update_index based on actual number of refs
  455. self.max_update_index = self.min_update_index + len(self.refs_order) - 1
  456. # Write header
  457. self._write_header()
  458. # Write ref blocks (includes embedded footer)
  459. self._write_ref_blocks()
  460. # Git embeds the footer within the ref block for small files,
  461. # so we only need to add final padding and CRC
  462. self._write_final_padding()
  463. def _write_header(self) -> None:
  464. """Write the reftable header."""
  465. # Magic bytes
  466. header_data = REFTABLE_MAGIC
  467. # Version + block size (4 bytes total, big-endian network order)
  468. # Format: uint8(version) + uint24(block_size)
  469. version_and_blocksize = (REFTABLE_VERSION << 24) | (
  470. DEFAULT_BLOCK_SIZE & 0xFFFFFF
  471. )
  472. header_data += struct.pack(">I", version_and_blocksize)
  473. # Min/max update index (timestamps) (big-endian network order)
  474. # Git uses increasing sequence numbers for update indices
  475. header_data += struct.pack(">Q", self.min_update_index) # min_update_index
  476. header_data += struct.pack(">Q", self.max_update_index) # max_update_index
  477. # Store header for footer
  478. self.header_data = header_data
  479. self.f.write(header_data)
  480. self._written_data.append(header_data)
  481. def _get_ref_update_indices(self) -> dict[bytes, int]:
  482. """Get update indices for all refs based on operation type.
  483. In batch operations, all refs get the same update index (timestamp).
  484. In non-batch operations, refs are assigned sequential indices starting
  485. from min_update_index.
  486. Returns:
  487. dict[bytes, int]: Mapping of ref names to their update indices
  488. """
  489. if self.is_batch_operation:
  490. # All refs get same index in batch
  491. return {name: self.min_update_index for name in self.refs_order}
  492. elif hasattr(self, "_ref_update_indices"):
  493. # Use provided indices
  494. return self._ref_update_indices # type: ignore[no-any-return]
  495. elif len(self.refs_order) == 1 and self.refs_order[0] == b"HEAD":
  496. # Special case for single HEAD symbolic ref
  497. value_type, _ = self.refs[b"HEAD"]
  498. if value_type == REF_VALUE_SYMREF:
  499. return {b"HEAD": self.max_update_index}
  500. else:
  501. return {b"HEAD": self.min_update_index}
  502. else:
  503. # Sequential indices
  504. indices = {}
  505. for i, name in enumerate(self.refs_order):
  506. indices[name] = self.min_update_index + i
  507. return indices
  508. def _write_ref_blocks(self) -> None:
  509. """Write reference blocks."""
  510. # Only write block if we have refs
  511. if not self.refs:
  512. return
  513. # Write refs in insertion order to preserve update indices like Git
  514. block = RefBlock()
  515. # Get update indices for all refs
  516. update_indices = self._get_ref_update_indices()
  517. # Add refs to block with their update indices
  518. for refname in self.refs_order:
  519. value_type, value = self.refs[refname]
  520. update_index = update_indices[refname]
  521. block.add_ref(refname, value_type, value, update_index)
  522. # Generate block data (may use embedded footer for small blocks)
  523. block_data = block.encode(
  524. min_update_index=self.min_update_index, header_data=self.header_data
  525. )
  526. # Write block type
  527. block_header = BLOCK_TYPE_REF
  528. self.f.write(block_header)
  529. self._written_data.append(block_header)
  530. # Write block length (3 bytes, big-endian network order)
  531. length_bytes = len(block_data).to_bytes(3, "big")
  532. self.f.write(length_bytes)
  533. self._written_data.append(length_bytes)
  534. # Write block data
  535. self.f.write(block_data)
  536. self._written_data.append(block_data)
  537. def _write_final_padding(self) -> None:
  538. """Write final padding and CRC for Git compatibility."""
  539. # Git writes exactly 40 bytes after the ref block (which includes embedded footer)
  540. # This is 36 bytes of zeros followed by 4-byte CRC
  541. padding = b"\x00" * FINAL_PADDING_SIZE
  542. self.f.write(padding)
  543. self._written_data.append(padding)
  544. # Calculate CRC over the last 64 bytes before CRC position
  545. # Collect all data written so far
  546. all_data = b"".join(self._written_data)
  547. # CRC is calculated over the 64 bytes before the CRC itself
  548. if len(all_data) >= CRC_DATA_SIZE:
  549. crc_data = all_data[-CRC_DATA_SIZE:]
  550. else:
  551. # Pad with zeros if file is too small
  552. crc_data = all_data + b"\x00" * (CRC_DATA_SIZE - len(all_data))
  553. crc = zlib.crc32(crc_data) & 0xFFFFFFFF
  554. self.f.write(struct.pack(">I", crc))
  555. class ReftableReader:
  556. """Reader for reftable files."""
  557. def __init__(self, f: BinaryIO):
  558. """Initialize ReftableReader.
  559. Args:
  560. f: Binary file object to read from
  561. """
  562. self.f = f
  563. self._read_header()
  564. self.refs: dict[bytes, tuple[int, bytes]] = {}
  565. self._read_blocks()
  566. def _read_header(self) -> None:
  567. """Read and validate the reftable header."""
  568. # Read magic bytes
  569. magic = self.f.read(4)
  570. if magic != REFTABLE_MAGIC:
  571. raise ValueError(f"Invalid reftable magic: {magic!r}")
  572. # Read version + block size (4 bytes total, big-endian network order)
  573. # Format: uint8(version) + uint24(block_size)
  574. version_and_blocksize = struct.unpack(">I", self.f.read(4))[0]
  575. version = (version_and_blocksize >> 24) & 0xFF # First byte
  576. block_size = version_and_blocksize & 0xFFFFFF # Last 3 bytes
  577. if version != REFTABLE_VERSION:
  578. raise ValueError(f"Unsupported reftable version: {version}")
  579. self.block_size = block_size
  580. # Read min/max update index (big-endian network order)
  581. self.min_update_index = struct.unpack(">Q", self.f.read(8))[0]
  582. self.max_update_index = struct.unpack(">Q", self.f.read(8))[0]
  583. def _read_blocks(self) -> None:
  584. """Read all blocks from the reftable."""
  585. while True:
  586. # Read block type
  587. block_type = self.f.read(1)
  588. if not block_type:
  589. break
  590. # Read block length (3 bytes, big-endian network order)
  591. length_bytes = self.f.read(3)
  592. if len(length_bytes) < 3:
  593. break
  594. # Convert 3-byte big-endian to int
  595. block_length = int.from_bytes(length_bytes, "big")
  596. # Read block data
  597. block_data = self.f.read(block_length)
  598. if block_type == BLOCK_TYPE_REF:
  599. self._process_ref_block(block_data)
  600. # TODO: Handle other block types
  601. # Stop if we encounter footer header copy
  602. if (
  603. block_type == b"R" and block_length > MAX_REASONABLE_BLOCK_SIZE
  604. ): # Likely parsing footer as block
  605. break
  606. def _process_ref_block(self, data: bytes) -> None:
  607. """Process a reference block."""
  608. block = RefBlock.decode(data, min_update_index=self.min_update_index)
  609. for ref in block.refs:
  610. # Store all refs including deletion records - deletion handling is done at container level
  611. self.refs[ref.refname] = (ref.value_type, ref.value)
  612. def get_ref(self, refname: bytes) -> tuple[int, bytes] | None:
  613. """Get a reference by name."""
  614. return self.refs.get(refname)
  615. def all_refs(self) -> dict[bytes, tuple[int, bytes]]:
  616. """Get all references."""
  617. return self.refs.copy()
  618. class _ReftableBatchContext:
  619. """Context manager for batching reftable updates."""
  620. def __init__(self, refs_container: "ReftableRefsContainer") -> None:
  621. self.refs_container = refs_container
  622. def __enter__(self) -> "_ReftableBatchContext":
  623. self.refs_container._batch_mode = True
  624. return self
  625. def __exit__(
  626. self,
  627. exc_type: type[BaseException] | None,
  628. exc_val: BaseException | None,
  629. exc_tb: TracebackType | None,
  630. ) -> None:
  631. self.refs_container._batch_mode = False
  632. if exc_type is None: # Only flush if no exception occurred
  633. self.refs_container._flush_pending_updates()
  634. class ReftableRefsContainer(RefsContainer):
  635. """A refs container backed by the reftable format."""
  636. def __init__(
  637. self,
  638. path: str | bytes,
  639. logger: Callable[
  640. [bytes, bytes, bytes, bytes | None, int | None, int | None, bytes], None
  641. ]
  642. | None = None,
  643. ) -> None:
  644. """Initialize a reftable refs container.
  645. Args:
  646. path: Path to the reftable directory
  647. logger: Optional logger for reflog
  648. """
  649. super().__init__(logger=logger)
  650. # Normalize path to string
  651. if isinstance(path, bytes):
  652. self.path = os.fsdecode(path)
  653. else:
  654. self.path = path
  655. self.reftable_dir = os.path.join(self.path, "reftable")
  656. if not os.path.exists(self.reftable_dir):
  657. os.makedirs(self.reftable_dir)
  658. self._pending_updates: list[RefUpdate] = [] # Buffer for batching ref updates
  659. self._ref_update_indices: dict[
  660. bytes, int
  661. ] = {} # Track chronological update index for each ref
  662. self._batch_mode = False # Track whether we're in batch mode
  663. # Create refs/heads marker file for Git compatibility
  664. self._ensure_refs_heads_marker()
  665. def _ensure_refs_heads_marker(self) -> None:
  666. """Ensure refs/heads marker file exists for Git compatibility.
  667. Git expects a refs/heads file (not directory) to exist when using
  668. reftables. This file contains a marker message indicating that the
  669. repository uses the reftable format instead of loose refs.
  670. """
  671. refs_dir = os.path.join(self.path, "refs")
  672. if not os.path.exists(refs_dir):
  673. os.makedirs(refs_dir)
  674. refs_heads_path = os.path.join(refs_dir, "heads")
  675. # If refs/heads is a directory, remove it
  676. if os.path.isdir(refs_heads_path):
  677. shutil.rmtree(refs_heads_path)
  678. # Create marker file if it doesn't exist
  679. if not os.path.exists(refs_heads_path):
  680. with open(refs_heads_path, "wb") as f:
  681. f.write(b"this repository uses the reftable format\n")
  682. def _read_table_file(self, table_file: str) -> BinaryIO:
  683. """Context manager helper to open and read a reftable file."""
  684. return open(table_file, "rb")
  685. def _load_ref_update_indices(self) -> None:
  686. """Load the update indices for all refs from existing reftable files."""
  687. for table_file in self._get_table_files():
  688. with self._read_table_file(table_file) as f:
  689. # Read header to get min_update_index
  690. f.seek(0)
  691. reader = ReftableReader(f)
  692. # Read the ref block to get individual update indices
  693. f.seek(HEADER_SIZE_V1) # Skip header
  694. block_type = f.read(1)
  695. if block_type == BLOCK_TYPE_REF:
  696. length_bytes = f.read(3)
  697. block_length = (
  698. (length_bytes[0] << 16)
  699. | (length_bytes[1] << 8)
  700. | length_bytes[2]
  701. )
  702. block_data = f.read(block_length)
  703. # Parse the block to get refs with their update indices
  704. block = RefBlock.decode(
  705. block_data, min_update_index=reader.min_update_index
  706. )
  707. for ref in block.refs:
  708. # Store the update index for each ref
  709. # This preserves the chronological order
  710. self._ref_update_indices[ref.refname] = ref.update_index
  711. def _get_next_update_index(self) -> int:
  712. """Get the next available update index across all tables."""
  713. # Find the highest update index used so far by reading all tables
  714. max_index = 0
  715. table_files = self._get_table_files()
  716. if not table_files:
  717. # No existing tables, but Git starts with HEAD at index 1
  718. # Our first operations start at index 2
  719. return 2
  720. for table_file in table_files:
  721. with self._read_table_file(table_file) as f:
  722. reader = ReftableReader(f)
  723. max_index = max(max_index, reader.max_update_index)
  724. return max_index + 1
  725. def _get_table_files(self) -> list[str]:
  726. """Get sorted list of reftable files from tables.list."""
  727. if not os.path.exists(self.reftable_dir):
  728. return []
  729. tables_list_path = os.path.join(self.reftable_dir, "tables.list")
  730. if not os.path.exists(tables_list_path):
  731. return []
  732. files = []
  733. with open(tables_list_path, "rb") as f:
  734. for line in f:
  735. table_name = line.decode().strip()
  736. if table_name:
  737. files.append(os.path.join(self.reftable_dir, table_name))
  738. return files
  739. def _read_all_tables(self) -> dict[bytes, tuple[int, bytes]]:
  740. """Read all reftable files and merge results."""
  741. # First, read all tables and sort them by min_update_index
  742. table_data = []
  743. for table_file in self._get_table_files():
  744. with self._read_table_file(table_file) as f:
  745. reader = ReftableReader(f)
  746. table_data.append(
  747. (reader.min_update_index, table_file, reader.all_refs())
  748. )
  749. # Sort by min_update_index to ensure chronological order
  750. table_data.sort(key=lambda x: x[0])
  751. # Merge results in chronological order
  752. all_refs: dict[bytes, tuple[int, bytes]] = {}
  753. for min_update_index, table_file, refs in table_data:
  754. # Apply updates from this table
  755. for refname, (value_type, value) in refs.items():
  756. if value_type == REF_VALUE_DELETE:
  757. # Remove ref if it exists
  758. all_refs.pop(refname, None)
  759. else:
  760. # Add/update ref
  761. all_refs[refname] = (value_type, value)
  762. return all_refs
  763. def allkeys(self) -> set[bytes]:
  764. """Return set of all ref names."""
  765. refs = self._read_all_tables()
  766. result = set(refs.keys())
  767. # For symbolic refs, also include their targets as implicit refs
  768. for refname, (value_type, value) in refs.items():
  769. if value_type == REF_VALUE_SYMREF:
  770. # Add the target ref as an implicit ref
  771. target = value
  772. result.add(target)
  773. return result
  774. def follow(self, name: bytes) -> tuple[list[bytes], bytes]:
  775. """Follow a reference name.
  776. Returns: a tuple of (refnames, sha), where refnames are the names of
  777. references in the chain
  778. """
  779. refnames = []
  780. current = name
  781. refs = self._read_all_tables()
  782. for _ in range(MAX_SYMREF_DEPTH):
  783. refnames.append(current)
  784. ref_data = refs.get(current)
  785. if ref_data is None:
  786. raise KeyError(current)
  787. value_type, value = ref_data
  788. if value_type == REF_VALUE_REF:
  789. return refnames, value
  790. if value_type == REF_VALUE_PEELED:
  791. return refnames, value[:SHA1_HEX_SIZE] # First SHA1 hex chars
  792. if value_type == REF_VALUE_SYMREF:
  793. current = value
  794. continue
  795. # Unknown value type
  796. raise ValueError(f"Unknown ref value type: {value_type}")
  797. # Too many levels of indirection
  798. raise ValueError(f"Too many levels of symbolic ref indirection for {name!r}")
  799. def __getitem__(self, name: bytes) -> ObjectID:
  800. """Get the SHA1 for a reference name.
  801. This method follows all symbolic references.
  802. """
  803. _, sha = self.follow(name)
  804. if sha is None:
  805. raise KeyError(name)
  806. return sha
  807. def read_loose_ref(self, name: bytes) -> bytes:
  808. """Read a reference value without following symbolic refs.
  809. Args:
  810. name: the refname to read
  811. Returns: The contents of the ref file.
  812. For symbolic refs, returns b"ref: <target>"
  813. Raises:
  814. KeyError: if the ref does not exist
  815. """
  816. refs = self._read_all_tables()
  817. ref_data = refs.get(name)
  818. if ref_data is None:
  819. raise KeyError(name)
  820. value_type, value = ref_data
  821. if value_type == REF_VALUE_REF:
  822. return value
  823. elif value_type == REF_VALUE_SYMREF:
  824. # Return in Git format: "ref: <target>"
  825. return SYMREF + value
  826. elif value_type == REF_VALUE_PEELED:
  827. # Return the first SHA (not the peeled one)
  828. return value[:SHA1_HEX_SIZE] # First SHA1 hex chars
  829. raise ValueError(f"Unknown ref value type: {value_type}")
  830. def get_packed_refs(self) -> dict[bytes, bytes]:
  831. """Get packed refs. Reftable doesn't distinguish packed/loose."""
  832. refs = self._read_all_tables()
  833. result = {}
  834. for name, (value_type, value) in refs.items():
  835. if value_type == REF_VALUE_REF:
  836. result[name] = value
  837. elif value_type == REF_VALUE_PEELED:
  838. result[name] = value[:SHA1_HEX_SIZE] # First SHA1 hex chars
  839. return result
  840. def get_peeled(self, name: bytes) -> bytes | None:
  841. """Return the cached peeled value of a ref, if available.
  842. Args:
  843. name: Name of the ref to peel
  844. Returns: The peeled value of the ref. If the ref is known not point to
  845. a tag, this will be the SHA the ref refers to. If the ref may point
  846. to a tag, but no cached information is available, None is returned.
  847. """
  848. refs = self._read_all_tables()
  849. ref_data = refs.get(name)
  850. if ref_data is None:
  851. return None
  852. value_type, value = ref_data
  853. if value_type == REF_VALUE_PEELED:
  854. # Return the peeled SHA (second 40 hex chars)
  855. return value[40:80]
  856. elif value_type == REF_VALUE_REF:
  857. # Known not to be peeled
  858. return value
  859. else:
  860. # Symbolic ref or other - no peeled info
  861. return None
  862. def _generate_table_path(
  863. self,
  864. min_update_index: int | None = None,
  865. max_update_index: int | None = None,
  866. ) -> str:
  867. """Generate a new reftable file path."""
  868. if min_update_index is None or max_update_index is None:
  869. timestamp = int(time.time() * 1000000)
  870. min_idx = max_idx = timestamp
  871. else:
  872. min_idx = min_update_index
  873. max_idx = max_update_index
  874. hash_part = random.randint(0, 0xFFFFFFFF)
  875. table_name = f"0x{min_idx:016x}-0x{max_idx:016x}-{hash_part:08x}.ref"
  876. return os.path.join(self.reftable_dir, table_name)
  877. def add_packed_refs(self, new_refs: Mapping[bytes, bytes | None]) -> None:
  878. """Add packed refs. Creates a new reftable file with all refs consolidated."""
  879. if not new_refs:
  880. return
  881. self._write_batch_updates(new_refs)
  882. def _write_batch_updates(self, updates: Mapping[bytes, bytes | None]) -> None:
  883. """Write multiple ref updates to a single reftable file."""
  884. if not updates:
  885. return
  886. table_path = self._generate_table_path()
  887. with open(table_path, "wb") as f:
  888. writer = ReftableWriter(f)
  889. for refname, sha in updates.items():
  890. if sha is not None:
  891. writer.add_ref(refname, sha)
  892. else:
  893. writer.delete_ref(refname)
  894. writer.write()
  895. self._update_tables_list()
  896. def set_if_equals(
  897. self,
  898. name: bytes,
  899. old_ref: bytes | None,
  900. new_ref: bytes | None,
  901. committer: bytes | None = None,
  902. timestamp: int | None = None,
  903. timezone: int | None = None,
  904. message: bytes | None = None,
  905. ) -> bool:
  906. """Atomically set a ref if it currently equals old_ref."""
  907. # For now, implement a simple non-atomic version
  908. # TODO: Implement proper atomic compare-and-swap
  909. try:
  910. current = self.read_loose_ref(name)
  911. except KeyError:
  912. current = None
  913. if current != old_ref:
  914. return False
  915. if new_ref is None:
  916. # Delete ref
  917. self._write_ref_update(name, REF_VALUE_DELETE, b"")
  918. else:
  919. # Update ref
  920. self._write_ref_update(name, REF_VALUE_REF, new_ref)
  921. return True
  922. def add_if_new(
  923. self,
  924. name: bytes,
  925. ref: bytes,
  926. committer: bytes | None = None,
  927. timestamp: int | None = None,
  928. timezone: int | None = None,
  929. message: bytes | None = None,
  930. ) -> bool:
  931. """Add a ref only if it doesn't exist."""
  932. try:
  933. self.read_loose_ref(name)
  934. return False # Ref exists
  935. except KeyError:
  936. pass # Ref doesn't exist, continue
  937. self._write_ref_update(name, REF_VALUE_REF, ref)
  938. return True
  939. def remove_if_equals(
  940. self,
  941. name: bytes,
  942. old_ref: bytes | None,
  943. committer: bytes | None = None,
  944. timestamp: int | None = None,
  945. timezone: int | None = None,
  946. message: bytes | None = None,
  947. ) -> bool:
  948. """Remove a ref if it equals old_ref."""
  949. return self.set_if_equals(
  950. name,
  951. old_ref,
  952. None,
  953. committer=committer,
  954. timestamp=timestamp,
  955. timezone=timezone,
  956. message=message,
  957. )
  958. def set_symbolic_ref(
  959. self,
  960. name: bytes,
  961. other: bytes,
  962. committer: bytes | None = None,
  963. timestamp: int | None = None,
  964. timezone: int | None = None,
  965. message: bytes | None = None,
  966. ) -> None:
  967. """Set a symbolic reference."""
  968. self._write_ref_update(name, REF_VALUE_SYMREF, other)
  969. def _write_ref_update(self, name: bytes, value_type: int, value: bytes) -> None:
  970. """Write a single ref update immediately to its own reftable file."""
  971. # Check if we're in batch mode - if so, buffer for later
  972. if getattr(self, "_batch_mode", False):
  973. # Buffer the update for later batching using RefUpdate objects
  974. self._pending_updates.append(RefUpdate(name, value_type, value))
  975. return
  976. # Write immediately like Git does - one file per update
  977. self._write_single_ref_update(name, value_type, value)
  978. def _write_single_ref_update(
  979. self, name: bytes, value_type: int, value: bytes
  980. ) -> None:
  981. """Write a single ref update to its own reftable file like Git does."""
  982. table_path = self._generate_table_path()
  983. next_update_index = self._get_next_update_index()
  984. with open(table_path, "wb") as f:
  985. writer = ReftableWriter(f, auto_create_head=False)
  986. writer.min_update_index = next_update_index
  987. # Git uses max_update_index = min + 1 for single ref updates
  988. writer.max_update_index = next_update_index + 1
  989. # Don't auto-create HEAD - let it be set explicitly
  990. # Add the requested ref
  991. if value_type == REF_VALUE_REF:
  992. writer.add_ref(name, value)
  993. elif value_type == REF_VALUE_SYMREF:
  994. writer.add_symbolic_ref(name, value)
  995. elif value_type == REF_VALUE_DELETE:
  996. writer.delete_ref(name)
  997. writer.write()
  998. self._update_tables_list()
  999. def _flush_pending_updates(self) -> None:
  1000. """Flush pending ref updates like Git does - consolidate all refs."""
  1001. if not self._pending_updates:
  1002. return
  1003. # First, load the current update indices for all refs
  1004. if not self._ref_update_indices:
  1005. self._load_ref_update_indices()
  1006. # Read all existing refs to create complete consolidated view
  1007. all_refs = self._read_all_tables()
  1008. # Process pending updates
  1009. head_update, other_updates = self._process_pending_updates()
  1010. # Get next update index - all refs in batch get the SAME index
  1011. batch_update_index = self._get_next_update_index()
  1012. # Apply updates to get final state
  1013. self._apply_batch_updates(
  1014. all_refs, other_updates, head_update, batch_update_index
  1015. )
  1016. # Write consolidated batch file
  1017. created_files = (
  1018. self._write_batch_file(all_refs, batch_update_index) if all_refs else []
  1019. )
  1020. # Update tables list with new files (don't compact, keep separate)
  1021. if created_files:
  1022. # Remove old files and update tables.list
  1023. tables_list_path = os.path.join(self.reftable_dir, "tables.list")
  1024. # Remove old .ref files
  1025. for name in os.listdir(self.reftable_dir):
  1026. if name.endswith(".ref") and name not in created_files:
  1027. os.remove(os.path.join(self.reftable_dir, name))
  1028. # Write new tables.list with separate files
  1029. with open(tables_list_path, "wb") as f:
  1030. for filename in sorted(created_files): # Sort for deterministic order
  1031. f.write((filename + "\n").encode())
  1032. self._pending_updates.clear()
  1033. def _process_pending_updates(
  1034. self,
  1035. ) -> tuple[tuple[bytes, int, bytes] | None, list[tuple[bytes, int, bytes]]]:
  1036. """Process pending updates and return (head_update, other_updates)."""
  1037. head_update = None
  1038. other_updates = []
  1039. # Process all RefUpdate objects
  1040. for update in self._pending_updates:
  1041. if update.name == b"HEAD":
  1042. if update.value_type == REF_VALUE_SYMREF:
  1043. # Resolve symref chain like Git does (only for HEAD)
  1044. resolved_target = self._resolve_symref_chain(update.value)
  1045. head_update = (update.name, REF_VALUE_SYMREF, resolved_target)
  1046. else:
  1047. head_update = (update.name, update.value_type, update.value)
  1048. else:
  1049. # Regular ref, symref, or deletion
  1050. other_updates.append((update.name, update.value_type, update.value))
  1051. return head_update, other_updates
  1052. def _resolve_symref_chain(self, target: bytes) -> bytes:
  1053. """Resolve a symbolic reference chain to its final target.
  1054. Git limits symref chains to 5 levels to prevent infinite loops.
  1055. This method follows the chain until it finds a non-symbolic ref
  1056. or hits the depth limit.
  1057. Args:
  1058. target: The initial target of a symbolic reference
  1059. Returns:
  1060. The final target after resolving all symbolic references
  1061. """
  1062. visited = set()
  1063. current = target
  1064. # Follow the chain up to 5 levels deep (Git's limit)
  1065. for _ in range(MAX_SYMREF_DEPTH):
  1066. if current in visited:
  1067. # Circular reference, return current
  1068. break
  1069. visited.add(current)
  1070. # Check if current target is also a symref in pending updates
  1071. found_symref = False
  1072. for update in self._pending_updates:
  1073. if update.name == current and update.value_type == REF_VALUE_SYMREF:
  1074. current = update.value
  1075. found_symref = True
  1076. break
  1077. if not found_symref:
  1078. # Not a symref, this is the final target
  1079. break
  1080. return current
  1081. def _apply_batch_updates(
  1082. self,
  1083. all_refs: dict[bytes, tuple[int, bytes]],
  1084. other_updates: list[tuple[bytes, int, bytes]],
  1085. head_update: tuple[bytes, int, bytes] | None,
  1086. batch_update_index: int,
  1087. ) -> None:
  1088. """Apply batch updates to the refs dict and update indices."""
  1089. # Process all updates and assign the SAME update index to all refs in batch
  1090. for name, value_type, value in other_updates:
  1091. if value_type == REF_VALUE_DELETE:
  1092. all_refs.pop(name, None)
  1093. # Deletion still gets recorded with the batch index
  1094. self._ref_update_indices[name] = batch_update_index
  1095. else:
  1096. all_refs[name] = (value_type, value)
  1097. # All refs in batch get the same update index
  1098. self._ref_update_indices[name] = batch_update_index
  1099. # Process HEAD update with same batch index
  1100. if head_update:
  1101. name, value_type, value = head_update
  1102. if value_type == REF_VALUE_DELETE:
  1103. all_refs.pop(name, None)
  1104. self._ref_update_indices[name] = batch_update_index
  1105. else:
  1106. all_refs[name] = (value_type, value)
  1107. self._ref_update_indices[name] = batch_update_index
  1108. def _write_batch_file(
  1109. self, all_refs: dict[bytes, tuple[int, bytes]], batch_update_index: int
  1110. ) -> list[str]:
  1111. """Write all refs to a single batch file and return created filenames."""
  1112. # All refs in batch have same update index
  1113. table_path = self._generate_table_path(batch_update_index, batch_update_index)
  1114. with open(table_path, "wb") as f:
  1115. writer = ReftableWriter(f, auto_create_head=False, is_batch_operation=True)
  1116. writer.min_update_index = batch_update_index
  1117. writer.max_update_index = batch_update_index
  1118. # Add all refs in sorted order
  1119. writer.refs_order = sorted(all_refs.keys())
  1120. # Git typically puts HEAD first if present
  1121. if b"HEAD" in writer.refs_order:
  1122. writer.refs_order.remove(b"HEAD")
  1123. writer.refs_order.insert(0, b"HEAD")
  1124. for refname in writer.refs_order:
  1125. value_type, value = all_refs[refname]
  1126. writer.refs[refname] = (value_type, value)
  1127. # Pass the update indices to the writer
  1128. writer._ref_update_indices = { # type: ignore[attr-defined]
  1129. name: batch_update_index for name in all_refs.keys()
  1130. }
  1131. writer.write()
  1132. return [os.path.basename(table_path)]
  1133. def batch_update(self) -> "_ReftableBatchContext":
  1134. """Context manager for batching multiple ref updates into a single reftable."""
  1135. return _ReftableBatchContext(self)
  1136. def remove_packed_ref(self, name: bytes) -> None:
  1137. """Remove a packed ref. Creates a deletion record."""
  1138. self._write_ref_update(name, REF_VALUE_DELETE, b"")
  1139. def _compact_tables_list(self, new_table_name: str) -> None:
  1140. """Compact tables list to single file like Git does."""
  1141. tables_list_path = os.path.join(self.reftable_dir, "tables.list")
  1142. # Remove old .ref files (Git's compaction behavior)
  1143. for name in os.listdir(self.reftable_dir):
  1144. if name.endswith(".ref") and name != new_table_name:
  1145. os.remove(os.path.join(self.reftable_dir, name))
  1146. # Write new tables.list with just the consolidated file
  1147. with open(tables_list_path, "wb") as f:
  1148. f.write((new_table_name + "\n").encode())
  1149. def _update_tables_list(self) -> None:
  1150. """Update the tables.list file with current table files."""
  1151. tables_list_path = os.path.join(self.reftable_dir, "tables.list")
  1152. # Get all .ref files in the directory
  1153. ref_files = []
  1154. for name in os.listdir(self.reftable_dir):
  1155. if name.endswith(".ref"):
  1156. ref_files.append(name)
  1157. # Sort by name (which includes timestamp)
  1158. ref_files.sort()
  1159. # Write to tables.list
  1160. with open(tables_list_path, "wb") as f:
  1161. for name in ref_files:
  1162. f.write((name + "\n").encode())