reftable.py 50 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429
  1. """Implementation of the reftable refs storage format.
  2. The reftable format is a binary format for storing Git refs that provides
  3. better performance and atomic operations compared to the traditional
  4. loose refs format.
  5. See: https://git-scm.com/docs/reftable
  6. """
  7. import os
  8. import random
  9. import shutil
  10. import struct
  11. import time
  12. import zlib
  13. from collections.abc import Mapping
  14. from dataclasses import dataclass
  15. from io import BytesIO
  16. from types import TracebackType
  17. from typing import BinaryIO, Callable, Optional, Union
  18. from dulwich.objects import ObjectID
  19. from dulwich.refs import (
  20. SYMREF,
  21. RefsContainer,
  22. )
  23. def decode_varint_from_stream(stream: BinaryIO) -> Optional[int]:
  24. """Decode a variable-length integer from a stream."""
  25. result = 0
  26. shift = 0
  27. while True:
  28. byte = stream.read(1)
  29. if not byte:
  30. return None
  31. byte_val = byte[0]
  32. result |= (byte_val & 0x7F) << shift
  33. if byte_val < 0x80:
  34. break
  35. shift += 7
  36. return result
  37. def encode_varint(value: int) -> bytes:
  38. """Encode an integer as a variable-width integer."""
  39. result = []
  40. while value >= 0x80:
  41. result.append((value & 0x7F) | 0x80)
  42. value >>= 7
  43. result.append(value)
  44. return bytes(result)
  45. def _encode_reftable_suffix_and_type(value: int) -> bytes:
  46. """Encode suffix_and_type using Git-compatible format.
  47. Git uses an additive format instead of proper LEB128:
  48. - Values < 128: Single byte (standard)
  49. - Values >= 128: Two bytes where byte1 + byte2 = value
  50. """
  51. if value < 128:
  52. return bytes([value])
  53. # Git's broken format: split into two bytes that add up to the value
  54. return bytes([0x80, value - 0x80])
  55. def _decode_reftable_suffix_and_type(stream: BinaryIO) -> Optional[int]:
  56. """Decode suffix_and_type handling both Git's broken and standard formats."""
  57. pos = stream.tell()
  58. first_byte_data = stream.read(1)
  59. if not first_byte_data:
  60. return None
  61. first_byte = first_byte_data[0]
  62. # Single byte case - most common path
  63. if not (first_byte & 0x80):
  64. return first_byte
  65. # Two byte case - handle missing second byte
  66. second_byte_data = stream.read(1)
  67. if not second_byte_data:
  68. stream.seek(pos)
  69. return first_byte & 0x7F
  70. second_byte = second_byte_data[0]
  71. # Multi-byte varint case - delegate to proper decoder
  72. if second_byte & 0x80:
  73. stream.seek(pos)
  74. return decode_varint_from_stream(stream)
  75. # Two-byte case: choose between Git's format and standard LEB128
  76. git_value = first_byte + second_byte
  77. git_suffix_len = git_value >> 3
  78. git_value_type = git_value & 7
  79. # Use Git's format if it produces reasonable values
  80. if git_suffix_len < MAX_REASONABLE_SUFFIX_LEN and git_value_type <= 3:
  81. return git_value
  82. # Fall back to standard LEB128
  83. return (first_byte & 0x7F) | ((second_byte & 0x7F) << 7)
  84. # Reftable magic bytes
  85. REFTABLE_MAGIC = b"REFT"
  86. # Reftable version
  87. REFTABLE_VERSION = 1
  88. # Constants - extracted magic numbers
  89. EMBEDDED_FOOTER_MARKER = 0x1C # 28 - version 2 header size
  90. HEADER_SIZE_V1 = 24
  91. DEFAULT_BLOCK_SIZE = 4096
  92. MAX_SYMREF_DEPTH = 5
  93. CRC_DATA_SIZE = 64
  94. FINAL_PADDING_SIZE = 36
  95. # SHA1 and size constants
  96. SHA1_BINARY_SIZE = 20 # SHA1 is 20 bytes
  97. SHA1_HEX_SIZE = 40 # SHA1 as hex string is 40 characters
  98. SHA1_PEELED_HEX_SIZE = 80 # Peeled ref is 80 hex characters (2 SHA1s)
  99. # Validation limits
  100. MAX_REASONABLE_SUFFIX_LEN = 100
  101. MAX_REASONABLE_BLOCK_SIZE = 1000000
  102. MIN_RECORD_SIZE = 3
  103. # Block types
  104. BLOCK_TYPE_REF = b"r"
  105. BLOCK_TYPE_OBJ = b"o"
  106. BLOCK_TYPE_LOG = b"g"
  107. BLOCK_TYPE_INDEX = b"i"
  108. # Value types for ref records (matching Git's format)
  109. REF_VALUE_DELETE = 0x0 # deletion
  110. REF_VALUE_REF = 0x1 # one object name
  111. REF_VALUE_PEELED = 0x2 # two object names (ref + peeled)
  112. REF_VALUE_SYMREF = 0x3 # symbolic reference
  113. def _encode_ref_value(value_type: int, value: bytes) -> bytes:
  114. """Encode a ref value based on its type."""
  115. if value_type == REF_VALUE_DELETE:
  116. return b""
  117. elif value_type == REF_VALUE_REF:
  118. # Convert hex string to binary if needed
  119. if len(value) == SHA1_HEX_SIZE:
  120. return bytes.fromhex(value.decode())
  121. else:
  122. return value
  123. elif value_type == REF_VALUE_PEELED:
  124. # Convert hex strings to binary if needed
  125. if len(value) == SHA1_PEELED_HEX_SIZE:
  126. return bytes.fromhex(value.decode())
  127. else:
  128. return value
  129. elif value_type == REF_VALUE_SYMREF:
  130. # varint(target_len) + target
  131. return encode_varint(len(value)) + value
  132. else:
  133. raise ValueError(f"Unknown ref value type: {value_type}")
  134. def _decode_ref_value(stream: BinaryIO, value_type: int) -> bytes:
  135. """Decode a ref value from stream based on its type."""
  136. if value_type == REF_VALUE_DELETE:
  137. return b""
  138. elif value_type == REF_VALUE_REF:
  139. value = stream.read(SHA1_BINARY_SIZE)
  140. if len(value) != SHA1_BINARY_SIZE:
  141. raise ValueError("Unexpected end of stream while reading ref value")
  142. # Convert to hex string for interface compatibility
  143. return value.hex().encode()
  144. elif value_type == REF_VALUE_PEELED:
  145. value = stream.read(SHA1_BINARY_SIZE * 2)
  146. if len(value) != SHA1_BINARY_SIZE * 2:
  147. raise ValueError("Unexpected end of stream while reading peeled value")
  148. # Convert to hex string for interface compatibility
  149. return value.hex().encode()
  150. elif value_type == REF_VALUE_SYMREF:
  151. target_len = decode_varint_from_stream(stream)
  152. if target_len is None:
  153. raise ValueError("Unexpected end of stream while reading target length")
  154. value = stream.read(target_len)
  155. if len(value) != target_len:
  156. raise ValueError("Unexpected end of stream while reading symref target")
  157. return value
  158. else:
  159. raise ValueError(f"Unknown ref value type: {value_type}")
  160. @dataclass
  161. class RefValue:
  162. """A reference value with its type."""
  163. value_type: int
  164. value: bytes
  165. @property
  166. def is_symbolic(self) -> bool:
  167. """Check if this is a symbolic reference."""
  168. return self.value_type == REF_VALUE_SYMREF
  169. @property
  170. def is_deletion(self) -> bool:
  171. """Check if this is a deletion."""
  172. return self.value_type == REF_VALUE_DELETE
  173. @property
  174. def is_peeled(self) -> bool:
  175. """Check if this is a peeled reference."""
  176. return self.value_type == REF_VALUE_PEELED
  177. def get_sha(self) -> Optional[bytes]:
  178. """Get the SHA1 value (for regular or peeled refs)."""
  179. if self.value_type == REF_VALUE_REF:
  180. return self.value
  181. elif self.value_type == REF_VALUE_PEELED:
  182. return self.value[:SHA1_HEX_SIZE]
  183. return None
  184. class RefUpdate:
  185. """A reference update operation."""
  186. def __init__(self, name: bytes, value_type: int, value: bytes):
  187. """Initialize RefUpdate.
  188. Args:
  189. name: Reference name
  190. value_type: Type of reference value
  191. value: Reference value
  192. """
  193. self.name = name
  194. self.value_type = value_type
  195. self.value = value
  196. class RefRecord:
  197. """A reference record in a reftable."""
  198. def __init__(
  199. self, refname: bytes, value_type: int, value: bytes, update_index: int = 0
  200. ):
  201. """Initialize RefRecord.
  202. Args:
  203. refname: Reference name
  204. value_type: Type of reference value
  205. value: Reference value
  206. update_index: Update index for the reference
  207. """
  208. self.refname = refname
  209. self.value_type = value_type
  210. self.value = value
  211. self.update_index = update_index
  212. def encode(self, prefix: bytes = b"", min_update_index: int = 0) -> bytes:
  213. """Encode the ref record with prefix compression."""
  214. common_prefix_len = 0
  215. for i in range(min(len(prefix), len(self.refname))):
  216. if prefix[i] != self.refname[i]:
  217. break
  218. common_prefix_len += 1
  219. suffix = self.refname[common_prefix_len:]
  220. # Encode according to Git format:
  221. # varint(prefix_length)
  222. # varint((suffix_length << 3) | value_type)
  223. # suffix
  224. # varint(update_index_delta)
  225. # value?
  226. result = encode_varint(common_prefix_len)
  227. result += _encode_reftable_suffix_and_type((len(suffix) << 3) | self.value_type)
  228. result += suffix
  229. # Calculate update_index_delta
  230. update_index_delta = self.update_index - min_update_index
  231. result += encode_varint(update_index_delta)
  232. # Encode value based on type
  233. result += _encode_ref_value(self.value_type, self.value)
  234. return result
  235. @classmethod
  236. def decode(
  237. cls, stream: BinaryIO, prefix: bytes = b"", min_update_index: int = 0
  238. ) -> tuple["RefRecord", bytes]:
  239. """Decode a ref record from a stream. Returns (record, full_refname)."""
  240. # Read prefix length
  241. prefix_len = decode_varint_from_stream(stream)
  242. if prefix_len is None:
  243. raise ValueError("Unexpected end of stream while reading prefix length")
  244. # Read combined suffix_length and value_type
  245. suffix_and_type = _decode_reftable_suffix_and_type(stream)
  246. if suffix_and_type is None:
  247. raise ValueError("Unexpected end of stream while reading suffix_and_type")
  248. suffix_len = suffix_and_type >> 3
  249. value_type = suffix_and_type & 0x7
  250. # Read suffix
  251. suffix = stream.read(suffix_len)
  252. if len(suffix) != suffix_len:
  253. raise ValueError("Unexpected end of stream while reading suffix")
  254. refname = prefix[:prefix_len] + suffix
  255. # Read update_index_delta
  256. update_index_delta = decode_varint_from_stream(stream)
  257. if update_index_delta is None:
  258. raise ValueError(
  259. "Unexpected end of stream while reading update_index_delta"
  260. )
  261. update_index = min_update_index + update_index_delta
  262. # Read value based on type
  263. value = _decode_ref_value(stream, value_type)
  264. return cls(refname, value_type, value, update_index), refname
  265. class LogBlock:
  266. """A block containing reflog records - simplified for empty blocks only."""
  267. def encode(self) -> bytes:
  268. """Encode empty log block for Git compatibility."""
  269. # Git expects compressed empty data for new repos
  270. return zlib.compress(b"")
  271. class RefBlock:
  272. """A block containing reference records."""
  273. def __init__(self) -> None:
  274. """Initialize RefBlock."""
  275. self.refs: list[RefRecord] = []
  276. def add_ref(
  277. self, refname: bytes, value_type: int, value: bytes, update_index: int = 1
  278. ) -> None:
  279. """Add a reference to the block."""
  280. self.refs.append(RefRecord(refname, value_type, value, update_index))
  281. def _create_embedded_footer(
  282. self, ref_offsets: list[int], header_data: bytes
  283. ) -> bytes:
  284. """Create the embedded footer pattern Git uses."""
  285. result = BytesIO()
  286. result.write(b"\x00\x00") # padding
  287. if len(self.refs) == 1:
  288. # Single ref pattern
  289. result.write(bytes([EMBEDDED_FOOTER_MARKER]))
  290. result.write(b"\x00\x01")
  291. else:
  292. # Multiple refs pattern
  293. result.write(bytes([EMBEDDED_FOOTER_MARKER]))
  294. result.write(b"\x00\x00") # first restart offset (always 0)
  295. # Special offset calculation: second_ref_offset + EMBEDDED_FOOTER_MARKER
  296. if len(ref_offsets) > 0:
  297. special_offset = ref_offsets[0] + EMBEDDED_FOOTER_MARKER
  298. result.write(struct.pack("B", special_offset))
  299. else:
  300. result.write(b"\x00")
  301. result.write(b"\x00") # padding byte
  302. result.write(struct.pack("B", 2)) # restart count (Git always uses 2)
  303. # Add header copy
  304. if header_data and len(header_data) >= HEADER_SIZE_V1:
  305. result.write(header_data[:HEADER_SIZE_V1])
  306. else:
  307. # Create a minimal header copy
  308. result.write(REFTABLE_MAGIC)
  309. result.write(b"\x01\x00\x10\x00") # version 1, block size 4096
  310. result.write(b"\x00" * 16) # min/max update indices
  311. # Final padding
  312. result.write(b"\x00\x00\x00\x00")
  313. return result.getvalue()
  314. def encode(
  315. self,
  316. min_update_index: int = 0,
  317. embed_footer: bool = False,
  318. header_data: bytes = b"",
  319. ) -> bytes:
  320. """Encode the ref block with restart points for Git compatibility."""
  321. # Sort refs by name
  322. self.refs.sort(key=lambda r: r.refname)
  323. # Encode refs with prefix compression
  324. ref_data = BytesIO()
  325. prefix = b""
  326. ref_offsets = []
  327. for i, ref in enumerate(self.refs):
  328. # Record offset for restart points
  329. if i > 0: # Git records offsets starting from second ref
  330. ref_offsets.append(ref_data.tell())
  331. encoded = ref.encode(prefix, min_update_index)
  332. ref_data.write(encoded)
  333. prefix = ref.refname
  334. record_data = ref_data.getvalue()
  335. # Git uses embedded footer format for ref blocks
  336. result = BytesIO()
  337. result.write(record_data)
  338. result.write(self._create_embedded_footer(ref_offsets, header_data))
  339. return result.getvalue()
  340. @classmethod
  341. def _find_ref_data_end(cls, data: bytes) -> int:
  342. """Find where ref records end in the data.
  343. Git embeds a footer at the end of ref blocks that contains offset
  344. information. This footer is marked by a special pattern (0x00 0x00 0x1c)
  345. where 0x1c (28) represents the version 2 header size.
  346. Args:
  347. data: The raw block data to search
  348. Returns:
  349. The position where ref records end and footer begins
  350. """
  351. # Look for embedded footer marker pattern
  352. marker_pattern = b"\x00\x00" + bytes([EMBEDDED_FOOTER_MARKER])
  353. marker_pos = data.find(marker_pattern)
  354. if marker_pos > 0:
  355. return marker_pos
  356. # Fallback: use most of the data, leaving room for footer
  357. return max(0, len(data) - 50)
  358. @classmethod
  359. def decode(cls, data: bytes, min_update_index: int = 0) -> "RefBlock":
  360. """Decode a ref block from bytes - simplified."""
  361. block = cls()
  362. if not data:
  363. return block
  364. # Find where ref data ends (before embedded footer)
  365. ref_data_end = cls._find_ref_data_end(data)
  366. record_data = data[:ref_data_end]
  367. stream = BytesIO(record_data)
  368. prefix = b""
  369. while stream.tell() < len(record_data):
  370. # Save position to check for progress
  371. pos_before = stream.tell()
  372. # Check if we have enough data for a minimal record
  373. remaining = len(record_data) - stream.tell()
  374. if (
  375. remaining < MIN_RECORD_SIZE
  376. ): # Need at least prefix_len + suffix_and_type + some data
  377. break
  378. try:
  379. ref, prefix = RefRecord.decode(stream, prefix, min_update_index)
  380. # Skip metadata records (empty refnames are Git's internal index records)
  381. if ref.refname: # Only add non-empty refnames
  382. block.refs.append(ref)
  383. except ValueError:
  384. # If we can't decode a record, we might have hit padding or invalid data
  385. # Stop parsing rather than raising an error
  386. break
  387. # Ensure we made progress to avoid infinite loops
  388. if stream.tell() == pos_before:
  389. break
  390. return block
  391. class ReftableWriter:
  392. """Writer for reftable files."""
  393. def __init__(
  394. self,
  395. f: BinaryIO,
  396. auto_create_head: bool = True,
  397. is_batch_operation: bool = False,
  398. ):
  399. """Initialize ReftableWriter.
  400. Args:
  401. f: Binary file object to write to
  402. auto_create_head: Whether to automatically create HEAD reference
  403. is_batch_operation: Whether this is a batch operation
  404. """
  405. self.f = f
  406. self.refs: dict[bytes, tuple[int, bytes]] = {}
  407. self.refs_order: list[bytes] = [] # Track insertion order for update indices
  408. self._written_data: list[bytes] = [] # Track written data for CRC calculation
  409. self.min_update_index = 1
  410. self.max_update_index = 1
  411. self.auto_create_head = auto_create_head
  412. self.is_batch_operation = (
  413. is_batch_operation # Track if this is a batch operation
  414. )
  415. def add_ref(self, refname: bytes, sha: bytes) -> None:
  416. """Add a direct reference."""
  417. self.refs[refname] = (REF_VALUE_REF, sha)
  418. if refname not in self.refs_order:
  419. self.refs_order.append(refname)
  420. self._maybe_auto_create_head()
  421. def add_symbolic_ref(self, refname: bytes, target: bytes) -> None:
  422. """Add a symbolic reference."""
  423. self.refs[refname] = (REF_VALUE_SYMREF, target)
  424. if refname not in self.refs_order:
  425. # HEAD should always be first (like git does)
  426. if refname == b"HEAD":
  427. self.refs_order.insert(0, refname)
  428. else:
  429. self.refs_order.append(refname)
  430. else:
  431. # Update existing ref (e.g., if HEAD was auto-created and now explicitly set)
  432. pass
  433. def delete_ref(self, refname: bytes) -> None:
  434. """Mark a reference as deleted."""
  435. self.refs[refname] = (REF_VALUE_DELETE, b"")
  436. if refname not in self.refs_order:
  437. self.refs_order.append(refname)
  438. def _maybe_auto_create_head(self) -> None:
  439. """Auto-create HEAD -> refs/heads/master if needed (Git compatibility)."""
  440. if self.auto_create_head and b"HEAD" not in self.refs:
  441. # Git always creates HEAD -> refs/heads/master by default
  442. self.refs[b"HEAD"] = (REF_VALUE_SYMREF, b"refs/heads/master")
  443. self.refs_order.insert(0, b"HEAD")
  444. def write(self) -> None:
  445. """Write the reftable to the file."""
  446. # Skip recalculation if max_update_index was already set higher than default
  447. # This preserves Git's behavior for symbolic-ref operations
  448. # Also skip for batch operations where min == max by design
  449. if (
  450. not self.is_batch_operation
  451. and self.max_update_index <= self.min_update_index
  452. and self.refs_order
  453. ):
  454. # Calculate max_update_index based on actual number of refs
  455. self.max_update_index = self.min_update_index + len(self.refs_order) - 1
  456. # Write header
  457. self._write_header()
  458. # Write ref blocks (includes embedded footer)
  459. self._write_ref_blocks()
  460. # Git embeds the footer within the ref block for small files,
  461. # so we only need to add final padding and CRC
  462. self._write_final_padding()
  463. def _write_header(self) -> None:
  464. """Write the reftable header."""
  465. # Magic bytes
  466. header_data = REFTABLE_MAGIC
  467. # Version + block size (4 bytes total, big-endian network order)
  468. # Format: uint8(version) + uint24(block_size)
  469. version_and_blocksize = (REFTABLE_VERSION << 24) | (
  470. DEFAULT_BLOCK_SIZE & 0xFFFFFF
  471. )
  472. header_data += struct.pack(">I", version_and_blocksize)
  473. # Min/max update index (timestamps) (big-endian network order)
  474. # Git uses increasing sequence numbers for update indices
  475. header_data += struct.pack(">Q", self.min_update_index) # min_update_index
  476. header_data += struct.pack(">Q", self.max_update_index) # max_update_index
  477. # Store header for footer
  478. self.header_data = header_data
  479. self.f.write(header_data)
  480. self._written_data.append(header_data)
  481. def _get_ref_update_indices(self) -> dict[bytes, int]:
  482. """Get update indices for all refs based on operation type.
  483. In batch operations, all refs get the same update index (timestamp).
  484. In non-batch operations, refs are assigned sequential indices starting
  485. from min_update_index.
  486. Returns:
  487. dict[bytes, int]: Mapping of ref names to their update indices
  488. """
  489. if self.is_batch_operation:
  490. # All refs get same index in batch
  491. return {name: self.min_update_index for name in self.refs_order}
  492. elif hasattr(self, "_ref_update_indices"):
  493. # Use provided indices
  494. return self._ref_update_indices # type: ignore[no-any-return]
  495. elif len(self.refs_order) == 1 and self.refs_order[0] == b"HEAD":
  496. # Special case for single HEAD symbolic ref
  497. value_type, _ = self.refs[b"HEAD"]
  498. if value_type == REF_VALUE_SYMREF:
  499. return {b"HEAD": self.max_update_index}
  500. else:
  501. return {b"HEAD": self.min_update_index}
  502. else:
  503. # Sequential indices
  504. indices = {}
  505. for i, name in enumerate(self.refs_order):
  506. indices[name] = self.min_update_index + i
  507. return indices
  508. def _write_ref_blocks(self) -> None:
  509. """Write reference blocks."""
  510. # Only write block if we have refs
  511. if not self.refs:
  512. return
  513. # Write refs in insertion order to preserve update indices like Git
  514. block = RefBlock()
  515. # Get update indices for all refs
  516. update_indices = self._get_ref_update_indices()
  517. # Add refs to block with their update indices
  518. for refname in self.refs_order:
  519. value_type, value = self.refs[refname]
  520. update_index = update_indices[refname]
  521. block.add_ref(refname, value_type, value, update_index)
  522. # Generate block data (may use embedded footer for small blocks)
  523. block_data = block.encode(
  524. min_update_index=self.min_update_index, header_data=self.header_data
  525. )
  526. # Write block type
  527. block_header = BLOCK_TYPE_REF
  528. self.f.write(block_header)
  529. self._written_data.append(block_header)
  530. # Write block length (3 bytes, big-endian network order)
  531. length_bytes = len(block_data).to_bytes(3, "big")
  532. self.f.write(length_bytes)
  533. self._written_data.append(length_bytes)
  534. # Write block data
  535. self.f.write(block_data)
  536. self._written_data.append(block_data)
  537. def _write_final_padding(self) -> None:
  538. """Write final padding and CRC for Git compatibility."""
  539. # Git writes exactly 40 bytes after the ref block (which includes embedded footer)
  540. # This is 36 bytes of zeros followed by 4-byte CRC
  541. padding = b"\x00" * FINAL_PADDING_SIZE
  542. self.f.write(padding)
  543. self._written_data.append(padding)
  544. # Calculate CRC over the last 64 bytes before CRC position
  545. # Collect all data written so far
  546. all_data = b"".join(self._written_data)
  547. # CRC is calculated over the 64 bytes before the CRC itself
  548. if len(all_data) >= CRC_DATA_SIZE:
  549. crc_data = all_data[-CRC_DATA_SIZE:]
  550. else:
  551. # Pad with zeros if file is too small
  552. crc_data = all_data + b"\x00" * (CRC_DATA_SIZE - len(all_data))
  553. crc = zlib.crc32(crc_data) & 0xFFFFFFFF
  554. self.f.write(struct.pack(">I", crc))
  555. class ReftableReader:
  556. """Reader for reftable files."""
  557. def __init__(self, f: BinaryIO):
  558. """Initialize ReftableReader.
  559. Args:
  560. f: Binary file object to read from
  561. """
  562. self.f = f
  563. self._read_header()
  564. self.refs: dict[bytes, tuple[int, bytes]] = {}
  565. self._read_blocks()
  566. def _read_header(self) -> None:
  567. """Read and validate the reftable header."""
  568. # Read magic bytes
  569. magic = self.f.read(4)
  570. if magic != REFTABLE_MAGIC:
  571. raise ValueError(f"Invalid reftable magic: {magic!r}")
  572. # Read version + block size (4 bytes total, big-endian network order)
  573. # Format: uint8(version) + uint24(block_size)
  574. version_and_blocksize = struct.unpack(">I", self.f.read(4))[0]
  575. version = (version_and_blocksize >> 24) & 0xFF # First byte
  576. block_size = version_and_blocksize & 0xFFFFFF # Last 3 bytes
  577. if version != REFTABLE_VERSION:
  578. raise ValueError(f"Unsupported reftable version: {version}")
  579. self.block_size = block_size
  580. # Read min/max update index (big-endian network order)
  581. self.min_update_index = struct.unpack(">Q", self.f.read(8))[0]
  582. self.max_update_index = struct.unpack(">Q", self.f.read(8))[0]
  583. def _read_blocks(self) -> None:
  584. """Read all blocks from the reftable."""
  585. while True:
  586. # Read block type
  587. block_type = self.f.read(1)
  588. if not block_type:
  589. break
  590. # Read block length (3 bytes, big-endian network order)
  591. length_bytes = self.f.read(3)
  592. if len(length_bytes) < 3:
  593. break
  594. # Convert 3-byte big-endian to int
  595. block_length = int.from_bytes(length_bytes, "big")
  596. # Read block data
  597. block_data = self.f.read(block_length)
  598. if block_type == BLOCK_TYPE_REF:
  599. self._process_ref_block(block_data)
  600. # TODO: Handle other block types
  601. # Stop if we encounter footer header copy
  602. if (
  603. block_type == b"R" and block_length > MAX_REASONABLE_BLOCK_SIZE
  604. ): # Likely parsing footer as block
  605. break
  606. def _process_ref_block(self, data: bytes) -> None:
  607. """Process a reference block."""
  608. block = RefBlock.decode(data, min_update_index=self.min_update_index)
  609. for ref in block.refs:
  610. # Store all refs including deletion records - deletion handling is done at container level
  611. self.refs[ref.refname] = (ref.value_type, ref.value)
  612. def get_ref(self, refname: bytes) -> Optional[tuple[int, bytes]]:
  613. """Get a reference by name."""
  614. return self.refs.get(refname)
  615. def all_refs(self) -> dict[bytes, tuple[int, bytes]]:
  616. """Get all references."""
  617. return self.refs.copy()
  618. class _ReftableBatchContext:
  619. """Context manager for batching reftable updates."""
  620. def __init__(self, refs_container: "ReftableRefsContainer") -> None:
  621. self.refs_container = refs_container
  622. def __enter__(self) -> "_ReftableBatchContext":
  623. self.refs_container._batch_mode = True
  624. return self
  625. def __exit__(
  626. self,
  627. exc_type: Optional[type[BaseException]],
  628. exc_val: Optional[BaseException],
  629. exc_tb: Optional[TracebackType],
  630. ) -> None:
  631. self.refs_container._batch_mode = False
  632. if exc_type is None: # Only flush if no exception occurred
  633. self.refs_container._flush_pending_updates()
  634. class ReftableRefsContainer(RefsContainer):
  635. """A refs container backed by the reftable format."""
  636. def __init__(
  637. self,
  638. path: Union[str, bytes],
  639. logger: Optional[
  640. Callable[
  641. [
  642. bytes,
  643. bytes,
  644. bytes,
  645. Optional[bytes],
  646. Optional[int],
  647. Optional[int],
  648. bytes,
  649. ],
  650. None,
  651. ]
  652. ] = None,
  653. ) -> None:
  654. """Initialize a reftable refs container.
  655. Args:
  656. path: Path to the reftable directory
  657. logger: Optional logger for reflog
  658. """
  659. super().__init__(logger=logger)
  660. # Normalize path to string
  661. if isinstance(path, bytes):
  662. self.path = os.fsdecode(path)
  663. else:
  664. self.path = path
  665. self.reftable_dir = os.path.join(self.path, "reftable")
  666. if not os.path.exists(self.reftable_dir):
  667. os.makedirs(self.reftable_dir)
  668. self._pending_updates: list[RefUpdate] = [] # Buffer for batching ref updates
  669. self._ref_update_indices: dict[
  670. bytes, int
  671. ] = {} # Track chronological update index for each ref
  672. self._batch_mode = False # Track whether we're in batch mode
  673. # Create refs/heads marker file for Git compatibility
  674. self._ensure_refs_heads_marker()
  675. def _ensure_refs_heads_marker(self) -> None:
  676. """Ensure refs/heads marker file exists for Git compatibility.
  677. Git expects a refs/heads file (not directory) to exist when using
  678. reftables. This file contains a marker message indicating that the
  679. repository uses the reftable format instead of loose refs.
  680. """
  681. refs_dir = os.path.join(self.path, "refs")
  682. if not os.path.exists(refs_dir):
  683. os.makedirs(refs_dir)
  684. refs_heads_path = os.path.join(refs_dir, "heads")
  685. # If refs/heads is a directory, remove it
  686. if os.path.isdir(refs_heads_path):
  687. shutil.rmtree(refs_heads_path)
  688. # Create marker file if it doesn't exist
  689. if not os.path.exists(refs_heads_path):
  690. with open(refs_heads_path, "wb") as f:
  691. f.write(b"this repository uses the reftable format\n")
  692. def _read_table_file(self, table_file: str) -> BinaryIO:
  693. """Context manager helper to open and read a reftable file."""
  694. return open(table_file, "rb")
  695. def _load_ref_update_indices(self) -> None:
  696. """Load the update indices for all refs from existing reftable files."""
  697. for table_file in self._get_table_files():
  698. with self._read_table_file(table_file) as f:
  699. # Read header to get min_update_index
  700. f.seek(0)
  701. reader = ReftableReader(f)
  702. # Read the ref block to get individual update indices
  703. f.seek(HEADER_SIZE_V1) # Skip header
  704. block_type = f.read(1)
  705. if block_type == BLOCK_TYPE_REF:
  706. length_bytes = f.read(3)
  707. block_length = (
  708. (length_bytes[0] << 16)
  709. | (length_bytes[1] << 8)
  710. | length_bytes[2]
  711. )
  712. block_data = f.read(block_length)
  713. # Parse the block to get refs with their update indices
  714. block = RefBlock.decode(
  715. block_data, min_update_index=reader.min_update_index
  716. )
  717. for ref in block.refs:
  718. # Store the update index for each ref
  719. # This preserves the chronological order
  720. self._ref_update_indices[ref.refname] = ref.update_index
  721. def _get_next_update_index(self) -> int:
  722. """Get the next available update index across all tables."""
  723. # Find the highest update index used so far by reading all tables
  724. max_index = 0
  725. table_files = self._get_table_files()
  726. if not table_files:
  727. # No existing tables, but Git starts with HEAD at index 1
  728. # Our first operations start at index 2
  729. return 2
  730. for table_file in table_files:
  731. with self._read_table_file(table_file) as f:
  732. reader = ReftableReader(f)
  733. max_index = max(max_index, reader.max_update_index)
  734. return max_index + 1
  735. def _get_table_files(self) -> list[str]:
  736. """Get sorted list of reftable files from tables.list."""
  737. if not os.path.exists(self.reftable_dir):
  738. return []
  739. tables_list_path = os.path.join(self.reftable_dir, "tables.list")
  740. if not os.path.exists(tables_list_path):
  741. return []
  742. files = []
  743. with open(tables_list_path, "rb") as f:
  744. for line in f:
  745. table_name = line.decode().strip()
  746. if table_name:
  747. files.append(os.path.join(self.reftable_dir, table_name))
  748. return files
  749. def _read_all_tables(self) -> dict[bytes, tuple[int, bytes]]:
  750. """Read all reftable files and merge results."""
  751. # First, read all tables and sort them by min_update_index
  752. table_data = []
  753. for table_file in self._get_table_files():
  754. with self._read_table_file(table_file) as f:
  755. reader = ReftableReader(f)
  756. table_data.append(
  757. (reader.min_update_index, table_file, reader.all_refs())
  758. )
  759. # Sort by min_update_index to ensure chronological order
  760. table_data.sort(key=lambda x: x[0])
  761. # Merge results in chronological order
  762. all_refs: dict[bytes, tuple[int, bytes]] = {}
  763. for min_update_index, table_file, refs in table_data:
  764. # Apply updates from this table
  765. for refname, (value_type, value) in refs.items():
  766. if value_type == REF_VALUE_DELETE:
  767. # Remove ref if it exists
  768. all_refs.pop(refname, None)
  769. else:
  770. # Add/update ref
  771. all_refs[refname] = (value_type, value)
  772. return all_refs
  773. def allkeys(self) -> set[bytes]:
  774. """Return set of all ref names."""
  775. refs = self._read_all_tables()
  776. result = set(refs.keys())
  777. # For symbolic refs, also include their targets as implicit refs
  778. for refname, (value_type, value) in refs.items():
  779. if value_type == REF_VALUE_SYMREF:
  780. # Add the target ref as an implicit ref
  781. target = value
  782. result.add(target)
  783. return result
  784. def follow(self, name: bytes) -> tuple[list[bytes], bytes]:
  785. """Follow a reference name.
  786. Returns: a tuple of (refnames, sha), where refnames are the names of
  787. references in the chain
  788. """
  789. refnames = []
  790. current = name
  791. refs = self._read_all_tables()
  792. for _ in range(MAX_SYMREF_DEPTH):
  793. refnames.append(current)
  794. ref_data = refs.get(current)
  795. if ref_data is None:
  796. raise KeyError(current)
  797. value_type, value = ref_data
  798. if value_type == REF_VALUE_REF:
  799. return refnames, value
  800. if value_type == REF_VALUE_PEELED:
  801. return refnames, value[:SHA1_HEX_SIZE] # First SHA1 hex chars
  802. if value_type == REF_VALUE_SYMREF:
  803. current = value
  804. continue
  805. # Unknown value type
  806. raise ValueError(f"Unknown ref value type: {value_type}")
  807. # Too many levels of indirection
  808. raise ValueError(f"Too many levels of symbolic ref indirection for {name!r}")
  809. def __getitem__(self, name: bytes) -> ObjectID:
  810. """Get the SHA1 for a reference name.
  811. This method follows all symbolic references.
  812. """
  813. _, sha = self.follow(name)
  814. if sha is None:
  815. raise KeyError(name)
  816. return sha
  817. def read_loose_ref(self, name: bytes) -> bytes:
  818. """Read a reference value without following symbolic refs.
  819. Args:
  820. name: the refname to read
  821. Returns: The contents of the ref file.
  822. For symbolic refs, returns b"ref: <target>"
  823. Raises:
  824. KeyError: if the ref does not exist
  825. """
  826. refs = self._read_all_tables()
  827. ref_data = refs.get(name)
  828. if ref_data is None:
  829. raise KeyError(name)
  830. value_type, value = ref_data
  831. if value_type == REF_VALUE_REF:
  832. return value
  833. elif value_type == REF_VALUE_SYMREF:
  834. # Return in Git format: "ref: <target>"
  835. return SYMREF + value
  836. elif value_type == REF_VALUE_PEELED:
  837. # Return the first SHA (not the peeled one)
  838. return value[:SHA1_HEX_SIZE] # First SHA1 hex chars
  839. raise ValueError(f"Unknown ref value type: {value_type}")
  840. def get_packed_refs(self) -> dict[bytes, bytes]:
  841. """Get packed refs. Reftable doesn't distinguish packed/loose."""
  842. refs = self._read_all_tables()
  843. result = {}
  844. for name, (value_type, value) in refs.items():
  845. if value_type == REF_VALUE_REF:
  846. result[name] = value
  847. elif value_type == REF_VALUE_PEELED:
  848. result[name] = value[:SHA1_HEX_SIZE] # First SHA1 hex chars
  849. return result
  850. def get_peeled(self, name: bytes) -> Optional[bytes]:
  851. """Return the cached peeled value of a ref, if available.
  852. Args:
  853. name: Name of the ref to peel
  854. Returns: The peeled value of the ref. If the ref is known not point to
  855. a tag, this will be the SHA the ref refers to. If the ref may point
  856. to a tag, but no cached information is available, None is returned.
  857. """
  858. refs = self._read_all_tables()
  859. ref_data = refs.get(name)
  860. if ref_data is None:
  861. return None
  862. value_type, value = ref_data
  863. if value_type == REF_VALUE_PEELED:
  864. # Return the peeled SHA (second 40 hex chars)
  865. return value[40:80]
  866. elif value_type == REF_VALUE_REF:
  867. # Known not to be peeled
  868. return value
  869. else:
  870. # Symbolic ref or other - no peeled info
  871. return None
  872. def _generate_table_path(
  873. self,
  874. min_update_index: Optional[int] = None,
  875. max_update_index: Optional[int] = None,
  876. ) -> str:
  877. """Generate a new reftable file path."""
  878. if min_update_index is None or max_update_index is None:
  879. timestamp = int(time.time() * 1000000)
  880. min_idx = max_idx = timestamp
  881. else:
  882. min_idx = min_update_index
  883. max_idx = max_update_index
  884. hash_part = random.randint(0, 0xFFFFFFFF)
  885. table_name = f"0x{min_idx:016x}-0x{max_idx:016x}-{hash_part:08x}.ref"
  886. return os.path.join(self.reftable_dir, table_name)
  887. def add_packed_refs(self, new_refs: Mapping[bytes, Optional[bytes]]) -> None:
  888. """Add packed refs. Creates a new reftable file with all refs consolidated."""
  889. if not new_refs:
  890. return
  891. self._write_batch_updates(new_refs)
  892. def _write_batch_updates(self, updates: Mapping[bytes, Optional[bytes]]) -> None:
  893. """Write multiple ref updates to a single reftable file."""
  894. if not updates:
  895. return
  896. table_path = self._generate_table_path()
  897. with open(table_path, "wb") as f:
  898. writer = ReftableWriter(f)
  899. for refname, sha in updates.items():
  900. if sha is not None:
  901. writer.add_ref(refname, sha)
  902. else:
  903. writer.delete_ref(refname)
  904. writer.write()
  905. self._update_tables_list()
  906. def set_if_equals(
  907. self,
  908. name: bytes,
  909. old_ref: Optional[bytes],
  910. new_ref: Optional[bytes],
  911. committer: Optional[bytes] = None,
  912. timestamp: Optional[int] = None,
  913. timezone: Optional[int] = None,
  914. message: Optional[bytes] = None,
  915. ) -> bool:
  916. """Atomically set a ref if it currently equals old_ref."""
  917. # For now, implement a simple non-atomic version
  918. # TODO: Implement proper atomic compare-and-swap
  919. try:
  920. current = self.read_loose_ref(name)
  921. except KeyError:
  922. current = None
  923. if current != old_ref:
  924. return False
  925. if new_ref is None:
  926. # Delete ref
  927. self._write_ref_update(name, REF_VALUE_DELETE, b"")
  928. else:
  929. # Update ref
  930. self._write_ref_update(name, REF_VALUE_REF, new_ref)
  931. return True
  932. def add_if_new(
  933. self,
  934. name: bytes,
  935. ref: bytes,
  936. committer: Optional[bytes] = None,
  937. timestamp: Optional[int] = None,
  938. timezone: Optional[int] = None,
  939. message: Optional[bytes] = None,
  940. ) -> bool:
  941. """Add a ref only if it doesn't exist."""
  942. try:
  943. self.read_loose_ref(name)
  944. return False # Ref exists
  945. except KeyError:
  946. pass # Ref doesn't exist, continue
  947. self._write_ref_update(name, REF_VALUE_REF, ref)
  948. return True
  949. def remove_if_equals(
  950. self,
  951. name: bytes,
  952. old_ref: Optional[bytes],
  953. committer: Optional[bytes] = None,
  954. timestamp: Optional[int] = None,
  955. timezone: Optional[int] = None,
  956. message: Optional[bytes] = None,
  957. ) -> bool:
  958. """Remove a ref if it equals old_ref."""
  959. return self.set_if_equals(
  960. name,
  961. old_ref,
  962. None,
  963. committer=committer,
  964. timestamp=timestamp,
  965. timezone=timezone,
  966. message=message,
  967. )
  968. def set_symbolic_ref(
  969. self,
  970. name: bytes,
  971. other: bytes,
  972. committer: Optional[bytes] = None,
  973. timestamp: Optional[int] = None,
  974. timezone: Optional[int] = None,
  975. message: Optional[bytes] = None,
  976. ) -> None:
  977. """Set a symbolic reference."""
  978. self._write_ref_update(name, REF_VALUE_SYMREF, other)
  979. def _write_ref_update(self, name: bytes, value_type: int, value: bytes) -> None:
  980. """Write a single ref update immediately to its own reftable file."""
  981. # Check if we're in batch mode - if so, buffer for later
  982. if getattr(self, "_batch_mode", False):
  983. # Buffer the update for later batching using RefUpdate objects
  984. self._pending_updates.append(RefUpdate(name, value_type, value))
  985. return
  986. # Write immediately like Git does - one file per update
  987. self._write_single_ref_update(name, value_type, value)
  988. def _write_single_ref_update(
  989. self, name: bytes, value_type: int, value: bytes
  990. ) -> None:
  991. """Write a single ref update to its own reftable file like Git does."""
  992. table_path = self._generate_table_path()
  993. next_update_index = self._get_next_update_index()
  994. with open(table_path, "wb") as f:
  995. writer = ReftableWriter(f, auto_create_head=False)
  996. writer.min_update_index = next_update_index
  997. # Git uses max_update_index = min + 1 for single ref updates
  998. writer.max_update_index = next_update_index + 1
  999. # Don't auto-create HEAD - let it be set explicitly
  1000. # Add the requested ref
  1001. if value_type == REF_VALUE_REF:
  1002. writer.add_ref(name, value)
  1003. elif value_type == REF_VALUE_SYMREF:
  1004. writer.add_symbolic_ref(name, value)
  1005. elif value_type == REF_VALUE_DELETE:
  1006. writer.delete_ref(name)
  1007. writer.write()
  1008. self._update_tables_list()
  1009. def _flush_pending_updates(self) -> None:
  1010. """Flush pending ref updates like Git does - consolidate all refs."""
  1011. if not self._pending_updates:
  1012. return
  1013. # First, load the current update indices for all refs
  1014. if not self._ref_update_indices:
  1015. self._load_ref_update_indices()
  1016. # Read all existing refs to create complete consolidated view
  1017. all_refs = self._read_all_tables()
  1018. # Process pending updates
  1019. head_update, other_updates = self._process_pending_updates()
  1020. # Get next update index - all refs in batch get the SAME index
  1021. batch_update_index = self._get_next_update_index()
  1022. # Apply updates to get final state
  1023. self._apply_batch_updates(
  1024. all_refs, other_updates, head_update, batch_update_index
  1025. )
  1026. # Write consolidated batch file
  1027. created_files = (
  1028. self._write_batch_file(all_refs, batch_update_index) if all_refs else []
  1029. )
  1030. # Update tables list with new files (don't compact, keep separate)
  1031. if created_files:
  1032. # Remove old files and update tables.list
  1033. tables_list_path = os.path.join(self.reftable_dir, "tables.list")
  1034. # Remove old .ref files
  1035. for name in os.listdir(self.reftable_dir):
  1036. if name.endswith(".ref") and name not in created_files:
  1037. os.remove(os.path.join(self.reftable_dir, name))
  1038. # Write new tables.list with separate files
  1039. with open(tables_list_path, "wb") as f:
  1040. for filename in sorted(created_files): # Sort for deterministic order
  1041. f.write((filename + "\n").encode())
  1042. self._pending_updates.clear()
  1043. def _process_pending_updates(
  1044. self,
  1045. ) -> tuple[Optional[tuple[bytes, int, bytes]], list[tuple[bytes, int, bytes]]]:
  1046. """Process pending updates and return (head_update, other_updates)."""
  1047. head_update = None
  1048. other_updates = []
  1049. # Process all RefUpdate objects
  1050. for update in self._pending_updates:
  1051. if update.name == b"HEAD":
  1052. if update.value_type == REF_VALUE_SYMREF:
  1053. # Resolve symref chain like Git does (only for HEAD)
  1054. resolved_target = self._resolve_symref_chain(update.value)
  1055. head_update = (update.name, REF_VALUE_SYMREF, resolved_target)
  1056. else:
  1057. head_update = (update.name, update.value_type, update.value)
  1058. else:
  1059. # Regular ref, symref, or deletion
  1060. other_updates.append((update.name, update.value_type, update.value))
  1061. return head_update, other_updates
  1062. def _resolve_symref_chain(self, target: bytes) -> bytes:
  1063. """Resolve a symbolic reference chain to its final target.
  1064. Git limits symref chains to 5 levels to prevent infinite loops.
  1065. This method follows the chain until it finds a non-symbolic ref
  1066. or hits the depth limit.
  1067. Args:
  1068. target: The initial target of a symbolic reference
  1069. Returns:
  1070. The final target after resolving all symbolic references
  1071. """
  1072. visited = set()
  1073. current = target
  1074. # Follow the chain up to 5 levels deep (Git's limit)
  1075. for _ in range(MAX_SYMREF_DEPTH):
  1076. if current in visited:
  1077. # Circular reference, return current
  1078. break
  1079. visited.add(current)
  1080. # Check if current target is also a symref in pending updates
  1081. found_symref = False
  1082. for update in self._pending_updates:
  1083. if update.name == current and update.value_type == REF_VALUE_SYMREF:
  1084. current = update.value
  1085. found_symref = True
  1086. break
  1087. if not found_symref:
  1088. # Not a symref, this is the final target
  1089. break
  1090. return current
  1091. def _apply_batch_updates(
  1092. self,
  1093. all_refs: dict[bytes, tuple[int, bytes]],
  1094. other_updates: list[tuple[bytes, int, bytes]],
  1095. head_update: Optional[tuple[bytes, int, bytes]],
  1096. batch_update_index: int,
  1097. ) -> None:
  1098. """Apply batch updates to the refs dict and update indices."""
  1099. # Process all updates and assign the SAME update index to all refs in batch
  1100. for name, value_type, value in other_updates:
  1101. if value_type == REF_VALUE_DELETE:
  1102. all_refs.pop(name, None)
  1103. # Deletion still gets recorded with the batch index
  1104. self._ref_update_indices[name] = batch_update_index
  1105. else:
  1106. all_refs[name] = (value_type, value)
  1107. # All refs in batch get the same update index
  1108. self._ref_update_indices[name] = batch_update_index
  1109. # Process HEAD update with same batch index
  1110. if head_update:
  1111. name, value_type, value = head_update
  1112. if value_type == REF_VALUE_DELETE:
  1113. all_refs.pop(name, None)
  1114. self._ref_update_indices[name] = batch_update_index
  1115. else:
  1116. all_refs[name] = (value_type, value)
  1117. self._ref_update_indices[name] = batch_update_index
  1118. def _write_batch_file(
  1119. self, all_refs: dict[bytes, tuple[int, bytes]], batch_update_index: int
  1120. ) -> list[str]:
  1121. """Write all refs to a single batch file and return created filenames."""
  1122. # All refs in batch have same update index
  1123. table_path = self._generate_table_path(batch_update_index, batch_update_index)
  1124. with open(table_path, "wb") as f:
  1125. writer = ReftableWriter(f, auto_create_head=False, is_batch_operation=True)
  1126. writer.min_update_index = batch_update_index
  1127. writer.max_update_index = batch_update_index
  1128. # Add all refs in sorted order
  1129. writer.refs_order = sorted(all_refs.keys())
  1130. # Git typically puts HEAD first if present
  1131. if b"HEAD" in writer.refs_order:
  1132. writer.refs_order.remove(b"HEAD")
  1133. writer.refs_order.insert(0, b"HEAD")
  1134. for refname in writer.refs_order:
  1135. value_type, value = all_refs[refname]
  1136. writer.refs[refname] = (value_type, value)
  1137. # Pass the update indices to the writer
  1138. writer._ref_update_indices = { # type: ignore[attr-defined]
  1139. name: batch_update_index for name in all_refs.keys()
  1140. }
  1141. writer.write()
  1142. return [os.path.basename(table_path)]
  1143. def batch_update(self) -> "_ReftableBatchContext":
  1144. """Context manager for batching multiple ref updates into a single reftable."""
  1145. return _ReftableBatchContext(self)
  1146. def remove_packed_ref(self, name: bytes) -> None:
  1147. """Remove a packed ref. Creates a deletion record."""
  1148. self._write_ref_update(name, REF_VALUE_DELETE, b"")
  1149. def _compact_tables_list(self, new_table_name: str) -> None:
  1150. """Compact tables list to single file like Git does."""
  1151. tables_list_path = os.path.join(self.reftable_dir, "tables.list")
  1152. # Remove old .ref files (Git's compaction behavior)
  1153. for name in os.listdir(self.reftable_dir):
  1154. if name.endswith(".ref") and name != new_table_name:
  1155. os.remove(os.path.join(self.reftable_dir, name))
  1156. # Write new tables.list with just the consolidated file
  1157. with open(tables_list_path, "wb") as f:
  1158. f.write((new_table_name + "\n").encode())
  1159. def _update_tables_list(self) -> None:
  1160. """Update the tables.list file with current table files."""
  1161. tables_list_path = os.path.join(self.reftable_dir, "tables.list")
  1162. # Get all .ref files in the directory
  1163. ref_files = []
  1164. for name in os.listdir(self.reftable_dir):
  1165. if name.endswith(".ref"):
  1166. ref_files.append(name)
  1167. # Sort by name (which includes timestamp)
  1168. ref_files.sort()
  1169. # Write to tables.list
  1170. with open(tables_list_path, "wb") as f:
  1171. for name in ref_files:
  1172. f.write((name + "\n").encode())