index.py 54 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793
  1. # index.py -- File parser/writer for the git index file
  2. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Parser for the git index file format."""
  22. import os
  23. import stat
  24. import struct
  25. import sys
  26. import types
  27. from collections.abc import Generator, Iterable, Iterator
  28. from dataclasses import dataclass
  29. from enum import Enum
  30. from typing import (
  31. TYPE_CHECKING,
  32. Any,
  33. BinaryIO,
  34. Callable,
  35. Optional,
  36. Union,
  37. cast,
  38. )
  39. if TYPE_CHECKING:
  40. from .file import _GitFile
  41. from .repo import BaseRepo
  42. from .file import GitFile
  43. from .object_store import iter_tree_contents
  44. from .objects import (
  45. S_IFGITLINK,
  46. S_ISGITLINK,
  47. Blob,
  48. ObjectID,
  49. Tree,
  50. hex_to_sha,
  51. sha_to_hex,
  52. )
  53. from .pack import ObjectContainer, SHA1Reader, SHA1Writer
  54. # 2-bit stage (during merge)
  55. FLAG_STAGEMASK = 0x3000
  56. FLAG_STAGESHIFT = 12
  57. FLAG_NAMEMASK = 0x0FFF
  58. # assume-valid
  59. FLAG_VALID = 0x8000
  60. # extended flag (must be zero in version 2)
  61. FLAG_EXTENDED = 0x4000
  62. # used by sparse checkout
  63. EXTENDED_FLAG_SKIP_WORKTREE = 0x4000
  64. # used by "git add -N"
  65. EXTENDED_FLAG_INTEND_TO_ADD = 0x2000
  66. DEFAULT_VERSION = 2
  67. # Index extension signatures
  68. TREE_EXTENSION = b"TREE"
  69. REUC_EXTENSION = b"REUC"
  70. UNTR_EXTENSION = b"UNTR"
  71. EOIE_EXTENSION = b"EOIE"
  72. IEOT_EXTENSION = b"IEOT"
  73. def _encode_varint(value: int) -> bytes:
  74. """Encode an integer using variable-width encoding.
  75. Same format as used for OFS_DELTA pack entries and index v4 path compression.
  76. Uses 7 bits per byte, with the high bit indicating continuation.
  77. Args:
  78. value: Integer to encode
  79. Returns:
  80. Encoded bytes
  81. """
  82. if value == 0:
  83. return b"\x00"
  84. result = []
  85. while value > 0:
  86. byte = value & 0x7F # Take lower 7 bits
  87. value >>= 7
  88. if value > 0:
  89. byte |= 0x80 # Set continuation bit
  90. result.append(byte)
  91. return bytes(result)
  92. def _decode_varint(data: bytes, offset: int = 0) -> tuple[int, int]:
  93. """Decode a variable-width encoded integer.
  94. Args:
  95. data: Bytes to decode from
  96. offset: Starting offset in data
  97. Returns:
  98. tuple of (decoded_value, new_offset)
  99. """
  100. value = 0
  101. shift = 0
  102. pos = offset
  103. while pos < len(data):
  104. byte = data[pos]
  105. pos += 1
  106. value |= (byte & 0x7F) << shift
  107. shift += 7
  108. if not (byte & 0x80): # No continuation bit
  109. break
  110. return value, pos
  111. def _compress_path(path: bytes, previous_path: bytes) -> bytes:
  112. """Compress a path relative to the previous path for index version 4.
  113. Args:
  114. path: Path to compress
  115. previous_path: Previous path for comparison
  116. Returns:
  117. Compressed path data (varint prefix_len + suffix)
  118. """
  119. # Find the common prefix length
  120. common_len = 0
  121. min_len = min(len(path), len(previous_path))
  122. for i in range(min_len):
  123. if path[i] == previous_path[i]:
  124. common_len += 1
  125. else:
  126. break
  127. # The number of bytes to remove from the end of previous_path
  128. # to get the common prefix
  129. remove_len = len(previous_path) - common_len
  130. # The suffix to append
  131. suffix = path[common_len:]
  132. # Encode: varint(remove_len) + suffix + NUL
  133. return _encode_varint(remove_len) + suffix + b"\x00"
  134. def _decompress_path(
  135. data: bytes, offset: int, previous_path: bytes
  136. ) -> tuple[bytes, int]:
  137. """Decompress a path from index version 4 compressed format.
  138. Args:
  139. data: Raw data containing compressed path
  140. offset: Starting offset in data
  141. previous_path: Previous path for decompression
  142. Returns:
  143. tuple of (decompressed_path, new_offset)
  144. """
  145. # Decode the number of bytes to remove from previous path
  146. remove_len, new_offset = _decode_varint(data, offset)
  147. # Find the NUL terminator for the suffix
  148. suffix_start = new_offset
  149. suffix_end = suffix_start
  150. while suffix_end < len(data) and data[suffix_end] != 0:
  151. suffix_end += 1
  152. if suffix_end >= len(data):
  153. raise ValueError("Unterminated path suffix in compressed entry")
  154. suffix = data[suffix_start:suffix_end]
  155. new_offset = suffix_end + 1 # Skip the NUL terminator
  156. # Reconstruct the path
  157. if remove_len > len(previous_path):
  158. raise ValueError(
  159. f"Invalid path compression: trying to remove {remove_len} bytes from {len(previous_path)}-byte path"
  160. )
  161. prefix = previous_path[:-remove_len] if remove_len > 0 else previous_path
  162. path = prefix + suffix
  163. return path, new_offset
  164. def _decompress_path_from_stream(
  165. f: BinaryIO, previous_path: bytes
  166. ) -> tuple[bytes, int]:
  167. """Decompress a path from index version 4 compressed format, reading from stream.
  168. Args:
  169. f: File-like object to read from
  170. previous_path: Previous path for decompression
  171. Returns:
  172. tuple of (decompressed_path, bytes_consumed)
  173. """
  174. # Decode the varint for remove_len by reading byte by byte
  175. remove_len = 0
  176. shift = 0
  177. bytes_consumed = 0
  178. while True:
  179. byte_data = f.read(1)
  180. if not byte_data:
  181. raise ValueError("Unexpected end of file while reading varint")
  182. byte = byte_data[0]
  183. bytes_consumed += 1
  184. remove_len |= (byte & 0x7F) << shift
  185. shift += 7
  186. if not (byte & 0x80): # No continuation bit
  187. break
  188. # Read the suffix until NUL terminator
  189. suffix = b""
  190. while True:
  191. byte_data = f.read(1)
  192. if not byte_data:
  193. raise ValueError("Unexpected end of file while reading path suffix")
  194. byte = byte_data[0]
  195. bytes_consumed += 1
  196. if byte == 0: # NUL terminator
  197. break
  198. suffix += bytes([byte])
  199. # Reconstruct the path
  200. if remove_len > len(previous_path):
  201. raise ValueError(
  202. f"Invalid path compression: trying to remove {remove_len} bytes from {len(previous_path)}-byte path"
  203. )
  204. prefix = previous_path[:-remove_len] if remove_len > 0 else previous_path
  205. path = prefix + suffix
  206. return path, bytes_consumed
  207. class Stage(Enum):
  208. NORMAL = 0
  209. MERGE_CONFLICT_ANCESTOR = 1
  210. MERGE_CONFLICT_THIS = 2
  211. MERGE_CONFLICT_OTHER = 3
  212. @dataclass
  213. class SerializedIndexEntry:
  214. name: bytes
  215. ctime: Union[int, float, tuple[int, int]]
  216. mtime: Union[int, float, tuple[int, int]]
  217. dev: int
  218. ino: int
  219. mode: int
  220. uid: int
  221. gid: int
  222. size: int
  223. sha: bytes
  224. flags: int
  225. extended_flags: int
  226. def stage(self) -> Stage:
  227. return Stage((self.flags & FLAG_STAGEMASK) >> FLAG_STAGESHIFT)
  228. @dataclass
  229. class IndexExtension:
  230. """Base class for index extensions."""
  231. signature: bytes
  232. data: bytes
  233. @classmethod
  234. def from_raw(cls, signature: bytes, data: bytes) -> "IndexExtension":
  235. """Create an extension from raw data.
  236. Args:
  237. signature: 4-byte extension signature
  238. data: Extension data
  239. Returns:
  240. Parsed extension object
  241. """
  242. if signature == TREE_EXTENSION:
  243. return TreeExtension.from_bytes(data)
  244. elif signature == REUC_EXTENSION:
  245. return ResolveUndoExtension.from_bytes(data)
  246. elif signature == UNTR_EXTENSION:
  247. return UntrackedExtension.from_bytes(data)
  248. else:
  249. # Unknown extension - just store raw data
  250. return cls(signature, data)
  251. def to_bytes(self) -> bytes:
  252. """Serialize extension to bytes."""
  253. return self.data
  254. class TreeExtension(IndexExtension):
  255. """Tree cache extension."""
  256. def __init__(self, entries: list[tuple[bytes, bytes, int]]) -> None:
  257. self.entries = entries
  258. super().__init__(TREE_EXTENSION, b"")
  259. @classmethod
  260. def from_bytes(cls, data: bytes) -> "TreeExtension":
  261. # TODO: Implement tree cache parsing
  262. return cls([])
  263. def to_bytes(self) -> bytes:
  264. # TODO: Implement tree cache serialization
  265. return b""
  266. class ResolveUndoExtension(IndexExtension):
  267. """Resolve undo extension for recording merge conflicts."""
  268. def __init__(self, entries: list[tuple[bytes, list[tuple[int, bytes]]]]) -> None:
  269. self.entries = entries
  270. super().__init__(REUC_EXTENSION, b"")
  271. @classmethod
  272. def from_bytes(cls, data: bytes) -> "ResolveUndoExtension":
  273. # TODO: Implement resolve undo parsing
  274. return cls([])
  275. def to_bytes(self) -> bytes:
  276. # TODO: Implement resolve undo serialization
  277. return b""
  278. class UntrackedExtension(IndexExtension):
  279. """Untracked cache extension."""
  280. def __init__(self, data: bytes) -> None:
  281. super().__init__(UNTR_EXTENSION, data)
  282. @classmethod
  283. def from_bytes(cls, data: bytes) -> "UntrackedExtension":
  284. return cls(data)
  285. @dataclass
  286. class IndexEntry:
  287. ctime: Union[int, float, tuple[int, int]]
  288. mtime: Union[int, float, tuple[int, int]]
  289. dev: int
  290. ino: int
  291. mode: int
  292. uid: int
  293. gid: int
  294. size: int
  295. sha: bytes
  296. flags: int = 0
  297. extended_flags: int = 0
  298. @classmethod
  299. def from_serialized(cls, serialized: SerializedIndexEntry) -> "IndexEntry":
  300. return cls(
  301. ctime=serialized.ctime,
  302. mtime=serialized.mtime,
  303. dev=serialized.dev,
  304. ino=serialized.ino,
  305. mode=serialized.mode,
  306. uid=serialized.uid,
  307. gid=serialized.gid,
  308. size=serialized.size,
  309. sha=serialized.sha,
  310. flags=serialized.flags,
  311. extended_flags=serialized.extended_flags,
  312. )
  313. def serialize(self, name: bytes, stage: Stage) -> SerializedIndexEntry:
  314. # Clear out any existing stage bits, then set them from the Stage.
  315. new_flags = self.flags & ~FLAG_STAGEMASK
  316. new_flags |= stage.value << FLAG_STAGESHIFT
  317. return SerializedIndexEntry(
  318. name=name,
  319. ctime=self.ctime,
  320. mtime=self.mtime,
  321. dev=self.dev,
  322. ino=self.ino,
  323. mode=self.mode,
  324. uid=self.uid,
  325. gid=self.gid,
  326. size=self.size,
  327. sha=self.sha,
  328. flags=new_flags,
  329. extended_flags=self.extended_flags,
  330. )
  331. def stage(self) -> Stage:
  332. return Stage((self.flags & FLAG_STAGEMASK) >> FLAG_STAGESHIFT)
  333. @property
  334. def skip_worktree(self) -> bool:
  335. """Return True if the skip-worktree bit is set in extended_flags."""
  336. return bool(self.extended_flags & EXTENDED_FLAG_SKIP_WORKTREE)
  337. def set_skip_worktree(self, skip: bool = True) -> None:
  338. """Helper method to set or clear the skip-worktree bit in extended_flags.
  339. Also sets FLAG_EXTENDED in self.flags if needed.
  340. """
  341. if skip:
  342. # Turn on the skip-worktree bit
  343. self.extended_flags |= EXTENDED_FLAG_SKIP_WORKTREE
  344. # Also ensure the main 'extended' bit is set in flags
  345. self.flags |= FLAG_EXTENDED
  346. else:
  347. # Turn off the skip-worktree bit
  348. self.extended_flags &= ~EXTENDED_FLAG_SKIP_WORKTREE
  349. # Optionally unset the main extended bit if no extended flags remain
  350. if self.extended_flags == 0:
  351. self.flags &= ~FLAG_EXTENDED
  352. class ConflictedIndexEntry:
  353. """Index entry that represents a conflict."""
  354. ancestor: Optional[IndexEntry]
  355. this: Optional[IndexEntry]
  356. other: Optional[IndexEntry]
  357. def __init__(
  358. self,
  359. ancestor: Optional[IndexEntry] = None,
  360. this: Optional[IndexEntry] = None,
  361. other: Optional[IndexEntry] = None,
  362. ) -> None:
  363. self.ancestor = ancestor
  364. self.this = this
  365. self.other = other
  366. class UnmergedEntries(Exception):
  367. """Unmerged entries exist in the index."""
  368. def pathsplit(path: bytes) -> tuple[bytes, bytes]:
  369. """Split a /-delimited path into a directory part and a basename.
  370. Args:
  371. path: The path to split.
  372. Returns:
  373. Tuple with directory name and basename
  374. """
  375. try:
  376. (dirname, basename) = path.rsplit(b"/", 1)
  377. except ValueError:
  378. return (b"", path)
  379. else:
  380. return (dirname, basename)
  381. def pathjoin(*args: bytes) -> bytes:
  382. """Join a /-delimited path."""
  383. return b"/".join([p for p in args if p])
  384. def read_cache_time(f: BinaryIO) -> tuple[int, int]:
  385. """Read a cache time.
  386. Args:
  387. f: File-like object to read from
  388. Returns:
  389. Tuple with seconds and nanoseconds
  390. """
  391. return struct.unpack(">LL", f.read(8))
  392. def write_cache_time(f: BinaryIO, t: Union[int, float, tuple[int, int]]) -> None:
  393. """Write a cache time.
  394. Args:
  395. f: File-like object to write to
  396. t: Time to write (as int, float or tuple with secs and nsecs)
  397. """
  398. if isinstance(t, int):
  399. t = (t, 0)
  400. elif isinstance(t, float):
  401. (secs, nsecs) = divmod(t, 1.0)
  402. t = (int(secs), int(nsecs * 1000000000))
  403. elif not isinstance(t, tuple):
  404. raise TypeError(t)
  405. f.write(struct.pack(">LL", *t))
  406. def read_cache_entry(
  407. f: BinaryIO, version: int, previous_path: bytes = b""
  408. ) -> SerializedIndexEntry:
  409. """Read an entry from a cache file.
  410. Args:
  411. f: File-like object to read from
  412. version: Index version
  413. previous_path: Previous entry's path (for version 4 compression)
  414. """
  415. beginoffset = f.tell()
  416. ctime = read_cache_time(f)
  417. mtime = read_cache_time(f)
  418. (
  419. dev,
  420. ino,
  421. mode,
  422. uid,
  423. gid,
  424. size,
  425. sha,
  426. flags,
  427. ) = struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
  428. if flags & FLAG_EXTENDED:
  429. if version < 3:
  430. raise AssertionError("extended flag set in index with version < 3")
  431. (extended_flags,) = struct.unpack(">H", f.read(2))
  432. else:
  433. extended_flags = 0
  434. if version >= 4:
  435. # Version 4: paths are always compressed (name_len should be 0)
  436. name, consumed = _decompress_path_from_stream(f, previous_path)
  437. else:
  438. # Versions < 4: regular name reading
  439. name = f.read(flags & FLAG_NAMEMASK)
  440. # Padding:
  441. if version < 4:
  442. real_size = (f.tell() - beginoffset + 8) & ~7
  443. f.read((beginoffset + real_size) - f.tell())
  444. return SerializedIndexEntry(
  445. name,
  446. ctime,
  447. mtime,
  448. dev,
  449. ino,
  450. mode,
  451. uid,
  452. gid,
  453. size,
  454. sha_to_hex(sha),
  455. flags & ~FLAG_NAMEMASK,
  456. extended_flags,
  457. )
  458. def write_cache_entry(
  459. f: BinaryIO, entry: SerializedIndexEntry, version: int, previous_path: bytes = b""
  460. ) -> None:
  461. """Write an index entry to a file.
  462. Args:
  463. f: File object
  464. entry: IndexEntry to write
  465. version: Index format version
  466. previous_path: Previous entry's path (for version 4 compression)
  467. """
  468. beginoffset = f.tell()
  469. write_cache_time(f, entry.ctime)
  470. write_cache_time(f, entry.mtime)
  471. if version >= 4:
  472. # Version 4: use compression but set name_len to actual filename length
  473. # This matches how C Git implements index v4 flags
  474. compressed_path = _compress_path(entry.name, previous_path)
  475. flags = len(entry.name) | (entry.flags & ~FLAG_NAMEMASK)
  476. else:
  477. # Versions < 4: include actual name length
  478. flags = len(entry.name) | (entry.flags & ~FLAG_NAMEMASK)
  479. if entry.extended_flags:
  480. flags |= FLAG_EXTENDED
  481. if flags & FLAG_EXTENDED and version is not None and version < 3:
  482. raise AssertionError("unable to use extended flags in version < 3")
  483. f.write(
  484. struct.pack(
  485. b">LLLLLL20sH",
  486. entry.dev & 0xFFFFFFFF,
  487. entry.ino & 0xFFFFFFFF,
  488. entry.mode,
  489. entry.uid,
  490. entry.gid,
  491. entry.size,
  492. hex_to_sha(entry.sha),
  493. flags,
  494. )
  495. )
  496. if flags & FLAG_EXTENDED:
  497. f.write(struct.pack(b">H", entry.extended_flags))
  498. if version >= 4:
  499. # Version 4: always write compressed path
  500. f.write(compressed_path)
  501. else:
  502. # Versions < 4: write regular path and padding
  503. f.write(entry.name)
  504. real_size = (f.tell() - beginoffset + 8) & ~7
  505. f.write(b"\0" * ((beginoffset + real_size) - f.tell()))
  506. class UnsupportedIndexFormat(Exception):
  507. """An unsupported index format was encountered."""
  508. def __init__(self, version: int) -> None:
  509. self.index_format_version = version
  510. def read_index_header(f: BinaryIO) -> tuple[int, int]:
  511. """Read an index header from a file.
  512. Returns:
  513. tuple of (version, num_entries)
  514. """
  515. header = f.read(4)
  516. if header != b"DIRC":
  517. raise AssertionError(f"Invalid index file header: {header!r}")
  518. (version, num_entries) = struct.unpack(b">LL", f.read(4 * 2))
  519. if version not in (1, 2, 3, 4):
  520. raise UnsupportedIndexFormat(version)
  521. return version, num_entries
  522. def write_index_extension(f: BinaryIO, extension: IndexExtension) -> None:
  523. """Write an index extension.
  524. Args:
  525. f: File-like object to write to
  526. extension: Extension to write
  527. """
  528. data = extension.to_bytes()
  529. f.write(extension.signature)
  530. f.write(struct.pack(">I", len(data)))
  531. f.write(data)
  532. def read_index(f: BinaryIO) -> Iterator[SerializedIndexEntry]:
  533. """Read an index file, yielding the individual entries."""
  534. version, num_entries = read_index_header(f)
  535. previous_path = b""
  536. for i in range(num_entries):
  537. entry = read_cache_entry(f, version, previous_path)
  538. previous_path = entry.name
  539. yield entry
  540. def read_index_dict_with_version(
  541. f: BinaryIO,
  542. ) -> tuple[dict[bytes, Union[IndexEntry, ConflictedIndexEntry]], int]:
  543. """Read an index file and return it as a dictionary along with the version.
  544. Returns:
  545. tuple of (entries_dict, version)
  546. """
  547. version, num_entries = read_index_header(f)
  548. ret: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]] = {}
  549. previous_path = b""
  550. for i in range(num_entries):
  551. entry = read_cache_entry(f, version, previous_path)
  552. previous_path = entry.name
  553. stage = entry.stage()
  554. if stage == Stage.NORMAL:
  555. ret[entry.name] = IndexEntry.from_serialized(entry)
  556. else:
  557. existing = ret.setdefault(entry.name, ConflictedIndexEntry())
  558. if isinstance(existing, IndexEntry):
  559. raise AssertionError(f"Non-conflicted entry for {entry.name!r} exists")
  560. if stage == Stage.MERGE_CONFLICT_ANCESTOR:
  561. existing.ancestor = IndexEntry.from_serialized(entry)
  562. elif stage == Stage.MERGE_CONFLICT_THIS:
  563. existing.this = IndexEntry.from_serialized(entry)
  564. elif stage == Stage.MERGE_CONFLICT_OTHER:
  565. existing.other = IndexEntry.from_serialized(entry)
  566. return ret, version
  567. def read_index_dict(
  568. f: BinaryIO,
  569. ) -> dict[bytes, Union[IndexEntry, ConflictedIndexEntry]]:
  570. """Read an index file and return it as a dictionary.
  571. Dict Key is tuple of path and stage number, as
  572. path alone is not unique
  573. Args:
  574. f: File object to read fromls.
  575. """
  576. ret: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]] = {}
  577. for entry in read_index(f):
  578. stage = entry.stage()
  579. if stage == Stage.NORMAL:
  580. ret[entry.name] = IndexEntry.from_serialized(entry)
  581. else:
  582. existing = ret.setdefault(entry.name, ConflictedIndexEntry())
  583. if isinstance(existing, IndexEntry):
  584. raise AssertionError(f"Non-conflicted entry for {entry.name!r} exists")
  585. if stage == Stage.MERGE_CONFLICT_ANCESTOR:
  586. existing.ancestor = IndexEntry.from_serialized(entry)
  587. elif stage == Stage.MERGE_CONFLICT_THIS:
  588. existing.this = IndexEntry.from_serialized(entry)
  589. elif stage == Stage.MERGE_CONFLICT_OTHER:
  590. existing.other = IndexEntry.from_serialized(entry)
  591. return ret
  592. def write_index(
  593. f: BinaryIO, entries: list[SerializedIndexEntry], version: Optional[int] = None
  594. ) -> None:
  595. """Write an index file.
  596. Args:
  597. f: File-like object to write to
  598. version: Version number to write
  599. entries: Iterable over the entries to write
  600. """
  601. if version is None:
  602. version = DEFAULT_VERSION
  603. # STEP 1: check if any extended_flags are set
  604. uses_extended_flags = any(e.extended_flags != 0 for e in entries)
  605. if uses_extended_flags and version < 3:
  606. # Force or bump the version to 3
  607. version = 3
  608. # The rest is unchanged, but you might insert a final check:
  609. if version < 3:
  610. # Double-check no extended flags appear
  611. for e in entries:
  612. if e.extended_flags != 0:
  613. raise AssertionError("Attempt to use extended flags in index < v3")
  614. # Proceed with the existing code to write the header and entries.
  615. f.write(b"DIRC")
  616. f.write(struct.pack(b">LL", version, len(entries)))
  617. previous_path = b""
  618. for entry in entries:
  619. write_cache_entry(f, entry, version=version, previous_path=previous_path)
  620. previous_path = entry.name
  621. def write_index_dict(
  622. f: BinaryIO,
  623. entries: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]],
  624. version: Optional[int] = None,
  625. ) -> None:
  626. """Write an index file based on the contents of a dictionary.
  627. being careful to sort by path and then by stage.
  628. """
  629. entries_list = []
  630. for key in sorted(entries):
  631. value = entries[key]
  632. if isinstance(value, ConflictedIndexEntry):
  633. if value.ancestor is not None:
  634. entries_list.append(
  635. value.ancestor.serialize(key, Stage.MERGE_CONFLICT_ANCESTOR)
  636. )
  637. if value.this is not None:
  638. entries_list.append(
  639. value.this.serialize(key, Stage.MERGE_CONFLICT_THIS)
  640. )
  641. if value.other is not None:
  642. entries_list.append(
  643. value.other.serialize(key, Stage.MERGE_CONFLICT_OTHER)
  644. )
  645. else:
  646. entries_list.append(value.serialize(key, Stage.NORMAL))
  647. write_index(f, entries_list, version=version)
  648. def cleanup_mode(mode: int) -> int:
  649. """Cleanup a mode value.
  650. This will return a mode that can be stored in a tree object.
  651. Args:
  652. mode: Mode to clean up.
  653. Returns:
  654. mode
  655. """
  656. if stat.S_ISLNK(mode):
  657. return stat.S_IFLNK
  658. elif stat.S_ISDIR(mode):
  659. return stat.S_IFDIR
  660. elif S_ISGITLINK(mode):
  661. return S_IFGITLINK
  662. ret = stat.S_IFREG | 0o644
  663. if mode & 0o100:
  664. ret |= 0o111
  665. return ret
  666. class Index:
  667. """A Git Index file."""
  668. _byname: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]]
  669. def __init__(
  670. self,
  671. filename: Union[bytes, str, os.PathLike],
  672. read: bool = True,
  673. skip_hash: bool = False,
  674. version: Optional[int] = None,
  675. ) -> None:
  676. """Create an index object associated with the given filename.
  677. Args:
  678. filename: Path to the index file
  679. read: Whether to initialize the index from the given file, should it exist.
  680. skip_hash: Whether to skip SHA1 hash when writing (for manyfiles feature)
  681. version: Index format version to use (None = auto-detect from file or use default)
  682. """
  683. self._filename = os.fspath(filename)
  684. # TODO(jelmer): Store the version returned by read_index
  685. self._version = version
  686. self._skip_hash = skip_hash
  687. self.clear()
  688. if read:
  689. self.read()
  690. @property
  691. def path(self) -> Union[bytes, str]:
  692. return self._filename
  693. def __repr__(self) -> str:
  694. return f"{self.__class__.__name__}({self._filename!r})"
  695. def write(self) -> None:
  696. """Write current contents of index to disk."""
  697. from typing import BinaryIO, cast
  698. f = GitFile(self._filename, "wb")
  699. try:
  700. if self._skip_hash:
  701. # When skipHash is enabled, write the index without computing SHA1
  702. write_index_dict(cast(BinaryIO, f), self._byname, version=self._version)
  703. # Write 20 zero bytes instead of SHA1
  704. f.write(b"\x00" * 20)
  705. f.close()
  706. else:
  707. sha1_writer = SHA1Writer(cast(BinaryIO, f))
  708. write_index_dict(
  709. cast(BinaryIO, sha1_writer), self._byname, version=self._version
  710. )
  711. sha1_writer.close()
  712. except:
  713. f.close()
  714. raise
  715. def read(self) -> None:
  716. """Read current contents of index from disk."""
  717. if not os.path.exists(self._filename):
  718. return
  719. f = GitFile(self._filename, "rb")
  720. try:
  721. sha1_reader = SHA1Reader(f)
  722. entries, version = read_index_dict_with_version(cast(BinaryIO, sha1_reader))
  723. self._version = version
  724. self.update(entries)
  725. # Read any remaining data before the SHA
  726. remaining = os.path.getsize(self._filename) - sha1_reader.tell() - 20
  727. if remaining > 0:
  728. sha1_reader.read(remaining)
  729. sha1_reader.check_sha(allow_empty=True)
  730. finally:
  731. f.close()
  732. def __len__(self) -> int:
  733. """Number of entries in this index file."""
  734. return len(self._byname)
  735. def __getitem__(self, key: bytes) -> Union[IndexEntry, ConflictedIndexEntry]:
  736. """Retrieve entry by relative path and stage.
  737. Returns: Either a IndexEntry or a ConflictedIndexEntry
  738. Raises KeyError: if the entry does not exist
  739. """
  740. return self._byname[key]
  741. def __iter__(self) -> Iterator[bytes]:
  742. """Iterate over the paths and stages in this index."""
  743. return iter(self._byname)
  744. def __contains__(self, key: bytes) -> bool:
  745. return key in self._byname
  746. def get_sha1(self, path: bytes) -> bytes:
  747. """Return the (git object) SHA1 for the object at a path."""
  748. value = self[path]
  749. if isinstance(value, ConflictedIndexEntry):
  750. raise UnmergedEntries
  751. return value.sha
  752. def get_mode(self, path: bytes) -> int:
  753. """Return the POSIX file mode for the object at a path."""
  754. value = self[path]
  755. if isinstance(value, ConflictedIndexEntry):
  756. raise UnmergedEntries
  757. return value.mode
  758. def iterobjects(self) -> Iterable[tuple[bytes, bytes, int]]:
  759. """Iterate over path, sha, mode tuples for use with commit_tree."""
  760. for path in self:
  761. entry = self[path]
  762. if isinstance(entry, ConflictedIndexEntry):
  763. raise UnmergedEntries
  764. yield path, entry.sha, cleanup_mode(entry.mode)
  765. def has_conflicts(self) -> bool:
  766. for value in self._byname.values():
  767. if isinstance(value, ConflictedIndexEntry):
  768. return True
  769. return False
  770. def clear(self) -> None:
  771. """Remove all contents from this index."""
  772. self._byname = {}
  773. def __setitem__(
  774. self, name: bytes, value: Union[IndexEntry, ConflictedIndexEntry]
  775. ) -> None:
  776. assert isinstance(name, bytes)
  777. self._byname[name] = value
  778. def __delitem__(self, name: bytes) -> None:
  779. del self._byname[name]
  780. def iteritems(
  781. self,
  782. ) -> Iterator[tuple[bytes, Union[IndexEntry, ConflictedIndexEntry]]]:
  783. return iter(self._byname.items())
  784. def items(self) -> Iterator[tuple[bytes, Union[IndexEntry, ConflictedIndexEntry]]]:
  785. return iter(self._byname.items())
  786. def update(
  787. self, entries: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]]
  788. ) -> None:
  789. for key, value in entries.items():
  790. self[key] = value
  791. def paths(self) -> Generator[bytes, None, None]:
  792. yield from self._byname.keys()
  793. def changes_from_tree(
  794. self,
  795. object_store: ObjectContainer,
  796. tree: ObjectID,
  797. want_unchanged: bool = False,
  798. ) -> Generator[
  799. tuple[
  800. tuple[Optional[bytes], Optional[bytes]],
  801. tuple[Optional[int], Optional[int]],
  802. tuple[Optional[bytes], Optional[bytes]],
  803. ],
  804. None,
  805. None,
  806. ]:
  807. """Find the differences between the contents of this index and a tree.
  808. Args:
  809. object_store: Object store to use for retrieving tree contents
  810. tree: SHA1 of the root tree
  811. want_unchanged: Whether unchanged files should be reported
  812. Returns: Iterator over tuples with (oldpath, newpath), (oldmode,
  813. newmode), (oldsha, newsha)
  814. """
  815. def lookup_entry(path: bytes) -> tuple[bytes, int]:
  816. entry = self[path]
  817. if hasattr(entry, "sha") and hasattr(entry, "mode"):
  818. return entry.sha, cleanup_mode(entry.mode)
  819. else:
  820. # Handle ConflictedIndexEntry case
  821. return b"", 0
  822. yield from changes_from_tree(
  823. self.paths(),
  824. lookup_entry,
  825. object_store,
  826. tree,
  827. want_unchanged=want_unchanged,
  828. )
  829. def commit(self, object_store: ObjectContainer) -> bytes:
  830. """Create a new tree from an index.
  831. Args:
  832. object_store: Object store to save the tree in
  833. Returns:
  834. Root tree SHA
  835. """
  836. return commit_tree(object_store, self.iterobjects())
  837. def commit_tree(
  838. object_store: ObjectContainer, blobs: Iterable[tuple[bytes, bytes, int]]
  839. ) -> bytes:
  840. """Commit a new tree.
  841. Args:
  842. object_store: Object store to add trees to
  843. blobs: Iterable over blob path, sha, mode entries
  844. Returns:
  845. SHA1 of the created tree.
  846. """
  847. trees: dict[bytes, Any] = {b"": {}}
  848. def add_tree(path: bytes) -> dict[bytes, Any]:
  849. if path in trees:
  850. return trees[path]
  851. dirname, basename = pathsplit(path)
  852. t = add_tree(dirname)
  853. assert isinstance(basename, bytes)
  854. newtree: dict[bytes, Any] = {}
  855. t[basename] = newtree
  856. trees[path] = newtree
  857. return newtree
  858. for path, sha, mode in blobs:
  859. tree_path, basename = pathsplit(path)
  860. tree = add_tree(tree_path)
  861. tree[basename] = (mode, sha)
  862. def build_tree(path: bytes) -> bytes:
  863. tree = Tree()
  864. for basename, entry in trees[path].items():
  865. if isinstance(entry, dict):
  866. mode = stat.S_IFDIR
  867. sha = build_tree(pathjoin(path, basename))
  868. else:
  869. (mode, sha) = entry
  870. tree.add(basename, mode, sha)
  871. object_store.add_object(tree)
  872. return tree.id
  873. return build_tree(b"")
  874. def commit_index(object_store: ObjectContainer, index: Index) -> bytes:
  875. """Create a new tree from an index.
  876. Args:
  877. object_store: Object store to save the tree in
  878. index: Index file
  879. Note: This function is deprecated, use index.commit() instead.
  880. Returns: Root tree sha.
  881. """
  882. return commit_tree(object_store, index.iterobjects())
  883. def changes_from_tree(
  884. names: Iterable[bytes],
  885. lookup_entry: Callable[[bytes], tuple[bytes, int]],
  886. object_store: ObjectContainer,
  887. tree: Optional[bytes],
  888. want_unchanged: bool = False,
  889. ) -> Iterable[
  890. tuple[
  891. tuple[Optional[bytes], Optional[bytes]],
  892. tuple[Optional[int], Optional[int]],
  893. tuple[Optional[bytes], Optional[bytes]],
  894. ]
  895. ]:
  896. """Find the differences between the contents of a tree and
  897. a working copy.
  898. Args:
  899. names: Iterable of names in the working copy
  900. lookup_entry: Function to lookup an entry in the working copy
  901. object_store: Object store to use for retrieving tree contents
  902. tree: SHA1 of the root tree, or None for an empty tree
  903. want_unchanged: Whether unchanged files should be reported
  904. Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
  905. (oldsha, newsha)
  906. """
  907. # TODO(jelmer): Support a include_trees option
  908. other_names = set(names)
  909. if tree is not None:
  910. for name, mode, sha in iter_tree_contents(object_store, tree):
  911. try:
  912. (other_sha, other_mode) = lookup_entry(name)
  913. except KeyError:
  914. # Was removed
  915. yield ((name, None), (mode, None), (sha, None))
  916. else:
  917. other_names.remove(name)
  918. if want_unchanged or other_sha != sha or other_mode != mode:
  919. yield ((name, name), (mode, other_mode), (sha, other_sha))
  920. # Mention added files
  921. for name in other_names:
  922. try:
  923. (other_sha, other_mode) = lookup_entry(name)
  924. except KeyError:
  925. pass
  926. else:
  927. yield ((None, name), (None, other_mode), (None, other_sha))
  928. def index_entry_from_stat(
  929. stat_val: os.stat_result,
  930. hex_sha: bytes,
  931. mode: Optional[int] = None,
  932. ) -> IndexEntry:
  933. """Create a new index entry from a stat value.
  934. Args:
  935. stat_val: POSIX stat_result instance
  936. hex_sha: Hex sha of the object
  937. """
  938. if mode is None:
  939. mode = cleanup_mode(stat_val.st_mode)
  940. return IndexEntry(
  941. ctime=stat_val.st_ctime,
  942. mtime=stat_val.st_mtime,
  943. dev=stat_val.st_dev,
  944. ino=stat_val.st_ino,
  945. mode=mode,
  946. uid=stat_val.st_uid,
  947. gid=stat_val.st_gid,
  948. size=stat_val.st_size,
  949. sha=hex_sha,
  950. flags=0,
  951. extended_flags=0,
  952. )
  953. if sys.platform == "win32":
  954. # On Windows, creating symlinks either requires administrator privileges
  955. # or developer mode. Raise a more helpful error when we're unable to
  956. # create symlinks
  957. # https://github.com/jelmer/dulwich/issues/1005
  958. class WindowsSymlinkPermissionError(PermissionError):
  959. def __init__(self, errno: int, msg: str, filename: Optional[str]) -> None:
  960. super(PermissionError, self).__init__(
  961. errno,
  962. f"Unable to create symlink; do you have developer mode enabled? {msg}",
  963. filename,
  964. )
  965. def symlink(
  966. src: Union[str, bytes],
  967. dst: Union[str, bytes],
  968. target_is_directory: bool = False,
  969. *,
  970. dir_fd: Optional[int] = None,
  971. ) -> None:
  972. try:
  973. return os.symlink(
  974. src, dst, target_is_directory=target_is_directory, dir_fd=dir_fd
  975. )
  976. except PermissionError as e:
  977. raise WindowsSymlinkPermissionError(
  978. e.errno or 0, e.strerror or "", e.filename
  979. ) from e
  980. else:
  981. symlink = os.symlink
  982. def build_file_from_blob(
  983. blob: Blob,
  984. mode: int,
  985. target_path: bytes,
  986. *,
  987. honor_filemode: bool = True,
  988. tree_encoding: str = "utf-8",
  989. symlink_fn: Optional[Callable] = None,
  990. ) -> os.stat_result:
  991. """Build a file or symlink on disk based on a Git object.
  992. Args:
  993. blob: The git object
  994. mode: File mode
  995. target_path: Path to write to
  996. honor_filemode: An optional flag to honor core.filemode setting in
  997. config file, default is core.filemode=True, change executable bit
  998. symlink_fn: Function to use for creating symlinks
  999. Returns: stat object for the file
  1000. """
  1001. try:
  1002. oldstat = os.lstat(target_path)
  1003. except FileNotFoundError:
  1004. oldstat = None
  1005. contents = blob.as_raw_string()
  1006. if stat.S_ISLNK(mode):
  1007. if oldstat:
  1008. os.unlink(target_path)
  1009. if sys.platform == "win32":
  1010. # os.readlink on Python3 on Windows requires a unicode string.
  1011. contents_str = contents.decode(tree_encoding)
  1012. target_path_str = target_path.decode(tree_encoding)
  1013. (symlink_fn or symlink)(contents_str, target_path_str)
  1014. else:
  1015. (symlink_fn or symlink)(contents, target_path)
  1016. else:
  1017. if oldstat is not None and oldstat.st_size == len(contents):
  1018. with open(target_path, "rb") as f:
  1019. if f.read() == contents:
  1020. return oldstat
  1021. with open(target_path, "wb") as f:
  1022. # Write out file
  1023. f.write(contents)
  1024. if honor_filemode:
  1025. os.chmod(target_path, mode)
  1026. return os.lstat(target_path)
  1027. INVALID_DOTNAMES = (b".git", b".", b"..", b"")
  1028. def validate_path_element_default(element: bytes) -> bool:
  1029. return element.lower() not in INVALID_DOTNAMES
  1030. def validate_path_element_ntfs(element: bytes) -> bool:
  1031. stripped = element.rstrip(b". ").lower()
  1032. if stripped in INVALID_DOTNAMES:
  1033. return False
  1034. if stripped == b"git~1":
  1035. return False
  1036. return True
  1037. def validate_path(
  1038. path: bytes,
  1039. element_validator: Callable[[bytes], bool] = validate_path_element_default,
  1040. ) -> bool:
  1041. """Default path validator that just checks for .git/."""
  1042. parts = path.split(b"/")
  1043. for p in parts:
  1044. if not element_validator(p):
  1045. return False
  1046. else:
  1047. return True
  1048. def build_index_from_tree(
  1049. root_path: Union[str, bytes],
  1050. index_path: Union[str, bytes],
  1051. object_store: ObjectContainer,
  1052. tree_id: bytes,
  1053. honor_filemode: bool = True,
  1054. validate_path_element: Callable[[bytes], bool] = validate_path_element_default,
  1055. symlink_fn: Optional[Callable] = None,
  1056. ) -> None:
  1057. """Generate and materialize index from a tree.
  1058. Args:
  1059. tree_id: Tree to materialize
  1060. root_path: Target dir for materialized index files
  1061. index_path: Target path for generated index
  1062. object_store: Non-empty object store holding tree contents
  1063. honor_filemode: An optional flag to honor core.filemode setting in
  1064. config file, default is core.filemode=True, change executable bit
  1065. validate_path_element: Function to validate path elements to check
  1066. out; default just refuses .git and .. directories.
  1067. Note: existing index is wiped and contents are not merged
  1068. in a working dir. Suitable only for fresh clones.
  1069. """
  1070. index = Index(index_path, read=False)
  1071. if not isinstance(root_path, bytes):
  1072. root_path = os.fsencode(root_path)
  1073. for entry in iter_tree_contents(object_store, tree_id):
  1074. if not validate_path(entry.path, validate_path_element):
  1075. continue
  1076. full_path = _tree_to_fs_path(root_path, entry.path)
  1077. if not os.path.exists(os.path.dirname(full_path)):
  1078. os.makedirs(os.path.dirname(full_path))
  1079. # TODO(jelmer): Merge new index into working tree
  1080. if S_ISGITLINK(entry.mode):
  1081. if not os.path.isdir(full_path):
  1082. os.mkdir(full_path)
  1083. st = os.lstat(full_path)
  1084. # TODO(jelmer): record and return submodule paths
  1085. else:
  1086. obj = object_store[entry.sha]
  1087. assert isinstance(obj, Blob)
  1088. st = build_file_from_blob(
  1089. obj,
  1090. entry.mode,
  1091. full_path,
  1092. honor_filemode=honor_filemode,
  1093. symlink_fn=symlink_fn,
  1094. )
  1095. # Add file to index
  1096. if not honor_filemode or S_ISGITLINK(entry.mode):
  1097. # we can not use tuple slicing to build a new tuple,
  1098. # because on windows that will convert the times to
  1099. # longs, which causes errors further along
  1100. st_tuple = (
  1101. entry.mode,
  1102. st.st_ino,
  1103. st.st_dev,
  1104. st.st_nlink,
  1105. st.st_uid,
  1106. st.st_gid,
  1107. st.st_size,
  1108. st.st_atime,
  1109. st.st_mtime,
  1110. st.st_ctime,
  1111. )
  1112. st = st.__class__(st_tuple)
  1113. # default to a stage 0 index entry (normal)
  1114. # when reading from the filesystem
  1115. index[entry.path] = index_entry_from_stat(st, entry.sha)
  1116. index.write()
  1117. def blob_from_path_and_mode(
  1118. fs_path: bytes, mode: int, tree_encoding: str = "utf-8"
  1119. ) -> Blob:
  1120. """Create a blob from a path and a stat object.
  1121. Args:
  1122. fs_path: Full file system path to file
  1123. mode: File mode
  1124. Returns: A `Blob` object
  1125. """
  1126. assert isinstance(fs_path, bytes)
  1127. blob = Blob()
  1128. if stat.S_ISLNK(mode):
  1129. if sys.platform == "win32":
  1130. # os.readlink on Python3 on Windows requires a unicode string.
  1131. blob.data = os.readlink(os.fsdecode(fs_path)).encode(tree_encoding)
  1132. else:
  1133. blob.data = os.readlink(fs_path)
  1134. else:
  1135. with open(fs_path, "rb") as f:
  1136. blob.data = f.read()
  1137. return blob
  1138. def blob_from_path_and_stat(
  1139. fs_path: bytes, st: os.stat_result, tree_encoding: str = "utf-8"
  1140. ) -> Blob:
  1141. """Create a blob from a path and a stat object.
  1142. Args:
  1143. fs_path: Full file system path to file
  1144. st: A stat object
  1145. Returns: A `Blob` object
  1146. """
  1147. return blob_from_path_and_mode(fs_path, st.st_mode, tree_encoding)
  1148. def read_submodule_head(path: Union[str, bytes]) -> Optional[bytes]:
  1149. """Read the head commit of a submodule.
  1150. Args:
  1151. path: path to the submodule
  1152. Returns: HEAD sha, None if not a valid head/repository
  1153. """
  1154. from .errors import NotGitRepository
  1155. from .repo import Repo
  1156. # Repo currently expects a "str", so decode if necessary.
  1157. # TODO(jelmer): Perhaps move this into Repo() ?
  1158. if not isinstance(path, str):
  1159. path = os.fsdecode(path)
  1160. try:
  1161. repo = Repo(path)
  1162. except NotGitRepository:
  1163. return None
  1164. try:
  1165. return repo.head()
  1166. except KeyError:
  1167. return None
  1168. def _has_directory_changed(tree_path: bytes, entry: IndexEntry) -> bool:
  1169. """Check if a directory has changed after getting an error.
  1170. When handling an error trying to create a blob from a path, call this
  1171. function. It will check if the path is a directory. If it's a directory
  1172. and a submodule, check the submodule head to see if it's has changed. If
  1173. not, consider the file as changed as Git tracked a file and not a
  1174. directory.
  1175. Return true if the given path should be considered as changed and False
  1176. otherwise or if the path is not a directory.
  1177. """
  1178. # This is actually a directory
  1179. if os.path.exists(os.path.join(tree_path, b".git")):
  1180. # Submodule
  1181. head = read_submodule_head(tree_path)
  1182. if entry.sha != head:
  1183. return True
  1184. else:
  1185. # The file was changed to a directory, so consider it removed.
  1186. return True
  1187. return False
  1188. def update_working_tree(
  1189. repo: "BaseRepo",
  1190. old_tree_id: Optional[bytes],
  1191. new_tree_id: bytes,
  1192. honor_filemode: bool = True,
  1193. validate_path_element: Optional[Callable[[bytes], bool]] = None,
  1194. symlink_fn: Optional[Callable] = None,
  1195. force_remove_untracked: bool = False,
  1196. ) -> None:
  1197. """Update the working tree and index to match a new tree.
  1198. This function handles:
  1199. - Adding new files
  1200. - Updating modified files
  1201. - Removing deleted files
  1202. - Cleaning up empty directories
  1203. Args:
  1204. repo: Repository object
  1205. old_tree_id: SHA of the tree before the update
  1206. new_tree_id: SHA of the tree to update to
  1207. honor_filemode: An optional flag to honor core.filemode setting
  1208. validate_path_element: Function to validate path elements to check out
  1209. symlink_fn: Function to use for creating symlinks
  1210. force_remove_untracked: If True, remove files that exist in working
  1211. directory but not in target tree, even if old_tree_id is None
  1212. """
  1213. import os
  1214. # Set default validate_path_element if not provided
  1215. if validate_path_element is None:
  1216. validate_path_element = validate_path_element_default
  1217. # Get the trees
  1218. old_tree = repo[old_tree_id] if old_tree_id else None
  1219. repo[new_tree_id]
  1220. # Open the index
  1221. index = repo.open_index()
  1222. # Track which paths we've dealt with
  1223. handled_paths = set()
  1224. # Get repo path as string for comparisons
  1225. if not hasattr(repo, "path"):
  1226. raise ValueError("Repository must have a path attribute")
  1227. repo_path_str = repo.path if isinstance(repo.path, str) else repo.path.decode()
  1228. # First, update/add all files in the new tree
  1229. for entry in iter_tree_contents(repo.object_store, new_tree_id):
  1230. handled_paths.add(entry.path)
  1231. # Skip .git directory
  1232. if entry.path.startswith(b".git"):
  1233. continue
  1234. # Validate path element
  1235. if not validate_path(entry.path, validate_path_element):
  1236. continue
  1237. # Build full path
  1238. full_path = os.path.join(repo_path_str, entry.path.decode())
  1239. # Get the blob
  1240. blob_obj = repo.object_store[entry.sha]
  1241. if not isinstance(blob_obj, Blob):
  1242. raise ValueError(f"Object {entry.sha!r} is not a blob")
  1243. # Ensure parent directory exists
  1244. parent_dir = os.path.dirname(full_path)
  1245. if parent_dir and not os.path.exists(parent_dir):
  1246. os.makedirs(parent_dir)
  1247. # Write the file
  1248. st = build_file_from_blob(
  1249. blob_obj,
  1250. entry.mode,
  1251. full_path.encode(),
  1252. honor_filemode=honor_filemode,
  1253. symlink_fn=symlink_fn,
  1254. )
  1255. # Update index
  1256. index[entry.path] = index_entry_from_stat(st, entry.sha)
  1257. # Remove files that existed in old tree but not in new tree
  1258. if old_tree:
  1259. for entry in iter_tree_contents(repo.object_store, old_tree_id):
  1260. if entry.path not in handled_paths:
  1261. # Skip .git directory
  1262. if entry.path.startswith(b".git"):
  1263. continue
  1264. # File was deleted
  1265. full_path = os.path.join(repo_path_str, entry.path.decode())
  1266. # Remove from working tree
  1267. if os.path.exists(full_path):
  1268. os.remove(full_path)
  1269. # Remove from index
  1270. if entry.path in index:
  1271. del index[entry.path]
  1272. # Clean up empty directories
  1273. dir_path = os.path.dirname(full_path)
  1274. while (
  1275. dir_path and dir_path != repo_path_str and os.path.exists(dir_path)
  1276. ):
  1277. try:
  1278. if not os.listdir(dir_path):
  1279. os.rmdir(dir_path)
  1280. dir_path = os.path.dirname(dir_path)
  1281. else:
  1282. break
  1283. except OSError:
  1284. break
  1285. # If force_remove_untracked is True, remove any files in working directory
  1286. # that are not in the target tree (useful for reset --hard)
  1287. if force_remove_untracked:
  1288. # Walk through all files in the working directory
  1289. for root, dirs, files in os.walk(repo_path_str):
  1290. # Skip .git directory
  1291. if ".git" in dirs:
  1292. dirs.remove(".git")
  1293. for file in files:
  1294. full_path = os.path.join(root, file)
  1295. # Get relative path from repo root
  1296. rel_path = os.path.relpath(full_path, repo_path_str)
  1297. # Normalize to use forward slashes like Git does internally
  1298. rel_path = rel_path.replace(os.sep, "/")
  1299. rel_path_bytes = rel_path.encode()
  1300. # If this file is not in the target tree, remove it
  1301. if rel_path_bytes not in handled_paths:
  1302. os.remove(full_path)
  1303. # Remove from index if present
  1304. if rel_path_bytes in index:
  1305. del index[rel_path_bytes]
  1306. # Clean up empty directories
  1307. for root, dirs, files in os.walk(repo_path_str, topdown=False):
  1308. if ".git" in root:
  1309. continue
  1310. if root != repo_path_str and not files and not dirs:
  1311. try:
  1312. os.rmdir(root)
  1313. except OSError:
  1314. pass
  1315. # Write the updated index
  1316. index.write()
  1317. def get_unstaged_changes(
  1318. index: Index,
  1319. root_path: Union[str, bytes],
  1320. filter_blob_callback: Optional[Callable] = None,
  1321. ) -> Generator[bytes, None, None]:
  1322. """Walk through an index and check for differences against working tree.
  1323. Args:
  1324. index: index to check
  1325. root_path: path in which to find files
  1326. Returns: iterator over paths with unstaged changes
  1327. """
  1328. # For each entry in the index check the sha1 & ensure not staged
  1329. if not isinstance(root_path, bytes):
  1330. root_path = os.fsencode(root_path)
  1331. for tree_path, entry in index.iteritems():
  1332. full_path = _tree_to_fs_path(root_path, tree_path)
  1333. if isinstance(entry, ConflictedIndexEntry):
  1334. # Conflicted files are always unstaged
  1335. yield tree_path
  1336. continue
  1337. try:
  1338. st = os.lstat(full_path)
  1339. if stat.S_ISDIR(st.st_mode):
  1340. if _has_directory_changed(tree_path, entry):
  1341. yield tree_path
  1342. continue
  1343. if not stat.S_ISREG(st.st_mode) and not stat.S_ISLNK(st.st_mode):
  1344. continue
  1345. blob = blob_from_path_and_stat(full_path, st)
  1346. if filter_blob_callback is not None:
  1347. blob = filter_blob_callback(blob, tree_path)
  1348. except FileNotFoundError:
  1349. # The file was removed, so we assume that counts as
  1350. # different from whatever file used to exist.
  1351. yield tree_path
  1352. else:
  1353. if blob.id != entry.sha:
  1354. yield tree_path
  1355. os_sep_bytes = os.sep.encode("ascii")
  1356. def _tree_to_fs_path(root_path: bytes, tree_path: bytes) -> bytes:
  1357. """Convert a git tree path to a file system path.
  1358. Args:
  1359. root_path: Root filesystem path
  1360. tree_path: Git tree path as bytes
  1361. Returns: File system path.
  1362. """
  1363. assert isinstance(tree_path, bytes)
  1364. if os_sep_bytes != b"/":
  1365. sep_corrected_path = tree_path.replace(b"/", os_sep_bytes)
  1366. else:
  1367. sep_corrected_path = tree_path
  1368. return os.path.join(root_path, sep_corrected_path)
  1369. def _fs_to_tree_path(fs_path: Union[str, bytes]) -> bytes:
  1370. """Convert a file system path to a git tree path.
  1371. Args:
  1372. fs_path: File system path.
  1373. Returns: Git tree path as bytes
  1374. """
  1375. if not isinstance(fs_path, bytes):
  1376. fs_path_bytes = os.fsencode(fs_path)
  1377. else:
  1378. fs_path_bytes = fs_path
  1379. if os_sep_bytes != b"/":
  1380. tree_path = fs_path_bytes.replace(os_sep_bytes, b"/")
  1381. else:
  1382. tree_path = fs_path_bytes
  1383. return tree_path
  1384. def index_entry_from_directory(st: os.stat_result, path: bytes) -> Optional[IndexEntry]:
  1385. if os.path.exists(os.path.join(path, b".git")):
  1386. head = read_submodule_head(path)
  1387. if head is None:
  1388. return None
  1389. return index_entry_from_stat(st, head, mode=S_IFGITLINK)
  1390. return None
  1391. def index_entry_from_path(
  1392. path: bytes, object_store: Optional[ObjectContainer] = None
  1393. ) -> Optional[IndexEntry]:
  1394. """Create an index from a filesystem path.
  1395. This returns an index value for files, symlinks
  1396. and tree references. for directories and
  1397. non-existent files it returns None
  1398. Args:
  1399. path: Path to create an index entry for
  1400. object_store: Optional object store to
  1401. save new blobs in
  1402. Returns: An index entry; None for directories
  1403. """
  1404. assert isinstance(path, bytes)
  1405. st = os.lstat(path)
  1406. if stat.S_ISDIR(st.st_mode):
  1407. return index_entry_from_directory(st, path)
  1408. if stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode):
  1409. blob = blob_from_path_and_stat(path, st)
  1410. if object_store is not None:
  1411. object_store.add_object(blob)
  1412. return index_entry_from_stat(st, blob.id)
  1413. return None
  1414. def iter_fresh_entries(
  1415. paths: Iterable[bytes],
  1416. root_path: bytes,
  1417. object_store: Optional[ObjectContainer] = None,
  1418. ) -> Iterator[tuple[bytes, Optional[IndexEntry]]]:
  1419. """Iterate over current versions of index entries on disk.
  1420. Args:
  1421. paths: Paths to iterate over
  1422. root_path: Root path to access from
  1423. object_store: Optional store to save new blobs in
  1424. Returns: Iterator over path, index_entry
  1425. """
  1426. for path in paths:
  1427. p = _tree_to_fs_path(root_path, path)
  1428. try:
  1429. entry = index_entry_from_path(p, object_store=object_store)
  1430. except (FileNotFoundError, IsADirectoryError):
  1431. entry = None
  1432. yield path, entry
  1433. def iter_fresh_objects(
  1434. paths: Iterable[bytes],
  1435. root_path: bytes,
  1436. include_deleted: bool = False,
  1437. object_store: Optional[ObjectContainer] = None,
  1438. ) -> Iterator[tuple[bytes, Optional[bytes], Optional[int]]]:
  1439. """Iterate over versions of objects on disk referenced by index.
  1440. Args:
  1441. root_path: Root path to access from
  1442. include_deleted: Include deleted entries with sha and
  1443. mode set to None
  1444. object_store: Optional object store to report new items to
  1445. Returns: Iterator over path, sha, mode
  1446. """
  1447. for path, entry in iter_fresh_entries(paths, root_path, object_store=object_store):
  1448. if entry is None:
  1449. if include_deleted:
  1450. yield path, None, None
  1451. else:
  1452. yield path, entry.sha, cleanup_mode(entry.mode)
  1453. def refresh_index(index: Index, root_path: bytes) -> None:
  1454. """Refresh the contents of an index.
  1455. This is the equivalent to running 'git commit -a'.
  1456. Args:
  1457. index: Index to update
  1458. root_path: Root filesystem path
  1459. """
  1460. for path, entry in iter_fresh_entries(index, root_path):
  1461. if entry:
  1462. index[path] = entry
  1463. class locked_index:
  1464. """Lock the index while making modifications.
  1465. Works as a context manager.
  1466. """
  1467. _file: "_GitFile"
  1468. def __init__(self, path: Union[bytes, str]) -> None:
  1469. self._path = path
  1470. def __enter__(self) -> Index:
  1471. self._file = GitFile(self._path, "wb")
  1472. self._index = Index(self._path)
  1473. return self._index
  1474. def __exit__(
  1475. self,
  1476. exc_type: Optional[type],
  1477. exc_value: Optional[BaseException],
  1478. traceback: Optional[types.TracebackType],
  1479. ) -> None:
  1480. if exc_type is not None:
  1481. self._file.abort()
  1482. return
  1483. try:
  1484. from typing import BinaryIO, cast
  1485. f = SHA1Writer(cast(BinaryIO, self._file))
  1486. write_index_dict(cast(BinaryIO, f), self._index._byname)
  1487. except BaseException:
  1488. self._file.abort()
  1489. else:
  1490. f.close()