index.py 74 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353
  1. # index.py -- File parser/writer for the git index file
  2. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Parser for the git index file format."""
  22. import errno
  23. import os
  24. import shutil
  25. import stat
  26. import struct
  27. import sys
  28. import types
  29. from collections.abc import Generator, Iterable, Iterator
  30. from dataclasses import dataclass
  31. from enum import Enum
  32. from typing import (
  33. TYPE_CHECKING,
  34. Any,
  35. BinaryIO,
  36. Callable,
  37. Optional,
  38. Union,
  39. cast,
  40. )
  41. if TYPE_CHECKING:
  42. from .diff_tree import TreeChange
  43. from .file import _GitFile
  44. from .line_ending import BlobNormalizer
  45. from .repo import Repo
  46. from .file import GitFile
  47. from .object_store import iter_tree_contents
  48. from .objects import (
  49. S_IFGITLINK,
  50. S_ISGITLINK,
  51. Blob,
  52. ObjectID,
  53. Tree,
  54. hex_to_sha,
  55. sha_to_hex,
  56. )
  57. from .pack import ObjectContainer, SHA1Reader, SHA1Writer
  58. # 2-bit stage (during merge)
  59. FLAG_STAGEMASK = 0x3000
  60. FLAG_STAGESHIFT = 12
  61. FLAG_NAMEMASK = 0x0FFF
  62. # assume-valid
  63. FLAG_VALID = 0x8000
  64. # extended flag (must be zero in version 2)
  65. FLAG_EXTENDED = 0x4000
  66. # used by sparse checkout
  67. EXTENDED_FLAG_SKIP_WORKTREE = 0x4000
  68. # used by "git add -N"
  69. EXTENDED_FLAG_INTEND_TO_ADD = 0x2000
  70. DEFAULT_VERSION = 2
  71. # Index extension signatures
  72. TREE_EXTENSION = b"TREE"
  73. REUC_EXTENSION = b"REUC"
  74. UNTR_EXTENSION = b"UNTR"
  75. EOIE_EXTENSION = b"EOIE"
  76. IEOT_EXTENSION = b"IEOT"
  77. def _encode_varint(value: int) -> bytes:
  78. """Encode an integer using variable-width encoding.
  79. Same format as used for OFS_DELTA pack entries and index v4 path compression.
  80. Uses 7 bits per byte, with the high bit indicating continuation.
  81. Args:
  82. value: Integer to encode
  83. Returns:
  84. Encoded bytes
  85. """
  86. if value == 0:
  87. return b"\x00"
  88. result = []
  89. while value > 0:
  90. byte = value & 0x7F # Take lower 7 bits
  91. value >>= 7
  92. if value > 0:
  93. byte |= 0x80 # Set continuation bit
  94. result.append(byte)
  95. return bytes(result)
  96. def _decode_varint(data: bytes, offset: int = 0) -> tuple[int, int]:
  97. """Decode a variable-width encoded integer.
  98. Args:
  99. data: Bytes to decode from
  100. offset: Starting offset in data
  101. Returns:
  102. tuple of (decoded_value, new_offset)
  103. """
  104. value = 0
  105. shift = 0
  106. pos = offset
  107. while pos < len(data):
  108. byte = data[pos]
  109. pos += 1
  110. value |= (byte & 0x7F) << shift
  111. shift += 7
  112. if not (byte & 0x80): # No continuation bit
  113. break
  114. return value, pos
  115. def _compress_path(path: bytes, previous_path: bytes) -> bytes:
  116. """Compress a path relative to the previous path for index version 4.
  117. Args:
  118. path: Path to compress
  119. previous_path: Previous path for comparison
  120. Returns:
  121. Compressed path data (varint prefix_len + suffix)
  122. """
  123. # Find the common prefix length
  124. common_len = 0
  125. min_len = min(len(path), len(previous_path))
  126. for i in range(min_len):
  127. if path[i] == previous_path[i]:
  128. common_len += 1
  129. else:
  130. break
  131. # The number of bytes to remove from the end of previous_path
  132. # to get the common prefix
  133. remove_len = len(previous_path) - common_len
  134. # The suffix to append
  135. suffix = path[common_len:]
  136. # Encode: varint(remove_len) + suffix + NUL
  137. return _encode_varint(remove_len) + suffix + b"\x00"
  138. def _decompress_path(
  139. data: bytes, offset: int, previous_path: bytes
  140. ) -> tuple[bytes, int]:
  141. """Decompress a path from index version 4 compressed format.
  142. Args:
  143. data: Raw data containing compressed path
  144. offset: Starting offset in data
  145. previous_path: Previous path for decompression
  146. Returns:
  147. tuple of (decompressed_path, new_offset)
  148. """
  149. # Decode the number of bytes to remove from previous path
  150. remove_len, new_offset = _decode_varint(data, offset)
  151. # Find the NUL terminator for the suffix
  152. suffix_start = new_offset
  153. suffix_end = suffix_start
  154. while suffix_end < len(data) and data[suffix_end] != 0:
  155. suffix_end += 1
  156. if suffix_end >= len(data):
  157. raise ValueError("Unterminated path suffix in compressed entry")
  158. suffix = data[suffix_start:suffix_end]
  159. new_offset = suffix_end + 1 # Skip the NUL terminator
  160. # Reconstruct the path
  161. if remove_len > len(previous_path):
  162. raise ValueError(
  163. f"Invalid path compression: trying to remove {remove_len} bytes from {len(previous_path)}-byte path"
  164. )
  165. prefix = previous_path[:-remove_len] if remove_len > 0 else previous_path
  166. path = prefix + suffix
  167. return path, new_offset
  168. def _decompress_path_from_stream(
  169. f: BinaryIO, previous_path: bytes
  170. ) -> tuple[bytes, int]:
  171. """Decompress a path from index version 4 compressed format, reading from stream.
  172. Args:
  173. f: File-like object to read from
  174. previous_path: Previous path for decompression
  175. Returns:
  176. tuple of (decompressed_path, bytes_consumed)
  177. """
  178. # Decode the varint for remove_len by reading byte by byte
  179. remove_len = 0
  180. shift = 0
  181. bytes_consumed = 0
  182. while True:
  183. byte_data = f.read(1)
  184. if not byte_data:
  185. raise ValueError("Unexpected end of file while reading varint")
  186. byte = byte_data[0]
  187. bytes_consumed += 1
  188. remove_len |= (byte & 0x7F) << shift
  189. shift += 7
  190. if not (byte & 0x80): # No continuation bit
  191. break
  192. # Read the suffix until NUL terminator
  193. suffix = b""
  194. while True:
  195. byte_data = f.read(1)
  196. if not byte_data:
  197. raise ValueError("Unexpected end of file while reading path suffix")
  198. byte = byte_data[0]
  199. bytes_consumed += 1
  200. if byte == 0: # NUL terminator
  201. break
  202. suffix += bytes([byte])
  203. # Reconstruct the path
  204. if remove_len > len(previous_path):
  205. raise ValueError(
  206. f"Invalid path compression: trying to remove {remove_len} bytes from {len(previous_path)}-byte path"
  207. )
  208. prefix = previous_path[:-remove_len] if remove_len > 0 else previous_path
  209. path = prefix + suffix
  210. return path, bytes_consumed
  211. class Stage(Enum):
  212. NORMAL = 0
  213. MERGE_CONFLICT_ANCESTOR = 1
  214. MERGE_CONFLICT_THIS = 2
  215. MERGE_CONFLICT_OTHER = 3
  216. @dataclass
  217. class SerializedIndexEntry:
  218. name: bytes
  219. ctime: Union[int, float, tuple[int, int]]
  220. mtime: Union[int, float, tuple[int, int]]
  221. dev: int
  222. ino: int
  223. mode: int
  224. uid: int
  225. gid: int
  226. size: int
  227. sha: bytes
  228. flags: int
  229. extended_flags: int
  230. def stage(self) -> Stage:
  231. return Stage((self.flags & FLAG_STAGEMASK) >> FLAG_STAGESHIFT)
  232. @dataclass
  233. class IndexExtension:
  234. """Base class for index extensions."""
  235. signature: bytes
  236. data: bytes
  237. @classmethod
  238. def from_raw(cls, signature: bytes, data: bytes) -> "IndexExtension":
  239. """Create an extension from raw data.
  240. Args:
  241. signature: 4-byte extension signature
  242. data: Extension data
  243. Returns:
  244. Parsed extension object
  245. """
  246. if signature == TREE_EXTENSION:
  247. return TreeExtension.from_bytes(data)
  248. elif signature == REUC_EXTENSION:
  249. return ResolveUndoExtension.from_bytes(data)
  250. elif signature == UNTR_EXTENSION:
  251. return UntrackedExtension.from_bytes(data)
  252. else:
  253. # Unknown extension - just store raw data
  254. return cls(signature, data)
  255. def to_bytes(self) -> bytes:
  256. """Serialize extension to bytes."""
  257. return self.data
  258. class TreeExtension(IndexExtension):
  259. """Tree cache extension."""
  260. def __init__(self, entries: list[tuple[bytes, bytes, int]]) -> None:
  261. self.entries = entries
  262. super().__init__(TREE_EXTENSION, b"")
  263. @classmethod
  264. def from_bytes(cls, data: bytes) -> "TreeExtension":
  265. # TODO: Implement tree cache parsing
  266. return cls([])
  267. def to_bytes(self) -> bytes:
  268. # TODO: Implement tree cache serialization
  269. return b""
  270. class ResolveUndoExtension(IndexExtension):
  271. """Resolve undo extension for recording merge conflicts."""
  272. def __init__(self, entries: list[tuple[bytes, list[tuple[int, bytes]]]]) -> None:
  273. self.entries = entries
  274. super().__init__(REUC_EXTENSION, b"")
  275. @classmethod
  276. def from_bytes(cls, data: bytes) -> "ResolveUndoExtension":
  277. # TODO: Implement resolve undo parsing
  278. return cls([])
  279. def to_bytes(self) -> bytes:
  280. # TODO: Implement resolve undo serialization
  281. return b""
  282. class UntrackedExtension(IndexExtension):
  283. """Untracked cache extension."""
  284. def __init__(self, data: bytes) -> None:
  285. super().__init__(UNTR_EXTENSION, data)
  286. @classmethod
  287. def from_bytes(cls, data: bytes) -> "UntrackedExtension":
  288. return cls(data)
  289. @dataclass
  290. class IndexEntry:
  291. ctime: Union[int, float, tuple[int, int]]
  292. mtime: Union[int, float, tuple[int, int]]
  293. dev: int
  294. ino: int
  295. mode: int
  296. uid: int
  297. gid: int
  298. size: int
  299. sha: bytes
  300. flags: int = 0
  301. extended_flags: int = 0
  302. @classmethod
  303. def from_serialized(cls, serialized: SerializedIndexEntry) -> "IndexEntry":
  304. return cls(
  305. ctime=serialized.ctime,
  306. mtime=serialized.mtime,
  307. dev=serialized.dev,
  308. ino=serialized.ino,
  309. mode=serialized.mode,
  310. uid=serialized.uid,
  311. gid=serialized.gid,
  312. size=serialized.size,
  313. sha=serialized.sha,
  314. flags=serialized.flags,
  315. extended_flags=serialized.extended_flags,
  316. )
  317. def serialize(self, name: bytes, stage: Stage) -> SerializedIndexEntry:
  318. # Clear out any existing stage bits, then set them from the Stage.
  319. new_flags = self.flags & ~FLAG_STAGEMASK
  320. new_flags |= stage.value << FLAG_STAGESHIFT
  321. return SerializedIndexEntry(
  322. name=name,
  323. ctime=self.ctime,
  324. mtime=self.mtime,
  325. dev=self.dev,
  326. ino=self.ino,
  327. mode=self.mode,
  328. uid=self.uid,
  329. gid=self.gid,
  330. size=self.size,
  331. sha=self.sha,
  332. flags=new_flags,
  333. extended_flags=self.extended_flags,
  334. )
  335. def stage(self) -> Stage:
  336. return Stage((self.flags & FLAG_STAGEMASK) >> FLAG_STAGESHIFT)
  337. @property
  338. def skip_worktree(self) -> bool:
  339. """Return True if the skip-worktree bit is set in extended_flags."""
  340. return bool(self.extended_flags & EXTENDED_FLAG_SKIP_WORKTREE)
  341. def set_skip_worktree(self, skip: bool = True) -> None:
  342. """Helper method to set or clear the skip-worktree bit in extended_flags.
  343. Also sets FLAG_EXTENDED in self.flags if needed.
  344. """
  345. if skip:
  346. # Turn on the skip-worktree bit
  347. self.extended_flags |= EXTENDED_FLAG_SKIP_WORKTREE
  348. # Also ensure the main 'extended' bit is set in flags
  349. self.flags |= FLAG_EXTENDED
  350. else:
  351. # Turn off the skip-worktree bit
  352. self.extended_flags &= ~EXTENDED_FLAG_SKIP_WORKTREE
  353. # Optionally unset the main extended bit if no extended flags remain
  354. if self.extended_flags == 0:
  355. self.flags &= ~FLAG_EXTENDED
  356. class ConflictedIndexEntry:
  357. """Index entry that represents a conflict."""
  358. ancestor: Optional[IndexEntry]
  359. this: Optional[IndexEntry]
  360. other: Optional[IndexEntry]
  361. def __init__(
  362. self,
  363. ancestor: Optional[IndexEntry] = None,
  364. this: Optional[IndexEntry] = None,
  365. other: Optional[IndexEntry] = None,
  366. ) -> None:
  367. self.ancestor = ancestor
  368. self.this = this
  369. self.other = other
  370. class UnmergedEntries(Exception):
  371. """Unmerged entries exist in the index."""
  372. def pathsplit(path: bytes) -> tuple[bytes, bytes]:
  373. """Split a /-delimited path into a directory part and a basename.
  374. Args:
  375. path: The path to split.
  376. Returns:
  377. Tuple with directory name and basename
  378. """
  379. try:
  380. (dirname, basename) = path.rsplit(b"/", 1)
  381. except ValueError:
  382. return (b"", path)
  383. else:
  384. return (dirname, basename)
  385. def pathjoin(*args: bytes) -> bytes:
  386. """Join a /-delimited path."""
  387. return b"/".join([p for p in args if p])
  388. def read_cache_time(f: BinaryIO) -> tuple[int, int]:
  389. """Read a cache time.
  390. Args:
  391. f: File-like object to read from
  392. Returns:
  393. Tuple with seconds and nanoseconds
  394. """
  395. return struct.unpack(">LL", f.read(8))
  396. def write_cache_time(f: BinaryIO, t: Union[int, float, tuple[int, int]]) -> None:
  397. """Write a cache time.
  398. Args:
  399. f: File-like object to write to
  400. t: Time to write (as int, float or tuple with secs and nsecs)
  401. """
  402. if isinstance(t, int):
  403. t = (t, 0)
  404. elif isinstance(t, float):
  405. (secs, nsecs) = divmod(t, 1.0)
  406. t = (int(secs), int(nsecs * 1000000000))
  407. elif not isinstance(t, tuple):
  408. raise TypeError(t)
  409. f.write(struct.pack(">LL", *t))
  410. def read_cache_entry(
  411. f: BinaryIO, version: int, previous_path: bytes = b""
  412. ) -> SerializedIndexEntry:
  413. """Read an entry from a cache file.
  414. Args:
  415. f: File-like object to read from
  416. version: Index version
  417. previous_path: Previous entry's path (for version 4 compression)
  418. """
  419. beginoffset = f.tell()
  420. ctime = read_cache_time(f)
  421. mtime = read_cache_time(f)
  422. (
  423. dev,
  424. ino,
  425. mode,
  426. uid,
  427. gid,
  428. size,
  429. sha,
  430. flags,
  431. ) = struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
  432. if flags & FLAG_EXTENDED:
  433. if version < 3:
  434. raise AssertionError("extended flag set in index with version < 3")
  435. (extended_flags,) = struct.unpack(">H", f.read(2))
  436. else:
  437. extended_flags = 0
  438. if version >= 4:
  439. # Version 4: paths are always compressed (name_len should be 0)
  440. name, consumed = _decompress_path_from_stream(f, previous_path)
  441. else:
  442. # Versions < 4: regular name reading
  443. name = f.read(flags & FLAG_NAMEMASK)
  444. # Padding:
  445. if version < 4:
  446. real_size = (f.tell() - beginoffset + 8) & ~7
  447. f.read((beginoffset + real_size) - f.tell())
  448. return SerializedIndexEntry(
  449. name,
  450. ctime,
  451. mtime,
  452. dev,
  453. ino,
  454. mode,
  455. uid,
  456. gid,
  457. size,
  458. sha_to_hex(sha),
  459. flags & ~FLAG_NAMEMASK,
  460. extended_flags,
  461. )
  462. def write_cache_entry(
  463. f: BinaryIO, entry: SerializedIndexEntry, version: int, previous_path: bytes = b""
  464. ) -> None:
  465. """Write an index entry to a file.
  466. Args:
  467. f: File object
  468. entry: IndexEntry to write
  469. version: Index format version
  470. previous_path: Previous entry's path (for version 4 compression)
  471. """
  472. beginoffset = f.tell()
  473. write_cache_time(f, entry.ctime)
  474. write_cache_time(f, entry.mtime)
  475. if version >= 4:
  476. # Version 4: use compression but set name_len to actual filename length
  477. # This matches how C Git implements index v4 flags
  478. compressed_path = _compress_path(entry.name, previous_path)
  479. flags = len(entry.name) | (entry.flags & ~FLAG_NAMEMASK)
  480. else:
  481. # Versions < 4: include actual name length
  482. flags = len(entry.name) | (entry.flags & ~FLAG_NAMEMASK)
  483. if entry.extended_flags:
  484. flags |= FLAG_EXTENDED
  485. if flags & FLAG_EXTENDED and version is not None and version < 3:
  486. raise AssertionError("unable to use extended flags in version < 3")
  487. f.write(
  488. struct.pack(
  489. b">LLLLLL20sH",
  490. entry.dev & 0xFFFFFFFF,
  491. entry.ino & 0xFFFFFFFF,
  492. entry.mode,
  493. entry.uid,
  494. entry.gid,
  495. entry.size,
  496. hex_to_sha(entry.sha),
  497. flags,
  498. )
  499. )
  500. if flags & FLAG_EXTENDED:
  501. f.write(struct.pack(b">H", entry.extended_flags))
  502. if version >= 4:
  503. # Version 4: always write compressed path
  504. f.write(compressed_path)
  505. else:
  506. # Versions < 4: write regular path and padding
  507. f.write(entry.name)
  508. real_size = (f.tell() - beginoffset + 8) & ~7
  509. f.write(b"\0" * ((beginoffset + real_size) - f.tell()))
  510. class UnsupportedIndexFormat(Exception):
  511. """An unsupported index format was encountered."""
  512. def __init__(self, version: int) -> None:
  513. self.index_format_version = version
  514. def read_index_header(f: BinaryIO) -> tuple[int, int]:
  515. """Read an index header from a file.
  516. Returns:
  517. tuple of (version, num_entries)
  518. """
  519. header = f.read(4)
  520. if header != b"DIRC":
  521. raise AssertionError(f"Invalid index file header: {header!r}")
  522. (version, num_entries) = struct.unpack(b">LL", f.read(4 * 2))
  523. if version not in (1, 2, 3, 4):
  524. raise UnsupportedIndexFormat(version)
  525. return version, num_entries
  526. def write_index_extension(f: BinaryIO, extension: IndexExtension) -> None:
  527. """Write an index extension.
  528. Args:
  529. f: File-like object to write to
  530. extension: Extension to write
  531. """
  532. data = extension.to_bytes()
  533. f.write(extension.signature)
  534. f.write(struct.pack(">I", len(data)))
  535. f.write(data)
  536. def read_index(f: BinaryIO) -> Iterator[SerializedIndexEntry]:
  537. """Read an index file, yielding the individual entries."""
  538. version, num_entries = read_index_header(f)
  539. previous_path = b""
  540. for i in range(num_entries):
  541. entry = read_cache_entry(f, version, previous_path)
  542. previous_path = entry.name
  543. yield entry
  544. def read_index_dict_with_version(
  545. f: BinaryIO,
  546. ) -> tuple[
  547. dict[bytes, Union[IndexEntry, ConflictedIndexEntry]], int, list[IndexExtension]
  548. ]:
  549. """Read an index file and return it as a dictionary along with the version.
  550. Returns:
  551. tuple of (entries_dict, version, extensions)
  552. """
  553. version, num_entries = read_index_header(f)
  554. ret: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]] = {}
  555. previous_path = b""
  556. for i in range(num_entries):
  557. entry = read_cache_entry(f, version, previous_path)
  558. previous_path = entry.name
  559. stage = entry.stage()
  560. if stage == Stage.NORMAL:
  561. ret[entry.name] = IndexEntry.from_serialized(entry)
  562. else:
  563. existing = ret.setdefault(entry.name, ConflictedIndexEntry())
  564. if isinstance(existing, IndexEntry):
  565. raise AssertionError(f"Non-conflicted entry for {entry.name!r} exists")
  566. if stage == Stage.MERGE_CONFLICT_ANCESTOR:
  567. existing.ancestor = IndexEntry.from_serialized(entry)
  568. elif stage == Stage.MERGE_CONFLICT_THIS:
  569. existing.this = IndexEntry.from_serialized(entry)
  570. elif stage == Stage.MERGE_CONFLICT_OTHER:
  571. existing.other = IndexEntry.from_serialized(entry)
  572. # Read extensions
  573. extensions = []
  574. while True:
  575. # Check if we're at the end (20 bytes before EOF for SHA checksum)
  576. current_pos = f.tell()
  577. f.seek(0, 2) # EOF
  578. eof_pos = f.tell()
  579. f.seek(current_pos)
  580. if current_pos >= eof_pos - 20:
  581. break
  582. # Try to read extension signature
  583. signature = f.read(4)
  584. if len(signature) < 4:
  585. break
  586. # Check if it's a valid extension signature (4 uppercase letters)
  587. if not all(65 <= b <= 90 for b in signature):
  588. # Not an extension, seek back
  589. f.seek(-4, 1)
  590. break
  591. # Read extension size
  592. size_data = f.read(4)
  593. if len(size_data) < 4:
  594. break
  595. size = struct.unpack(">I", size_data)[0]
  596. # Read extension data
  597. data = f.read(size)
  598. if len(data) < size:
  599. break
  600. extension = IndexExtension.from_raw(signature, data)
  601. extensions.append(extension)
  602. return ret, version, extensions
  603. def read_index_dict(
  604. f: BinaryIO,
  605. ) -> dict[bytes, Union[IndexEntry, ConflictedIndexEntry]]:
  606. """Read an index file and return it as a dictionary.
  607. Dict Key is tuple of path and stage number, as
  608. path alone is not unique
  609. Args:
  610. f: File object to read fromls.
  611. """
  612. ret: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]] = {}
  613. for entry in read_index(f):
  614. stage = entry.stage()
  615. if stage == Stage.NORMAL:
  616. ret[entry.name] = IndexEntry.from_serialized(entry)
  617. else:
  618. existing = ret.setdefault(entry.name, ConflictedIndexEntry())
  619. if isinstance(existing, IndexEntry):
  620. raise AssertionError(f"Non-conflicted entry for {entry.name!r} exists")
  621. if stage == Stage.MERGE_CONFLICT_ANCESTOR:
  622. existing.ancestor = IndexEntry.from_serialized(entry)
  623. elif stage == Stage.MERGE_CONFLICT_THIS:
  624. existing.this = IndexEntry.from_serialized(entry)
  625. elif stage == Stage.MERGE_CONFLICT_OTHER:
  626. existing.other = IndexEntry.from_serialized(entry)
  627. return ret
  628. def write_index(
  629. f: BinaryIO,
  630. entries: list[SerializedIndexEntry],
  631. version: Optional[int] = None,
  632. extensions: Optional[list[IndexExtension]] = None,
  633. ) -> None:
  634. """Write an index file.
  635. Args:
  636. f: File-like object to write to
  637. version: Version number to write
  638. entries: Iterable over the entries to write
  639. extensions: Optional list of extensions to write
  640. """
  641. if version is None:
  642. version = DEFAULT_VERSION
  643. # STEP 1: check if any extended_flags are set
  644. uses_extended_flags = any(e.extended_flags != 0 for e in entries)
  645. if uses_extended_flags and version < 3:
  646. # Force or bump the version to 3
  647. version = 3
  648. # The rest is unchanged, but you might insert a final check:
  649. if version < 3:
  650. # Double-check no extended flags appear
  651. for e in entries:
  652. if e.extended_flags != 0:
  653. raise AssertionError("Attempt to use extended flags in index < v3")
  654. # Proceed with the existing code to write the header and entries.
  655. f.write(b"DIRC")
  656. f.write(struct.pack(b">LL", version, len(entries)))
  657. previous_path = b""
  658. for entry in entries:
  659. write_cache_entry(f, entry, version=version, previous_path=previous_path)
  660. previous_path = entry.name
  661. # Write extensions
  662. if extensions:
  663. for extension in extensions:
  664. write_index_extension(f, extension)
  665. def write_index_dict(
  666. f: BinaryIO,
  667. entries: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]],
  668. version: Optional[int] = None,
  669. extensions: Optional[list[IndexExtension]] = None,
  670. ) -> None:
  671. """Write an index file based on the contents of a dictionary.
  672. being careful to sort by path and then by stage.
  673. """
  674. entries_list = []
  675. for key in sorted(entries):
  676. value = entries[key]
  677. if isinstance(value, ConflictedIndexEntry):
  678. if value.ancestor is not None:
  679. entries_list.append(
  680. value.ancestor.serialize(key, Stage.MERGE_CONFLICT_ANCESTOR)
  681. )
  682. if value.this is not None:
  683. entries_list.append(
  684. value.this.serialize(key, Stage.MERGE_CONFLICT_THIS)
  685. )
  686. if value.other is not None:
  687. entries_list.append(
  688. value.other.serialize(key, Stage.MERGE_CONFLICT_OTHER)
  689. )
  690. else:
  691. entries_list.append(value.serialize(key, Stage.NORMAL))
  692. write_index(f, entries_list, version=version, extensions=extensions)
  693. def cleanup_mode(mode: int) -> int:
  694. """Cleanup a mode value.
  695. This will return a mode that can be stored in a tree object.
  696. Args:
  697. mode: Mode to clean up.
  698. Returns:
  699. mode
  700. """
  701. if stat.S_ISLNK(mode):
  702. return stat.S_IFLNK
  703. elif stat.S_ISDIR(mode):
  704. return stat.S_IFDIR
  705. elif S_ISGITLINK(mode):
  706. return S_IFGITLINK
  707. ret = stat.S_IFREG | 0o644
  708. if mode & 0o100:
  709. ret |= 0o111
  710. return ret
  711. class Index:
  712. """A Git Index file."""
  713. _byname: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]]
  714. def __init__(
  715. self,
  716. filename: Union[bytes, str, os.PathLike],
  717. read: bool = True,
  718. skip_hash: bool = False,
  719. version: Optional[int] = None,
  720. ) -> None:
  721. """Create an index object associated with the given filename.
  722. Args:
  723. filename: Path to the index file
  724. read: Whether to initialize the index from the given file, should it exist.
  725. skip_hash: Whether to skip SHA1 hash when writing (for manyfiles feature)
  726. version: Index format version to use (None = auto-detect from file or use default)
  727. """
  728. self._filename = os.fspath(filename)
  729. # TODO(jelmer): Store the version returned by read_index
  730. self._version = version
  731. self._skip_hash = skip_hash
  732. self._extensions: list[IndexExtension] = []
  733. self.clear()
  734. if read:
  735. self.read()
  736. @property
  737. def path(self) -> Union[bytes, str]:
  738. return self._filename
  739. def __repr__(self) -> str:
  740. return f"{self.__class__.__name__}({self._filename!r})"
  741. def write(self) -> None:
  742. """Write current contents of index to disk."""
  743. from typing import BinaryIO, cast
  744. f = GitFile(self._filename, "wb")
  745. try:
  746. # Filter out extensions with no meaningful data
  747. meaningful_extensions = []
  748. for ext in self._extensions:
  749. # Skip extensions that have empty data
  750. ext_data = ext.to_bytes()
  751. if ext_data:
  752. meaningful_extensions.append(ext)
  753. if self._skip_hash:
  754. # When skipHash is enabled, write the index without computing SHA1
  755. write_index_dict(
  756. cast(BinaryIO, f),
  757. self._byname,
  758. version=self._version,
  759. extensions=meaningful_extensions,
  760. )
  761. # Write 20 zero bytes instead of SHA1
  762. f.write(b"\x00" * 20)
  763. f.close()
  764. else:
  765. sha1_writer = SHA1Writer(cast(BinaryIO, f))
  766. write_index_dict(
  767. cast(BinaryIO, sha1_writer),
  768. self._byname,
  769. version=self._version,
  770. extensions=meaningful_extensions,
  771. )
  772. sha1_writer.close()
  773. except:
  774. f.close()
  775. raise
  776. def read(self) -> None:
  777. """Read current contents of index from disk."""
  778. if not os.path.exists(self._filename):
  779. return
  780. f = GitFile(self._filename, "rb")
  781. try:
  782. sha1_reader = SHA1Reader(f)
  783. entries, version, extensions = read_index_dict_with_version(
  784. cast(BinaryIO, sha1_reader)
  785. )
  786. self._version = version
  787. self._extensions = extensions
  788. self.update(entries)
  789. # Extensions have already been read by read_index_dict_with_version
  790. sha1_reader.check_sha(allow_empty=True)
  791. finally:
  792. f.close()
  793. def __len__(self) -> int:
  794. """Number of entries in this index file."""
  795. return len(self._byname)
  796. def __getitem__(self, key: bytes) -> Union[IndexEntry, ConflictedIndexEntry]:
  797. """Retrieve entry by relative path and stage.
  798. Returns: Either a IndexEntry or a ConflictedIndexEntry
  799. Raises KeyError: if the entry does not exist
  800. """
  801. return self._byname[key]
  802. def __iter__(self) -> Iterator[bytes]:
  803. """Iterate over the paths and stages in this index."""
  804. return iter(self._byname)
  805. def __contains__(self, key: bytes) -> bool:
  806. return key in self._byname
  807. def get_sha1(self, path: bytes) -> bytes:
  808. """Return the (git object) SHA1 for the object at a path."""
  809. value = self[path]
  810. if isinstance(value, ConflictedIndexEntry):
  811. raise UnmergedEntries
  812. return value.sha
  813. def get_mode(self, path: bytes) -> int:
  814. """Return the POSIX file mode for the object at a path."""
  815. value = self[path]
  816. if isinstance(value, ConflictedIndexEntry):
  817. raise UnmergedEntries
  818. return value.mode
  819. def iterobjects(self) -> Iterable[tuple[bytes, bytes, int]]:
  820. """Iterate over path, sha, mode tuples for use with commit_tree."""
  821. for path in self:
  822. entry = self[path]
  823. if isinstance(entry, ConflictedIndexEntry):
  824. raise UnmergedEntries
  825. yield path, entry.sha, cleanup_mode(entry.mode)
  826. def has_conflicts(self) -> bool:
  827. for value in self._byname.values():
  828. if isinstance(value, ConflictedIndexEntry):
  829. return True
  830. return False
  831. def clear(self) -> None:
  832. """Remove all contents from this index."""
  833. self._byname = {}
  834. def __setitem__(
  835. self, name: bytes, value: Union[IndexEntry, ConflictedIndexEntry]
  836. ) -> None:
  837. assert isinstance(name, bytes)
  838. self._byname[name] = value
  839. def __delitem__(self, name: bytes) -> None:
  840. del self._byname[name]
  841. def iteritems(
  842. self,
  843. ) -> Iterator[tuple[bytes, Union[IndexEntry, ConflictedIndexEntry]]]:
  844. return iter(self._byname.items())
  845. def items(self) -> Iterator[tuple[bytes, Union[IndexEntry, ConflictedIndexEntry]]]:
  846. return iter(self._byname.items())
  847. def update(
  848. self, entries: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]]
  849. ) -> None:
  850. for key, value in entries.items():
  851. self[key] = value
  852. def paths(self) -> Generator[bytes, None, None]:
  853. yield from self._byname.keys()
  854. def changes_from_tree(
  855. self,
  856. object_store: ObjectContainer,
  857. tree: ObjectID,
  858. want_unchanged: bool = False,
  859. ) -> Generator[
  860. tuple[
  861. tuple[Optional[bytes], Optional[bytes]],
  862. tuple[Optional[int], Optional[int]],
  863. tuple[Optional[bytes], Optional[bytes]],
  864. ],
  865. None,
  866. None,
  867. ]:
  868. """Find the differences between the contents of this index and a tree.
  869. Args:
  870. object_store: Object store to use for retrieving tree contents
  871. tree: SHA1 of the root tree
  872. want_unchanged: Whether unchanged files should be reported
  873. Returns: Iterator over tuples with (oldpath, newpath), (oldmode,
  874. newmode), (oldsha, newsha)
  875. """
  876. def lookup_entry(path: bytes) -> tuple[bytes, int]:
  877. entry = self[path]
  878. if hasattr(entry, "sha") and hasattr(entry, "mode"):
  879. return entry.sha, cleanup_mode(entry.mode)
  880. else:
  881. # Handle ConflictedIndexEntry case
  882. return b"", 0
  883. yield from changes_from_tree(
  884. self.paths(),
  885. lookup_entry,
  886. object_store,
  887. tree,
  888. want_unchanged=want_unchanged,
  889. )
  890. def commit(self, object_store: ObjectContainer) -> bytes:
  891. """Create a new tree from an index.
  892. Args:
  893. object_store: Object store to save the tree in
  894. Returns:
  895. Root tree SHA
  896. """
  897. return commit_tree(object_store, self.iterobjects())
  898. def commit_tree(
  899. object_store: ObjectContainer, blobs: Iterable[tuple[bytes, bytes, int]]
  900. ) -> bytes:
  901. """Commit a new tree.
  902. Args:
  903. object_store: Object store to add trees to
  904. blobs: Iterable over blob path, sha, mode entries
  905. Returns:
  906. SHA1 of the created tree.
  907. """
  908. trees: dict[bytes, Any] = {b"": {}}
  909. def add_tree(path: bytes) -> dict[bytes, Any]:
  910. if path in trees:
  911. return trees[path]
  912. dirname, basename = pathsplit(path)
  913. t = add_tree(dirname)
  914. assert isinstance(basename, bytes)
  915. newtree: dict[bytes, Any] = {}
  916. t[basename] = newtree
  917. trees[path] = newtree
  918. return newtree
  919. for path, sha, mode in blobs:
  920. tree_path, basename = pathsplit(path)
  921. tree = add_tree(tree_path)
  922. tree[basename] = (mode, sha)
  923. def build_tree(path: bytes) -> bytes:
  924. tree = Tree()
  925. for basename, entry in trees[path].items():
  926. if isinstance(entry, dict):
  927. mode = stat.S_IFDIR
  928. sha = build_tree(pathjoin(path, basename))
  929. else:
  930. (mode, sha) = entry
  931. tree.add(basename, mode, sha)
  932. object_store.add_object(tree)
  933. return tree.id
  934. return build_tree(b"")
  935. def commit_index(object_store: ObjectContainer, index: Index) -> bytes:
  936. """Create a new tree from an index.
  937. Args:
  938. object_store: Object store to save the tree in
  939. index: Index file
  940. Note: This function is deprecated, use index.commit() instead.
  941. Returns: Root tree sha.
  942. """
  943. return commit_tree(object_store, index.iterobjects())
  944. def changes_from_tree(
  945. names: Iterable[bytes],
  946. lookup_entry: Callable[[bytes], tuple[bytes, int]],
  947. object_store: ObjectContainer,
  948. tree: Optional[bytes],
  949. want_unchanged: bool = False,
  950. ) -> Iterable[
  951. tuple[
  952. tuple[Optional[bytes], Optional[bytes]],
  953. tuple[Optional[int], Optional[int]],
  954. tuple[Optional[bytes], Optional[bytes]],
  955. ]
  956. ]:
  957. """Find the differences between the contents of a tree and
  958. a working copy.
  959. Args:
  960. names: Iterable of names in the working copy
  961. lookup_entry: Function to lookup an entry in the working copy
  962. object_store: Object store to use for retrieving tree contents
  963. tree: SHA1 of the root tree, or None for an empty tree
  964. want_unchanged: Whether unchanged files should be reported
  965. Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
  966. (oldsha, newsha)
  967. """
  968. # TODO(jelmer): Support a include_trees option
  969. other_names = set(names)
  970. if tree is not None:
  971. for name, mode, sha in iter_tree_contents(object_store, tree):
  972. try:
  973. (other_sha, other_mode) = lookup_entry(name)
  974. except KeyError:
  975. # Was removed
  976. yield ((name, None), (mode, None), (sha, None))
  977. else:
  978. other_names.remove(name)
  979. if want_unchanged or other_sha != sha or other_mode != mode:
  980. yield ((name, name), (mode, other_mode), (sha, other_sha))
  981. # Mention added files
  982. for name in other_names:
  983. try:
  984. (other_sha, other_mode) = lookup_entry(name)
  985. except KeyError:
  986. pass
  987. else:
  988. yield ((None, name), (None, other_mode), (None, other_sha))
  989. def index_entry_from_stat(
  990. stat_val: os.stat_result,
  991. hex_sha: bytes,
  992. mode: Optional[int] = None,
  993. ) -> IndexEntry:
  994. """Create a new index entry from a stat value.
  995. Args:
  996. stat_val: POSIX stat_result instance
  997. hex_sha: Hex sha of the object
  998. """
  999. if mode is None:
  1000. mode = cleanup_mode(stat_val.st_mode)
  1001. return IndexEntry(
  1002. ctime=stat_val.st_ctime,
  1003. mtime=stat_val.st_mtime,
  1004. dev=stat_val.st_dev,
  1005. ino=stat_val.st_ino,
  1006. mode=mode,
  1007. uid=stat_val.st_uid,
  1008. gid=stat_val.st_gid,
  1009. size=stat_val.st_size,
  1010. sha=hex_sha,
  1011. flags=0,
  1012. extended_flags=0,
  1013. )
  1014. if sys.platform == "win32":
  1015. # On Windows, creating symlinks either requires administrator privileges
  1016. # or developer mode. Raise a more helpful error when we're unable to
  1017. # create symlinks
  1018. # https://github.com/jelmer/dulwich/issues/1005
  1019. class WindowsSymlinkPermissionError(PermissionError):
  1020. def __init__(self, errno: int, msg: str, filename: Optional[str]) -> None:
  1021. super(PermissionError, self).__init__(
  1022. errno,
  1023. f"Unable to create symlink; do you have developer mode enabled? {msg}",
  1024. filename,
  1025. )
  1026. def symlink(
  1027. src: Union[str, bytes],
  1028. dst: Union[str, bytes],
  1029. target_is_directory: bool = False,
  1030. *,
  1031. dir_fd: Optional[int] = None,
  1032. ) -> None:
  1033. try:
  1034. return os.symlink(
  1035. src, dst, target_is_directory=target_is_directory, dir_fd=dir_fd
  1036. )
  1037. except PermissionError as e:
  1038. raise WindowsSymlinkPermissionError(
  1039. e.errno or 0, e.strerror or "", e.filename
  1040. ) from e
  1041. else:
  1042. symlink = os.symlink
  1043. def build_file_from_blob(
  1044. blob: Blob,
  1045. mode: int,
  1046. target_path: bytes,
  1047. *,
  1048. honor_filemode: bool = True,
  1049. tree_encoding: str = "utf-8",
  1050. symlink_fn: Optional[Callable] = None,
  1051. ) -> os.stat_result:
  1052. """Build a file or symlink on disk based on a Git object.
  1053. Args:
  1054. blob: The git object
  1055. mode: File mode
  1056. target_path: Path to write to
  1057. honor_filemode: An optional flag to honor core.filemode setting in
  1058. config file, default is core.filemode=True, change executable bit
  1059. symlink_fn: Function to use for creating symlinks
  1060. Returns: stat object for the file
  1061. """
  1062. try:
  1063. oldstat = os.lstat(target_path)
  1064. except FileNotFoundError:
  1065. oldstat = None
  1066. contents = blob.as_raw_string()
  1067. if stat.S_ISLNK(mode):
  1068. if oldstat:
  1069. _remove_file_with_readonly_handling(target_path)
  1070. if sys.platform == "win32":
  1071. # os.readlink on Python3 on Windows requires a unicode string.
  1072. contents_str = contents.decode(tree_encoding)
  1073. target_path_str = target_path.decode(tree_encoding)
  1074. (symlink_fn or symlink)(contents_str, target_path_str)
  1075. else:
  1076. (symlink_fn or symlink)(contents, target_path)
  1077. else:
  1078. if oldstat is not None and oldstat.st_size == len(contents):
  1079. with open(target_path, "rb") as f:
  1080. if f.read() == contents:
  1081. return oldstat
  1082. with open(target_path, "wb") as f:
  1083. # Write out file
  1084. f.write(contents)
  1085. if honor_filemode:
  1086. os.chmod(target_path, mode)
  1087. return os.lstat(target_path)
  1088. INVALID_DOTNAMES = (b".git", b".", b"..", b"")
  1089. def validate_path_element_default(element: bytes) -> bool:
  1090. return element.lower() not in INVALID_DOTNAMES
  1091. def validate_path_element_ntfs(element: bytes) -> bool:
  1092. stripped = element.rstrip(b". ").lower()
  1093. if stripped in INVALID_DOTNAMES:
  1094. return False
  1095. if stripped == b"git~1":
  1096. return False
  1097. return True
  1098. # HFS+ ignorable Unicode codepoints (from Git's utf8.c)
  1099. HFS_IGNORABLE_CHARS = {
  1100. 0x200C, # ZERO WIDTH NON-JOINER
  1101. 0x200D, # ZERO WIDTH JOINER
  1102. 0x200E, # LEFT-TO-RIGHT MARK
  1103. 0x200F, # RIGHT-TO-LEFT MARK
  1104. 0x202A, # LEFT-TO-RIGHT EMBEDDING
  1105. 0x202B, # RIGHT-TO-LEFT EMBEDDING
  1106. 0x202C, # POP DIRECTIONAL FORMATTING
  1107. 0x202D, # LEFT-TO-RIGHT OVERRIDE
  1108. 0x202E, # RIGHT-TO-LEFT OVERRIDE
  1109. 0x206A, # INHIBIT SYMMETRIC SWAPPING
  1110. 0x206B, # ACTIVATE SYMMETRIC SWAPPING
  1111. 0x206C, # INHIBIT ARABIC FORM SHAPING
  1112. 0x206D, # ACTIVATE ARABIC FORM SHAPING
  1113. 0x206E, # NATIONAL DIGIT SHAPES
  1114. 0x206F, # NOMINAL DIGIT SHAPES
  1115. 0xFEFF, # ZERO WIDTH NO-BREAK SPACE
  1116. }
  1117. def validate_path_element_hfs(element: bytes) -> bool:
  1118. """Validate path element for HFS+ filesystem.
  1119. Equivalent to Git's is_hfs_dotgit and related checks.
  1120. Uses NFD normalization and ignores HFS+ ignorable characters.
  1121. """
  1122. import unicodedata
  1123. try:
  1124. # Decode to Unicode
  1125. element_str = element.decode("utf-8", errors="strict")
  1126. except UnicodeDecodeError:
  1127. # Malformed UTF-8 - be conservative and reject
  1128. return False
  1129. # Remove HFS+ ignorable characters (like Git's next_hfs_char)
  1130. filtered = "".join(c for c in element_str if ord(c) not in HFS_IGNORABLE_CHARS)
  1131. # Normalize to NFD (HFS+ uses a variant of NFD)
  1132. normalized = unicodedata.normalize("NFD", filtered)
  1133. # Check against invalid names (case-insensitive)
  1134. normalized_bytes = normalized.encode("utf-8", errors="strict")
  1135. if normalized_bytes.lower() in INVALID_DOTNAMES:
  1136. return False
  1137. # Also check for 8.3 short name
  1138. if normalized_bytes.lower() == b"git~1":
  1139. return False
  1140. return True
  1141. def validate_path(
  1142. path: bytes,
  1143. element_validator: Callable[[bytes], bool] = validate_path_element_default,
  1144. ) -> bool:
  1145. """Default path validator that just checks for .git/."""
  1146. parts = path.split(b"/")
  1147. for p in parts:
  1148. if not element_validator(p):
  1149. return False
  1150. else:
  1151. return True
  1152. def build_index_from_tree(
  1153. root_path: Union[str, bytes],
  1154. index_path: Union[str, bytes],
  1155. object_store: ObjectContainer,
  1156. tree_id: bytes,
  1157. honor_filemode: bool = True,
  1158. validate_path_element: Callable[[bytes], bool] = validate_path_element_default,
  1159. symlink_fn: Optional[Callable] = None,
  1160. blob_normalizer: Optional["BlobNormalizer"] = None,
  1161. tree_encoding: str = "utf-8",
  1162. ) -> None:
  1163. """Generate and materialize index from a tree.
  1164. Args:
  1165. tree_id: Tree to materialize
  1166. root_path: Target dir for materialized index files
  1167. index_path: Target path for generated index
  1168. object_store: Non-empty object store holding tree contents
  1169. honor_filemode: An optional flag to honor core.filemode setting in
  1170. config file, default is core.filemode=True, change executable bit
  1171. validate_path_element: Function to validate path elements to check
  1172. out; default just refuses .git and .. directories.
  1173. blob_normalizer: An optional BlobNormalizer to use for converting line
  1174. endings when writing blobs to the working directory.
  1175. tree_encoding: Encoding used for tree paths (default: utf-8)
  1176. Note: existing index is wiped and contents are not merged
  1177. in a working dir. Suitable only for fresh clones.
  1178. """
  1179. index = Index(index_path, read=False)
  1180. if not isinstance(root_path, bytes):
  1181. root_path = os.fsencode(root_path)
  1182. for entry in iter_tree_contents(object_store, tree_id):
  1183. if not validate_path(entry.path, validate_path_element):
  1184. continue
  1185. full_path = _tree_to_fs_path(root_path, entry.path, tree_encoding)
  1186. if not os.path.exists(os.path.dirname(full_path)):
  1187. os.makedirs(os.path.dirname(full_path))
  1188. # TODO(jelmer): Merge new index into working tree
  1189. if S_ISGITLINK(entry.mode):
  1190. if not os.path.isdir(full_path):
  1191. os.mkdir(full_path)
  1192. st = os.lstat(full_path)
  1193. # TODO(jelmer): record and return submodule paths
  1194. else:
  1195. obj = object_store[entry.sha]
  1196. assert isinstance(obj, Blob)
  1197. # Apply blob normalization for checkout if normalizer is provided
  1198. if blob_normalizer is not None:
  1199. obj = blob_normalizer.checkout_normalize(obj, entry.path)
  1200. st = build_file_from_blob(
  1201. obj,
  1202. entry.mode,
  1203. full_path,
  1204. honor_filemode=honor_filemode,
  1205. tree_encoding=tree_encoding,
  1206. symlink_fn=symlink_fn,
  1207. )
  1208. # Add file to index
  1209. if not honor_filemode or S_ISGITLINK(entry.mode):
  1210. # we can not use tuple slicing to build a new tuple,
  1211. # because on windows that will convert the times to
  1212. # longs, which causes errors further along
  1213. st_tuple = (
  1214. entry.mode,
  1215. st.st_ino,
  1216. st.st_dev,
  1217. st.st_nlink,
  1218. st.st_uid,
  1219. st.st_gid,
  1220. st.st_size,
  1221. st.st_atime,
  1222. st.st_mtime,
  1223. st.st_ctime,
  1224. )
  1225. st = st.__class__(st_tuple)
  1226. # default to a stage 0 index entry (normal)
  1227. # when reading from the filesystem
  1228. index[entry.path] = index_entry_from_stat(st, entry.sha)
  1229. index.write()
  1230. def blob_from_path_and_mode(
  1231. fs_path: bytes, mode: int, tree_encoding: str = "utf-8"
  1232. ) -> Blob:
  1233. """Create a blob from a path and a stat object.
  1234. Args:
  1235. fs_path: Full file system path to file
  1236. mode: File mode
  1237. Returns: A `Blob` object
  1238. """
  1239. assert isinstance(fs_path, bytes)
  1240. blob = Blob()
  1241. if stat.S_ISLNK(mode):
  1242. if sys.platform == "win32":
  1243. # os.readlink on Python3 on Windows requires a unicode string.
  1244. blob.data = os.readlink(os.fsdecode(fs_path)).encode(tree_encoding)
  1245. else:
  1246. blob.data = os.readlink(fs_path)
  1247. else:
  1248. with open(fs_path, "rb") as f:
  1249. blob.data = f.read()
  1250. return blob
  1251. def blob_from_path_and_stat(
  1252. fs_path: bytes, st: os.stat_result, tree_encoding: str = "utf-8"
  1253. ) -> Blob:
  1254. """Create a blob from a path and a stat object.
  1255. Args:
  1256. fs_path: Full file system path to file
  1257. st: A stat object
  1258. Returns: A `Blob` object
  1259. """
  1260. return blob_from_path_and_mode(fs_path, st.st_mode, tree_encoding)
  1261. def read_submodule_head(path: Union[str, bytes]) -> Optional[bytes]:
  1262. """Read the head commit of a submodule.
  1263. Args:
  1264. path: path to the submodule
  1265. Returns: HEAD sha, None if not a valid head/repository
  1266. """
  1267. from .errors import NotGitRepository
  1268. from .repo import Repo
  1269. # Repo currently expects a "str", so decode if necessary.
  1270. # TODO(jelmer): Perhaps move this into Repo() ?
  1271. if not isinstance(path, str):
  1272. path = os.fsdecode(path)
  1273. try:
  1274. repo = Repo(path)
  1275. except NotGitRepository:
  1276. return None
  1277. try:
  1278. return repo.head()
  1279. except KeyError:
  1280. return None
  1281. def _has_directory_changed(tree_path: bytes, entry: IndexEntry) -> bool:
  1282. """Check if a directory has changed after getting an error.
  1283. When handling an error trying to create a blob from a path, call this
  1284. function. It will check if the path is a directory. If it's a directory
  1285. and a submodule, check the submodule head to see if it's has changed. If
  1286. not, consider the file as changed as Git tracked a file and not a
  1287. directory.
  1288. Return true if the given path should be considered as changed and False
  1289. otherwise or if the path is not a directory.
  1290. """
  1291. # This is actually a directory
  1292. if os.path.exists(os.path.join(tree_path, b".git")):
  1293. # Submodule
  1294. head = read_submodule_head(tree_path)
  1295. if entry.sha != head:
  1296. return True
  1297. else:
  1298. # The file was changed to a directory, so consider it removed.
  1299. return True
  1300. return False
  1301. os_sep_bytes = os.sep.encode("ascii")
  1302. def _ensure_parent_dir_exists(full_path: bytes) -> None:
  1303. """Ensure parent directory exists, checking no parent is a file."""
  1304. parent_dir = os.path.dirname(full_path)
  1305. if parent_dir and not os.path.exists(parent_dir):
  1306. # Check if any parent in the path is a file
  1307. parts = parent_dir.split(os_sep_bytes)
  1308. for i in range(len(parts)):
  1309. partial_path = os_sep_bytes.join(parts[: i + 1])
  1310. if (
  1311. partial_path
  1312. and os.path.exists(partial_path)
  1313. and not os.path.isdir(partial_path)
  1314. ):
  1315. # Parent path is a file, this is an error
  1316. raise OSError(
  1317. f"Cannot create directory, parent path is a file: {partial_path!r}"
  1318. )
  1319. os.makedirs(parent_dir)
  1320. def _remove_file_with_readonly_handling(path: bytes) -> None:
  1321. """Remove a file, handling read-only files on Windows.
  1322. Args:
  1323. path: Path to the file to remove
  1324. """
  1325. try:
  1326. os.unlink(path)
  1327. except PermissionError:
  1328. # On Windows, remove read-only attribute and retry
  1329. if sys.platform == "win32":
  1330. os.chmod(path, stat.S_IWRITE | stat.S_IREAD)
  1331. os.unlink(path)
  1332. else:
  1333. raise
  1334. def _remove_empty_parents(path: bytes, stop_at: bytes) -> None:
  1335. """Remove empty parent directories up to stop_at."""
  1336. parent = os.path.dirname(path)
  1337. while parent and parent != stop_at:
  1338. try:
  1339. os.rmdir(parent)
  1340. parent = os.path.dirname(parent)
  1341. except FileNotFoundError:
  1342. # Directory doesn't exist - stop trying
  1343. break
  1344. except OSError as e:
  1345. if e.errno == errno.ENOTEMPTY:
  1346. # Directory not empty - stop trying
  1347. break
  1348. raise
  1349. def _check_symlink_matches(
  1350. full_path: bytes, repo_object_store, entry_sha: bytes
  1351. ) -> bool:
  1352. """Check if symlink target matches expected target.
  1353. Returns True if symlink matches, False if it doesn't match.
  1354. """
  1355. try:
  1356. current_target = os.readlink(full_path)
  1357. blob_obj = repo_object_store[entry_sha]
  1358. expected_target = blob_obj.as_raw_string()
  1359. if isinstance(current_target, str):
  1360. current_target = current_target.encode()
  1361. return current_target == expected_target
  1362. except FileNotFoundError:
  1363. # Symlink doesn't exist
  1364. return False
  1365. except OSError as e:
  1366. if e.errno == errno.EINVAL:
  1367. # Not a symlink
  1368. return False
  1369. raise
  1370. def _check_file_matches(
  1371. repo_object_store,
  1372. full_path: bytes,
  1373. entry_sha: bytes,
  1374. entry_mode: int,
  1375. current_stat: os.stat_result,
  1376. honor_filemode: bool,
  1377. blob_normalizer: Optional["BlobNormalizer"] = None,
  1378. tree_path: Optional[bytes] = None,
  1379. ) -> bool:
  1380. """Check if a file on disk matches the expected git object.
  1381. Returns True if file matches, False if it doesn't match.
  1382. """
  1383. # Check mode first (if honor_filemode is True)
  1384. if honor_filemode:
  1385. current_mode = stat.S_IMODE(current_stat.st_mode)
  1386. expected_mode = stat.S_IMODE(entry_mode)
  1387. # For regular files, only check the user executable bit, not group/other permissions
  1388. # This matches Git's behavior where umask differences don't count as modifications
  1389. if stat.S_ISREG(current_stat.st_mode):
  1390. # Normalize regular file modes to ignore group/other write permissions
  1391. current_mode_normalized = (
  1392. current_mode & 0o755
  1393. ) # Keep only user rwx and all read+execute
  1394. expected_mode_normalized = expected_mode & 0o755
  1395. # For Git compatibility, regular files should be either 644 or 755
  1396. if expected_mode_normalized not in (0o644, 0o755):
  1397. expected_mode_normalized = 0o644 # Default for regular files
  1398. if current_mode_normalized not in (0o644, 0o755):
  1399. # Determine if it should be executable based on user execute bit
  1400. if current_mode & 0o100: # User execute bit is set
  1401. current_mode_normalized = 0o755
  1402. else:
  1403. current_mode_normalized = 0o644
  1404. if current_mode_normalized != expected_mode_normalized:
  1405. return False
  1406. else:
  1407. # For non-regular files (symlinks, etc.), check mode exactly
  1408. if current_mode != expected_mode:
  1409. return False
  1410. # If mode matches (or we don't care), check content via size first
  1411. blob_obj = repo_object_store[entry_sha]
  1412. if current_stat.st_size != blob_obj.raw_length():
  1413. return False
  1414. # Size matches, check actual content
  1415. try:
  1416. with open(full_path, "rb") as f:
  1417. current_content = f.read()
  1418. expected_content = blob_obj.as_raw_string()
  1419. if blob_normalizer and tree_path is not None:
  1420. normalized_blob = blob_normalizer.checkout_normalize(
  1421. blob_obj, tree_path
  1422. )
  1423. expected_content = normalized_blob.as_raw_string()
  1424. return current_content == expected_content
  1425. except (FileNotFoundError, PermissionError, IsADirectoryError):
  1426. return False
  1427. def _transition_to_submodule(repo, path, full_path, current_stat, entry, index):
  1428. """Transition any type to submodule."""
  1429. from .submodule import ensure_submodule_placeholder
  1430. if current_stat is not None and stat.S_ISDIR(current_stat.st_mode):
  1431. # Already a directory, just ensure .git file exists
  1432. ensure_submodule_placeholder(repo, path)
  1433. else:
  1434. # Remove whatever is there and create submodule
  1435. if current_stat is not None:
  1436. _remove_file_with_readonly_handling(full_path)
  1437. ensure_submodule_placeholder(repo, path)
  1438. st = os.lstat(full_path)
  1439. index[path] = index_entry_from_stat(st, entry.sha)
  1440. def _transition_to_file(
  1441. object_store,
  1442. path,
  1443. full_path,
  1444. current_stat,
  1445. entry,
  1446. index,
  1447. honor_filemode,
  1448. symlink_fn,
  1449. blob_normalizer,
  1450. tree_encoding="utf-8",
  1451. ):
  1452. """Transition any type to regular file or symlink."""
  1453. # Check if we need to update
  1454. if (
  1455. current_stat is not None
  1456. and stat.S_ISREG(current_stat.st_mode)
  1457. and not stat.S_ISLNK(entry.mode)
  1458. ):
  1459. # File to file - check if update needed
  1460. file_matches = _check_file_matches(
  1461. object_store,
  1462. full_path,
  1463. entry.sha,
  1464. entry.mode,
  1465. current_stat,
  1466. honor_filemode,
  1467. blob_normalizer,
  1468. path,
  1469. )
  1470. needs_update = not file_matches
  1471. elif (
  1472. current_stat is not None
  1473. and stat.S_ISLNK(current_stat.st_mode)
  1474. and stat.S_ISLNK(entry.mode)
  1475. ):
  1476. # Symlink to symlink - check if update needed
  1477. symlink_matches = _check_symlink_matches(full_path, object_store, entry.sha)
  1478. needs_update = not symlink_matches
  1479. else:
  1480. needs_update = True
  1481. if not needs_update:
  1482. # Just update index - current_stat should always be valid here since we're not updating
  1483. index[path] = index_entry_from_stat(current_stat, entry.sha)
  1484. return
  1485. # Remove existing entry if needed
  1486. if current_stat is not None and stat.S_ISDIR(current_stat.st_mode):
  1487. # Remove directory
  1488. dir_contents = set(os.listdir(full_path))
  1489. git_file_name = b".git" if isinstance(full_path, bytes) else ".git"
  1490. if git_file_name in dir_contents:
  1491. if dir_contents != {git_file_name}:
  1492. raise IsADirectoryError(
  1493. f"Cannot replace submodule with untracked files: {full_path!r}"
  1494. )
  1495. shutil.rmtree(full_path)
  1496. else:
  1497. try:
  1498. os.rmdir(full_path)
  1499. except OSError as e:
  1500. if e.errno == errno.ENOTEMPTY:
  1501. raise IsADirectoryError(
  1502. f"Cannot replace non-empty directory with file: {full_path!r}"
  1503. )
  1504. raise
  1505. elif current_stat is not None:
  1506. _remove_file_with_readonly_handling(full_path)
  1507. # Ensure parent directory exists
  1508. _ensure_parent_dir_exists(full_path)
  1509. # Write the file
  1510. blob_obj = object_store[entry.sha]
  1511. assert isinstance(blob_obj, Blob)
  1512. if blob_normalizer:
  1513. blob_obj = blob_normalizer.checkout_normalize(blob_obj, path)
  1514. st = build_file_from_blob(
  1515. blob_obj,
  1516. entry.mode,
  1517. full_path,
  1518. honor_filemode=honor_filemode,
  1519. tree_encoding=tree_encoding,
  1520. symlink_fn=symlink_fn,
  1521. )
  1522. index[path] = index_entry_from_stat(st, entry.sha)
  1523. def _transition_to_absent(repo, path, full_path, current_stat, index):
  1524. """Remove any type of entry."""
  1525. if current_stat is None:
  1526. return
  1527. if stat.S_ISDIR(current_stat.st_mode):
  1528. # Check if it's a submodule directory
  1529. dir_contents = set(os.listdir(full_path))
  1530. git_file_name = b".git" if isinstance(full_path, bytes) else ".git"
  1531. if git_file_name in dir_contents and dir_contents == {git_file_name}:
  1532. shutil.rmtree(full_path)
  1533. else:
  1534. try:
  1535. os.rmdir(full_path)
  1536. except OSError as e:
  1537. if e.errno not in (errno.ENOTEMPTY, errno.EEXIST):
  1538. raise
  1539. else:
  1540. _remove_file_with_readonly_handling(full_path)
  1541. try:
  1542. del index[path]
  1543. except KeyError:
  1544. pass
  1545. # Try to remove empty parent directories
  1546. _remove_empty_parents(
  1547. full_path, repo.path if isinstance(repo.path, bytes) else repo.path.encode()
  1548. )
  1549. def update_working_tree(
  1550. repo: "Repo",
  1551. old_tree_id: Optional[bytes],
  1552. new_tree_id: bytes,
  1553. change_iterator: Iterator["TreeChange"],
  1554. honor_filemode: bool = True,
  1555. validate_path_element: Optional[Callable[[bytes], bool]] = None,
  1556. symlink_fn: Optional[Callable] = None,
  1557. force_remove_untracked: bool = False,
  1558. blob_normalizer: Optional["BlobNormalizer"] = None,
  1559. tree_encoding: str = "utf-8",
  1560. allow_overwrite_modified: bool = False,
  1561. ) -> None:
  1562. """Update the working tree and index to match a new tree.
  1563. This function handles:
  1564. - Adding new files
  1565. - Updating modified files
  1566. - Removing deleted files
  1567. - Cleaning up empty directories
  1568. Args:
  1569. repo: Repository object
  1570. old_tree_id: SHA of the tree before the update
  1571. new_tree_id: SHA of the tree to update to
  1572. change_iterator: Iterator of TreeChange objects to apply
  1573. honor_filemode: An optional flag to honor core.filemode setting
  1574. validate_path_element: Function to validate path elements to check out
  1575. symlink_fn: Function to use for creating symlinks
  1576. force_remove_untracked: If True, remove files that exist in working
  1577. directory but not in target tree, even if old_tree_id is None
  1578. blob_normalizer: An optional BlobNormalizer to use for converting line
  1579. endings when writing blobs to the working directory.
  1580. tree_encoding: Encoding used for tree paths (default: utf-8)
  1581. allow_overwrite_modified: If False, raise an error when attempting to
  1582. overwrite files that have been modified compared to old_tree_id
  1583. """
  1584. if validate_path_element is None:
  1585. validate_path_element = validate_path_element_default
  1586. from .diff_tree import (
  1587. CHANGE_ADD,
  1588. CHANGE_COPY,
  1589. CHANGE_DELETE,
  1590. CHANGE_MODIFY,
  1591. CHANGE_RENAME,
  1592. CHANGE_UNCHANGED,
  1593. )
  1594. repo_path = repo.path if isinstance(repo.path, bytes) else repo.path.encode()
  1595. index = repo.open_index()
  1596. # Convert iterator to list since we need multiple passes
  1597. changes = list(change_iterator)
  1598. # Check for path conflicts where files need to become directories
  1599. paths_becoming_dirs = set()
  1600. for change in changes:
  1601. if change.type in (CHANGE_ADD, CHANGE_MODIFY, CHANGE_RENAME, CHANGE_COPY):
  1602. path = change.new.path
  1603. if b"/" in path: # This is a file inside a directory
  1604. # Check if any parent path exists as a file in the old tree or changes
  1605. parts = path.split(b"/")
  1606. for i in range(1, len(parts)):
  1607. parent = b"/".join(parts[:i])
  1608. # See if this parent path is being deleted (was a file, becoming a dir)
  1609. for other_change in changes:
  1610. if (
  1611. other_change.type == CHANGE_DELETE
  1612. and other_change.old
  1613. and other_change.old.path == parent
  1614. ):
  1615. paths_becoming_dirs.add(parent)
  1616. # Check if any path that needs to become a directory has been modified
  1617. for path in paths_becoming_dirs:
  1618. full_path = _tree_to_fs_path(repo_path, path, tree_encoding)
  1619. try:
  1620. current_stat = os.lstat(full_path)
  1621. except FileNotFoundError:
  1622. continue # File doesn't exist, nothing to check
  1623. except OSError as e:
  1624. raise OSError(
  1625. f"Cannot access {path.decode('utf-8', errors='replace')}: {e}"
  1626. ) from e
  1627. if stat.S_ISREG(current_stat.st_mode):
  1628. # Find the old entry for this path
  1629. old_change = None
  1630. for change in changes:
  1631. if (
  1632. change.type == CHANGE_DELETE
  1633. and change.old
  1634. and change.old.path == path
  1635. ):
  1636. old_change = change
  1637. break
  1638. if old_change:
  1639. # Check if file has been modified
  1640. file_matches = _check_file_matches(
  1641. repo.object_store,
  1642. full_path,
  1643. old_change.old.sha,
  1644. old_change.old.mode,
  1645. current_stat,
  1646. honor_filemode,
  1647. blob_normalizer,
  1648. path,
  1649. )
  1650. if not file_matches:
  1651. raise OSError(
  1652. f"Cannot replace modified file with directory: {path!r}"
  1653. )
  1654. # Check for uncommitted modifications before making any changes
  1655. if not allow_overwrite_modified and old_tree_id:
  1656. for change in changes:
  1657. # Only check files that are being modified or deleted
  1658. if change.type in (CHANGE_MODIFY, CHANGE_DELETE) and change.old:
  1659. path = change.old.path
  1660. if path.startswith(b".git") or not validate_path(
  1661. path, validate_path_element
  1662. ):
  1663. continue
  1664. full_path = _tree_to_fs_path(repo_path, path, tree_encoding)
  1665. try:
  1666. current_stat = os.lstat(full_path)
  1667. except FileNotFoundError:
  1668. continue # File doesn't exist, nothing to check
  1669. except OSError as e:
  1670. raise OSError(
  1671. f"Cannot access {path.decode('utf-8', errors='replace')}: {e}"
  1672. ) from e
  1673. if stat.S_ISREG(current_stat.st_mode):
  1674. # Check if working tree file differs from old tree
  1675. file_matches = _check_file_matches(
  1676. repo.object_store,
  1677. full_path,
  1678. change.old.sha,
  1679. change.old.mode,
  1680. current_stat,
  1681. honor_filemode,
  1682. blob_normalizer,
  1683. path,
  1684. )
  1685. if not file_matches:
  1686. from .errors import WorkingTreeModifiedError
  1687. raise WorkingTreeModifiedError(
  1688. f"Your local changes to '{path.decode('utf-8', errors='replace')}' "
  1689. f"would be overwritten by checkout. "
  1690. f"Please commit your changes or stash them before you switch branches."
  1691. )
  1692. # Apply the changes
  1693. for change in changes:
  1694. if change.type == CHANGE_DELETE:
  1695. # Remove file/directory
  1696. path = change.old.path
  1697. if path.startswith(b".git") or not validate_path(
  1698. path, validate_path_element
  1699. ):
  1700. continue
  1701. full_path = _tree_to_fs_path(repo_path, path, tree_encoding)
  1702. try:
  1703. delete_stat: Optional[os.stat_result] = os.lstat(full_path)
  1704. except FileNotFoundError:
  1705. delete_stat = None
  1706. except OSError as e:
  1707. raise OSError(
  1708. f"Cannot access {path.decode('utf-8', errors='replace')}: {e}"
  1709. ) from e
  1710. _transition_to_absent(repo, path, full_path, delete_stat, index)
  1711. elif change.type in (CHANGE_ADD, CHANGE_MODIFY, CHANGE_UNCHANGED):
  1712. # Add or modify file
  1713. path = change.new.path
  1714. if path.startswith(b".git") or not validate_path(
  1715. path, validate_path_element
  1716. ):
  1717. continue
  1718. full_path = _tree_to_fs_path(repo_path, path, tree_encoding)
  1719. try:
  1720. modify_stat: Optional[os.stat_result] = os.lstat(full_path)
  1721. except FileNotFoundError:
  1722. modify_stat = None
  1723. except OSError as e:
  1724. raise OSError(
  1725. f"Cannot access {path.decode('utf-8', errors='replace')}: {e}"
  1726. ) from e
  1727. if S_ISGITLINK(change.new.mode):
  1728. _transition_to_submodule(
  1729. repo, path, full_path, modify_stat, change.new, index
  1730. )
  1731. else:
  1732. _transition_to_file(
  1733. repo.object_store,
  1734. path,
  1735. full_path,
  1736. modify_stat,
  1737. change.new,
  1738. index,
  1739. honor_filemode,
  1740. symlink_fn,
  1741. blob_normalizer,
  1742. tree_encoding,
  1743. )
  1744. elif change.type in (CHANGE_RENAME, CHANGE_COPY):
  1745. # Handle rename/copy: remove old, add new
  1746. old_path = change.old.path
  1747. new_path = change.new.path
  1748. if not old_path.startswith(b".git") and validate_path(
  1749. old_path, validate_path_element
  1750. ):
  1751. old_full_path = _tree_to_fs_path(repo_path, old_path, tree_encoding)
  1752. try:
  1753. old_current_stat = os.lstat(old_full_path)
  1754. except FileNotFoundError:
  1755. old_current_stat = None
  1756. except OSError as e:
  1757. raise OSError(
  1758. f"Cannot access {old_path.decode('utf-8', errors='replace')}: {e}"
  1759. ) from e
  1760. _transition_to_absent(
  1761. repo, old_path, old_full_path, old_current_stat, index
  1762. )
  1763. if not new_path.startswith(b".git") and validate_path(
  1764. new_path, validate_path_element
  1765. ):
  1766. new_full_path = _tree_to_fs_path(repo_path, new_path, tree_encoding)
  1767. try:
  1768. new_current_stat = os.lstat(new_full_path)
  1769. except FileNotFoundError:
  1770. new_current_stat = None
  1771. except OSError as e:
  1772. raise OSError(
  1773. f"Cannot access {new_path.decode('utf-8', errors='replace')}: {e}"
  1774. ) from e
  1775. if S_ISGITLINK(change.new.mode):
  1776. _transition_to_submodule(
  1777. repo,
  1778. new_path,
  1779. new_full_path,
  1780. new_current_stat,
  1781. change.new,
  1782. index,
  1783. )
  1784. else:
  1785. _transition_to_file(
  1786. repo.object_store,
  1787. new_path,
  1788. new_full_path,
  1789. new_current_stat,
  1790. change.new,
  1791. index,
  1792. honor_filemode,
  1793. symlink_fn,
  1794. blob_normalizer,
  1795. tree_encoding,
  1796. )
  1797. index.write()
  1798. def get_unstaged_changes(
  1799. index: Index,
  1800. root_path: Union[str, bytes],
  1801. filter_blob_callback: Optional[Callable] = None,
  1802. ) -> Generator[bytes, None, None]:
  1803. """Walk through an index and check for differences against working tree.
  1804. Args:
  1805. index: index to check
  1806. root_path: path in which to find files
  1807. Returns: iterator over paths with unstaged changes
  1808. """
  1809. # For each entry in the index check the sha1 & ensure not staged
  1810. if not isinstance(root_path, bytes):
  1811. root_path = os.fsencode(root_path)
  1812. for tree_path, entry in index.iteritems():
  1813. full_path = _tree_to_fs_path(root_path, tree_path)
  1814. if isinstance(entry, ConflictedIndexEntry):
  1815. # Conflicted files are always unstaged
  1816. yield tree_path
  1817. continue
  1818. try:
  1819. st = os.lstat(full_path)
  1820. if stat.S_ISDIR(st.st_mode):
  1821. if _has_directory_changed(tree_path, entry):
  1822. yield tree_path
  1823. continue
  1824. if not stat.S_ISREG(st.st_mode) and not stat.S_ISLNK(st.st_mode):
  1825. continue
  1826. blob = blob_from_path_and_stat(full_path, st)
  1827. if filter_blob_callback is not None:
  1828. blob = filter_blob_callback(blob, tree_path)
  1829. except FileNotFoundError:
  1830. # The file was removed, so we assume that counts as
  1831. # different from whatever file used to exist.
  1832. yield tree_path
  1833. else:
  1834. if blob.id != entry.sha:
  1835. yield tree_path
  1836. def _tree_to_fs_path(
  1837. root_path: bytes, tree_path: bytes, tree_encoding: str = "utf-8"
  1838. ) -> bytes:
  1839. """Convert a git tree path to a file system path.
  1840. Args:
  1841. root_path: Root filesystem path
  1842. tree_path: Git tree path as bytes (encoded with tree_encoding)
  1843. tree_encoding: Encoding used for tree paths (default: utf-8)
  1844. Returns: File system path.
  1845. """
  1846. assert isinstance(tree_path, bytes)
  1847. if os_sep_bytes != b"/":
  1848. sep_corrected_path = tree_path.replace(b"/", os_sep_bytes)
  1849. else:
  1850. sep_corrected_path = tree_path
  1851. # On Windows, we need to handle tree path encoding properly
  1852. if sys.platform == "win32":
  1853. # Decode from tree encoding, then re-encode for filesystem
  1854. try:
  1855. tree_path_str = sep_corrected_path.decode(tree_encoding)
  1856. sep_corrected_path = os.fsencode(tree_path_str)
  1857. except UnicodeDecodeError:
  1858. # If decoding fails, use the original bytes
  1859. pass
  1860. return os.path.join(root_path, sep_corrected_path)
  1861. def _fs_to_tree_path(fs_path: Union[str, bytes], tree_encoding: str = "utf-8") -> bytes:
  1862. """Convert a file system path to a git tree path.
  1863. Args:
  1864. fs_path: File system path.
  1865. tree_encoding: Encoding to use for tree paths (default: utf-8)
  1866. Returns: Git tree path as bytes (encoded with tree_encoding)
  1867. """
  1868. if not isinstance(fs_path, bytes):
  1869. fs_path_bytes = os.fsencode(fs_path)
  1870. else:
  1871. fs_path_bytes = fs_path
  1872. # On Windows, we need to ensure tree paths are properly encoded
  1873. if sys.platform == "win32":
  1874. try:
  1875. # Decode from filesystem encoding, then re-encode with tree encoding
  1876. fs_path_str = os.fsdecode(fs_path_bytes)
  1877. fs_path_bytes = fs_path_str.encode(tree_encoding)
  1878. except UnicodeDecodeError:
  1879. # If filesystem decoding fails, use the original bytes
  1880. pass
  1881. if os_sep_bytes != b"/":
  1882. tree_path = fs_path_bytes.replace(os_sep_bytes, b"/")
  1883. else:
  1884. tree_path = fs_path_bytes
  1885. return tree_path
  1886. def index_entry_from_directory(st: os.stat_result, path: bytes) -> Optional[IndexEntry]:
  1887. if os.path.exists(os.path.join(path, b".git")):
  1888. head = read_submodule_head(path)
  1889. if head is None:
  1890. return None
  1891. return index_entry_from_stat(st, head, mode=S_IFGITLINK)
  1892. return None
  1893. def index_entry_from_path(
  1894. path: bytes, object_store: Optional[ObjectContainer] = None
  1895. ) -> Optional[IndexEntry]:
  1896. """Create an index from a filesystem path.
  1897. This returns an index value for files, symlinks
  1898. and tree references. for directories and
  1899. non-existent files it returns None
  1900. Args:
  1901. path: Path to create an index entry for
  1902. object_store: Optional object store to
  1903. save new blobs in
  1904. Returns: An index entry; None for directories
  1905. """
  1906. assert isinstance(path, bytes)
  1907. st = os.lstat(path)
  1908. if stat.S_ISDIR(st.st_mode):
  1909. return index_entry_from_directory(st, path)
  1910. if stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode):
  1911. blob = blob_from_path_and_stat(path, st)
  1912. if object_store is not None:
  1913. object_store.add_object(blob)
  1914. return index_entry_from_stat(st, blob.id)
  1915. return None
  1916. def iter_fresh_entries(
  1917. paths: Iterable[bytes],
  1918. root_path: bytes,
  1919. object_store: Optional[ObjectContainer] = None,
  1920. ) -> Iterator[tuple[bytes, Optional[IndexEntry]]]:
  1921. """Iterate over current versions of index entries on disk.
  1922. Args:
  1923. paths: Paths to iterate over
  1924. root_path: Root path to access from
  1925. object_store: Optional store to save new blobs in
  1926. Returns: Iterator over path, index_entry
  1927. """
  1928. for path in paths:
  1929. p = _tree_to_fs_path(root_path, path)
  1930. try:
  1931. entry = index_entry_from_path(p, object_store=object_store)
  1932. except (FileNotFoundError, IsADirectoryError):
  1933. entry = None
  1934. yield path, entry
  1935. def iter_fresh_objects(
  1936. paths: Iterable[bytes],
  1937. root_path: bytes,
  1938. include_deleted: bool = False,
  1939. object_store: Optional[ObjectContainer] = None,
  1940. ) -> Iterator[tuple[bytes, Optional[bytes], Optional[int]]]:
  1941. """Iterate over versions of objects on disk referenced by index.
  1942. Args:
  1943. root_path: Root path to access from
  1944. include_deleted: Include deleted entries with sha and
  1945. mode set to None
  1946. object_store: Optional object store to report new items to
  1947. Returns: Iterator over path, sha, mode
  1948. """
  1949. for path, entry in iter_fresh_entries(paths, root_path, object_store=object_store):
  1950. if entry is None:
  1951. if include_deleted:
  1952. yield path, None, None
  1953. else:
  1954. yield path, entry.sha, cleanup_mode(entry.mode)
  1955. def refresh_index(index: Index, root_path: bytes) -> None:
  1956. """Refresh the contents of an index.
  1957. This is the equivalent to running 'git commit -a'.
  1958. Args:
  1959. index: Index to update
  1960. root_path: Root filesystem path
  1961. """
  1962. for path, entry in iter_fresh_entries(index, root_path):
  1963. if entry:
  1964. index[path] = entry
  1965. class locked_index:
  1966. """Lock the index while making modifications.
  1967. Works as a context manager.
  1968. """
  1969. _file: "_GitFile"
  1970. def __init__(self, path: Union[bytes, str]) -> None:
  1971. self._path = path
  1972. def __enter__(self) -> Index:
  1973. self._file = GitFile(self._path, "wb")
  1974. self._index = Index(self._path)
  1975. return self._index
  1976. def __exit__(
  1977. self,
  1978. exc_type: Optional[type],
  1979. exc_value: Optional[BaseException],
  1980. traceback: Optional[types.TracebackType],
  1981. ) -> None:
  1982. if exc_type is not None:
  1983. self._file.abort()
  1984. return
  1985. try:
  1986. from typing import BinaryIO, cast
  1987. f = SHA1Writer(cast(BinaryIO, self._file))
  1988. write_index_dict(cast(BinaryIO, f), self._index._byname)
  1989. except BaseException:
  1990. self._file.abort()
  1991. else:
  1992. f.close()