index.py 88 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800
  1. # index.py -- File parser/writer for the git index file
  2. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Parser for the git index file format."""
  22. import errno
  23. import os
  24. import shutil
  25. import stat
  26. import struct
  27. import sys
  28. import types
  29. from collections.abc import Generator, Iterable, Iterator
  30. from dataclasses import dataclass
  31. from enum import Enum
  32. from typing import (
  33. IO,
  34. TYPE_CHECKING,
  35. Any,
  36. BinaryIO,
  37. Callable,
  38. Optional,
  39. Union,
  40. )
  41. if TYPE_CHECKING:
  42. from .config import Config
  43. from .diff_tree import TreeChange
  44. from .file import _GitFile
  45. from .filters import FilterBlobNormalizer
  46. from .object_store import BaseObjectStore
  47. from .repo import Repo
  48. from .file import GitFile
  49. from .object_store import iter_tree_contents
  50. from .objects import (
  51. S_IFGITLINK,
  52. S_ISGITLINK,
  53. Blob,
  54. ObjectID,
  55. Tree,
  56. TreeEntry,
  57. hex_to_sha,
  58. sha_to_hex,
  59. )
  60. from .pack import ObjectContainer, SHA1Reader, SHA1Writer
  61. # Type alias for recursive tree structure used in commit_tree
  62. if sys.version_info >= (3, 10):
  63. TreeDict = dict[bytes, Union["TreeDict", tuple[int, bytes]]]
  64. else:
  65. TreeDict = dict[bytes, Any]
  66. # 2-bit stage (during merge)
  67. FLAG_STAGEMASK = 0x3000
  68. FLAG_STAGESHIFT = 12
  69. FLAG_NAMEMASK = 0x0FFF
  70. # assume-valid
  71. FLAG_VALID = 0x8000
  72. # extended flag (must be zero in version 2)
  73. FLAG_EXTENDED = 0x4000
  74. # used by sparse checkout
  75. EXTENDED_FLAG_SKIP_WORKTREE = 0x4000
  76. # used by "git add -N"
  77. EXTENDED_FLAG_INTEND_TO_ADD = 0x2000
  78. DEFAULT_VERSION = 2
  79. # Index extension signatures
  80. TREE_EXTENSION = b"TREE"
  81. REUC_EXTENSION = b"REUC"
  82. UNTR_EXTENSION = b"UNTR"
  83. EOIE_EXTENSION = b"EOIE"
  84. IEOT_EXTENSION = b"IEOT"
  85. def _encode_varint(value: int) -> bytes:
  86. """Encode an integer using variable-width encoding.
  87. Same format as used for OFS_DELTA pack entries and index v4 path compression.
  88. Uses 7 bits per byte, with the high bit indicating continuation.
  89. Args:
  90. value: Integer to encode
  91. Returns:
  92. Encoded bytes
  93. """
  94. if value == 0:
  95. return b"\x00"
  96. result = []
  97. while value > 0:
  98. byte = value & 0x7F # Take lower 7 bits
  99. value >>= 7
  100. if value > 0:
  101. byte |= 0x80 # Set continuation bit
  102. result.append(byte)
  103. return bytes(result)
  104. def _decode_varint(data: bytes, offset: int = 0) -> tuple[int, int]:
  105. """Decode a variable-width encoded integer.
  106. Args:
  107. data: Bytes to decode from
  108. offset: Starting offset in data
  109. Returns:
  110. tuple of (decoded_value, new_offset)
  111. """
  112. value = 0
  113. shift = 0
  114. pos = offset
  115. while pos < len(data):
  116. byte = data[pos]
  117. pos += 1
  118. value |= (byte & 0x7F) << shift
  119. shift += 7
  120. if not (byte & 0x80): # No continuation bit
  121. break
  122. return value, pos
  123. def _compress_path(path: bytes, previous_path: bytes) -> bytes:
  124. """Compress a path relative to the previous path for index version 4.
  125. Args:
  126. path: Path to compress
  127. previous_path: Previous path for comparison
  128. Returns:
  129. Compressed path data (varint prefix_len + suffix)
  130. """
  131. # Find the common prefix length
  132. common_len = 0
  133. min_len = min(len(path), len(previous_path))
  134. for i in range(min_len):
  135. if path[i] == previous_path[i]:
  136. common_len += 1
  137. else:
  138. break
  139. # The number of bytes to remove from the end of previous_path
  140. # to get the common prefix
  141. remove_len = len(previous_path) - common_len
  142. # The suffix to append
  143. suffix = path[common_len:]
  144. # Encode: varint(remove_len) + suffix + NUL
  145. return _encode_varint(remove_len) + suffix + b"\x00"
  146. def _decompress_path(
  147. data: bytes, offset: int, previous_path: bytes
  148. ) -> tuple[bytes, int]:
  149. """Decompress a path from index version 4 compressed format.
  150. Args:
  151. data: Raw data containing compressed path
  152. offset: Starting offset in data
  153. previous_path: Previous path for decompression
  154. Returns:
  155. tuple of (decompressed_path, new_offset)
  156. """
  157. # Decode the number of bytes to remove from previous path
  158. remove_len, new_offset = _decode_varint(data, offset)
  159. # Find the NUL terminator for the suffix
  160. suffix_start = new_offset
  161. suffix_end = suffix_start
  162. while suffix_end < len(data) and data[suffix_end] != 0:
  163. suffix_end += 1
  164. if suffix_end >= len(data):
  165. raise ValueError("Unterminated path suffix in compressed entry")
  166. suffix = data[suffix_start:suffix_end]
  167. new_offset = suffix_end + 1 # Skip the NUL terminator
  168. # Reconstruct the path
  169. if remove_len > len(previous_path):
  170. raise ValueError(
  171. f"Invalid path compression: trying to remove {remove_len} bytes from {len(previous_path)}-byte path"
  172. )
  173. prefix = previous_path[:-remove_len] if remove_len > 0 else previous_path
  174. path = prefix + suffix
  175. return path, new_offset
  176. def _decompress_path_from_stream(
  177. f: BinaryIO, previous_path: bytes
  178. ) -> tuple[bytes, int]:
  179. """Decompress a path from index version 4 compressed format, reading from stream.
  180. Args:
  181. f: File-like object to read from
  182. previous_path: Previous path for decompression
  183. Returns:
  184. tuple of (decompressed_path, bytes_consumed)
  185. """
  186. # Decode the varint for remove_len by reading byte by byte
  187. remove_len = 0
  188. shift = 0
  189. bytes_consumed = 0
  190. while True:
  191. byte_data = f.read(1)
  192. if not byte_data:
  193. raise ValueError("Unexpected end of file while reading varint")
  194. byte = byte_data[0]
  195. bytes_consumed += 1
  196. remove_len |= (byte & 0x7F) << shift
  197. shift += 7
  198. if not (byte & 0x80): # No continuation bit
  199. break
  200. # Read the suffix until NUL terminator
  201. suffix = b""
  202. while True:
  203. byte_data = f.read(1)
  204. if not byte_data:
  205. raise ValueError("Unexpected end of file while reading path suffix")
  206. byte = byte_data[0]
  207. bytes_consumed += 1
  208. if byte == 0: # NUL terminator
  209. break
  210. suffix += bytes([byte])
  211. # Reconstruct the path
  212. if remove_len > len(previous_path):
  213. raise ValueError(
  214. f"Invalid path compression: trying to remove {remove_len} bytes from {len(previous_path)}-byte path"
  215. )
  216. prefix = previous_path[:-remove_len] if remove_len > 0 else previous_path
  217. path = prefix + suffix
  218. return path, bytes_consumed
  219. class Stage(Enum):
  220. """Represents the stage of an index entry during merge conflicts."""
  221. NORMAL = 0
  222. MERGE_CONFLICT_ANCESTOR = 1
  223. MERGE_CONFLICT_THIS = 2
  224. MERGE_CONFLICT_OTHER = 3
  225. @dataclass
  226. class SerializedIndexEntry:
  227. """Represents a serialized index entry as stored in the index file.
  228. This dataclass holds the raw data for an index entry before it's
  229. parsed into the more user-friendly IndexEntry format.
  230. """
  231. name: bytes
  232. ctime: Union[int, float, tuple[int, int]]
  233. mtime: Union[int, float, tuple[int, int]]
  234. dev: int
  235. ino: int
  236. mode: int
  237. uid: int
  238. gid: int
  239. size: int
  240. sha: bytes
  241. flags: int
  242. extended_flags: int
  243. def stage(self) -> Stage:
  244. """Extract the stage from the flags field.
  245. Returns:
  246. Stage enum value indicating merge conflict state
  247. """
  248. return Stage((self.flags & FLAG_STAGEMASK) >> FLAG_STAGESHIFT)
  249. @dataclass
  250. class IndexExtension:
  251. """Base class for index extensions."""
  252. signature: bytes
  253. data: bytes
  254. @classmethod
  255. def from_raw(cls, signature: bytes, data: bytes) -> "IndexExtension":
  256. """Create an extension from raw data.
  257. Args:
  258. signature: 4-byte extension signature
  259. data: Extension data
  260. Returns:
  261. Parsed extension object
  262. """
  263. if signature == TREE_EXTENSION:
  264. return TreeExtension.from_bytes(data)
  265. elif signature == REUC_EXTENSION:
  266. return ResolveUndoExtension.from_bytes(data)
  267. elif signature == UNTR_EXTENSION:
  268. return UntrackedExtension.from_bytes(data)
  269. else:
  270. # Unknown extension - just store raw data
  271. return cls(signature, data)
  272. def to_bytes(self) -> bytes:
  273. """Serialize extension to bytes."""
  274. return self.data
  275. class TreeExtension(IndexExtension):
  276. """Tree cache extension."""
  277. def __init__(self, entries: list[tuple[bytes, bytes, int]]) -> None:
  278. """Initialize TreeExtension.
  279. Args:
  280. entries: List of tree cache entries (path, sha, flags)
  281. """
  282. self.entries = entries
  283. super().__init__(TREE_EXTENSION, b"")
  284. @classmethod
  285. def from_bytes(cls, data: bytes) -> "TreeExtension":
  286. """Parse TreeExtension from bytes.
  287. Args:
  288. data: Raw bytes to parse
  289. Returns:
  290. TreeExtension instance
  291. """
  292. # TODO: Implement tree cache parsing
  293. return cls([])
  294. def to_bytes(self) -> bytes:
  295. """Serialize TreeExtension to bytes.
  296. Returns:
  297. Serialized extension data
  298. """
  299. # TODO: Implement tree cache serialization
  300. return b""
  301. class ResolveUndoExtension(IndexExtension):
  302. """Resolve undo extension for recording merge conflicts."""
  303. def __init__(self, entries: list[tuple[bytes, list[tuple[int, bytes]]]]) -> None:
  304. """Initialize ResolveUndoExtension.
  305. Args:
  306. entries: List of (path, stages) where stages is a list of (stage, sha) tuples
  307. """
  308. self.entries = entries
  309. super().__init__(REUC_EXTENSION, b"")
  310. @classmethod
  311. def from_bytes(cls, data: bytes) -> "ResolveUndoExtension":
  312. """Parse ResolveUndoExtension from bytes.
  313. Args:
  314. data: Raw bytes to parse
  315. Returns:
  316. ResolveUndoExtension instance
  317. """
  318. # TODO: Implement resolve undo parsing
  319. return cls([])
  320. def to_bytes(self) -> bytes:
  321. """Serialize ResolveUndoExtension to bytes.
  322. Returns:
  323. Serialized extension data
  324. """
  325. # TODO: Implement resolve undo serialization
  326. return b""
  327. class UntrackedExtension(IndexExtension):
  328. """Untracked cache extension."""
  329. def __init__(self, data: bytes) -> None:
  330. """Initialize UntrackedExtension.
  331. Args:
  332. data: Raw untracked cache data
  333. """
  334. super().__init__(UNTR_EXTENSION, data)
  335. @classmethod
  336. def from_bytes(cls, data: bytes) -> "UntrackedExtension":
  337. """Parse UntrackedExtension from bytes.
  338. Args:
  339. data: Raw bytes to parse
  340. Returns:
  341. UntrackedExtension instance
  342. """
  343. return cls(data)
  344. @dataclass
  345. class IndexEntry:
  346. """Represents an entry in the Git index.
  347. This is a higher-level representation of an index entry that includes
  348. parsed data and convenience methods.
  349. """
  350. ctime: Union[int, float, tuple[int, int]]
  351. mtime: Union[int, float, tuple[int, int]]
  352. dev: int
  353. ino: int
  354. mode: int
  355. uid: int
  356. gid: int
  357. size: int
  358. sha: bytes
  359. flags: int = 0
  360. extended_flags: int = 0
  361. @classmethod
  362. def from_serialized(cls, serialized: SerializedIndexEntry) -> "IndexEntry":
  363. """Create an IndexEntry from a SerializedIndexEntry.
  364. Args:
  365. serialized: SerializedIndexEntry to convert
  366. Returns:
  367. New IndexEntry instance
  368. """
  369. return cls(
  370. ctime=serialized.ctime,
  371. mtime=serialized.mtime,
  372. dev=serialized.dev,
  373. ino=serialized.ino,
  374. mode=serialized.mode,
  375. uid=serialized.uid,
  376. gid=serialized.gid,
  377. size=serialized.size,
  378. sha=serialized.sha,
  379. flags=serialized.flags,
  380. extended_flags=serialized.extended_flags,
  381. )
  382. def serialize(self, name: bytes, stage: Stage) -> SerializedIndexEntry:
  383. """Serialize this entry with a given name and stage.
  384. Args:
  385. name: Path name for the entry
  386. stage: Merge conflict stage
  387. Returns:
  388. SerializedIndexEntry ready for writing to disk
  389. """
  390. # Clear out any existing stage bits, then set them from the Stage.
  391. new_flags = self.flags & ~FLAG_STAGEMASK
  392. new_flags |= stage.value << FLAG_STAGESHIFT
  393. return SerializedIndexEntry(
  394. name=name,
  395. ctime=self.ctime,
  396. mtime=self.mtime,
  397. dev=self.dev,
  398. ino=self.ino,
  399. mode=self.mode,
  400. uid=self.uid,
  401. gid=self.gid,
  402. size=self.size,
  403. sha=self.sha,
  404. flags=new_flags,
  405. extended_flags=self.extended_flags,
  406. )
  407. def stage(self) -> Stage:
  408. """Get the merge conflict stage of this entry.
  409. Returns:
  410. Stage enum value
  411. """
  412. return Stage((self.flags & FLAG_STAGEMASK) >> FLAG_STAGESHIFT)
  413. @property
  414. def skip_worktree(self) -> bool:
  415. """Return True if the skip-worktree bit is set in extended_flags."""
  416. return bool(self.extended_flags & EXTENDED_FLAG_SKIP_WORKTREE)
  417. def set_skip_worktree(self, skip: bool = True) -> None:
  418. """Helper method to set or clear the skip-worktree bit in extended_flags.
  419. Also sets FLAG_EXTENDED in self.flags if needed.
  420. """
  421. if skip:
  422. # Turn on the skip-worktree bit
  423. self.extended_flags |= EXTENDED_FLAG_SKIP_WORKTREE
  424. # Also ensure the main 'extended' bit is set in flags
  425. self.flags |= FLAG_EXTENDED
  426. else:
  427. # Turn off the skip-worktree bit
  428. self.extended_flags &= ~EXTENDED_FLAG_SKIP_WORKTREE
  429. # Optionally unset the main extended bit if no extended flags remain
  430. if self.extended_flags == 0:
  431. self.flags &= ~FLAG_EXTENDED
  432. class ConflictedIndexEntry:
  433. """Index entry that represents a conflict."""
  434. ancestor: Optional[IndexEntry]
  435. this: Optional[IndexEntry]
  436. other: Optional[IndexEntry]
  437. def __init__(
  438. self,
  439. ancestor: Optional[IndexEntry] = None,
  440. this: Optional[IndexEntry] = None,
  441. other: Optional[IndexEntry] = None,
  442. ) -> None:
  443. """Initialize ConflictedIndexEntry.
  444. Args:
  445. ancestor: The common ancestor entry
  446. this: The current branch entry
  447. other: The other branch entry
  448. """
  449. self.ancestor = ancestor
  450. self.this = this
  451. self.other = other
  452. class UnmergedEntries(Exception):
  453. """Unmerged entries exist in the index."""
  454. def pathsplit(path: bytes) -> tuple[bytes, bytes]:
  455. """Split a /-delimited path into a directory part and a basename.
  456. Args:
  457. path: The path to split.
  458. Returns:
  459. Tuple with directory name and basename
  460. """
  461. try:
  462. (dirname, basename) = path.rsplit(b"/", 1)
  463. except ValueError:
  464. return (b"", path)
  465. else:
  466. return (dirname, basename)
  467. def pathjoin(*args: bytes) -> bytes:
  468. """Join a /-delimited path."""
  469. return b"/".join([p for p in args if p])
  470. def read_cache_time(f: BinaryIO) -> tuple[int, int]:
  471. """Read a cache time.
  472. Args:
  473. f: File-like object to read from
  474. Returns:
  475. Tuple with seconds and nanoseconds
  476. """
  477. return struct.unpack(">LL", f.read(8))
  478. def write_cache_time(f: IO[bytes], t: Union[int, float, tuple[int, int]]) -> None:
  479. """Write a cache time.
  480. Args:
  481. f: File-like object to write to
  482. t: Time to write (as int, float or tuple with secs and nsecs)
  483. """
  484. if isinstance(t, int):
  485. t = (t, 0)
  486. elif isinstance(t, float):
  487. (secs, nsecs) = divmod(t, 1.0)
  488. t = (int(secs), int(nsecs * 1000000000))
  489. elif not isinstance(t, tuple):
  490. raise TypeError(t)
  491. f.write(struct.pack(">LL", *t))
  492. def read_cache_entry(
  493. f: BinaryIO, version: int, previous_path: bytes = b""
  494. ) -> SerializedIndexEntry:
  495. """Read an entry from a cache file.
  496. Args:
  497. f: File-like object to read from
  498. version: Index version
  499. previous_path: Previous entry's path (for version 4 compression)
  500. """
  501. beginoffset = f.tell()
  502. ctime = read_cache_time(f)
  503. mtime = read_cache_time(f)
  504. (
  505. dev,
  506. ino,
  507. mode,
  508. uid,
  509. gid,
  510. size,
  511. sha,
  512. flags,
  513. ) = struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
  514. if flags & FLAG_EXTENDED:
  515. if version < 3:
  516. raise AssertionError("extended flag set in index with version < 3")
  517. (extended_flags,) = struct.unpack(">H", f.read(2))
  518. else:
  519. extended_flags = 0
  520. if version >= 4:
  521. # Version 4: paths are always compressed (name_len should be 0)
  522. name, _consumed = _decompress_path_from_stream(f, previous_path)
  523. else:
  524. # Versions < 4: regular name reading
  525. name = f.read(flags & FLAG_NAMEMASK)
  526. # Padding:
  527. if version < 4:
  528. real_size = (f.tell() - beginoffset + 8) & ~7
  529. f.read((beginoffset + real_size) - f.tell())
  530. return SerializedIndexEntry(
  531. name,
  532. ctime,
  533. mtime,
  534. dev,
  535. ino,
  536. mode,
  537. uid,
  538. gid,
  539. size,
  540. sha_to_hex(sha),
  541. flags & ~FLAG_NAMEMASK,
  542. extended_flags,
  543. )
  544. def write_cache_entry(
  545. f: IO[bytes], entry: SerializedIndexEntry, version: int, previous_path: bytes = b""
  546. ) -> None:
  547. """Write an index entry to a file.
  548. Args:
  549. f: File object
  550. entry: IndexEntry to write
  551. version: Index format version
  552. previous_path: Previous entry's path (for version 4 compression)
  553. """
  554. beginoffset = f.tell()
  555. write_cache_time(f, entry.ctime)
  556. write_cache_time(f, entry.mtime)
  557. if version >= 4:
  558. # Version 4: use compression but set name_len to actual filename length
  559. # This matches how C Git implements index v4 flags
  560. compressed_path = _compress_path(entry.name, previous_path)
  561. flags = len(entry.name) | (entry.flags & ~FLAG_NAMEMASK)
  562. else:
  563. # Versions < 4: include actual name length
  564. flags = len(entry.name) | (entry.flags & ~FLAG_NAMEMASK)
  565. if entry.extended_flags:
  566. flags |= FLAG_EXTENDED
  567. if flags & FLAG_EXTENDED and version is not None and version < 3:
  568. raise AssertionError("unable to use extended flags in version < 3")
  569. f.write(
  570. struct.pack(
  571. b">LLLLLL20sH",
  572. entry.dev & 0xFFFFFFFF,
  573. entry.ino & 0xFFFFFFFF,
  574. entry.mode,
  575. entry.uid,
  576. entry.gid,
  577. entry.size,
  578. hex_to_sha(entry.sha),
  579. flags,
  580. )
  581. )
  582. if flags & FLAG_EXTENDED:
  583. f.write(struct.pack(b">H", entry.extended_flags))
  584. if version >= 4:
  585. # Version 4: always write compressed path
  586. f.write(compressed_path)
  587. else:
  588. # Versions < 4: write regular path and padding
  589. f.write(entry.name)
  590. real_size = (f.tell() - beginoffset + 8) & ~7
  591. f.write(b"\0" * ((beginoffset + real_size) - f.tell()))
  592. class UnsupportedIndexFormat(Exception):
  593. """An unsupported index format was encountered."""
  594. def __init__(self, version: int) -> None:
  595. """Initialize UnsupportedIndexFormat exception.
  596. Args:
  597. version: The unsupported index format version
  598. """
  599. self.index_format_version = version
  600. def read_index_header(f: BinaryIO) -> tuple[int, int]:
  601. """Read an index header from a file.
  602. Returns:
  603. tuple of (version, num_entries)
  604. """
  605. header = f.read(4)
  606. if header != b"DIRC":
  607. raise AssertionError(f"Invalid index file header: {header!r}")
  608. (version, num_entries) = struct.unpack(b">LL", f.read(4 * 2))
  609. if version not in (1, 2, 3, 4):
  610. raise UnsupportedIndexFormat(version)
  611. return version, num_entries
  612. def write_index_extension(f: IO[bytes], extension: IndexExtension) -> None:
  613. """Write an index extension.
  614. Args:
  615. f: File-like object to write to
  616. extension: Extension to write
  617. """
  618. data = extension.to_bytes()
  619. f.write(extension.signature)
  620. f.write(struct.pack(">I", len(data)))
  621. f.write(data)
  622. def read_index(f: BinaryIO) -> Iterator[SerializedIndexEntry]:
  623. """Read an index file, yielding the individual entries."""
  624. version, num_entries = read_index_header(f)
  625. previous_path = b""
  626. for i in range(num_entries):
  627. entry = read_cache_entry(f, version, previous_path)
  628. previous_path = entry.name
  629. yield entry
  630. def read_index_dict_with_version(
  631. f: BinaryIO,
  632. ) -> tuple[
  633. dict[bytes, Union[IndexEntry, ConflictedIndexEntry]], int, list[IndexExtension]
  634. ]:
  635. """Read an index file and return it as a dictionary along with the version.
  636. Returns:
  637. tuple of (entries_dict, version, extensions)
  638. """
  639. version, num_entries = read_index_header(f)
  640. ret: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]] = {}
  641. previous_path = b""
  642. for i in range(num_entries):
  643. entry = read_cache_entry(f, version, previous_path)
  644. previous_path = entry.name
  645. stage = entry.stage()
  646. if stage == Stage.NORMAL:
  647. ret[entry.name] = IndexEntry.from_serialized(entry)
  648. else:
  649. existing = ret.setdefault(entry.name, ConflictedIndexEntry())
  650. if isinstance(existing, IndexEntry):
  651. raise AssertionError(f"Non-conflicted entry for {entry.name!r} exists")
  652. if stage == Stage.MERGE_CONFLICT_ANCESTOR:
  653. existing.ancestor = IndexEntry.from_serialized(entry)
  654. elif stage == Stage.MERGE_CONFLICT_THIS:
  655. existing.this = IndexEntry.from_serialized(entry)
  656. elif stage == Stage.MERGE_CONFLICT_OTHER:
  657. existing.other = IndexEntry.from_serialized(entry)
  658. # Read extensions
  659. extensions = []
  660. while True:
  661. # Check if we're at the end (20 bytes before EOF for SHA checksum)
  662. current_pos = f.tell()
  663. f.seek(0, 2) # EOF
  664. eof_pos = f.tell()
  665. f.seek(current_pos)
  666. if current_pos >= eof_pos - 20:
  667. break
  668. # Try to read extension signature
  669. signature = f.read(4)
  670. if len(signature) < 4:
  671. break
  672. # Check if it's a valid extension signature (4 uppercase letters)
  673. if not all(65 <= b <= 90 for b in signature):
  674. # Not an extension, seek back
  675. f.seek(-4, 1)
  676. break
  677. # Read extension size
  678. size_data = f.read(4)
  679. if len(size_data) < 4:
  680. break
  681. size = struct.unpack(">I", size_data)[0]
  682. # Read extension data
  683. data = f.read(size)
  684. if len(data) < size:
  685. break
  686. extension = IndexExtension.from_raw(signature, data)
  687. extensions.append(extension)
  688. return ret, version, extensions
  689. def read_index_dict(
  690. f: BinaryIO,
  691. ) -> dict[bytes, Union[IndexEntry, ConflictedIndexEntry]]:
  692. """Read an index file and return it as a dictionary.
  693. Dict Key is tuple of path and stage number, as
  694. path alone is not unique
  695. Args:
  696. f: File object to read fromls.
  697. """
  698. ret: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]] = {}
  699. for entry in read_index(f):
  700. stage = entry.stage()
  701. if stage == Stage.NORMAL:
  702. ret[entry.name] = IndexEntry.from_serialized(entry)
  703. else:
  704. existing = ret.setdefault(entry.name, ConflictedIndexEntry())
  705. if isinstance(existing, IndexEntry):
  706. raise AssertionError(f"Non-conflicted entry for {entry.name!r} exists")
  707. if stage == Stage.MERGE_CONFLICT_ANCESTOR:
  708. existing.ancestor = IndexEntry.from_serialized(entry)
  709. elif stage == Stage.MERGE_CONFLICT_THIS:
  710. existing.this = IndexEntry.from_serialized(entry)
  711. elif stage == Stage.MERGE_CONFLICT_OTHER:
  712. existing.other = IndexEntry.from_serialized(entry)
  713. return ret
  714. def write_index(
  715. f: IO[bytes],
  716. entries: list[SerializedIndexEntry],
  717. version: Optional[int] = None,
  718. extensions: Optional[list[IndexExtension]] = None,
  719. ) -> None:
  720. """Write an index file.
  721. Args:
  722. f: File-like object to write to
  723. version: Version number to write
  724. entries: Iterable over the entries to write
  725. extensions: Optional list of extensions to write
  726. """
  727. if version is None:
  728. version = DEFAULT_VERSION
  729. # STEP 1: check if any extended_flags are set
  730. uses_extended_flags = any(e.extended_flags != 0 for e in entries)
  731. if uses_extended_flags and version < 3:
  732. # Force or bump the version to 3
  733. version = 3
  734. # The rest is unchanged, but you might insert a final check:
  735. if version < 3:
  736. # Double-check no extended flags appear
  737. for e in entries:
  738. if e.extended_flags != 0:
  739. raise AssertionError("Attempt to use extended flags in index < v3")
  740. # Proceed with the existing code to write the header and entries.
  741. f.write(b"DIRC")
  742. f.write(struct.pack(b">LL", version, len(entries)))
  743. previous_path = b""
  744. for entry in entries:
  745. write_cache_entry(f, entry, version=version, previous_path=previous_path)
  746. previous_path = entry.name
  747. # Write extensions
  748. if extensions:
  749. for extension in extensions:
  750. write_index_extension(f, extension)
  751. def write_index_dict(
  752. f: IO[bytes],
  753. entries: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]],
  754. version: Optional[int] = None,
  755. extensions: Optional[list[IndexExtension]] = None,
  756. ) -> None:
  757. """Write an index file based on the contents of a dictionary.
  758. being careful to sort by path and then by stage.
  759. """
  760. entries_list = []
  761. for key in sorted(entries):
  762. value = entries[key]
  763. if isinstance(value, ConflictedIndexEntry):
  764. if value.ancestor is not None:
  765. entries_list.append(
  766. value.ancestor.serialize(key, Stage.MERGE_CONFLICT_ANCESTOR)
  767. )
  768. if value.this is not None:
  769. entries_list.append(
  770. value.this.serialize(key, Stage.MERGE_CONFLICT_THIS)
  771. )
  772. if value.other is not None:
  773. entries_list.append(
  774. value.other.serialize(key, Stage.MERGE_CONFLICT_OTHER)
  775. )
  776. else:
  777. entries_list.append(value.serialize(key, Stage.NORMAL))
  778. write_index(f, entries_list, version=version, extensions=extensions)
  779. def cleanup_mode(mode: int) -> int:
  780. """Cleanup a mode value.
  781. This will return a mode that can be stored in a tree object.
  782. Args:
  783. mode: Mode to clean up.
  784. Returns:
  785. mode
  786. """
  787. if stat.S_ISLNK(mode):
  788. return stat.S_IFLNK
  789. elif stat.S_ISDIR(mode):
  790. return stat.S_IFDIR
  791. elif S_ISGITLINK(mode):
  792. return S_IFGITLINK
  793. ret = stat.S_IFREG | 0o644
  794. if mode & 0o100:
  795. ret |= 0o111
  796. return ret
  797. class Index:
  798. """A Git Index file."""
  799. _byname: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]]
  800. def __init__(
  801. self,
  802. filename: Union[bytes, str, os.PathLike[str]],
  803. read: bool = True,
  804. skip_hash: bool = False,
  805. version: Optional[int] = None,
  806. ) -> None:
  807. """Create an index object associated with the given filename.
  808. Args:
  809. filename: Path to the index file
  810. read: Whether to initialize the index from the given file, should it exist.
  811. skip_hash: Whether to skip SHA1 hash when writing (for manyfiles feature)
  812. version: Index format version to use (None = auto-detect from file or use default)
  813. """
  814. self._filename = os.fspath(filename)
  815. # TODO(jelmer): Store the version returned by read_index
  816. self._version = version
  817. self._skip_hash = skip_hash
  818. self._extensions: list[IndexExtension] = []
  819. self.clear()
  820. if read:
  821. self.read()
  822. @property
  823. def path(self) -> Union[bytes, str]:
  824. """Get the path to the index file.
  825. Returns:
  826. Path to the index file
  827. """
  828. return self._filename
  829. def __repr__(self) -> str:
  830. """Return string representation of Index."""
  831. return f"{self.__class__.__name__}({self._filename!r})"
  832. def write(self) -> None:
  833. """Write current contents of index to disk."""
  834. f = GitFile(self._filename, "wb")
  835. try:
  836. # Filter out extensions with no meaningful data
  837. meaningful_extensions = []
  838. for ext in self._extensions:
  839. # Skip extensions that have empty data
  840. ext_data = ext.to_bytes()
  841. if ext_data:
  842. meaningful_extensions.append(ext)
  843. if self._skip_hash:
  844. # When skipHash is enabled, write the index without computing SHA1
  845. write_index_dict(
  846. f,
  847. self._byname,
  848. version=self._version,
  849. extensions=meaningful_extensions,
  850. )
  851. # Write 20 zero bytes instead of SHA1
  852. f.write(b"\x00" * 20)
  853. f.close()
  854. else:
  855. sha1_writer = SHA1Writer(f)
  856. write_index_dict(
  857. sha1_writer,
  858. self._byname,
  859. version=self._version,
  860. extensions=meaningful_extensions,
  861. )
  862. sha1_writer.close()
  863. except:
  864. f.close()
  865. raise
  866. def read(self) -> None:
  867. """Read current contents of index from disk."""
  868. if not os.path.exists(self._filename):
  869. return
  870. f = GitFile(self._filename, "rb")
  871. try:
  872. sha1_reader = SHA1Reader(f)
  873. entries, version, extensions = read_index_dict_with_version(sha1_reader)
  874. self._version = version
  875. self._extensions = extensions
  876. self.update(entries)
  877. # Extensions have already been read by read_index_dict_with_version
  878. sha1_reader.check_sha(allow_empty=True)
  879. finally:
  880. f.close()
  881. def __len__(self) -> int:
  882. """Number of entries in this index file."""
  883. return len(self._byname)
  884. def __getitem__(self, key: bytes) -> Union[IndexEntry, ConflictedIndexEntry]:
  885. """Retrieve entry by relative path and stage.
  886. Returns: Either a IndexEntry or a ConflictedIndexEntry
  887. Raises KeyError: if the entry does not exist
  888. """
  889. return self._byname[key]
  890. def __iter__(self) -> Iterator[bytes]:
  891. """Iterate over the paths and stages in this index."""
  892. return iter(self._byname)
  893. def __contains__(self, key: bytes) -> bool:
  894. """Check if a path exists in the index."""
  895. return key in self._byname
  896. def get_sha1(self, path: bytes) -> bytes:
  897. """Return the (git object) SHA1 for the object at a path."""
  898. value = self[path]
  899. if isinstance(value, ConflictedIndexEntry):
  900. raise UnmergedEntries
  901. return value.sha
  902. def get_mode(self, path: bytes) -> int:
  903. """Return the POSIX file mode for the object at a path."""
  904. value = self[path]
  905. if isinstance(value, ConflictedIndexEntry):
  906. raise UnmergedEntries
  907. return value.mode
  908. def iterobjects(self) -> Iterable[tuple[bytes, bytes, int]]:
  909. """Iterate over path, sha, mode tuples for use with commit_tree."""
  910. for path in self:
  911. entry = self[path]
  912. if isinstance(entry, ConflictedIndexEntry):
  913. raise UnmergedEntries
  914. yield path, entry.sha, cleanup_mode(entry.mode)
  915. def has_conflicts(self) -> bool:
  916. """Check if the index contains any conflicted entries.
  917. Returns:
  918. True if any entries are conflicted, False otherwise
  919. """
  920. for value in self._byname.values():
  921. if isinstance(value, ConflictedIndexEntry):
  922. return True
  923. return False
  924. def clear(self) -> None:
  925. """Remove all contents from this index."""
  926. self._byname = {}
  927. def __setitem__(
  928. self, name: bytes, value: Union[IndexEntry, ConflictedIndexEntry]
  929. ) -> None:
  930. """Set an entry in the index."""
  931. assert isinstance(name, bytes)
  932. self._byname[name] = value
  933. def __delitem__(self, name: bytes) -> None:
  934. """Delete an entry from the index."""
  935. del self._byname[name]
  936. def iteritems(
  937. self,
  938. ) -> Iterator[tuple[bytes, Union[IndexEntry, ConflictedIndexEntry]]]:
  939. """Iterate over (path, entry) pairs in the index.
  940. Returns:
  941. Iterator of (path, entry) tuples
  942. """
  943. return iter(self._byname.items())
  944. def items(self) -> Iterator[tuple[bytes, Union[IndexEntry, ConflictedIndexEntry]]]:
  945. """Get an iterator over (path, entry) pairs.
  946. Returns:
  947. Iterator of (path, entry) tuples
  948. """
  949. return iter(self._byname.items())
  950. def update(
  951. self, entries: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]]
  952. ) -> None:
  953. """Update the index with multiple entries.
  954. Args:
  955. entries: Dictionary mapping paths to index entries
  956. """
  957. for key, value in entries.items():
  958. self[key] = value
  959. def paths(self) -> Generator[bytes, None, None]:
  960. """Generate all paths in the index.
  961. Yields:
  962. Path names as bytes
  963. """
  964. yield from self._byname.keys()
  965. def changes_from_tree(
  966. self,
  967. object_store: ObjectContainer,
  968. tree: ObjectID,
  969. want_unchanged: bool = False,
  970. ) -> Generator[
  971. tuple[
  972. tuple[Optional[bytes], Optional[bytes]],
  973. tuple[Optional[int], Optional[int]],
  974. tuple[Optional[bytes], Optional[bytes]],
  975. ],
  976. None,
  977. None,
  978. ]:
  979. """Find the differences between the contents of this index and a tree.
  980. Args:
  981. object_store: Object store to use for retrieving tree contents
  982. tree: SHA1 of the root tree
  983. want_unchanged: Whether unchanged files should be reported
  984. Returns: Iterator over tuples with (oldpath, newpath), (oldmode,
  985. newmode), (oldsha, newsha)
  986. """
  987. def lookup_entry(path: bytes) -> tuple[bytes, int]:
  988. entry = self[path]
  989. if hasattr(entry, "sha") and hasattr(entry, "mode"):
  990. return entry.sha, cleanup_mode(entry.mode)
  991. else:
  992. # Handle ConflictedIndexEntry case
  993. return b"", 0
  994. yield from changes_from_tree(
  995. self.paths(),
  996. lookup_entry,
  997. object_store,
  998. tree,
  999. want_unchanged=want_unchanged,
  1000. )
  1001. def commit(self, object_store: ObjectContainer) -> bytes:
  1002. """Create a new tree from an index.
  1003. Args:
  1004. object_store: Object store to save the tree in
  1005. Returns:
  1006. Root tree SHA
  1007. """
  1008. return commit_tree(object_store, self.iterobjects())
  1009. def commit_tree(
  1010. object_store: ObjectContainer, blobs: Iterable[tuple[bytes, bytes, int]]
  1011. ) -> bytes:
  1012. """Commit a new tree.
  1013. Args:
  1014. object_store: Object store to add trees to
  1015. blobs: Iterable over blob path, sha, mode entries
  1016. Returns:
  1017. SHA1 of the created tree.
  1018. """
  1019. trees: dict[bytes, TreeDict] = {b"": {}}
  1020. def add_tree(path: bytes) -> TreeDict:
  1021. if path in trees:
  1022. return trees[path]
  1023. dirname, basename = pathsplit(path)
  1024. t = add_tree(dirname)
  1025. assert isinstance(basename, bytes)
  1026. newtree: TreeDict = {}
  1027. t[basename] = newtree
  1028. trees[path] = newtree
  1029. return newtree
  1030. for path, sha, mode in blobs:
  1031. tree_path, basename = pathsplit(path)
  1032. tree = add_tree(tree_path)
  1033. tree[basename] = (mode, sha)
  1034. def build_tree(path: bytes) -> bytes:
  1035. tree = Tree()
  1036. for basename, entry in trees[path].items():
  1037. if isinstance(entry, dict):
  1038. mode = stat.S_IFDIR
  1039. sha = build_tree(pathjoin(path, basename))
  1040. else:
  1041. (mode, sha) = entry
  1042. tree.add(basename, mode, sha)
  1043. object_store.add_object(tree)
  1044. return tree.id
  1045. return build_tree(b"")
  1046. def commit_index(object_store: ObjectContainer, index: Index) -> bytes:
  1047. """Create a new tree from an index.
  1048. Args:
  1049. object_store: Object store to save the tree in
  1050. index: Index file
  1051. Note: This function is deprecated, use index.commit() instead.
  1052. Returns: Root tree sha.
  1053. """
  1054. return commit_tree(object_store, index.iterobjects())
  1055. def changes_from_tree(
  1056. names: Iterable[bytes],
  1057. lookup_entry: Callable[[bytes], tuple[bytes, int]],
  1058. object_store: ObjectContainer,
  1059. tree: Optional[bytes],
  1060. want_unchanged: bool = False,
  1061. ) -> Iterable[
  1062. tuple[
  1063. tuple[Optional[bytes], Optional[bytes]],
  1064. tuple[Optional[int], Optional[int]],
  1065. tuple[Optional[bytes], Optional[bytes]],
  1066. ]
  1067. ]:
  1068. """Find the differences between the contents of a tree and a working copy.
  1069. Args:
  1070. names: Iterable of names in the working copy
  1071. lookup_entry: Function to lookup an entry in the working copy
  1072. object_store: Object store to use for retrieving tree contents
  1073. tree: SHA1 of the root tree, or None for an empty tree
  1074. want_unchanged: Whether unchanged files should be reported
  1075. Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
  1076. (oldsha, newsha)
  1077. """
  1078. # TODO(jelmer): Support a include_trees option
  1079. other_names = set(names)
  1080. if tree is not None:
  1081. for name, mode, sha in iter_tree_contents(object_store, tree):
  1082. assert name is not None and mode is not None and sha is not None
  1083. try:
  1084. (other_sha, other_mode) = lookup_entry(name)
  1085. except KeyError:
  1086. # Was removed
  1087. yield ((name, None), (mode, None), (sha, None))
  1088. else:
  1089. other_names.remove(name)
  1090. if want_unchanged or other_sha != sha or other_mode != mode:
  1091. yield ((name, name), (mode, other_mode), (sha, other_sha))
  1092. # Mention added files
  1093. for name in other_names:
  1094. try:
  1095. (other_sha, other_mode) = lookup_entry(name)
  1096. except KeyError:
  1097. pass
  1098. else:
  1099. yield ((None, name), (None, other_mode), (None, other_sha))
  1100. def index_entry_from_stat(
  1101. stat_val: os.stat_result,
  1102. hex_sha: bytes,
  1103. mode: Optional[int] = None,
  1104. ) -> IndexEntry:
  1105. """Create a new index entry from a stat value.
  1106. Args:
  1107. stat_val: POSIX stat_result instance
  1108. hex_sha: Hex sha of the object
  1109. mode: Optional file mode, will be derived from stat if not provided
  1110. """
  1111. if mode is None:
  1112. mode = cleanup_mode(stat_val.st_mode)
  1113. return IndexEntry(
  1114. ctime=stat_val.st_ctime,
  1115. mtime=stat_val.st_mtime,
  1116. dev=stat_val.st_dev,
  1117. ino=stat_val.st_ino,
  1118. mode=mode,
  1119. uid=stat_val.st_uid,
  1120. gid=stat_val.st_gid,
  1121. size=stat_val.st_size,
  1122. sha=hex_sha,
  1123. flags=0,
  1124. extended_flags=0,
  1125. )
  1126. if sys.platform == "win32":
  1127. # On Windows, creating symlinks either requires administrator privileges
  1128. # or developer mode. Raise a more helpful error when we're unable to
  1129. # create symlinks
  1130. # https://github.com/jelmer/dulwich/issues/1005
  1131. class WindowsSymlinkPermissionError(PermissionError):
  1132. """Windows-specific error for symlink creation failures.
  1133. This error is raised when symlink creation fails on Windows,
  1134. typically due to lack of developer mode or administrator privileges.
  1135. """
  1136. def __init__(self, errno: int, msg: str, filename: Optional[str]) -> None:
  1137. """Initialize WindowsSymlinkPermissionError."""
  1138. super(PermissionError, self).__init__(
  1139. errno,
  1140. f"Unable to create symlink; do you have developer mode enabled? {msg}",
  1141. filename,
  1142. )
  1143. def symlink(
  1144. src: Union[str, bytes],
  1145. dst: Union[str, bytes],
  1146. target_is_directory: bool = False,
  1147. *,
  1148. dir_fd: Optional[int] = None,
  1149. ) -> None:
  1150. """Create a symbolic link on Windows with better error handling.
  1151. Args:
  1152. src: Source path for the symlink
  1153. dst: Destination path where symlink will be created
  1154. target_is_directory: Whether the target is a directory
  1155. dir_fd: Optional directory file descriptor
  1156. Raises:
  1157. WindowsSymlinkPermissionError: If symlink creation fails due to permissions
  1158. """
  1159. try:
  1160. return os.symlink(
  1161. src, dst, target_is_directory=target_is_directory, dir_fd=dir_fd
  1162. )
  1163. except PermissionError as e:
  1164. raise WindowsSymlinkPermissionError(
  1165. e.errno or 0, e.strerror or "", e.filename
  1166. ) from e
  1167. else:
  1168. symlink = os.symlink
  1169. def build_file_from_blob(
  1170. blob: Blob,
  1171. mode: int,
  1172. target_path: bytes,
  1173. *,
  1174. honor_filemode: bool = True,
  1175. tree_encoding: str = "utf-8",
  1176. symlink_fn: Optional[
  1177. Callable[
  1178. [Union[str, bytes, os.PathLike[str]], Union[str, bytes, os.PathLike[str]]],
  1179. None,
  1180. ]
  1181. ] = None,
  1182. ) -> os.stat_result:
  1183. """Build a file or symlink on disk based on a Git object.
  1184. Args:
  1185. blob: The git object
  1186. mode: File mode
  1187. target_path: Path to write to
  1188. honor_filemode: An optional flag to honor core.filemode setting in
  1189. config file, default is core.filemode=True, change executable bit
  1190. tree_encoding: Encoding to use for tree contents
  1191. symlink_fn: Function to use for creating symlinks
  1192. Returns: stat object for the file
  1193. """
  1194. try:
  1195. oldstat = os.lstat(target_path)
  1196. except FileNotFoundError:
  1197. oldstat = None
  1198. contents = blob.as_raw_string()
  1199. if stat.S_ISLNK(mode):
  1200. if oldstat:
  1201. _remove_file_with_readonly_handling(target_path)
  1202. if sys.platform == "win32":
  1203. # os.readlink on Python3 on Windows requires a unicode string.
  1204. contents_str = contents.decode(tree_encoding)
  1205. target_path_str = target_path.decode(tree_encoding)
  1206. (symlink_fn or symlink)(contents_str, target_path_str)
  1207. else:
  1208. (symlink_fn or symlink)(contents, target_path)
  1209. else:
  1210. if oldstat is not None and oldstat.st_size == len(contents):
  1211. with open(target_path, "rb") as f:
  1212. if f.read() == contents:
  1213. return oldstat
  1214. with open(target_path, "wb") as f:
  1215. # Write out file
  1216. f.write(contents)
  1217. if honor_filemode:
  1218. os.chmod(target_path, mode)
  1219. return os.lstat(target_path)
  1220. INVALID_DOTNAMES = (b".git", b".", b"..", b"")
  1221. def _normalize_path_element_default(element: bytes) -> bytes:
  1222. """Normalize path element for default case-insensitive comparison."""
  1223. return element.lower()
  1224. def _normalize_path_element_ntfs(element: bytes) -> bytes:
  1225. """Normalize path element for NTFS filesystem."""
  1226. return element.rstrip(b". ").lower()
  1227. def _normalize_path_element_hfs(element: bytes) -> bytes:
  1228. """Normalize path element for HFS+ filesystem."""
  1229. import unicodedata
  1230. # Decode to Unicode (let UnicodeDecodeError bubble up)
  1231. element_str = element.decode("utf-8", errors="strict")
  1232. # Remove HFS+ ignorable characters
  1233. filtered = "".join(c for c in element_str if ord(c) not in HFS_IGNORABLE_CHARS)
  1234. # Normalize to NFD
  1235. normalized = unicodedata.normalize("NFD", filtered)
  1236. return normalized.lower().encode("utf-8", errors="strict")
  1237. def get_path_element_normalizer(config: "Config") -> Callable[[bytes], bytes]:
  1238. """Get the appropriate path element normalization function based on config.
  1239. Args:
  1240. config: Repository configuration object
  1241. Returns:
  1242. Function that normalizes path elements for the configured filesystem
  1243. """
  1244. import os
  1245. import sys
  1246. if config.get_boolean(b"core", b"protectNTFS", os.name == "nt"):
  1247. return _normalize_path_element_ntfs
  1248. elif config.get_boolean(b"core", b"protectHFS", sys.platform == "darwin"):
  1249. return _normalize_path_element_hfs
  1250. else:
  1251. return _normalize_path_element_default
  1252. def validate_path_element_default(element: bytes) -> bool:
  1253. """Validate a path element using default rules.
  1254. Args:
  1255. element: Path element to validate
  1256. Returns:
  1257. True if path element is valid, False otherwise
  1258. """
  1259. return _normalize_path_element_default(element) not in INVALID_DOTNAMES
  1260. def validate_path_element_ntfs(element: bytes) -> bool:
  1261. """Validate a path element using NTFS filesystem rules.
  1262. Args:
  1263. element: Path element to validate
  1264. Returns:
  1265. True if path element is valid for NTFS, False otherwise
  1266. """
  1267. normalized = _normalize_path_element_ntfs(element)
  1268. if normalized in INVALID_DOTNAMES:
  1269. return False
  1270. if normalized == b"git~1":
  1271. return False
  1272. return True
  1273. # HFS+ ignorable Unicode codepoints (from Git's utf8.c)
  1274. HFS_IGNORABLE_CHARS = {
  1275. 0x200C, # ZERO WIDTH NON-JOINER
  1276. 0x200D, # ZERO WIDTH JOINER
  1277. 0x200E, # LEFT-TO-RIGHT MARK
  1278. 0x200F, # RIGHT-TO-LEFT MARK
  1279. 0x202A, # LEFT-TO-RIGHT EMBEDDING
  1280. 0x202B, # RIGHT-TO-LEFT EMBEDDING
  1281. 0x202C, # POP DIRECTIONAL FORMATTING
  1282. 0x202D, # LEFT-TO-RIGHT OVERRIDE
  1283. 0x202E, # RIGHT-TO-LEFT OVERRIDE
  1284. 0x206A, # INHIBIT SYMMETRIC SWAPPING
  1285. 0x206B, # ACTIVATE SYMMETRIC SWAPPING
  1286. 0x206C, # INHIBIT ARABIC FORM SHAPING
  1287. 0x206D, # ACTIVATE ARABIC FORM SHAPING
  1288. 0x206E, # NATIONAL DIGIT SHAPES
  1289. 0x206F, # NOMINAL DIGIT SHAPES
  1290. 0xFEFF, # ZERO WIDTH NO-BREAK SPACE
  1291. }
  1292. def validate_path_element_hfs(element: bytes) -> bool:
  1293. """Validate path element for HFS+ filesystem.
  1294. Equivalent to Git's is_hfs_dotgit and related checks.
  1295. Uses NFD normalization and ignores HFS+ ignorable characters.
  1296. """
  1297. try:
  1298. normalized = _normalize_path_element_hfs(element)
  1299. except UnicodeDecodeError:
  1300. # Malformed UTF-8 - be conservative and reject
  1301. return False
  1302. # Check against invalid names
  1303. if normalized in INVALID_DOTNAMES:
  1304. return False
  1305. # Also check for 8.3 short name
  1306. if normalized == b"git~1":
  1307. return False
  1308. return True
  1309. def validate_path(
  1310. path: bytes,
  1311. element_validator: Callable[[bytes], bool] = validate_path_element_default,
  1312. ) -> bool:
  1313. """Default path validator that just checks for .git/."""
  1314. parts = path.split(b"/")
  1315. for p in parts:
  1316. if not element_validator(p):
  1317. return False
  1318. else:
  1319. return True
  1320. def build_index_from_tree(
  1321. root_path: Union[str, bytes],
  1322. index_path: Union[str, bytes],
  1323. object_store: ObjectContainer,
  1324. tree_id: bytes,
  1325. honor_filemode: bool = True,
  1326. validate_path_element: Callable[[bytes], bool] = validate_path_element_default,
  1327. symlink_fn: Optional[
  1328. Callable[
  1329. [Union[str, bytes, os.PathLike[str]], Union[str, bytes, os.PathLike[str]]],
  1330. None,
  1331. ]
  1332. ] = None,
  1333. blob_normalizer: Optional["FilterBlobNormalizer"] = None,
  1334. tree_encoding: str = "utf-8",
  1335. ) -> None:
  1336. """Generate and materialize index from a tree.
  1337. Args:
  1338. tree_id: Tree to materialize
  1339. root_path: Target dir for materialized index files
  1340. index_path: Target path for generated index
  1341. object_store: Non-empty object store holding tree contents
  1342. honor_filemode: An optional flag to honor core.filemode setting in
  1343. config file, default is core.filemode=True, change executable bit
  1344. validate_path_element: Function to validate path elements to check
  1345. out; default just refuses .git and .. directories.
  1346. symlink_fn: Function to use for creating symlinks
  1347. blob_normalizer: An optional BlobNormalizer to use for converting line
  1348. endings when writing blobs to the working directory.
  1349. tree_encoding: Encoding used for tree paths (default: utf-8)
  1350. Note: existing index is wiped and contents are not merged
  1351. in a working dir. Suitable only for fresh clones.
  1352. """
  1353. index = Index(index_path, read=False)
  1354. if not isinstance(root_path, bytes):
  1355. root_path = os.fsencode(root_path)
  1356. for entry in iter_tree_contents(object_store, tree_id):
  1357. assert (
  1358. entry.path is not None and entry.mode is not None and entry.sha is not None
  1359. )
  1360. if not validate_path(entry.path, validate_path_element):
  1361. continue
  1362. full_path = _tree_to_fs_path(root_path, entry.path, tree_encoding)
  1363. if not os.path.exists(os.path.dirname(full_path)):
  1364. os.makedirs(os.path.dirname(full_path))
  1365. # TODO(jelmer): Merge new index into working tree
  1366. if S_ISGITLINK(entry.mode):
  1367. if not os.path.isdir(full_path):
  1368. os.mkdir(full_path)
  1369. st = os.lstat(full_path)
  1370. # TODO(jelmer): record and return submodule paths
  1371. else:
  1372. obj = object_store[entry.sha]
  1373. assert isinstance(obj, Blob)
  1374. # Apply blob normalization for checkout if normalizer is provided
  1375. if blob_normalizer is not None:
  1376. obj = blob_normalizer.checkout_normalize(obj, entry.path)
  1377. st = build_file_from_blob(
  1378. obj,
  1379. entry.mode,
  1380. full_path,
  1381. honor_filemode=honor_filemode,
  1382. tree_encoding=tree_encoding,
  1383. symlink_fn=symlink_fn,
  1384. )
  1385. # Add file to index
  1386. if not honor_filemode or S_ISGITLINK(entry.mode):
  1387. # we can not use tuple slicing to build a new tuple,
  1388. # because on windows that will convert the times to
  1389. # longs, which causes errors further along
  1390. st_tuple = (
  1391. entry.mode,
  1392. st.st_ino,
  1393. st.st_dev,
  1394. st.st_nlink,
  1395. st.st_uid,
  1396. st.st_gid,
  1397. st.st_size,
  1398. st.st_atime,
  1399. st.st_mtime,
  1400. st.st_ctime,
  1401. )
  1402. st = st.__class__(st_tuple)
  1403. # default to a stage 0 index entry (normal)
  1404. # when reading from the filesystem
  1405. index[entry.path] = index_entry_from_stat(st, entry.sha)
  1406. index.write()
  1407. def blob_from_path_and_mode(
  1408. fs_path: bytes, mode: int, tree_encoding: str = "utf-8"
  1409. ) -> Blob:
  1410. """Create a blob from a path and a stat object.
  1411. Args:
  1412. fs_path: Full file system path to file
  1413. mode: File mode
  1414. tree_encoding: Encoding to use for tree contents
  1415. Returns: A `Blob` object
  1416. """
  1417. assert isinstance(fs_path, bytes)
  1418. blob = Blob()
  1419. if stat.S_ISLNK(mode):
  1420. if sys.platform == "win32":
  1421. # os.readlink on Python3 on Windows requires a unicode string.
  1422. blob.data = os.readlink(os.fsdecode(fs_path)).encode(tree_encoding)
  1423. else:
  1424. blob.data = os.readlink(fs_path)
  1425. else:
  1426. with open(fs_path, "rb") as f:
  1427. blob.data = f.read()
  1428. return blob
  1429. def blob_from_path_and_stat(
  1430. fs_path: bytes, st: os.stat_result, tree_encoding: str = "utf-8"
  1431. ) -> Blob:
  1432. """Create a blob from a path and a stat object.
  1433. Args:
  1434. fs_path: Full file system path to file
  1435. st: A stat object
  1436. tree_encoding: Encoding to use for tree contents
  1437. Returns: A `Blob` object
  1438. """
  1439. return blob_from_path_and_mode(fs_path, st.st_mode, tree_encoding)
  1440. def read_submodule_head(path: Union[str, bytes]) -> Optional[bytes]:
  1441. """Read the head commit of a submodule.
  1442. Args:
  1443. path: path to the submodule
  1444. Returns: HEAD sha, None if not a valid head/repository
  1445. """
  1446. from .errors import NotGitRepository
  1447. from .repo import Repo
  1448. # Repo currently expects a "str", so decode if necessary.
  1449. # TODO(jelmer): Perhaps move this into Repo() ?
  1450. if not isinstance(path, str):
  1451. path = os.fsdecode(path)
  1452. try:
  1453. repo = Repo(path)
  1454. except NotGitRepository:
  1455. return None
  1456. try:
  1457. return repo.head()
  1458. except KeyError:
  1459. return None
  1460. def _has_directory_changed(tree_path: bytes, entry: IndexEntry) -> bool:
  1461. """Check if a directory has changed after getting an error.
  1462. When handling an error trying to create a blob from a path, call this
  1463. function. It will check if the path is a directory. If it's a directory
  1464. and a submodule, check the submodule head to see if it's has changed. If
  1465. not, consider the file as changed as Git tracked a file and not a
  1466. directory.
  1467. Return true if the given path should be considered as changed and False
  1468. otherwise or if the path is not a directory.
  1469. """
  1470. # This is actually a directory
  1471. if os.path.exists(os.path.join(tree_path, b".git")):
  1472. # Submodule
  1473. head = read_submodule_head(tree_path)
  1474. if entry.sha != head:
  1475. return True
  1476. else:
  1477. # The file was changed to a directory, so consider it removed.
  1478. return True
  1479. return False
  1480. os_sep_bytes = os.sep.encode("ascii")
  1481. def _ensure_parent_dir_exists(full_path: bytes) -> None:
  1482. """Ensure parent directory exists, checking no parent is a file."""
  1483. parent_dir = os.path.dirname(full_path)
  1484. if parent_dir and not os.path.exists(parent_dir):
  1485. # Walk up the directory tree to find the first existing parent
  1486. current = parent_dir
  1487. parents_to_check: list[bytes] = []
  1488. while current and not os.path.exists(current):
  1489. parents_to_check.insert(0, current)
  1490. new_parent = os.path.dirname(current)
  1491. if new_parent == current:
  1492. # Reached the root or can't go up further
  1493. break
  1494. current = new_parent
  1495. # Check if the existing parent (if any) is a directory
  1496. if current and os.path.exists(current) and not os.path.isdir(current):
  1497. raise OSError(
  1498. f"Cannot create directory, parent path is a file: {current!r}"
  1499. )
  1500. # Now check each parent we need to create isn't blocked by an existing file
  1501. for parent_path in parents_to_check:
  1502. if os.path.exists(parent_path) and not os.path.isdir(parent_path):
  1503. raise OSError(
  1504. f"Cannot create directory, parent path is a file: {parent_path!r}"
  1505. )
  1506. os.makedirs(parent_dir)
  1507. def _remove_file_with_readonly_handling(path: bytes) -> None:
  1508. """Remove a file, handling read-only files on Windows.
  1509. Args:
  1510. path: Path to the file to remove
  1511. """
  1512. try:
  1513. os.unlink(path)
  1514. except PermissionError:
  1515. # On Windows, remove read-only attribute and retry
  1516. if sys.platform == "win32":
  1517. os.chmod(path, stat.S_IWRITE | stat.S_IREAD)
  1518. os.unlink(path)
  1519. else:
  1520. raise
  1521. def _remove_empty_parents(path: bytes, stop_at: bytes) -> None:
  1522. """Remove empty parent directories up to stop_at."""
  1523. parent = os.path.dirname(path)
  1524. while parent and parent != stop_at:
  1525. try:
  1526. os.rmdir(parent)
  1527. parent = os.path.dirname(parent)
  1528. except FileNotFoundError:
  1529. # Directory doesn't exist - stop trying
  1530. break
  1531. except OSError as e:
  1532. if e.errno == errno.ENOTEMPTY:
  1533. # Directory not empty - stop trying
  1534. break
  1535. raise
  1536. def _check_symlink_matches(
  1537. full_path: bytes, repo_object_store: "BaseObjectStore", entry_sha: bytes
  1538. ) -> bool:
  1539. """Check if symlink target matches expected target.
  1540. Returns True if symlink matches, False if it doesn't match.
  1541. """
  1542. try:
  1543. current_target = os.readlink(full_path)
  1544. blob_obj = repo_object_store[entry_sha]
  1545. expected_target = blob_obj.as_raw_string()
  1546. if isinstance(current_target, str):
  1547. current_target = current_target.encode()
  1548. return current_target == expected_target
  1549. except FileNotFoundError:
  1550. # Symlink doesn't exist
  1551. return False
  1552. except OSError as e:
  1553. if e.errno == errno.EINVAL:
  1554. # Not a symlink
  1555. return False
  1556. raise
  1557. def _check_file_matches(
  1558. repo_object_store: "BaseObjectStore",
  1559. full_path: bytes,
  1560. entry_sha: bytes,
  1561. entry_mode: int,
  1562. current_stat: os.stat_result,
  1563. honor_filemode: bool,
  1564. blob_normalizer: Optional["FilterBlobNormalizer"] = None,
  1565. tree_path: Optional[bytes] = None,
  1566. ) -> bool:
  1567. """Check if a file on disk matches the expected git object.
  1568. Returns True if file matches, False if it doesn't match.
  1569. """
  1570. # Check mode first (if honor_filemode is True)
  1571. if honor_filemode:
  1572. current_mode = stat.S_IMODE(current_stat.st_mode)
  1573. expected_mode = stat.S_IMODE(entry_mode)
  1574. # For regular files, only check the user executable bit, not group/other permissions
  1575. # This matches Git's behavior where umask differences don't count as modifications
  1576. if stat.S_ISREG(current_stat.st_mode):
  1577. # Normalize regular file modes to ignore group/other write permissions
  1578. current_mode_normalized = (
  1579. current_mode & 0o755
  1580. ) # Keep only user rwx and all read+execute
  1581. expected_mode_normalized = expected_mode & 0o755
  1582. # For Git compatibility, regular files should be either 644 or 755
  1583. if expected_mode_normalized not in (0o644, 0o755):
  1584. expected_mode_normalized = 0o644 # Default for regular files
  1585. if current_mode_normalized not in (0o644, 0o755):
  1586. # Determine if it should be executable based on user execute bit
  1587. if current_mode & 0o100: # User execute bit is set
  1588. current_mode_normalized = 0o755
  1589. else:
  1590. current_mode_normalized = 0o644
  1591. if current_mode_normalized != expected_mode_normalized:
  1592. return False
  1593. else:
  1594. # For non-regular files (symlinks, etc.), check mode exactly
  1595. if current_mode != expected_mode:
  1596. return False
  1597. # If mode matches (or we don't care), check content via size first
  1598. blob_obj = repo_object_store[entry_sha]
  1599. if current_stat.st_size != blob_obj.raw_length():
  1600. return False
  1601. # Size matches, check actual content
  1602. try:
  1603. with open(full_path, "rb") as f:
  1604. current_content = f.read()
  1605. expected_content = blob_obj.as_raw_string()
  1606. if blob_normalizer and tree_path is not None:
  1607. assert isinstance(blob_obj, Blob)
  1608. normalized_blob = blob_normalizer.checkout_normalize(
  1609. blob_obj, tree_path
  1610. )
  1611. expected_content = normalized_blob.as_raw_string()
  1612. return current_content == expected_content
  1613. except (FileNotFoundError, PermissionError, IsADirectoryError):
  1614. return False
  1615. def _transition_to_submodule(
  1616. repo: "Repo",
  1617. path: bytes,
  1618. full_path: bytes,
  1619. current_stat: Optional[os.stat_result],
  1620. entry: Union[IndexEntry, TreeEntry],
  1621. index: Index,
  1622. ) -> None:
  1623. """Transition any type to submodule."""
  1624. from .submodule import ensure_submodule_placeholder
  1625. if current_stat is not None and stat.S_ISDIR(current_stat.st_mode):
  1626. # Already a directory, just ensure .git file exists
  1627. ensure_submodule_placeholder(repo, path)
  1628. else:
  1629. # Remove whatever is there and create submodule
  1630. if current_stat is not None:
  1631. _remove_file_with_readonly_handling(full_path)
  1632. ensure_submodule_placeholder(repo, path)
  1633. st = os.lstat(full_path)
  1634. assert entry.sha is not None
  1635. index[path] = index_entry_from_stat(st, entry.sha)
  1636. def _transition_to_file(
  1637. object_store: "BaseObjectStore",
  1638. path: bytes,
  1639. full_path: bytes,
  1640. current_stat: Optional[os.stat_result],
  1641. entry: Union[IndexEntry, TreeEntry],
  1642. index: Index,
  1643. honor_filemode: bool,
  1644. symlink_fn: Optional[
  1645. Callable[
  1646. [Union[str, bytes, os.PathLike[str]], Union[str, bytes, os.PathLike[str]]],
  1647. None,
  1648. ]
  1649. ],
  1650. blob_normalizer: Optional["FilterBlobNormalizer"],
  1651. tree_encoding: str = "utf-8",
  1652. ) -> None:
  1653. """Transition any type to regular file or symlink."""
  1654. assert entry.sha is not None and entry.mode is not None
  1655. # Check if we need to update
  1656. if (
  1657. current_stat is not None
  1658. and stat.S_ISREG(current_stat.st_mode)
  1659. and not stat.S_ISLNK(entry.mode)
  1660. ):
  1661. # File to file - check if update needed
  1662. file_matches = _check_file_matches(
  1663. object_store,
  1664. full_path,
  1665. entry.sha,
  1666. entry.mode,
  1667. current_stat,
  1668. honor_filemode,
  1669. blob_normalizer,
  1670. path,
  1671. )
  1672. needs_update = not file_matches
  1673. elif (
  1674. current_stat is not None
  1675. and stat.S_ISLNK(current_stat.st_mode)
  1676. and stat.S_ISLNK(entry.mode)
  1677. ):
  1678. # Symlink to symlink - check if update needed
  1679. symlink_matches = _check_symlink_matches(full_path, object_store, entry.sha)
  1680. needs_update = not symlink_matches
  1681. else:
  1682. needs_update = True
  1683. if not needs_update:
  1684. # Just update index - current_stat should always be valid here since we're not updating
  1685. assert current_stat is not None
  1686. index[path] = index_entry_from_stat(current_stat, entry.sha)
  1687. return
  1688. # Remove existing entry if needed
  1689. if current_stat is not None and stat.S_ISDIR(current_stat.st_mode):
  1690. # Remove directory
  1691. dir_contents = set(os.listdir(full_path))
  1692. git_file_name = b".git" if isinstance(full_path, bytes) else ".git"
  1693. if git_file_name in dir_contents:
  1694. if dir_contents != {git_file_name}:
  1695. raise IsADirectoryError(
  1696. f"Cannot replace submodule with untracked files: {full_path!r}"
  1697. )
  1698. shutil.rmtree(full_path)
  1699. else:
  1700. try:
  1701. os.rmdir(full_path)
  1702. except OSError as e:
  1703. if e.errno == errno.ENOTEMPTY:
  1704. raise IsADirectoryError(
  1705. f"Cannot replace non-empty directory with file: {full_path!r}"
  1706. )
  1707. raise
  1708. elif current_stat is not None:
  1709. _remove_file_with_readonly_handling(full_path)
  1710. # Ensure parent directory exists
  1711. _ensure_parent_dir_exists(full_path)
  1712. # Write the file
  1713. blob_obj = object_store[entry.sha]
  1714. assert isinstance(blob_obj, Blob)
  1715. if blob_normalizer:
  1716. blob_obj = blob_normalizer.checkout_normalize(blob_obj, path)
  1717. st = build_file_from_blob(
  1718. blob_obj,
  1719. entry.mode,
  1720. full_path,
  1721. honor_filemode=honor_filemode,
  1722. tree_encoding=tree_encoding,
  1723. symlink_fn=symlink_fn,
  1724. )
  1725. index[path] = index_entry_from_stat(st, entry.sha)
  1726. def _transition_to_absent(
  1727. repo: "Repo",
  1728. path: bytes,
  1729. full_path: bytes,
  1730. current_stat: Optional[os.stat_result],
  1731. index: Index,
  1732. ) -> None:
  1733. """Remove any type of entry."""
  1734. if current_stat is None:
  1735. return
  1736. if stat.S_ISDIR(current_stat.st_mode):
  1737. # Check if it's a submodule directory
  1738. dir_contents = set(os.listdir(full_path))
  1739. git_file_name = b".git" if isinstance(full_path, bytes) else ".git"
  1740. if git_file_name in dir_contents and dir_contents == {git_file_name}:
  1741. shutil.rmtree(full_path)
  1742. else:
  1743. try:
  1744. os.rmdir(full_path)
  1745. except OSError as e:
  1746. if e.errno not in (errno.ENOTEMPTY, errno.EEXIST):
  1747. raise
  1748. else:
  1749. _remove_file_with_readonly_handling(full_path)
  1750. try:
  1751. del index[path]
  1752. except KeyError:
  1753. pass
  1754. # Try to remove empty parent directories
  1755. _remove_empty_parents(
  1756. full_path, repo.path if isinstance(repo.path, bytes) else repo.path.encode()
  1757. )
  1758. def detect_case_only_renames(
  1759. changes: list["TreeChange"],
  1760. config: "Config",
  1761. ) -> list["TreeChange"]:
  1762. """Detect and transform case-only renames in a list of tree changes.
  1763. This function identifies file renames that only differ in case (e.g.,
  1764. README.txt -> readme.txt) and transforms matching ADD/DELETE pairs into
  1765. CHANGE_RENAME operations. It uses filesystem-appropriate path normalization
  1766. based on the repository configuration.
  1767. Args:
  1768. changes: List of TreeChange objects representing file changes
  1769. config: Repository configuration object
  1770. Returns:
  1771. New list of TreeChange objects with case-only renames converted to CHANGE_RENAME
  1772. """
  1773. from .diff_tree import (
  1774. CHANGE_ADD,
  1775. CHANGE_COPY,
  1776. CHANGE_DELETE,
  1777. CHANGE_MODIFY,
  1778. CHANGE_RENAME,
  1779. TreeChange,
  1780. )
  1781. # Build dictionaries of old and new paths with their normalized forms
  1782. old_paths_normalized = {}
  1783. new_paths_normalized = {}
  1784. old_changes = {} # Map from old path to change object
  1785. new_changes = {} # Map from new path to change object
  1786. # Get the appropriate normalizer based on config
  1787. normalize_func = get_path_element_normalizer(config)
  1788. def normalize_path(path: bytes) -> bytes:
  1789. """Normalize entire path using element normalization."""
  1790. return b"/".join(normalize_func(part) for part in path.split(b"/"))
  1791. # Pre-normalize all paths once to avoid repeated normalization
  1792. for change in changes:
  1793. if change.type == CHANGE_DELETE and change.old:
  1794. assert change.old.path is not None
  1795. try:
  1796. normalized = normalize_path(change.old.path)
  1797. except UnicodeDecodeError:
  1798. import logging
  1799. logging.warning(
  1800. "Skipping case-only rename detection for path with invalid UTF-8: %r",
  1801. change.old.path,
  1802. )
  1803. else:
  1804. old_paths_normalized[normalized] = change.old.path
  1805. old_changes[change.old.path] = change
  1806. elif change.type == CHANGE_RENAME and change.old:
  1807. assert change.old.path is not None
  1808. # Treat RENAME as DELETE + ADD for case-only detection
  1809. try:
  1810. normalized = normalize_path(change.old.path)
  1811. except UnicodeDecodeError:
  1812. import logging
  1813. logging.warning(
  1814. "Skipping case-only rename detection for path with invalid UTF-8: %r",
  1815. change.old.path,
  1816. )
  1817. else:
  1818. old_paths_normalized[normalized] = change.old.path
  1819. old_changes[change.old.path] = change
  1820. if (
  1821. change.type in (CHANGE_ADD, CHANGE_MODIFY, CHANGE_RENAME, CHANGE_COPY)
  1822. and change.new
  1823. ):
  1824. assert change.new.path is not None
  1825. try:
  1826. normalized = normalize_path(change.new.path)
  1827. except UnicodeDecodeError:
  1828. import logging
  1829. logging.warning(
  1830. "Skipping case-only rename detection for path with invalid UTF-8: %r",
  1831. change.new.path,
  1832. )
  1833. else:
  1834. new_paths_normalized[normalized] = change.new.path
  1835. new_changes[change.new.path] = change
  1836. # Find case-only renames and transform changes
  1837. case_only_renames = set()
  1838. new_rename_changes = []
  1839. for norm_path, old_path in old_paths_normalized.items():
  1840. if norm_path in new_paths_normalized:
  1841. new_path = new_paths_normalized[norm_path]
  1842. if old_path != new_path:
  1843. # Found a case-only rename
  1844. old_change = old_changes[old_path]
  1845. new_change = new_changes[new_path]
  1846. # Create a CHANGE_RENAME to replace the DELETE and ADD/MODIFY pair
  1847. if new_change.type == CHANGE_ADD:
  1848. # Simple case: DELETE + ADD becomes RENAME
  1849. rename_change = TreeChange(
  1850. CHANGE_RENAME, old_change.old, new_change.new
  1851. )
  1852. else:
  1853. # Complex case: DELETE + MODIFY becomes RENAME
  1854. # Use the old file from DELETE and new file from MODIFY
  1855. rename_change = TreeChange(
  1856. CHANGE_RENAME, old_change.old, new_change.new
  1857. )
  1858. new_rename_changes.append(rename_change)
  1859. # Mark the old changes for removal
  1860. case_only_renames.add(old_change)
  1861. case_only_renames.add(new_change)
  1862. # Return new list with original ADD/DELETE changes replaced by renames
  1863. result = [change for change in changes if change not in case_only_renames]
  1864. result.extend(new_rename_changes)
  1865. return result
  1866. def update_working_tree(
  1867. repo: "Repo",
  1868. old_tree_id: Optional[bytes],
  1869. new_tree_id: bytes,
  1870. change_iterator: Iterator["TreeChange"],
  1871. honor_filemode: bool = True,
  1872. validate_path_element: Optional[Callable[[bytes], bool]] = None,
  1873. symlink_fn: Optional[
  1874. Callable[
  1875. [Union[str, bytes, os.PathLike[str]], Union[str, bytes, os.PathLike[str]]],
  1876. None,
  1877. ]
  1878. ] = None,
  1879. force_remove_untracked: bool = False,
  1880. blob_normalizer: Optional["FilterBlobNormalizer"] = None,
  1881. tree_encoding: str = "utf-8",
  1882. allow_overwrite_modified: bool = False,
  1883. ) -> None:
  1884. """Update the working tree and index to match a new tree.
  1885. This function handles:
  1886. - Adding new files
  1887. - Updating modified files
  1888. - Removing deleted files
  1889. - Cleaning up empty directories
  1890. Args:
  1891. repo: Repository object
  1892. old_tree_id: SHA of the tree before the update
  1893. new_tree_id: SHA of the tree to update to
  1894. change_iterator: Iterator of TreeChange objects to apply
  1895. honor_filemode: An optional flag to honor core.filemode setting
  1896. validate_path_element: Function to validate path elements to check out
  1897. symlink_fn: Function to use for creating symlinks
  1898. force_remove_untracked: If True, remove files that exist in working
  1899. directory but not in target tree, even if old_tree_id is None
  1900. blob_normalizer: An optional BlobNormalizer to use for converting line
  1901. endings when writing blobs to the working directory.
  1902. tree_encoding: Encoding used for tree paths (default: utf-8)
  1903. allow_overwrite_modified: If False, raise an error when attempting to
  1904. overwrite files that have been modified compared to old_tree_id
  1905. """
  1906. if validate_path_element is None:
  1907. validate_path_element = validate_path_element_default
  1908. from .diff_tree import (
  1909. CHANGE_ADD,
  1910. CHANGE_COPY,
  1911. CHANGE_DELETE,
  1912. CHANGE_MODIFY,
  1913. CHANGE_RENAME,
  1914. CHANGE_UNCHANGED,
  1915. )
  1916. repo_path = repo.path if isinstance(repo.path, bytes) else repo.path.encode()
  1917. index = repo.open_index()
  1918. # Convert iterator to list since we need multiple passes
  1919. changes = list(change_iterator)
  1920. # Transform case-only renames on case-insensitive filesystems
  1921. import platform
  1922. default_ignore_case = platform.system() in ("Windows", "Darwin")
  1923. config = repo.get_config()
  1924. ignore_case = config.get_boolean((b"core",), b"ignorecase", default_ignore_case)
  1925. if ignore_case:
  1926. config = repo.get_config()
  1927. changes = detect_case_only_renames(changes, config)
  1928. # Check for path conflicts where files need to become directories
  1929. paths_becoming_dirs = set()
  1930. for change in changes:
  1931. if change.type in (CHANGE_ADD, CHANGE_MODIFY, CHANGE_RENAME, CHANGE_COPY):
  1932. assert change.new is not None
  1933. path = change.new.path
  1934. assert path is not None
  1935. if b"/" in path: # This is a file inside a directory
  1936. # Check if any parent path exists as a file in the old tree or changes
  1937. parts = path.split(b"/")
  1938. for i in range(1, len(parts)):
  1939. parent = b"/".join(parts[:i])
  1940. # See if this parent path is being deleted (was a file, becoming a dir)
  1941. for other_change in changes:
  1942. if (
  1943. other_change.type == CHANGE_DELETE
  1944. and other_change.old
  1945. and other_change.old.path == parent
  1946. ):
  1947. paths_becoming_dirs.add(parent)
  1948. # Check if any path that needs to become a directory has been modified
  1949. for path in paths_becoming_dirs:
  1950. full_path = _tree_to_fs_path(repo_path, path, tree_encoding)
  1951. try:
  1952. current_stat = os.lstat(full_path)
  1953. except FileNotFoundError:
  1954. continue # File doesn't exist, nothing to check
  1955. except OSError as e:
  1956. raise OSError(
  1957. f"Cannot access {path.decode('utf-8', errors='replace')}: {e}"
  1958. ) from e
  1959. if stat.S_ISREG(current_stat.st_mode):
  1960. # Find the old entry for this path
  1961. old_change = None
  1962. for change in changes:
  1963. if (
  1964. change.type == CHANGE_DELETE
  1965. and change.old
  1966. and change.old.path == path
  1967. ):
  1968. old_change = change
  1969. break
  1970. if old_change:
  1971. # Check if file has been modified
  1972. assert old_change.old is not None
  1973. assert (
  1974. old_change.old.sha is not None and old_change.old.mode is not None
  1975. )
  1976. file_matches = _check_file_matches(
  1977. repo.object_store,
  1978. full_path,
  1979. old_change.old.sha,
  1980. old_change.old.mode,
  1981. current_stat,
  1982. honor_filemode,
  1983. blob_normalizer,
  1984. path,
  1985. )
  1986. if not file_matches:
  1987. raise OSError(
  1988. f"Cannot replace modified file with directory: {path!r}"
  1989. )
  1990. # Check for uncommitted modifications before making any changes
  1991. if not allow_overwrite_modified and old_tree_id:
  1992. for change in changes:
  1993. # Only check files that are being modified or deleted
  1994. if change.type in (CHANGE_MODIFY, CHANGE_DELETE) and change.old:
  1995. path = change.old.path
  1996. assert path is not None
  1997. if path.startswith(b".git") or not validate_path(
  1998. path, validate_path_element
  1999. ):
  2000. continue
  2001. full_path = _tree_to_fs_path(repo_path, path, tree_encoding)
  2002. try:
  2003. current_stat = os.lstat(full_path)
  2004. except FileNotFoundError:
  2005. continue # File doesn't exist, nothing to check
  2006. except OSError as e:
  2007. raise OSError(
  2008. f"Cannot access {path.decode('utf-8', errors='replace')}: {e}"
  2009. ) from e
  2010. if stat.S_ISREG(current_stat.st_mode):
  2011. # Check if working tree file differs from old tree
  2012. assert change.old.sha is not None and change.old.mode is not None
  2013. file_matches = _check_file_matches(
  2014. repo.object_store,
  2015. full_path,
  2016. change.old.sha,
  2017. change.old.mode,
  2018. current_stat,
  2019. honor_filemode,
  2020. blob_normalizer,
  2021. path,
  2022. )
  2023. if not file_matches:
  2024. from .errors import WorkingTreeModifiedError
  2025. raise WorkingTreeModifiedError(
  2026. f"Your local changes to '{path.decode('utf-8', errors='replace')}' "
  2027. f"would be overwritten by checkout. "
  2028. f"Please commit your changes or stash them before you switch branches."
  2029. )
  2030. # Apply the changes
  2031. for change in changes:
  2032. if change.type in (CHANGE_DELETE, CHANGE_RENAME):
  2033. # Remove file/directory
  2034. assert change.old is not None and change.old.path is not None
  2035. path = change.old.path
  2036. if path.startswith(b".git") or not validate_path(
  2037. path, validate_path_element
  2038. ):
  2039. continue
  2040. full_path = _tree_to_fs_path(repo_path, path, tree_encoding)
  2041. try:
  2042. delete_stat: Optional[os.stat_result] = os.lstat(full_path)
  2043. except FileNotFoundError:
  2044. delete_stat = None
  2045. except OSError as e:
  2046. raise OSError(
  2047. f"Cannot access {path.decode('utf-8', errors='replace')}: {e}"
  2048. ) from e
  2049. _transition_to_absent(repo, path, full_path, delete_stat, index)
  2050. if change.type in (
  2051. CHANGE_ADD,
  2052. CHANGE_MODIFY,
  2053. CHANGE_UNCHANGED,
  2054. CHANGE_COPY,
  2055. CHANGE_RENAME,
  2056. ):
  2057. # Add or modify file
  2058. assert (
  2059. change.new is not None
  2060. and change.new.path is not None
  2061. and change.new.mode is not None
  2062. )
  2063. path = change.new.path
  2064. if path.startswith(b".git") or not validate_path(
  2065. path, validate_path_element
  2066. ):
  2067. continue
  2068. full_path = _tree_to_fs_path(repo_path, path, tree_encoding)
  2069. try:
  2070. modify_stat: Optional[os.stat_result] = os.lstat(full_path)
  2071. except FileNotFoundError:
  2072. modify_stat = None
  2073. except OSError as e:
  2074. raise OSError(
  2075. f"Cannot access {path.decode('utf-8', errors='replace')}: {e}"
  2076. ) from e
  2077. if S_ISGITLINK(change.new.mode):
  2078. _transition_to_submodule(
  2079. repo, path, full_path, modify_stat, change.new, index
  2080. )
  2081. else:
  2082. _transition_to_file(
  2083. repo.object_store,
  2084. path,
  2085. full_path,
  2086. modify_stat,
  2087. change.new,
  2088. index,
  2089. honor_filemode,
  2090. symlink_fn,
  2091. blob_normalizer,
  2092. tree_encoding,
  2093. )
  2094. index.write()
  2095. def _check_entry_for_changes(
  2096. tree_path: bytes,
  2097. entry: Union[IndexEntry, ConflictedIndexEntry],
  2098. root_path: bytes,
  2099. filter_blob_callback: Optional[Callable[[bytes, bytes], bytes]] = None,
  2100. ) -> Optional[bytes]:
  2101. """Check a single index entry for changes.
  2102. Args:
  2103. tree_path: Path in the tree
  2104. entry: Index entry to check
  2105. root_path: Root filesystem path
  2106. filter_blob_callback: Optional callback to filter blobs
  2107. Returns: tree_path if changed, None otherwise
  2108. """
  2109. if isinstance(entry, ConflictedIndexEntry):
  2110. # Conflicted files are always unstaged
  2111. return tree_path
  2112. full_path = _tree_to_fs_path(root_path, tree_path)
  2113. try:
  2114. st = os.lstat(full_path)
  2115. if stat.S_ISDIR(st.st_mode):
  2116. if _has_directory_changed(tree_path, entry):
  2117. return tree_path
  2118. return None
  2119. if not stat.S_ISREG(st.st_mode) and not stat.S_ISLNK(st.st_mode):
  2120. return None
  2121. blob = blob_from_path_and_stat(full_path, st)
  2122. if filter_blob_callback is not None:
  2123. blob.data = filter_blob_callback(blob.data, tree_path)
  2124. except FileNotFoundError:
  2125. # The file was removed, so we assume that counts as
  2126. # different from whatever file used to exist.
  2127. return tree_path
  2128. else:
  2129. if blob.id != entry.sha:
  2130. return tree_path
  2131. return None
  2132. def get_unstaged_changes(
  2133. index: Index,
  2134. root_path: Union[str, bytes],
  2135. filter_blob_callback: Optional[Callable[..., Any]] = None,
  2136. preload_index: bool = False,
  2137. ) -> Generator[bytes, None, None]:
  2138. """Walk through an index and check for differences against working tree.
  2139. Args:
  2140. index: index to check
  2141. root_path: path in which to find files
  2142. filter_blob_callback: Optional callback to filter blobs
  2143. preload_index: If True, use parallel threads to check files (requires threading support)
  2144. Returns: iterator over paths with unstaged changes
  2145. """
  2146. # For each entry in the index check the sha1 & ensure not staged
  2147. if not isinstance(root_path, bytes):
  2148. root_path = os.fsencode(root_path)
  2149. if preload_index:
  2150. # Use parallel processing for better performance on slow filesystems
  2151. try:
  2152. import multiprocessing
  2153. from concurrent.futures import ThreadPoolExecutor
  2154. except ImportError:
  2155. # If threading is not available, fall back to serial processing
  2156. preload_index = False
  2157. else:
  2158. # Collect all entries first
  2159. entries = list(index.iteritems())
  2160. # Use number of CPUs but cap at 8 threads to avoid overhead
  2161. num_workers = min(multiprocessing.cpu_count(), 8)
  2162. # Process entries in parallel
  2163. with ThreadPoolExecutor(max_workers=num_workers) as executor:
  2164. # Submit all tasks
  2165. futures = [
  2166. executor.submit(
  2167. _check_entry_for_changes,
  2168. tree_path,
  2169. entry,
  2170. root_path,
  2171. filter_blob_callback,
  2172. )
  2173. for tree_path, entry in entries
  2174. ]
  2175. # Yield results as they complete
  2176. for future in futures:
  2177. result = future.result()
  2178. if result is not None:
  2179. yield result
  2180. if not preload_index:
  2181. # Serial processing
  2182. for tree_path, entry in index.iteritems():
  2183. result = _check_entry_for_changes(
  2184. tree_path, entry, root_path, filter_blob_callback
  2185. )
  2186. if result is not None:
  2187. yield result
  2188. def _tree_to_fs_path(
  2189. root_path: bytes, tree_path: bytes, tree_encoding: str = "utf-8"
  2190. ) -> bytes:
  2191. """Convert a git tree path to a file system path.
  2192. Args:
  2193. root_path: Root filesystem path
  2194. tree_path: Git tree path as bytes (encoded with tree_encoding)
  2195. tree_encoding: Encoding used for tree paths (default: utf-8)
  2196. Returns: File system path.
  2197. """
  2198. assert isinstance(tree_path, bytes)
  2199. if os_sep_bytes != b"/":
  2200. sep_corrected_path = tree_path.replace(b"/", os_sep_bytes)
  2201. else:
  2202. sep_corrected_path = tree_path
  2203. # On Windows, we need to handle tree path encoding properly
  2204. if sys.platform == "win32":
  2205. # Decode from tree encoding, then re-encode for filesystem
  2206. try:
  2207. tree_path_str = sep_corrected_path.decode(tree_encoding)
  2208. sep_corrected_path = os.fsencode(tree_path_str)
  2209. except UnicodeDecodeError:
  2210. # If decoding fails, use the original bytes
  2211. pass
  2212. return os.path.join(root_path, sep_corrected_path)
  2213. def _fs_to_tree_path(fs_path: Union[str, bytes], tree_encoding: str = "utf-8") -> bytes:
  2214. """Convert a file system path to a git tree path.
  2215. Args:
  2216. fs_path: File system path.
  2217. tree_encoding: Encoding to use for tree paths (default: utf-8)
  2218. Returns: Git tree path as bytes (encoded with tree_encoding)
  2219. """
  2220. if not isinstance(fs_path, bytes):
  2221. fs_path_bytes = os.fsencode(fs_path)
  2222. else:
  2223. fs_path_bytes = fs_path
  2224. # On Windows, we need to ensure tree paths are properly encoded
  2225. if sys.platform == "win32":
  2226. try:
  2227. # Decode from filesystem encoding, then re-encode with tree encoding
  2228. fs_path_str = os.fsdecode(fs_path_bytes)
  2229. fs_path_bytes = fs_path_str.encode(tree_encoding)
  2230. except UnicodeDecodeError:
  2231. # If filesystem decoding fails, use the original bytes
  2232. pass
  2233. if os_sep_bytes != b"/":
  2234. tree_path = fs_path_bytes.replace(os_sep_bytes, b"/")
  2235. else:
  2236. tree_path = fs_path_bytes
  2237. return tree_path
  2238. def index_entry_from_directory(st: os.stat_result, path: bytes) -> Optional[IndexEntry]:
  2239. """Create an index entry for a directory.
  2240. This is only used for submodules (directories containing .git).
  2241. Args:
  2242. st: Stat result for the directory
  2243. path: Path to the directory
  2244. Returns:
  2245. IndexEntry for a submodule, or None if not a submodule
  2246. """
  2247. if os.path.exists(os.path.join(path, b".git")):
  2248. head = read_submodule_head(path)
  2249. if head is None:
  2250. return None
  2251. return index_entry_from_stat(st, head, mode=S_IFGITLINK)
  2252. return None
  2253. def index_entry_from_path(
  2254. path: bytes, object_store: Optional[ObjectContainer] = None
  2255. ) -> Optional[IndexEntry]:
  2256. """Create an index from a filesystem path.
  2257. This returns an index value for files, symlinks
  2258. and tree references. for directories and
  2259. non-existent files it returns None
  2260. Args:
  2261. path: Path to create an index entry for
  2262. object_store: Optional object store to
  2263. save new blobs in
  2264. Returns: An index entry; None for directories
  2265. """
  2266. assert isinstance(path, bytes)
  2267. st = os.lstat(path)
  2268. if stat.S_ISDIR(st.st_mode):
  2269. return index_entry_from_directory(st, path)
  2270. if stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode):
  2271. blob = blob_from_path_and_stat(path, st)
  2272. if object_store is not None:
  2273. object_store.add_object(blob)
  2274. return index_entry_from_stat(st, blob.id)
  2275. return None
  2276. def iter_fresh_entries(
  2277. paths: Iterable[bytes],
  2278. root_path: bytes,
  2279. object_store: Optional[ObjectContainer] = None,
  2280. ) -> Iterator[tuple[bytes, Optional[IndexEntry]]]:
  2281. """Iterate over current versions of index entries on disk.
  2282. Args:
  2283. paths: Paths to iterate over
  2284. root_path: Root path to access from
  2285. object_store: Optional store to save new blobs in
  2286. Returns: Iterator over path, index_entry
  2287. """
  2288. for path in paths:
  2289. p = _tree_to_fs_path(root_path, path)
  2290. try:
  2291. entry = index_entry_from_path(p, object_store=object_store)
  2292. except (FileNotFoundError, IsADirectoryError):
  2293. entry = None
  2294. yield path, entry
  2295. def iter_fresh_objects(
  2296. paths: Iterable[bytes],
  2297. root_path: bytes,
  2298. include_deleted: bool = False,
  2299. object_store: Optional[ObjectContainer] = None,
  2300. ) -> Iterator[tuple[bytes, Optional[bytes], Optional[int]]]:
  2301. """Iterate over versions of objects on disk referenced by index.
  2302. Args:
  2303. paths: Paths to check
  2304. root_path: Root path to access from
  2305. include_deleted: Include deleted entries with sha and
  2306. mode set to None
  2307. object_store: Optional object store to report new items to
  2308. Returns: Iterator over path, sha, mode
  2309. """
  2310. for path, entry in iter_fresh_entries(paths, root_path, object_store=object_store):
  2311. if entry is None:
  2312. if include_deleted:
  2313. yield path, None, None
  2314. else:
  2315. yield path, entry.sha, cleanup_mode(entry.mode)
  2316. def refresh_index(index: Index, root_path: bytes) -> None:
  2317. """Refresh the contents of an index.
  2318. This is the equivalent to running 'git commit -a'.
  2319. Args:
  2320. index: Index to update
  2321. root_path: Root filesystem path
  2322. """
  2323. for path, entry in iter_fresh_entries(index, root_path):
  2324. if entry:
  2325. index[path] = entry
  2326. class locked_index:
  2327. """Lock the index while making modifications.
  2328. Works as a context manager.
  2329. """
  2330. _file: "_GitFile"
  2331. def __init__(self, path: Union[bytes, str]) -> None:
  2332. """Initialize locked_index."""
  2333. self._path = path
  2334. def __enter__(self) -> Index:
  2335. """Enter context manager and lock index."""
  2336. f = GitFile(self._path, "wb")
  2337. assert isinstance(f, _GitFile) # GitFile in write mode always returns _GitFile
  2338. self._file = f
  2339. self._index = Index(self._path)
  2340. return self._index
  2341. def __exit__(
  2342. self,
  2343. exc_type: Optional[type],
  2344. exc_value: Optional[BaseException],
  2345. traceback: Optional[types.TracebackType],
  2346. ) -> None:
  2347. """Exit context manager and unlock index."""
  2348. if exc_type is not None:
  2349. self._file.abort()
  2350. return
  2351. try:
  2352. f = SHA1Writer(self._file)
  2353. write_index_dict(f, self._index._byname)
  2354. except BaseException:
  2355. self._file.abort()
  2356. else:
  2357. f.close()