index.py 101 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214
  1. # index.py -- File parser/writer for the git index file
  2. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Parser for the git index file format."""
  22. __all__ = [
  23. "DEFAULT_VERSION",
  24. "EOIE_EXTENSION",
  25. "EXTENDED_FLAG_INTEND_TO_ADD",
  26. "EXTENDED_FLAG_SKIP_WORKTREE",
  27. "FLAG_EXTENDED",
  28. "FLAG_NAMEMASK",
  29. "FLAG_STAGEMASK",
  30. "FLAG_STAGESHIFT",
  31. "FLAG_VALID",
  32. "HFS_IGNORABLE_CHARS",
  33. "IEOT_EXTENSION",
  34. "INVALID_DOTNAMES",
  35. "REUC_EXTENSION",
  36. "SDIR_EXTENSION",
  37. "TREE_EXTENSION",
  38. "UNTR_EXTENSION",
  39. "Index",
  40. "IndexEntry",
  41. "IndexExtension",
  42. "ResolveUndoExtension",
  43. "SerializedIndexEntry",
  44. "SparseDirExtension",
  45. "Stage",
  46. "TreeDict",
  47. "TreeExtension",
  48. "UnmergedEntries",
  49. "UnsupportedIndexFormat",
  50. "UntrackedExtension",
  51. "blob_from_path_and_mode",
  52. "blob_from_path_and_stat",
  53. "build_file_from_blob",
  54. "build_index_from_tree",
  55. "changes_from_tree",
  56. "cleanup_mode",
  57. "commit_index",
  58. "commit_tree",
  59. "detect_case_only_renames",
  60. "get_path_element_normalizer",
  61. "get_unstaged_changes",
  62. "index_entry_from_stat",
  63. "pathjoin",
  64. "pathsplit",
  65. "read_cache_entry",
  66. "read_cache_time",
  67. "read_index",
  68. "read_index_dict",
  69. "read_index_dict_with_version",
  70. "read_index_header",
  71. "read_submodule_head",
  72. "update_working_tree",
  73. "validate_path",
  74. "validate_path_element_default",
  75. "validate_path_element_hfs",
  76. "validate_path_element_ntfs",
  77. "write_cache_entry",
  78. "write_cache_time",
  79. "write_index",
  80. "write_index_dict",
  81. "write_index_extension",
  82. ]
  83. import errno
  84. import os
  85. import shutil
  86. import stat
  87. import struct
  88. import sys
  89. import types
  90. from collections.abc import (
  91. Callable,
  92. Generator,
  93. Iterable,
  94. Iterator,
  95. Mapping,
  96. Sequence,
  97. Set,
  98. )
  99. from dataclasses import dataclass
  100. from enum import Enum
  101. from typing import (
  102. IO,
  103. TYPE_CHECKING,
  104. Any,
  105. BinaryIO,
  106. )
  107. if TYPE_CHECKING:
  108. from .config import Config
  109. from .diff_tree import TreeChange
  110. from .file import _GitFile
  111. from .filters import FilterBlobNormalizer
  112. from .object_store import BaseObjectStore
  113. from .repo import Repo
  114. from .file import GitFile
  115. from .object_store import iter_tree_contents
  116. from .objects import (
  117. S_IFGITLINK,
  118. S_ISGITLINK,
  119. Blob,
  120. ObjectID,
  121. Tree,
  122. TreeEntry,
  123. hex_to_sha,
  124. sha_to_hex,
  125. )
  126. from .pack import ObjectContainer, SHA1Reader, SHA1Writer
  127. # Type alias for recursive tree structure used in commit_tree
  128. TreeDict = dict[bytes, "TreeDict | tuple[int, ObjectID]"]
  129. # 2-bit stage (during merge)
  130. FLAG_STAGEMASK = 0x3000
  131. FLAG_STAGESHIFT = 12
  132. FLAG_NAMEMASK = 0x0FFF
  133. # assume-valid
  134. FLAG_VALID = 0x8000
  135. # extended flag (must be zero in version 2)
  136. FLAG_EXTENDED = 0x4000
  137. # used by sparse checkout
  138. EXTENDED_FLAG_SKIP_WORKTREE = 0x4000
  139. # used by "git add -N"
  140. EXTENDED_FLAG_INTEND_TO_ADD = 0x2000
  141. DEFAULT_VERSION = 2
  142. # Index extension signatures
  143. TREE_EXTENSION = b"TREE"
  144. REUC_EXTENSION = b"REUC"
  145. UNTR_EXTENSION = b"UNTR"
  146. EOIE_EXTENSION = b"EOIE"
  147. IEOT_EXTENSION = b"IEOT"
  148. SDIR_EXTENSION = b"sdir" # Sparse directory extension
  149. def _encode_varint(value: int) -> bytes:
  150. """Encode an integer using variable-width encoding.
  151. Same format as used for OFS_DELTA pack entries and index v4 path compression.
  152. Uses 7 bits per byte, with the high bit indicating continuation.
  153. Args:
  154. value: Integer to encode
  155. Returns:
  156. Encoded bytes
  157. """
  158. if value == 0:
  159. return b"\x00"
  160. result = []
  161. while value > 0:
  162. byte = value & 0x7F # Take lower 7 bits
  163. value >>= 7
  164. if value > 0:
  165. byte |= 0x80 # Set continuation bit
  166. result.append(byte)
  167. return bytes(result)
  168. def _decode_varint(data: bytes, offset: int = 0) -> tuple[int, int]:
  169. """Decode a variable-width encoded integer.
  170. Args:
  171. data: Bytes to decode from
  172. offset: Starting offset in data
  173. Returns:
  174. tuple of (decoded_value, new_offset)
  175. """
  176. value = 0
  177. shift = 0
  178. pos = offset
  179. while pos < len(data):
  180. byte = data[pos]
  181. pos += 1
  182. value |= (byte & 0x7F) << shift
  183. shift += 7
  184. if not (byte & 0x80): # No continuation bit
  185. break
  186. return value, pos
  187. def _compress_path(path: bytes, previous_path: bytes) -> bytes:
  188. """Compress a path relative to the previous path for index version 4.
  189. Args:
  190. path: Path to compress
  191. previous_path: Previous path for comparison
  192. Returns:
  193. Compressed path data (varint prefix_len + suffix)
  194. """
  195. # Find the common prefix length
  196. common_len = 0
  197. min_len = min(len(path), len(previous_path))
  198. for i in range(min_len):
  199. if path[i] == previous_path[i]:
  200. common_len += 1
  201. else:
  202. break
  203. # The number of bytes to remove from the end of previous_path
  204. # to get the common prefix
  205. remove_len = len(previous_path) - common_len
  206. # The suffix to append
  207. suffix = path[common_len:]
  208. # Encode: varint(remove_len) + suffix + NUL
  209. return _encode_varint(remove_len) + suffix + b"\x00"
  210. def _decompress_path(
  211. data: bytes, offset: int, previous_path: bytes
  212. ) -> tuple[bytes, int]:
  213. """Decompress a path from index version 4 compressed format.
  214. Args:
  215. data: Raw data containing compressed path
  216. offset: Starting offset in data
  217. previous_path: Previous path for decompression
  218. Returns:
  219. tuple of (decompressed_path, new_offset)
  220. """
  221. # Decode the number of bytes to remove from previous path
  222. remove_len, new_offset = _decode_varint(data, offset)
  223. # Find the NUL terminator for the suffix
  224. suffix_start = new_offset
  225. suffix_end = suffix_start
  226. while suffix_end < len(data) and data[suffix_end] != 0:
  227. suffix_end += 1
  228. if suffix_end >= len(data):
  229. raise ValueError("Unterminated path suffix in compressed entry")
  230. suffix = data[suffix_start:suffix_end]
  231. new_offset = suffix_end + 1 # Skip the NUL terminator
  232. # Reconstruct the path
  233. if remove_len > len(previous_path):
  234. raise ValueError(
  235. f"Invalid path compression: trying to remove {remove_len} bytes from {len(previous_path)}-byte path"
  236. )
  237. prefix = previous_path[:-remove_len] if remove_len > 0 else previous_path
  238. path = prefix + suffix
  239. return path, new_offset
  240. def _decompress_path_from_stream(
  241. f: BinaryIO, previous_path: bytes
  242. ) -> tuple[bytes, int]:
  243. """Decompress a path from index version 4 compressed format, reading from stream.
  244. Args:
  245. f: File-like object to read from
  246. previous_path: Previous path for decompression
  247. Returns:
  248. tuple of (decompressed_path, bytes_consumed)
  249. """
  250. # Decode the varint for remove_len by reading byte by byte
  251. remove_len = 0
  252. shift = 0
  253. bytes_consumed = 0
  254. while True:
  255. byte_data = f.read(1)
  256. if not byte_data:
  257. raise ValueError("Unexpected end of file while reading varint")
  258. byte = byte_data[0]
  259. bytes_consumed += 1
  260. remove_len |= (byte & 0x7F) << shift
  261. shift += 7
  262. if not (byte & 0x80): # No continuation bit
  263. break
  264. # Read the suffix until NUL terminator
  265. suffix = b""
  266. while True:
  267. byte_data = f.read(1)
  268. if not byte_data:
  269. raise ValueError("Unexpected end of file while reading path suffix")
  270. byte = byte_data[0]
  271. bytes_consumed += 1
  272. if byte == 0: # NUL terminator
  273. break
  274. suffix += bytes([byte])
  275. # Reconstruct the path
  276. if remove_len > len(previous_path):
  277. raise ValueError(
  278. f"Invalid path compression: trying to remove {remove_len} bytes from {len(previous_path)}-byte path"
  279. )
  280. prefix = previous_path[:-remove_len] if remove_len > 0 else previous_path
  281. path = prefix + suffix
  282. return path, bytes_consumed
  283. class Stage(Enum):
  284. """Represents the stage of an index entry during merge conflicts."""
  285. NORMAL = 0
  286. MERGE_CONFLICT_ANCESTOR = 1
  287. MERGE_CONFLICT_THIS = 2
  288. MERGE_CONFLICT_OTHER = 3
  289. @dataclass
  290. class SerializedIndexEntry:
  291. """Represents a serialized index entry as stored in the index file.
  292. This dataclass holds the raw data for an index entry before it's
  293. parsed into the more user-friendly IndexEntry format.
  294. """
  295. name: bytes
  296. ctime: int | float | tuple[int, int]
  297. mtime: int | float | tuple[int, int]
  298. dev: int
  299. ino: int
  300. mode: int
  301. uid: int
  302. gid: int
  303. size: int
  304. sha: ObjectID
  305. flags: int
  306. extended_flags: int
  307. def stage(self) -> Stage:
  308. """Extract the stage from the flags field.
  309. Returns:
  310. Stage enum value indicating merge conflict state
  311. """
  312. return Stage((self.flags & FLAG_STAGEMASK) >> FLAG_STAGESHIFT)
  313. def is_sparse_dir(self) -> bool:
  314. """Check if this entry represents a sparse directory.
  315. A sparse directory entry is a collapsed representation of an entire
  316. directory tree in a sparse index. It has:
  317. - Directory mode (0o040000)
  318. - SKIP_WORKTREE flag set
  319. - Path ending with '/'
  320. - SHA pointing to a tree object
  321. Returns:
  322. True if entry is a sparse directory entry
  323. """
  324. return (
  325. stat.S_ISDIR(self.mode)
  326. and bool(self.extended_flags & EXTENDED_FLAG_SKIP_WORKTREE)
  327. and self.name.endswith(b"/")
  328. )
  329. @dataclass
  330. class IndexExtension:
  331. """Base class for index extensions."""
  332. signature: bytes
  333. data: bytes
  334. @classmethod
  335. def from_raw(cls, signature: bytes, data: bytes) -> "IndexExtension":
  336. """Create an extension from raw data.
  337. Args:
  338. signature: 4-byte extension signature
  339. data: Extension data
  340. Returns:
  341. Parsed extension object
  342. """
  343. if signature == TREE_EXTENSION:
  344. return TreeExtension.from_bytes(data)
  345. elif signature == REUC_EXTENSION:
  346. return ResolveUndoExtension.from_bytes(data)
  347. elif signature == UNTR_EXTENSION:
  348. return UntrackedExtension.from_bytes(data)
  349. elif signature == SDIR_EXTENSION:
  350. return SparseDirExtension.from_bytes(data)
  351. else:
  352. # Unknown extension - just store raw data
  353. return cls(signature, data)
  354. def to_bytes(self) -> bytes:
  355. """Serialize extension to bytes."""
  356. return self.data
  357. class TreeExtension(IndexExtension):
  358. """Tree cache extension."""
  359. def __init__(self, entries: list[tuple[bytes, bytes, int]]) -> None:
  360. """Initialize TreeExtension.
  361. Args:
  362. entries: List of tree cache entries (path, sha, flags)
  363. """
  364. self.entries = entries
  365. super().__init__(TREE_EXTENSION, b"")
  366. @classmethod
  367. def from_bytes(cls, data: bytes) -> "TreeExtension":
  368. """Parse TreeExtension from bytes.
  369. Args:
  370. data: Raw bytes to parse
  371. Returns:
  372. TreeExtension instance
  373. """
  374. # TODO: Implement tree cache parsing
  375. return cls([])
  376. def to_bytes(self) -> bytes:
  377. """Serialize TreeExtension to bytes.
  378. Returns:
  379. Serialized extension data
  380. """
  381. # TODO: Implement tree cache serialization
  382. return b""
  383. class ResolveUndoExtension(IndexExtension):
  384. """Resolve undo extension for recording merge conflicts."""
  385. def __init__(self, entries: list[tuple[bytes, list[tuple[int, bytes]]]]) -> None:
  386. """Initialize ResolveUndoExtension.
  387. Args:
  388. entries: List of (path, stages) where stages is a list of (stage, sha) tuples
  389. """
  390. self.entries = entries
  391. super().__init__(REUC_EXTENSION, b"")
  392. @classmethod
  393. def from_bytes(cls, data: bytes) -> "ResolveUndoExtension":
  394. """Parse ResolveUndoExtension from bytes.
  395. Args:
  396. data: Raw bytes to parse
  397. Returns:
  398. ResolveUndoExtension instance
  399. """
  400. # TODO: Implement resolve undo parsing
  401. return cls([])
  402. def to_bytes(self) -> bytes:
  403. """Serialize ResolveUndoExtension to bytes.
  404. Returns:
  405. Serialized extension data
  406. """
  407. # TODO: Implement resolve undo serialization
  408. return b""
  409. class UntrackedExtension(IndexExtension):
  410. """Untracked cache extension."""
  411. def __init__(self, data: bytes) -> None:
  412. """Initialize UntrackedExtension.
  413. Args:
  414. data: Raw untracked cache data
  415. """
  416. super().__init__(UNTR_EXTENSION, data)
  417. @classmethod
  418. def from_bytes(cls, data: bytes) -> "UntrackedExtension":
  419. """Parse UntrackedExtension from bytes.
  420. Args:
  421. data: Raw bytes to parse
  422. Returns:
  423. UntrackedExtension instance
  424. """
  425. return cls(data)
  426. class SparseDirExtension(IndexExtension):
  427. """Sparse directory extension.
  428. This extension indicates that the index contains sparse directory entries.
  429. Tools that don't understand sparse index should avoid interacting with
  430. the index when this extension is present.
  431. The extension data is empty - its presence is the signal.
  432. """
  433. def __init__(self) -> None:
  434. """Initialize SparseDirExtension."""
  435. super().__init__(SDIR_EXTENSION, b"")
  436. @classmethod
  437. def from_bytes(cls, data: bytes) -> "SparseDirExtension":
  438. """Parse SparseDirExtension from bytes.
  439. Args:
  440. data: Raw bytes to parse (should be empty)
  441. Returns:
  442. SparseDirExtension instance
  443. """
  444. return cls()
  445. def to_bytes(self) -> bytes:
  446. """Serialize SparseDirExtension to bytes.
  447. Returns:
  448. Empty bytes (extension presence is the signal)
  449. """
  450. return b""
  451. @dataclass
  452. class IndexEntry:
  453. """Represents an entry in the Git index.
  454. This is a higher-level representation of an index entry that includes
  455. parsed data and convenience methods.
  456. """
  457. ctime: int | float | tuple[int, int]
  458. mtime: int | float | tuple[int, int]
  459. dev: int
  460. ino: int
  461. mode: int
  462. uid: int
  463. gid: int
  464. size: int
  465. sha: ObjectID
  466. flags: int = 0
  467. extended_flags: int = 0
  468. @classmethod
  469. def from_serialized(cls, serialized: SerializedIndexEntry) -> "IndexEntry":
  470. """Create an IndexEntry from a SerializedIndexEntry.
  471. Args:
  472. serialized: SerializedIndexEntry to convert
  473. Returns:
  474. New IndexEntry instance
  475. """
  476. return cls(
  477. ctime=serialized.ctime,
  478. mtime=serialized.mtime,
  479. dev=serialized.dev,
  480. ino=serialized.ino,
  481. mode=serialized.mode,
  482. uid=serialized.uid,
  483. gid=serialized.gid,
  484. size=serialized.size,
  485. sha=serialized.sha,
  486. flags=serialized.flags,
  487. extended_flags=serialized.extended_flags,
  488. )
  489. def serialize(self, name: bytes, stage: Stage) -> SerializedIndexEntry:
  490. """Serialize this entry with a given name and stage.
  491. Args:
  492. name: Path name for the entry
  493. stage: Merge conflict stage
  494. Returns:
  495. SerializedIndexEntry ready for writing to disk
  496. """
  497. # Clear out any existing stage bits, then set them from the Stage.
  498. new_flags = self.flags & ~FLAG_STAGEMASK
  499. new_flags |= stage.value << FLAG_STAGESHIFT
  500. return SerializedIndexEntry(
  501. name=name,
  502. ctime=self.ctime,
  503. mtime=self.mtime,
  504. dev=self.dev,
  505. ino=self.ino,
  506. mode=self.mode,
  507. uid=self.uid,
  508. gid=self.gid,
  509. size=self.size,
  510. sha=self.sha,
  511. flags=new_flags,
  512. extended_flags=self.extended_flags,
  513. )
  514. def stage(self) -> Stage:
  515. """Get the merge conflict stage of this entry.
  516. Returns:
  517. Stage enum value
  518. """
  519. return Stage((self.flags & FLAG_STAGEMASK) >> FLAG_STAGESHIFT)
  520. @property
  521. def skip_worktree(self) -> bool:
  522. """Return True if the skip-worktree bit is set in extended_flags."""
  523. return bool(self.extended_flags & EXTENDED_FLAG_SKIP_WORKTREE)
  524. def set_skip_worktree(self, skip: bool = True) -> None:
  525. """Helper method to set or clear the skip-worktree bit in extended_flags.
  526. Also sets FLAG_EXTENDED in self.flags if needed.
  527. """
  528. if skip:
  529. # Turn on the skip-worktree bit
  530. self.extended_flags |= EXTENDED_FLAG_SKIP_WORKTREE
  531. # Also ensure the main 'extended' bit is set in flags
  532. self.flags |= FLAG_EXTENDED
  533. else:
  534. # Turn off the skip-worktree bit
  535. self.extended_flags &= ~EXTENDED_FLAG_SKIP_WORKTREE
  536. # Optionally unset the main extended bit if no extended flags remain
  537. if self.extended_flags == 0:
  538. self.flags &= ~FLAG_EXTENDED
  539. def is_sparse_dir(self, name: bytes) -> bool:
  540. """Check if this entry represents a sparse directory.
  541. A sparse directory entry is a collapsed representation of an entire
  542. directory tree in a sparse index. It has:
  543. - Directory mode (0o040000)
  544. - SKIP_WORKTREE flag set
  545. - Path ending with '/'
  546. - SHA pointing to a tree object
  547. Args:
  548. name: The path name for this entry (IndexEntry doesn't store name)
  549. Returns:
  550. True if entry is a sparse directory entry
  551. """
  552. return (
  553. stat.S_ISDIR(self.mode)
  554. and bool(self.extended_flags & EXTENDED_FLAG_SKIP_WORKTREE)
  555. and name.endswith(b"/")
  556. )
  557. class ConflictedIndexEntry:
  558. """Index entry that represents a conflict."""
  559. ancestor: IndexEntry | None
  560. this: IndexEntry | None
  561. other: IndexEntry | None
  562. def __init__(
  563. self,
  564. ancestor: IndexEntry | None = None,
  565. this: IndexEntry | None = None,
  566. other: IndexEntry | None = None,
  567. ) -> None:
  568. """Initialize ConflictedIndexEntry.
  569. Args:
  570. ancestor: The common ancestor entry
  571. this: The current branch entry
  572. other: The other branch entry
  573. """
  574. self.ancestor = ancestor
  575. self.this = this
  576. self.other = other
  577. class UnmergedEntries(Exception):
  578. """Unmerged entries exist in the index."""
  579. def pathsplit(path: bytes) -> tuple[bytes, bytes]:
  580. """Split a /-delimited path into a directory part and a basename.
  581. Args:
  582. path: The path to split.
  583. Returns:
  584. Tuple with directory name and basename
  585. """
  586. try:
  587. (dirname, basename) = path.rsplit(b"/", 1)
  588. except ValueError:
  589. return (b"", path)
  590. else:
  591. return (dirname, basename)
  592. def pathjoin(*args: bytes) -> bytes:
  593. """Join a /-delimited path."""
  594. return b"/".join([p for p in args if p])
  595. def read_cache_time(f: BinaryIO) -> tuple[int, int]:
  596. """Read a cache time.
  597. Args:
  598. f: File-like object to read from
  599. Returns:
  600. Tuple with seconds and nanoseconds
  601. """
  602. return struct.unpack(">LL", f.read(8))
  603. def write_cache_time(f: IO[bytes], t: int | float | tuple[int, int]) -> None:
  604. """Write a cache time.
  605. Args:
  606. f: File-like object to write to
  607. t: Time to write (as int, float or tuple with secs and nsecs)
  608. """
  609. if isinstance(t, int):
  610. t = (t, 0)
  611. elif isinstance(t, float):
  612. (secs, nsecs) = divmod(t, 1.0)
  613. t = (int(secs), int(nsecs * 1000000000))
  614. elif not isinstance(t, tuple):
  615. raise TypeError(t)
  616. f.write(struct.pack(">LL", *t))
  617. def read_cache_entry(
  618. f: BinaryIO, version: int, previous_path: bytes = b""
  619. ) -> SerializedIndexEntry:
  620. """Read an entry from a cache file.
  621. Args:
  622. f: File-like object to read from
  623. version: Index version
  624. previous_path: Previous entry's path (for version 4 compression)
  625. """
  626. beginoffset = f.tell()
  627. ctime = read_cache_time(f)
  628. mtime = read_cache_time(f)
  629. (
  630. dev,
  631. ino,
  632. mode,
  633. uid,
  634. gid,
  635. size,
  636. sha,
  637. flags,
  638. ) = struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
  639. if flags & FLAG_EXTENDED:
  640. if version < 3:
  641. raise AssertionError("extended flag set in index with version < 3")
  642. (extended_flags,) = struct.unpack(">H", f.read(2))
  643. else:
  644. extended_flags = 0
  645. if version >= 4:
  646. # Version 4: paths are always compressed (name_len should be 0)
  647. name, _consumed = _decompress_path_from_stream(f, previous_path)
  648. else:
  649. # Versions < 4: regular name reading
  650. name = f.read(flags & FLAG_NAMEMASK)
  651. # Padding:
  652. if version < 4:
  653. real_size = (f.tell() - beginoffset + 8) & ~7
  654. f.read((beginoffset + real_size) - f.tell())
  655. return SerializedIndexEntry(
  656. name,
  657. ctime,
  658. mtime,
  659. dev,
  660. ino,
  661. mode,
  662. uid,
  663. gid,
  664. size,
  665. sha_to_hex(sha),
  666. flags & ~FLAG_NAMEMASK,
  667. extended_flags,
  668. )
  669. def write_cache_entry(
  670. f: IO[bytes], entry: SerializedIndexEntry, version: int, previous_path: bytes = b""
  671. ) -> None:
  672. """Write an index entry to a file.
  673. Args:
  674. f: File object
  675. entry: IndexEntry to write
  676. version: Index format version
  677. previous_path: Previous entry's path (for version 4 compression)
  678. """
  679. beginoffset = f.tell()
  680. write_cache_time(f, entry.ctime)
  681. write_cache_time(f, entry.mtime)
  682. if version >= 4:
  683. # Version 4: use compression but set name_len to actual filename length
  684. # This matches how C Git implements index v4 flags
  685. compressed_path = _compress_path(entry.name, previous_path)
  686. flags = len(entry.name) | (entry.flags & ~FLAG_NAMEMASK)
  687. else:
  688. # Versions < 4: include actual name length
  689. flags = len(entry.name) | (entry.flags & ~FLAG_NAMEMASK)
  690. if entry.extended_flags:
  691. flags |= FLAG_EXTENDED
  692. if flags & FLAG_EXTENDED and version is not None and version < 3:
  693. raise AssertionError("unable to use extended flags in version < 3")
  694. f.write(
  695. struct.pack(
  696. b">LLLLLL20sH",
  697. entry.dev & 0xFFFFFFFF,
  698. entry.ino & 0xFFFFFFFF,
  699. entry.mode,
  700. entry.uid,
  701. entry.gid,
  702. entry.size,
  703. hex_to_sha(entry.sha),
  704. flags,
  705. )
  706. )
  707. if flags & FLAG_EXTENDED:
  708. f.write(struct.pack(b">H", entry.extended_flags))
  709. if version >= 4:
  710. # Version 4: always write compressed path
  711. f.write(compressed_path)
  712. else:
  713. # Versions < 4: write regular path and padding
  714. f.write(entry.name)
  715. real_size = (f.tell() - beginoffset + 8) & ~7
  716. f.write(b"\0" * ((beginoffset + real_size) - f.tell()))
  717. class UnsupportedIndexFormat(Exception):
  718. """An unsupported index format was encountered."""
  719. def __init__(self, version: int) -> None:
  720. """Initialize UnsupportedIndexFormat exception.
  721. Args:
  722. version: The unsupported index format version
  723. """
  724. self.index_format_version = version
  725. def read_index_header(f: BinaryIO) -> tuple[int, int]:
  726. """Read an index header from a file.
  727. Returns:
  728. tuple of (version, num_entries)
  729. """
  730. header = f.read(4)
  731. if header != b"DIRC":
  732. raise AssertionError(f"Invalid index file header: {header!r}")
  733. (version, num_entries) = struct.unpack(b">LL", f.read(4 * 2))
  734. if version not in (1, 2, 3, 4):
  735. raise UnsupportedIndexFormat(version)
  736. return version, num_entries
  737. def write_index_extension(f: IO[bytes], extension: IndexExtension) -> None:
  738. """Write an index extension.
  739. Args:
  740. f: File-like object to write to
  741. extension: Extension to write
  742. """
  743. data = extension.to_bytes()
  744. f.write(extension.signature)
  745. f.write(struct.pack(">I", len(data)))
  746. f.write(data)
  747. def read_index(f: BinaryIO) -> Iterator[SerializedIndexEntry]:
  748. """Read an index file, yielding the individual entries."""
  749. version, num_entries = read_index_header(f)
  750. previous_path = b""
  751. for i in range(num_entries):
  752. entry = read_cache_entry(f, version, previous_path)
  753. previous_path = entry.name
  754. yield entry
  755. def read_index_dict_with_version(
  756. f: BinaryIO,
  757. ) -> tuple[dict[bytes, IndexEntry | ConflictedIndexEntry], int, list[IndexExtension]]:
  758. """Read an index file and return it as a dictionary along with the version.
  759. Returns:
  760. tuple of (entries_dict, version, extensions)
  761. """
  762. version, num_entries = read_index_header(f)
  763. ret: dict[bytes, IndexEntry | ConflictedIndexEntry] = {}
  764. previous_path = b""
  765. for i in range(num_entries):
  766. entry = read_cache_entry(f, version, previous_path)
  767. previous_path = entry.name
  768. stage = entry.stage()
  769. if stage == Stage.NORMAL:
  770. ret[entry.name] = IndexEntry.from_serialized(entry)
  771. else:
  772. existing = ret.setdefault(entry.name, ConflictedIndexEntry())
  773. if isinstance(existing, IndexEntry):
  774. raise AssertionError(f"Non-conflicted entry for {entry.name!r} exists")
  775. if stage == Stage.MERGE_CONFLICT_ANCESTOR:
  776. existing.ancestor = IndexEntry.from_serialized(entry)
  777. elif stage == Stage.MERGE_CONFLICT_THIS:
  778. existing.this = IndexEntry.from_serialized(entry)
  779. elif stage == Stage.MERGE_CONFLICT_OTHER:
  780. existing.other = IndexEntry.from_serialized(entry)
  781. # Read extensions
  782. extensions = []
  783. while True:
  784. # Check if we're at the end (20 bytes before EOF for SHA checksum)
  785. current_pos = f.tell()
  786. f.seek(0, 2) # EOF
  787. eof_pos = f.tell()
  788. f.seek(current_pos)
  789. if current_pos >= eof_pos - 20:
  790. break
  791. # Try to read extension signature
  792. signature = f.read(4)
  793. if len(signature) < 4:
  794. break
  795. # Check if it's a valid extension signature (4 uppercase letters)
  796. if not all(65 <= b <= 90 for b in signature):
  797. # Not an extension, seek back
  798. f.seek(-4, 1)
  799. break
  800. # Read extension size
  801. size_data = f.read(4)
  802. if len(size_data) < 4:
  803. break
  804. size = struct.unpack(">I", size_data)[0]
  805. # Read extension data
  806. data = f.read(size)
  807. if len(data) < size:
  808. break
  809. extension = IndexExtension.from_raw(signature, data)
  810. extensions.append(extension)
  811. return ret, version, extensions
  812. def read_index_dict(
  813. f: BinaryIO,
  814. ) -> dict[bytes, IndexEntry | ConflictedIndexEntry]:
  815. """Read an index file and return it as a dictionary.
  816. Dict Key is tuple of path and stage number, as
  817. path alone is not unique
  818. Args:
  819. f: File object to read fromls.
  820. """
  821. ret: dict[bytes, IndexEntry | ConflictedIndexEntry] = {}
  822. for entry in read_index(f):
  823. stage = entry.stage()
  824. if stage == Stage.NORMAL:
  825. ret[entry.name] = IndexEntry.from_serialized(entry)
  826. else:
  827. existing = ret.setdefault(entry.name, ConflictedIndexEntry())
  828. if isinstance(existing, IndexEntry):
  829. raise AssertionError(f"Non-conflicted entry for {entry.name!r} exists")
  830. if stage == Stage.MERGE_CONFLICT_ANCESTOR:
  831. existing.ancestor = IndexEntry.from_serialized(entry)
  832. elif stage == Stage.MERGE_CONFLICT_THIS:
  833. existing.this = IndexEntry.from_serialized(entry)
  834. elif stage == Stage.MERGE_CONFLICT_OTHER:
  835. existing.other = IndexEntry.from_serialized(entry)
  836. return ret
  837. def write_index(
  838. f: IO[bytes],
  839. entries: Sequence[SerializedIndexEntry],
  840. version: int | None = None,
  841. extensions: Sequence[IndexExtension] | None = None,
  842. ) -> None:
  843. """Write an index file.
  844. Args:
  845. f: File-like object to write to
  846. version: Version number to write
  847. entries: Iterable over the entries to write
  848. extensions: Optional list of extensions to write
  849. """
  850. if version is None:
  851. version = DEFAULT_VERSION
  852. # STEP 1: check if any extended_flags are set
  853. uses_extended_flags = any(e.extended_flags != 0 for e in entries)
  854. if uses_extended_flags and version < 3:
  855. # Force or bump the version to 3
  856. version = 3
  857. # The rest is unchanged, but you might insert a final check:
  858. if version < 3:
  859. # Double-check no extended flags appear
  860. for e in entries:
  861. if e.extended_flags != 0:
  862. raise AssertionError("Attempt to use extended flags in index < v3")
  863. # Proceed with the existing code to write the header and entries.
  864. f.write(b"DIRC")
  865. f.write(struct.pack(b">LL", version, len(entries)))
  866. previous_path = b""
  867. for entry in entries:
  868. write_cache_entry(f, entry, version=version, previous_path=previous_path)
  869. previous_path = entry.name
  870. # Write extensions
  871. if extensions:
  872. for extension in extensions:
  873. write_index_extension(f, extension)
  874. def write_index_dict(
  875. f: IO[bytes],
  876. entries: Mapping[bytes, IndexEntry | ConflictedIndexEntry],
  877. version: int | None = None,
  878. extensions: Sequence[IndexExtension] | None = None,
  879. ) -> None:
  880. """Write an index file based on the contents of a dictionary.
  881. being careful to sort by path and then by stage.
  882. """
  883. entries_list = []
  884. for key in sorted(entries):
  885. value = entries[key]
  886. if isinstance(value, ConflictedIndexEntry):
  887. if value.ancestor is not None:
  888. entries_list.append(
  889. value.ancestor.serialize(key, Stage.MERGE_CONFLICT_ANCESTOR)
  890. )
  891. if value.this is not None:
  892. entries_list.append(
  893. value.this.serialize(key, Stage.MERGE_CONFLICT_THIS)
  894. )
  895. if value.other is not None:
  896. entries_list.append(
  897. value.other.serialize(key, Stage.MERGE_CONFLICT_OTHER)
  898. )
  899. else:
  900. entries_list.append(value.serialize(key, Stage.NORMAL))
  901. write_index(f, entries_list, version=version, extensions=extensions)
  902. def cleanup_mode(mode: int) -> int:
  903. """Cleanup a mode value.
  904. This will return a mode that can be stored in a tree object.
  905. Args:
  906. mode: Mode to clean up.
  907. Returns:
  908. mode
  909. """
  910. if stat.S_ISLNK(mode):
  911. return stat.S_IFLNK
  912. elif stat.S_ISDIR(mode):
  913. return stat.S_IFDIR
  914. elif S_ISGITLINK(mode):
  915. return S_IFGITLINK
  916. ret = stat.S_IFREG | 0o644
  917. if mode & 0o100:
  918. ret |= 0o111
  919. return ret
  920. class Index:
  921. """A Git Index file."""
  922. _byname: dict[bytes, IndexEntry | ConflictedIndexEntry]
  923. def __init__(
  924. self,
  925. filename: bytes | str | os.PathLike[str],
  926. read: bool = True,
  927. skip_hash: bool = False,
  928. version: int | None = None,
  929. *,
  930. file_mode: int | None = None,
  931. ) -> None:
  932. """Create an index object associated with the given filename.
  933. Args:
  934. filename: Path to the index file
  935. read: Whether to initialize the index from the given file, should it exist.
  936. skip_hash: Whether to skip SHA1 hash when writing (for manyfiles feature)
  937. version: Index format version to use (None = auto-detect from file or use default)
  938. file_mode: Optional file permission mask for shared repository
  939. """
  940. self._filename = os.fspath(filename)
  941. # TODO(jelmer): Store the version returned by read_index
  942. self._version = version
  943. self._skip_hash = skip_hash
  944. self._file_mode = file_mode
  945. self._extensions: list[IndexExtension] = []
  946. self.clear()
  947. if read:
  948. self.read()
  949. @property
  950. def path(self) -> bytes | str:
  951. """Get the path to the index file.
  952. Returns:
  953. Path to the index file
  954. """
  955. return self._filename
  956. def __repr__(self) -> str:
  957. """Return string representation of Index."""
  958. return f"{self.__class__.__name__}({self._filename!r})"
  959. def write(self) -> None:
  960. """Write current contents of index to disk."""
  961. mask = self._file_mode if self._file_mode is not None else 0o644
  962. f = GitFile(self._filename, "wb", mask=mask)
  963. try:
  964. # Filter out extensions with no meaningful data
  965. meaningful_extensions = []
  966. for ext in self._extensions:
  967. # Skip extensions that have empty data
  968. ext_data = ext.to_bytes()
  969. if ext_data:
  970. meaningful_extensions.append(ext)
  971. if self._skip_hash:
  972. # When skipHash is enabled, write the index without computing SHA1
  973. write_index_dict(
  974. f,
  975. self._byname,
  976. version=self._version,
  977. extensions=meaningful_extensions,
  978. )
  979. # Write 20 zero bytes instead of SHA1
  980. f.write(b"\x00" * 20)
  981. f.close()
  982. else:
  983. sha1_writer = SHA1Writer(f)
  984. write_index_dict(
  985. sha1_writer,
  986. self._byname,
  987. version=self._version,
  988. extensions=meaningful_extensions,
  989. )
  990. sha1_writer.close()
  991. except:
  992. f.close()
  993. raise
  994. def read(self) -> None:
  995. """Read current contents of index from disk."""
  996. if not os.path.exists(self._filename):
  997. return
  998. f = GitFile(self._filename, "rb")
  999. try:
  1000. sha1_reader = SHA1Reader(f)
  1001. entries, version, extensions = read_index_dict_with_version(sha1_reader)
  1002. self._version = version
  1003. self._extensions = extensions
  1004. self.update(entries)
  1005. # Extensions have already been read by read_index_dict_with_version
  1006. sha1_reader.check_sha(allow_empty=True)
  1007. finally:
  1008. f.close()
  1009. def __len__(self) -> int:
  1010. """Number of entries in this index file."""
  1011. return len(self._byname)
  1012. def __getitem__(self, key: bytes) -> IndexEntry | ConflictedIndexEntry:
  1013. """Retrieve entry by relative path and stage.
  1014. Returns: Either a IndexEntry or a ConflictedIndexEntry
  1015. Raises KeyError: if the entry does not exist
  1016. """
  1017. return self._byname[key]
  1018. def __iter__(self) -> Iterator[bytes]:
  1019. """Iterate over the paths and stages in this index."""
  1020. return iter(self._byname)
  1021. def __contains__(self, key: bytes) -> bool:
  1022. """Check if a path exists in the index."""
  1023. return key in self._byname
  1024. def get_sha1(self, path: bytes) -> ObjectID:
  1025. """Return the (git object) SHA1 for the object at a path."""
  1026. value = self[path]
  1027. if isinstance(value, ConflictedIndexEntry):
  1028. raise UnmergedEntries
  1029. return value.sha
  1030. def get_mode(self, path: bytes) -> int:
  1031. """Return the POSIX file mode for the object at a path."""
  1032. value = self[path]
  1033. if isinstance(value, ConflictedIndexEntry):
  1034. raise UnmergedEntries
  1035. return value.mode
  1036. def iterobjects(self) -> Iterable[tuple[bytes, ObjectID, int]]:
  1037. """Iterate over path, sha, mode tuples for use with commit_tree."""
  1038. for path in self:
  1039. entry = self[path]
  1040. if isinstance(entry, ConflictedIndexEntry):
  1041. raise UnmergedEntries
  1042. yield path, entry.sha, cleanup_mode(entry.mode)
  1043. def has_conflicts(self) -> bool:
  1044. """Check if the index contains any conflicted entries.
  1045. Returns:
  1046. True if any entries are conflicted, False otherwise
  1047. """
  1048. for value in self._byname.values():
  1049. if isinstance(value, ConflictedIndexEntry):
  1050. return True
  1051. return False
  1052. def clear(self) -> None:
  1053. """Remove all contents from this index."""
  1054. self._byname = {}
  1055. def __setitem__(
  1056. self, name: bytes, value: IndexEntry | ConflictedIndexEntry
  1057. ) -> None:
  1058. """Set an entry in the index."""
  1059. assert isinstance(name, bytes)
  1060. self._byname[name] = value
  1061. def __delitem__(self, name: bytes) -> None:
  1062. """Delete an entry from the index."""
  1063. del self._byname[name]
  1064. def iteritems(
  1065. self,
  1066. ) -> Iterator[tuple[bytes, IndexEntry | ConflictedIndexEntry]]:
  1067. """Iterate over (path, entry) pairs in the index.
  1068. Returns:
  1069. Iterator of (path, entry) tuples
  1070. """
  1071. return iter(self._byname.items())
  1072. def items(self) -> Iterator[tuple[bytes, IndexEntry | ConflictedIndexEntry]]:
  1073. """Get an iterator over (path, entry) pairs.
  1074. Returns:
  1075. Iterator of (path, entry) tuples
  1076. """
  1077. return iter(self._byname.items())
  1078. def update(self, entries: dict[bytes, IndexEntry | ConflictedIndexEntry]) -> None:
  1079. """Update the index with multiple entries.
  1080. Args:
  1081. entries: Dictionary mapping paths to index entries
  1082. """
  1083. for key, value in entries.items():
  1084. self[key] = value
  1085. def paths(self) -> Generator[bytes, None, None]:
  1086. """Generate all paths in the index.
  1087. Yields:
  1088. Path names as bytes
  1089. """
  1090. yield from self._byname.keys()
  1091. def changes_from_tree(
  1092. self,
  1093. object_store: ObjectContainer,
  1094. tree: ObjectID,
  1095. want_unchanged: bool = False,
  1096. ) -> Generator[
  1097. tuple[
  1098. tuple[bytes | None, bytes | None],
  1099. tuple[int | None, int | None],
  1100. tuple[bytes | None, bytes | None],
  1101. ],
  1102. None,
  1103. None,
  1104. ]:
  1105. """Find the differences between the contents of this index and a tree.
  1106. Args:
  1107. object_store: Object store to use for retrieving tree contents
  1108. tree: SHA1 of the root tree
  1109. want_unchanged: Whether unchanged files should be reported
  1110. Returns: Iterator over tuples with (oldpath, newpath), (oldmode,
  1111. newmode), (oldsha, newsha)
  1112. """
  1113. def lookup_entry(path: bytes) -> tuple[bytes, int]:
  1114. entry = self[path]
  1115. if hasattr(entry, "sha") and hasattr(entry, "mode"):
  1116. return entry.sha, cleanup_mode(entry.mode)
  1117. else:
  1118. # Handle ConflictedIndexEntry case
  1119. return b"", 0
  1120. yield from changes_from_tree(
  1121. self.paths(),
  1122. lookup_entry,
  1123. object_store,
  1124. tree,
  1125. want_unchanged=want_unchanged,
  1126. )
  1127. def commit(self, object_store: ObjectContainer) -> ObjectID:
  1128. """Create a new tree from an index.
  1129. Args:
  1130. object_store: Object store to save the tree in
  1131. Returns:
  1132. Root tree SHA
  1133. """
  1134. return commit_tree(object_store, self.iterobjects())
  1135. def is_sparse(self) -> bool:
  1136. """Check if this index contains sparse directory entries.
  1137. Returns:
  1138. True if any sparse directory extension is present
  1139. """
  1140. return any(isinstance(ext, SparseDirExtension) for ext in self._extensions)
  1141. def ensure_full_index(self, object_store: "BaseObjectStore") -> None:
  1142. """Expand all sparse directory entries into full file entries.
  1143. This converts a sparse index into a full index by recursively
  1144. expanding any sparse directory entries into their constituent files.
  1145. Args:
  1146. object_store: Object store to read tree objects from
  1147. Raises:
  1148. KeyError: If a tree object referenced by a sparse dir entry doesn't exist
  1149. """
  1150. if not self.is_sparse():
  1151. return
  1152. # Find all sparse directory entries
  1153. sparse_dirs = []
  1154. for path, entry in list(self._byname.items()):
  1155. if isinstance(entry, IndexEntry) and entry.is_sparse_dir(path):
  1156. sparse_dirs.append((path, entry))
  1157. # Expand each sparse directory
  1158. for path, entry in sparse_dirs:
  1159. # Remove the sparse directory entry
  1160. del self._byname[path]
  1161. # Get the tree object
  1162. tree = object_store[entry.sha]
  1163. if not isinstance(tree, Tree):
  1164. raise ValueError(f"Sparse directory {path!r} points to non-tree object")
  1165. # Recursively add all entries from the tree
  1166. self._expand_tree(path.rstrip(b"/"), tree, object_store, entry)
  1167. # Remove the sparse directory extension
  1168. self._extensions = [
  1169. ext for ext in self._extensions if not isinstance(ext, SparseDirExtension)
  1170. ]
  1171. def _expand_tree(
  1172. self,
  1173. prefix: bytes,
  1174. tree: Tree,
  1175. object_store: "BaseObjectStore",
  1176. template_entry: IndexEntry,
  1177. ) -> None:
  1178. """Recursively expand a tree into index entries.
  1179. Args:
  1180. prefix: Path prefix for entries (without trailing slash)
  1181. tree: Tree object to expand
  1182. object_store: Object store to read nested trees from
  1183. template_entry: Template entry to copy metadata from
  1184. """
  1185. for name, mode, sha in tree.items():
  1186. if prefix:
  1187. full_path = prefix + b"/" + name
  1188. else:
  1189. full_path = name
  1190. if stat.S_ISDIR(mode):
  1191. # Recursively expand subdirectories
  1192. subtree = object_store[sha]
  1193. if not isinstance(subtree, Tree):
  1194. raise ValueError(
  1195. f"Directory entry {full_path!r} points to non-tree object"
  1196. )
  1197. self._expand_tree(full_path, subtree, object_store, template_entry)
  1198. else:
  1199. # Create an index entry for this file
  1200. # Use the template entry for metadata but with the file's sha and mode
  1201. new_entry = IndexEntry(
  1202. ctime=template_entry.ctime,
  1203. mtime=template_entry.mtime,
  1204. dev=template_entry.dev,
  1205. ino=template_entry.ino,
  1206. mode=mode,
  1207. uid=template_entry.uid,
  1208. gid=template_entry.gid,
  1209. size=0, # Size is unknown from tree
  1210. sha=sha,
  1211. flags=0,
  1212. extended_flags=0, # Don't copy skip-worktree flag
  1213. )
  1214. self._byname[full_path] = new_entry
  1215. def convert_to_sparse(
  1216. self,
  1217. object_store: "BaseObjectStore",
  1218. tree_sha: ObjectID,
  1219. sparse_dirs: Set[bytes],
  1220. ) -> None:
  1221. """Convert full index entries to sparse directory entries.
  1222. This collapses directories that are entirely outside the sparse
  1223. checkout cone into single sparse directory entries.
  1224. Args:
  1225. object_store: Object store to read tree objects
  1226. tree_sha: SHA of the tree (usually HEAD) to base sparse dirs on
  1227. sparse_dirs: Set of directory paths (with trailing /) to collapse
  1228. Raises:
  1229. KeyError: If tree_sha or a subdirectory doesn't exist
  1230. """
  1231. if not sparse_dirs:
  1232. return
  1233. # Get the base tree
  1234. tree = object_store[tree_sha]
  1235. if not isinstance(tree, Tree):
  1236. raise ValueError(f"tree_sha {tree_sha!r} is not a tree object")
  1237. # For each sparse directory, find its tree SHA and create sparse entry
  1238. for dir_path in sparse_dirs:
  1239. dir_path_stripped = dir_path.rstrip(b"/")
  1240. # Find the tree SHA for this directory
  1241. subtree_sha = self._find_subtree_sha(tree, dir_path_stripped, object_store)
  1242. if subtree_sha is None:
  1243. # Directory doesn't exist in tree, skip it
  1244. continue
  1245. # Remove all entries under this directory
  1246. entries_to_remove = [
  1247. path
  1248. for path in self._byname
  1249. if path.startswith(dir_path) or path == dir_path_stripped
  1250. ]
  1251. for path in entries_to_remove:
  1252. del self._byname[path]
  1253. # Create a sparse directory entry
  1254. # Use minimal metadata since it's not a real file
  1255. from dulwich.objects import ObjectID
  1256. sparse_entry = IndexEntry(
  1257. ctime=0,
  1258. mtime=0,
  1259. dev=0,
  1260. ino=0,
  1261. mode=stat.S_IFDIR,
  1262. uid=0,
  1263. gid=0,
  1264. size=0,
  1265. sha=ObjectID(subtree_sha),
  1266. flags=0,
  1267. extended_flags=EXTENDED_FLAG_SKIP_WORKTREE,
  1268. )
  1269. self._byname[dir_path] = sparse_entry
  1270. # Add sparse directory extension if not present
  1271. if not self.is_sparse():
  1272. self._extensions.append(SparseDirExtension())
  1273. def _find_subtree_sha(
  1274. self,
  1275. tree: Tree,
  1276. path: bytes,
  1277. object_store: "BaseObjectStore",
  1278. ) -> bytes | None:
  1279. """Find the SHA of a subtree at a given path.
  1280. Args:
  1281. tree: Root tree object to search in
  1282. path: Path to the subdirectory (no trailing slash)
  1283. object_store: Object store to read nested trees from
  1284. Returns:
  1285. SHA of the subtree, or None if path doesn't exist
  1286. """
  1287. if not path:
  1288. return tree.id
  1289. parts = path.split(b"/")
  1290. current_tree = tree
  1291. for part in parts:
  1292. # Look for this part in the current tree
  1293. try:
  1294. mode, sha = current_tree[part]
  1295. except KeyError:
  1296. return None
  1297. if not stat.S_ISDIR(mode):
  1298. # Path component is a file, not a directory
  1299. return None
  1300. # Load the next tree
  1301. obj = object_store[sha]
  1302. if not isinstance(obj, Tree):
  1303. return None
  1304. current_tree = obj
  1305. return current_tree.id
  1306. def commit_tree(
  1307. object_store: ObjectContainer, blobs: Iterable[tuple[bytes, ObjectID, int]]
  1308. ) -> ObjectID:
  1309. """Commit a new tree.
  1310. Args:
  1311. object_store: Object store to add trees to
  1312. blobs: Iterable over blob path, sha, mode entries
  1313. Returns:
  1314. SHA1 of the created tree.
  1315. """
  1316. trees: dict[bytes, TreeDict] = {b"": {}}
  1317. def add_tree(path: bytes) -> TreeDict:
  1318. if path in trees:
  1319. return trees[path]
  1320. dirname, basename = pathsplit(path)
  1321. t = add_tree(dirname)
  1322. assert isinstance(basename, bytes)
  1323. newtree: TreeDict = {}
  1324. t[basename] = newtree
  1325. trees[path] = newtree
  1326. return newtree
  1327. for path, sha, mode in blobs:
  1328. tree_path, basename = pathsplit(path)
  1329. tree = add_tree(tree_path)
  1330. tree[basename] = (mode, sha)
  1331. def build_tree(path: bytes) -> ObjectID:
  1332. tree = Tree()
  1333. for basename, entry in trees[path].items():
  1334. if isinstance(entry, dict):
  1335. mode = stat.S_IFDIR
  1336. sha = build_tree(pathjoin(path, basename))
  1337. else:
  1338. (mode, sha) = entry
  1339. tree.add(basename, mode, sha)
  1340. object_store.add_object(tree)
  1341. return tree.id
  1342. return build_tree(b"")
  1343. def commit_index(object_store: ObjectContainer, index: Index) -> ObjectID:
  1344. """Create a new tree from an index.
  1345. Args:
  1346. object_store: Object store to save the tree in
  1347. index: Index file
  1348. Note: This function is deprecated, use index.commit() instead.
  1349. Returns: Root tree sha.
  1350. """
  1351. return commit_tree(object_store, index.iterobjects())
  1352. def changes_from_tree(
  1353. names: Iterable[bytes],
  1354. lookup_entry: Callable[[bytes], tuple[bytes, int]],
  1355. object_store: ObjectContainer,
  1356. tree: ObjectID | None,
  1357. want_unchanged: bool = False,
  1358. ) -> Iterable[
  1359. tuple[
  1360. tuple[bytes | None, bytes | None],
  1361. tuple[int | None, int | None],
  1362. tuple[bytes | None, bytes | None],
  1363. ]
  1364. ]:
  1365. """Find the differences between the contents of a tree and a working copy.
  1366. Args:
  1367. names: Iterable of names in the working copy
  1368. lookup_entry: Function to lookup an entry in the working copy
  1369. object_store: Object store to use for retrieving tree contents
  1370. tree: SHA1 of the root tree, or None for an empty tree
  1371. want_unchanged: Whether unchanged files should be reported
  1372. Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
  1373. (oldsha, newsha)
  1374. """
  1375. # TODO(jelmer): Support a include_trees option
  1376. other_names = set(names)
  1377. if tree is not None:
  1378. for name, mode, sha in iter_tree_contents(object_store, tree):
  1379. assert name is not None and mode is not None and sha is not None
  1380. try:
  1381. (other_sha, other_mode) = lookup_entry(name)
  1382. except KeyError:
  1383. # Was removed
  1384. yield ((name, None), (mode, None), (sha, None))
  1385. else:
  1386. other_names.remove(name)
  1387. if want_unchanged or other_sha != sha or other_mode != mode:
  1388. yield ((name, name), (mode, other_mode), (sha, other_sha))
  1389. # Mention added files
  1390. for name in other_names:
  1391. try:
  1392. (other_sha, other_mode) = lookup_entry(name)
  1393. except KeyError:
  1394. pass
  1395. else:
  1396. yield ((None, name), (None, other_mode), (None, other_sha))
  1397. def index_entry_from_stat(
  1398. stat_val: os.stat_result,
  1399. hex_sha: bytes,
  1400. mode: int | None = None,
  1401. ) -> IndexEntry:
  1402. """Create a new index entry from a stat value.
  1403. Args:
  1404. stat_val: POSIX stat_result instance
  1405. hex_sha: Hex sha of the object
  1406. mode: Optional file mode, will be derived from stat if not provided
  1407. """
  1408. if mode is None:
  1409. mode = cleanup_mode(stat_val.st_mode)
  1410. from dulwich.objects import ObjectID
  1411. # Use nanosecond precision when available to avoid precision loss
  1412. # through float representation
  1413. ctime: int | float | tuple[int, int]
  1414. mtime: int | float | tuple[int, int]
  1415. st_ctime_ns = getattr(stat_val, "st_ctime_ns", None)
  1416. if st_ctime_ns is not None:
  1417. ctime = (
  1418. st_ctime_ns // 1_000_000_000,
  1419. st_ctime_ns % 1_000_000_000,
  1420. )
  1421. else:
  1422. ctime = stat_val.st_ctime
  1423. st_mtime_ns = getattr(stat_val, "st_mtime_ns", None)
  1424. if st_mtime_ns is not None:
  1425. mtime = (
  1426. st_mtime_ns // 1_000_000_000,
  1427. st_mtime_ns % 1_000_000_000,
  1428. )
  1429. else:
  1430. mtime = stat_val.st_mtime
  1431. return IndexEntry(
  1432. ctime=ctime,
  1433. mtime=mtime,
  1434. dev=stat_val.st_dev,
  1435. ino=stat_val.st_ino,
  1436. mode=mode,
  1437. uid=stat_val.st_uid,
  1438. gid=stat_val.st_gid,
  1439. size=stat_val.st_size,
  1440. sha=ObjectID(hex_sha),
  1441. flags=0,
  1442. extended_flags=0,
  1443. )
  1444. if sys.platform == "win32":
  1445. # On Windows, creating symlinks either requires administrator privileges
  1446. # or developer mode. Raise a more helpful error when we're unable to
  1447. # create symlinks
  1448. # https://github.com/jelmer/dulwich/issues/1005
  1449. class WindowsSymlinkPermissionError(PermissionError):
  1450. """Windows-specific error for symlink creation failures.
  1451. This error is raised when symlink creation fails on Windows,
  1452. typically due to lack of developer mode or administrator privileges.
  1453. """
  1454. def __init__(self, errno: int, msg: str, filename: str | None) -> None:
  1455. """Initialize WindowsSymlinkPermissionError."""
  1456. super().__init__(
  1457. errno,
  1458. f"Unable to create symlink; do you have developer mode enabled? {msg}",
  1459. filename,
  1460. )
  1461. def symlink(
  1462. src: str | bytes,
  1463. dst: str | bytes,
  1464. target_is_directory: bool = False,
  1465. *,
  1466. dir_fd: int | None = None,
  1467. ) -> None:
  1468. """Create a symbolic link on Windows with better error handling.
  1469. Args:
  1470. src: Source path for the symlink
  1471. dst: Destination path where symlink will be created
  1472. target_is_directory: Whether the target is a directory
  1473. dir_fd: Optional directory file descriptor
  1474. Raises:
  1475. WindowsSymlinkPermissionError: If symlink creation fails due to permissions
  1476. """
  1477. try:
  1478. return os.symlink(
  1479. src, dst, target_is_directory=target_is_directory, dir_fd=dir_fd
  1480. )
  1481. except PermissionError as e:
  1482. raise WindowsSymlinkPermissionError(
  1483. e.errno or 0, e.strerror or "", e.filename
  1484. ) from e
  1485. else:
  1486. symlink = os.symlink
  1487. def build_file_from_blob(
  1488. blob: Blob,
  1489. mode: int,
  1490. target_path: bytes,
  1491. *,
  1492. honor_filemode: bool = True,
  1493. tree_encoding: str = "utf-8",
  1494. symlink_fn: Callable[
  1495. [str | bytes | os.PathLike[str], str | bytes | os.PathLike[str]], None
  1496. ]
  1497. | None = None,
  1498. ) -> os.stat_result:
  1499. """Build a file or symlink on disk based on a Git object.
  1500. Args:
  1501. blob: The git object
  1502. mode: File mode
  1503. target_path: Path to write to
  1504. honor_filemode: An optional flag to honor core.filemode setting in
  1505. config file, default is core.filemode=True, change executable bit
  1506. tree_encoding: Encoding to use for tree contents
  1507. symlink_fn: Function to use for creating symlinks
  1508. Returns: stat object for the file
  1509. """
  1510. try:
  1511. oldstat = os.lstat(target_path)
  1512. except FileNotFoundError:
  1513. oldstat = None
  1514. contents = blob.as_raw_string()
  1515. if stat.S_ISLNK(mode):
  1516. if oldstat:
  1517. _remove_file_with_readonly_handling(target_path)
  1518. if sys.platform == "win32":
  1519. # os.readlink on Python3 on Windows requires a unicode string.
  1520. contents_str = contents.decode(tree_encoding)
  1521. target_path_str = target_path.decode(tree_encoding)
  1522. (symlink_fn or symlink)(contents_str, target_path_str)
  1523. else:
  1524. (symlink_fn or symlink)(contents, target_path)
  1525. else:
  1526. if oldstat is not None and oldstat.st_size == len(contents):
  1527. with open(target_path, "rb") as f:
  1528. if f.read() == contents:
  1529. return oldstat
  1530. with open(target_path, "wb") as f:
  1531. # Write out file
  1532. f.write(contents)
  1533. if honor_filemode:
  1534. os.chmod(target_path, mode)
  1535. return os.lstat(target_path)
  1536. INVALID_DOTNAMES = (b".git", b".", b"..", b"")
  1537. def _normalize_path_element_default(element: bytes) -> bytes:
  1538. """Normalize path element for default case-insensitive comparison."""
  1539. return element.lower()
  1540. def _normalize_path_element_ntfs(element: bytes) -> bytes:
  1541. """Normalize path element for NTFS filesystem."""
  1542. return element.rstrip(b". ").lower()
  1543. def _normalize_path_element_hfs(element: bytes) -> bytes:
  1544. """Normalize path element for HFS+ filesystem."""
  1545. import unicodedata
  1546. # Decode to Unicode (let UnicodeDecodeError bubble up)
  1547. element_str = element.decode("utf-8", errors="strict")
  1548. # Remove HFS+ ignorable characters
  1549. filtered = "".join(c for c in element_str if ord(c) not in HFS_IGNORABLE_CHARS)
  1550. # Normalize to NFD
  1551. normalized = unicodedata.normalize("NFD", filtered)
  1552. return normalized.lower().encode("utf-8", errors="strict")
  1553. def get_path_element_normalizer(config: "Config") -> Callable[[bytes], bytes]:
  1554. """Get the appropriate path element normalization function based on config.
  1555. Args:
  1556. config: Repository configuration object
  1557. Returns:
  1558. Function that normalizes path elements for the configured filesystem
  1559. """
  1560. import os
  1561. import sys
  1562. if config.get_boolean(b"core", b"protectNTFS", os.name == "nt"):
  1563. return _normalize_path_element_ntfs
  1564. elif config.get_boolean(b"core", b"protectHFS", sys.platform == "darwin"):
  1565. return _normalize_path_element_hfs
  1566. else:
  1567. return _normalize_path_element_default
  1568. def validate_path_element_default(element: bytes) -> bool:
  1569. """Validate a path element using default rules.
  1570. Args:
  1571. element: Path element to validate
  1572. Returns:
  1573. True if path element is valid, False otherwise
  1574. """
  1575. return _normalize_path_element_default(element) not in INVALID_DOTNAMES
  1576. def validate_path_element_ntfs(element: bytes) -> bool:
  1577. """Validate a path element using NTFS filesystem rules.
  1578. Args:
  1579. element: Path element to validate
  1580. Returns:
  1581. True if path element is valid for NTFS, False otherwise
  1582. """
  1583. normalized = _normalize_path_element_ntfs(element)
  1584. if normalized in INVALID_DOTNAMES:
  1585. return False
  1586. if normalized == b"git~1":
  1587. return False
  1588. return True
  1589. # HFS+ ignorable Unicode codepoints (from Git's utf8.c)
  1590. HFS_IGNORABLE_CHARS = {
  1591. 0x200C, # ZERO WIDTH NON-JOINER
  1592. 0x200D, # ZERO WIDTH JOINER
  1593. 0x200E, # LEFT-TO-RIGHT MARK
  1594. 0x200F, # RIGHT-TO-LEFT MARK
  1595. 0x202A, # LEFT-TO-RIGHT EMBEDDING
  1596. 0x202B, # RIGHT-TO-LEFT EMBEDDING
  1597. 0x202C, # POP DIRECTIONAL FORMATTING
  1598. 0x202D, # LEFT-TO-RIGHT OVERRIDE
  1599. 0x202E, # RIGHT-TO-LEFT OVERRIDE
  1600. 0x206A, # INHIBIT SYMMETRIC SWAPPING
  1601. 0x206B, # ACTIVATE SYMMETRIC SWAPPING
  1602. 0x206C, # INHIBIT ARABIC FORM SHAPING
  1603. 0x206D, # ACTIVATE ARABIC FORM SHAPING
  1604. 0x206E, # NATIONAL DIGIT SHAPES
  1605. 0x206F, # NOMINAL DIGIT SHAPES
  1606. 0xFEFF, # ZERO WIDTH NO-BREAK SPACE
  1607. }
  1608. def validate_path_element_hfs(element: bytes) -> bool:
  1609. """Validate path element for HFS+ filesystem.
  1610. Equivalent to Git's is_hfs_dotgit and related checks.
  1611. Uses NFD normalization and ignores HFS+ ignorable characters.
  1612. """
  1613. try:
  1614. normalized = _normalize_path_element_hfs(element)
  1615. except UnicodeDecodeError:
  1616. # Malformed UTF-8 - be conservative and reject
  1617. return False
  1618. # Check against invalid names
  1619. if normalized in INVALID_DOTNAMES:
  1620. return False
  1621. # Also check for 8.3 short name
  1622. if normalized == b"git~1":
  1623. return False
  1624. return True
  1625. def validate_path(
  1626. path: bytes,
  1627. element_validator: Callable[[bytes], bool] = validate_path_element_default,
  1628. ) -> bool:
  1629. """Default path validator that just checks for .git/."""
  1630. parts = path.split(b"/")
  1631. for p in parts:
  1632. if not element_validator(p):
  1633. return False
  1634. else:
  1635. return True
  1636. def build_index_from_tree(
  1637. root_path: str | bytes,
  1638. index_path: str | bytes,
  1639. object_store: ObjectContainer,
  1640. tree_id: ObjectID,
  1641. honor_filemode: bool = True,
  1642. validate_path_element: Callable[[bytes], bool] = validate_path_element_default,
  1643. symlink_fn: Callable[
  1644. [str | bytes | os.PathLike[str], str | bytes | os.PathLike[str]], None
  1645. ]
  1646. | None = None,
  1647. blob_normalizer: "FilterBlobNormalizer | None" = None,
  1648. tree_encoding: str = "utf-8",
  1649. ) -> None:
  1650. """Generate and materialize index from a tree.
  1651. Args:
  1652. tree_id: Tree to materialize
  1653. root_path: Target dir for materialized index files
  1654. index_path: Target path for generated index
  1655. object_store: Non-empty object store holding tree contents
  1656. honor_filemode: An optional flag to honor core.filemode setting in
  1657. config file, default is core.filemode=True, change executable bit
  1658. validate_path_element: Function to validate path elements to check
  1659. out; default just refuses .git and .. directories.
  1660. symlink_fn: Function to use for creating symlinks
  1661. blob_normalizer: An optional BlobNormalizer to use for converting line
  1662. endings when writing blobs to the working directory.
  1663. tree_encoding: Encoding used for tree paths (default: utf-8)
  1664. Note: existing index is wiped and contents are not merged
  1665. in a working dir. Suitable only for fresh clones.
  1666. """
  1667. index = Index(index_path, read=False)
  1668. if not isinstance(root_path, bytes):
  1669. root_path = os.fsencode(root_path)
  1670. for entry in iter_tree_contents(object_store, tree_id):
  1671. assert (
  1672. entry.path is not None and entry.mode is not None and entry.sha is not None
  1673. )
  1674. if not validate_path(entry.path, validate_path_element):
  1675. continue
  1676. full_path = _tree_to_fs_path(root_path, entry.path, tree_encoding)
  1677. if not os.path.exists(os.path.dirname(full_path)):
  1678. os.makedirs(os.path.dirname(full_path))
  1679. # TODO(jelmer): Merge new index into working tree
  1680. if S_ISGITLINK(entry.mode):
  1681. if not os.path.isdir(full_path):
  1682. os.mkdir(full_path)
  1683. st = os.lstat(full_path)
  1684. # TODO(jelmer): record and return submodule paths
  1685. else:
  1686. obj = object_store[entry.sha]
  1687. assert isinstance(obj, Blob)
  1688. # Apply blob normalization for checkout if normalizer is provided
  1689. if blob_normalizer is not None:
  1690. obj = blob_normalizer.checkout_normalize(obj, entry.path)
  1691. st = build_file_from_blob(
  1692. obj,
  1693. entry.mode,
  1694. full_path,
  1695. honor_filemode=honor_filemode,
  1696. tree_encoding=tree_encoding,
  1697. symlink_fn=symlink_fn,
  1698. )
  1699. # Add file to index
  1700. if not honor_filemode or S_ISGITLINK(entry.mode):
  1701. # we can not use tuple slicing to build a new tuple,
  1702. # because on windows that will convert the times to
  1703. # longs, which causes errors further along
  1704. st_tuple = (
  1705. entry.mode,
  1706. st.st_ino,
  1707. st.st_dev,
  1708. st.st_nlink,
  1709. st.st_uid,
  1710. st.st_gid,
  1711. st.st_size,
  1712. st.st_atime,
  1713. st.st_mtime,
  1714. st.st_ctime,
  1715. )
  1716. st = st.__class__(st_tuple)
  1717. # default to a stage 0 index entry (normal)
  1718. # when reading from the filesystem
  1719. index[entry.path] = index_entry_from_stat(st, entry.sha)
  1720. index.write()
  1721. def blob_from_path_and_mode(
  1722. fs_path: bytes, mode: int, tree_encoding: str = "utf-8"
  1723. ) -> Blob:
  1724. """Create a blob from a path and a stat object.
  1725. Args:
  1726. fs_path: Full file system path to file
  1727. mode: File mode
  1728. tree_encoding: Encoding to use for tree contents
  1729. Returns: A `Blob` object
  1730. """
  1731. assert isinstance(fs_path, bytes)
  1732. blob = Blob()
  1733. if stat.S_ISLNK(mode):
  1734. if sys.platform == "win32":
  1735. # os.readlink on Python3 on Windows requires a unicode string.
  1736. blob.data = os.readlink(os.fsdecode(fs_path)).encode(tree_encoding)
  1737. else:
  1738. blob.data = os.readlink(fs_path)
  1739. else:
  1740. with open(fs_path, "rb") as f:
  1741. blob.data = f.read()
  1742. return blob
  1743. def blob_from_path_and_stat(
  1744. fs_path: bytes, st: os.stat_result, tree_encoding: str = "utf-8"
  1745. ) -> Blob:
  1746. """Create a blob from a path and a stat object.
  1747. Args:
  1748. fs_path: Full file system path to file
  1749. st: A stat object
  1750. tree_encoding: Encoding to use for tree contents
  1751. Returns: A `Blob` object
  1752. """
  1753. return blob_from_path_and_mode(fs_path, st.st_mode, tree_encoding)
  1754. def read_submodule_head(path: str | bytes) -> bytes | None:
  1755. """Read the head commit of a submodule.
  1756. Args:
  1757. path: path to the submodule
  1758. Returns: HEAD sha, None if not a valid head/repository
  1759. """
  1760. from .errors import NotGitRepository
  1761. from .repo import Repo
  1762. # Repo currently expects a "str", so decode if necessary.
  1763. # TODO(jelmer): Perhaps move this into Repo() ?
  1764. if not isinstance(path, str):
  1765. path = os.fsdecode(path)
  1766. try:
  1767. repo = Repo(path)
  1768. except NotGitRepository:
  1769. return None
  1770. try:
  1771. return repo.head()
  1772. except KeyError:
  1773. return None
  1774. def _has_directory_changed(tree_path: bytes, entry: IndexEntry) -> bool:
  1775. """Check if a directory has changed after getting an error.
  1776. When handling an error trying to create a blob from a path, call this
  1777. function. It will check if the path is a directory. If it's a directory
  1778. and a submodule, check the submodule head to see if it's has changed. If
  1779. not, consider the file as changed as Git tracked a file and not a
  1780. directory.
  1781. Return true if the given path should be considered as changed and False
  1782. otherwise or if the path is not a directory.
  1783. """
  1784. # This is actually a directory
  1785. if os.path.exists(os.path.join(tree_path, b".git")):
  1786. # Submodule
  1787. head = read_submodule_head(tree_path)
  1788. if entry.sha != head:
  1789. return True
  1790. else:
  1791. # The file was changed to a directory, so consider it removed.
  1792. return True
  1793. return False
  1794. os_sep_bytes = os.sep.encode("ascii")
  1795. def _ensure_parent_dir_exists(full_path: bytes) -> None:
  1796. """Ensure parent directory exists, checking no parent is a file."""
  1797. parent_dir = os.path.dirname(full_path)
  1798. if parent_dir and not os.path.exists(parent_dir):
  1799. # Walk up the directory tree to find the first existing parent
  1800. current = parent_dir
  1801. parents_to_check: list[bytes] = []
  1802. while current and not os.path.exists(current):
  1803. parents_to_check.insert(0, current)
  1804. new_parent = os.path.dirname(current)
  1805. if new_parent == current:
  1806. # Reached the root or can't go up further
  1807. break
  1808. current = new_parent
  1809. # Check if the existing parent (if any) is a directory
  1810. if current and os.path.exists(current) and not os.path.isdir(current):
  1811. raise OSError(
  1812. f"Cannot create directory, parent path is a file: {current!r}"
  1813. )
  1814. # Now check each parent we need to create isn't blocked by an existing file
  1815. for parent_path in parents_to_check:
  1816. if os.path.exists(parent_path) and not os.path.isdir(parent_path):
  1817. raise OSError(
  1818. f"Cannot create directory, parent path is a file: {parent_path!r}"
  1819. )
  1820. os.makedirs(parent_dir)
  1821. def _remove_file_with_readonly_handling(path: bytes) -> None:
  1822. """Remove a file, handling read-only files on Windows.
  1823. Args:
  1824. path: Path to the file to remove
  1825. """
  1826. try:
  1827. os.unlink(path)
  1828. except PermissionError:
  1829. # On Windows, remove read-only attribute and retry
  1830. if sys.platform == "win32":
  1831. os.chmod(path, stat.S_IWRITE | stat.S_IREAD)
  1832. os.unlink(path)
  1833. else:
  1834. raise
  1835. def _remove_empty_parents(path: bytes, stop_at: bytes) -> None:
  1836. """Remove empty parent directories up to stop_at."""
  1837. parent = os.path.dirname(path)
  1838. while parent and parent != stop_at:
  1839. try:
  1840. os.rmdir(parent)
  1841. parent = os.path.dirname(parent)
  1842. except FileNotFoundError:
  1843. # Directory doesn't exist - stop trying
  1844. break
  1845. except OSError as e:
  1846. if e.errno in (errno.ENOTEMPTY, errno.EEXIST):
  1847. # Directory not empty - stop trying
  1848. break
  1849. raise
  1850. def _check_symlink_matches(
  1851. full_path: bytes, repo_object_store: "BaseObjectStore", entry_sha: ObjectID
  1852. ) -> bool:
  1853. """Check if symlink target matches expected target.
  1854. Returns True if symlink matches, False if it doesn't match.
  1855. """
  1856. try:
  1857. current_target = os.readlink(full_path)
  1858. blob_obj = repo_object_store[entry_sha]
  1859. expected_target = blob_obj.as_raw_string()
  1860. if isinstance(current_target, str):
  1861. current_target = current_target.encode()
  1862. return current_target == expected_target
  1863. except FileNotFoundError:
  1864. # Symlink doesn't exist
  1865. return False
  1866. except OSError as e:
  1867. if e.errno == errno.EINVAL:
  1868. # Not a symlink
  1869. return False
  1870. raise
  1871. def _check_file_matches(
  1872. repo_object_store: "BaseObjectStore",
  1873. full_path: bytes,
  1874. entry_sha: ObjectID,
  1875. entry_mode: int,
  1876. current_stat: os.stat_result,
  1877. honor_filemode: bool,
  1878. blob_normalizer: "FilterBlobNormalizer | None" = None,
  1879. tree_path: bytes | None = None,
  1880. ) -> bool:
  1881. """Check if a file on disk matches the expected git object.
  1882. Returns True if file matches, False if it doesn't match.
  1883. """
  1884. # Check mode first (if honor_filemode is True)
  1885. if honor_filemode:
  1886. current_mode = stat.S_IMODE(current_stat.st_mode)
  1887. expected_mode = stat.S_IMODE(entry_mode)
  1888. # For regular files, only check the user executable bit, not group/other permissions
  1889. # This matches Git's behavior where umask differences don't count as modifications
  1890. if stat.S_ISREG(current_stat.st_mode):
  1891. # Normalize regular file modes to ignore group/other write permissions
  1892. current_mode_normalized = (
  1893. current_mode & 0o755
  1894. ) # Keep only user rwx and all read+execute
  1895. expected_mode_normalized = expected_mode & 0o755
  1896. # For Git compatibility, regular files should be either 644 or 755
  1897. if expected_mode_normalized not in (0o644, 0o755):
  1898. expected_mode_normalized = 0o644 # Default for regular files
  1899. if current_mode_normalized not in (0o644, 0o755):
  1900. # Determine if it should be executable based on user execute bit
  1901. if current_mode & 0o100: # User execute bit is set
  1902. current_mode_normalized = 0o755
  1903. else:
  1904. current_mode_normalized = 0o644
  1905. if current_mode_normalized != expected_mode_normalized:
  1906. return False
  1907. else:
  1908. # For non-regular files (symlinks, etc.), check mode exactly
  1909. if current_mode != expected_mode:
  1910. return False
  1911. # If mode matches (or we don't care), check content via size first
  1912. blob_obj = repo_object_store[entry_sha]
  1913. if current_stat.st_size != blob_obj.raw_length():
  1914. return False
  1915. # Size matches, check actual content
  1916. try:
  1917. with open(full_path, "rb") as f:
  1918. current_content = f.read()
  1919. expected_content = blob_obj.as_raw_string()
  1920. if blob_normalizer and tree_path is not None:
  1921. assert isinstance(blob_obj, Blob)
  1922. normalized_blob = blob_normalizer.checkout_normalize(
  1923. blob_obj, tree_path
  1924. )
  1925. expected_content = normalized_blob.as_raw_string()
  1926. return current_content == expected_content
  1927. except (FileNotFoundError, PermissionError, IsADirectoryError):
  1928. return False
  1929. def _transition_to_submodule(
  1930. repo: "Repo",
  1931. path: bytes,
  1932. full_path: bytes,
  1933. current_stat: os.stat_result | None,
  1934. entry: IndexEntry | TreeEntry,
  1935. index: Index,
  1936. ) -> None:
  1937. """Transition any type to submodule."""
  1938. from .submodule import ensure_submodule_placeholder
  1939. if current_stat is not None and stat.S_ISDIR(current_stat.st_mode):
  1940. # Already a directory, just ensure .git file exists
  1941. ensure_submodule_placeholder(repo, path)
  1942. else:
  1943. # Remove whatever is there and create submodule
  1944. if current_stat is not None:
  1945. _remove_file_with_readonly_handling(full_path)
  1946. ensure_submodule_placeholder(repo, path)
  1947. st = os.lstat(full_path)
  1948. assert entry.sha is not None
  1949. index[path] = index_entry_from_stat(st, entry.sha)
  1950. def _transition_to_file(
  1951. object_store: "BaseObjectStore",
  1952. path: bytes,
  1953. full_path: bytes,
  1954. current_stat: os.stat_result | None,
  1955. entry: IndexEntry | TreeEntry,
  1956. index: Index,
  1957. honor_filemode: bool,
  1958. symlink_fn: Callable[
  1959. [str | bytes | os.PathLike[str], str | bytes | os.PathLike[str]], None
  1960. ]
  1961. | None,
  1962. blob_normalizer: "FilterBlobNormalizer | None",
  1963. tree_encoding: str = "utf-8",
  1964. ) -> None:
  1965. """Transition any type to regular file or symlink."""
  1966. assert entry.sha is not None and entry.mode is not None
  1967. # Check if we need to update
  1968. if (
  1969. current_stat is not None
  1970. and stat.S_ISREG(current_stat.st_mode)
  1971. and not stat.S_ISLNK(entry.mode)
  1972. ):
  1973. # File to file - check if update needed
  1974. file_matches = _check_file_matches(
  1975. object_store,
  1976. full_path,
  1977. entry.sha,
  1978. entry.mode,
  1979. current_stat,
  1980. honor_filemode,
  1981. blob_normalizer,
  1982. path,
  1983. )
  1984. needs_update = not file_matches
  1985. elif (
  1986. current_stat is not None
  1987. and stat.S_ISLNK(current_stat.st_mode)
  1988. and stat.S_ISLNK(entry.mode)
  1989. ):
  1990. # Symlink to symlink - check if update needed
  1991. symlink_matches = _check_symlink_matches(full_path, object_store, entry.sha)
  1992. needs_update = not symlink_matches
  1993. else:
  1994. needs_update = True
  1995. if not needs_update:
  1996. # Just update index - current_stat should always be valid here since we're not updating
  1997. assert current_stat is not None
  1998. index[path] = index_entry_from_stat(current_stat, entry.sha)
  1999. return
  2000. # Remove existing entry if needed
  2001. if current_stat is not None and stat.S_ISDIR(current_stat.st_mode):
  2002. # Remove directory
  2003. dir_contents = set(os.listdir(full_path))
  2004. git_file_name = b".git" if isinstance(full_path, bytes) else ".git"
  2005. if git_file_name in dir_contents:
  2006. if dir_contents != {git_file_name}:
  2007. raise IsADirectoryError(
  2008. f"Cannot replace submodule with untracked files: {full_path!r}"
  2009. )
  2010. shutil.rmtree(full_path)
  2011. else:
  2012. try:
  2013. os.rmdir(full_path)
  2014. except OSError as e:
  2015. if e.errno in (errno.ENOTEMPTY, errno.EEXIST):
  2016. raise IsADirectoryError(
  2017. f"Cannot replace non-empty directory with file: {full_path!r}"
  2018. )
  2019. raise
  2020. elif current_stat is not None:
  2021. _remove_file_with_readonly_handling(full_path)
  2022. # Ensure parent directory exists
  2023. _ensure_parent_dir_exists(full_path)
  2024. # Write the file
  2025. blob_obj = object_store[entry.sha]
  2026. assert isinstance(blob_obj, Blob)
  2027. if blob_normalizer:
  2028. blob_obj = blob_normalizer.checkout_normalize(blob_obj, path)
  2029. st = build_file_from_blob(
  2030. blob_obj,
  2031. entry.mode,
  2032. full_path,
  2033. honor_filemode=honor_filemode,
  2034. tree_encoding=tree_encoding,
  2035. symlink_fn=symlink_fn,
  2036. )
  2037. index[path] = index_entry_from_stat(st, entry.sha)
  2038. def _transition_to_absent(
  2039. repo: "Repo",
  2040. path: bytes,
  2041. full_path: bytes,
  2042. current_stat: os.stat_result | None,
  2043. index: Index,
  2044. ) -> None:
  2045. """Remove any type of entry."""
  2046. if current_stat is None:
  2047. return
  2048. if stat.S_ISDIR(current_stat.st_mode):
  2049. # Check if it's a submodule directory
  2050. dir_contents = set(os.listdir(full_path))
  2051. git_file_name = b".git" if isinstance(full_path, bytes) else ".git"
  2052. if git_file_name in dir_contents and dir_contents == {git_file_name}:
  2053. shutil.rmtree(full_path)
  2054. else:
  2055. try:
  2056. os.rmdir(full_path)
  2057. except OSError as e:
  2058. if e.errno not in (errno.ENOTEMPTY, errno.EEXIST):
  2059. raise
  2060. else:
  2061. _remove_file_with_readonly_handling(full_path)
  2062. try:
  2063. del index[path]
  2064. except KeyError:
  2065. pass
  2066. # Try to remove empty parent directories
  2067. _remove_empty_parents(
  2068. full_path, repo.path if isinstance(repo.path, bytes) else repo.path.encode()
  2069. )
  2070. def detect_case_only_renames(
  2071. changes: Sequence["TreeChange"],
  2072. config: "Config",
  2073. ) -> list["TreeChange"]:
  2074. """Detect and transform case-only renames in a list of tree changes.
  2075. This function identifies file renames that only differ in case (e.g.,
  2076. README.txt -> readme.txt) and transforms matching ADD/DELETE pairs into
  2077. CHANGE_RENAME operations. It uses filesystem-appropriate path normalization
  2078. based on the repository configuration.
  2079. Args:
  2080. changes: List of TreeChange objects representing file changes
  2081. config: Repository configuration object
  2082. Returns:
  2083. New list of TreeChange objects with case-only renames converted to CHANGE_RENAME
  2084. """
  2085. from .diff_tree import (
  2086. CHANGE_ADD,
  2087. CHANGE_COPY,
  2088. CHANGE_DELETE,
  2089. CHANGE_MODIFY,
  2090. CHANGE_RENAME,
  2091. TreeChange,
  2092. )
  2093. # Build dictionaries of old and new paths with their normalized forms
  2094. old_paths_normalized = {}
  2095. new_paths_normalized = {}
  2096. old_changes = {} # Map from old path to change object
  2097. new_changes = {} # Map from new path to change object
  2098. # Get the appropriate normalizer based on config
  2099. normalize_func = get_path_element_normalizer(config)
  2100. def normalize_path(path: bytes) -> bytes:
  2101. """Normalize entire path using element normalization."""
  2102. return b"/".join(normalize_func(part) for part in path.split(b"/"))
  2103. # Pre-normalize all paths once to avoid repeated normalization
  2104. for change in changes:
  2105. if change.type == CHANGE_DELETE and change.old:
  2106. assert change.old.path is not None
  2107. try:
  2108. normalized = normalize_path(change.old.path)
  2109. except UnicodeDecodeError:
  2110. import logging
  2111. logging.warning(
  2112. "Skipping case-only rename detection for path with invalid UTF-8: %r",
  2113. change.old.path,
  2114. )
  2115. else:
  2116. old_paths_normalized[normalized] = change.old.path
  2117. old_changes[change.old.path] = change
  2118. elif change.type == CHANGE_RENAME and change.old:
  2119. assert change.old.path is not None
  2120. # Treat RENAME as DELETE + ADD for case-only detection
  2121. try:
  2122. normalized = normalize_path(change.old.path)
  2123. except UnicodeDecodeError:
  2124. import logging
  2125. logging.warning(
  2126. "Skipping case-only rename detection for path with invalid UTF-8: %r",
  2127. change.old.path,
  2128. )
  2129. else:
  2130. old_paths_normalized[normalized] = change.old.path
  2131. old_changes[change.old.path] = change
  2132. if (
  2133. change.type in (CHANGE_ADD, CHANGE_MODIFY, CHANGE_RENAME, CHANGE_COPY)
  2134. and change.new
  2135. ):
  2136. assert change.new.path is not None
  2137. try:
  2138. normalized = normalize_path(change.new.path)
  2139. except UnicodeDecodeError:
  2140. import logging
  2141. logging.warning(
  2142. "Skipping case-only rename detection for path with invalid UTF-8: %r",
  2143. change.new.path,
  2144. )
  2145. else:
  2146. new_paths_normalized[normalized] = change.new.path
  2147. new_changes[change.new.path] = change
  2148. # Find case-only renames and transform changes
  2149. case_only_renames = set()
  2150. new_rename_changes = []
  2151. for norm_path, old_path in old_paths_normalized.items():
  2152. if norm_path in new_paths_normalized:
  2153. new_path = new_paths_normalized[norm_path]
  2154. if old_path != new_path:
  2155. # Found a case-only rename
  2156. old_change = old_changes[old_path]
  2157. new_change = new_changes[new_path]
  2158. # Create a CHANGE_RENAME to replace the DELETE and ADD/MODIFY pair
  2159. if new_change.type == CHANGE_ADD:
  2160. # Simple case: DELETE + ADD becomes RENAME
  2161. rename_change = TreeChange(
  2162. CHANGE_RENAME, old_change.old, new_change.new
  2163. )
  2164. else:
  2165. # Complex case: DELETE + MODIFY becomes RENAME
  2166. # Use the old file from DELETE and new file from MODIFY
  2167. rename_change = TreeChange(
  2168. CHANGE_RENAME, old_change.old, new_change.new
  2169. )
  2170. new_rename_changes.append(rename_change)
  2171. # Mark the old changes for removal
  2172. case_only_renames.add(old_change)
  2173. case_only_renames.add(new_change)
  2174. # Return new list with original ADD/DELETE changes replaced by renames
  2175. result = [change for change in changes if change not in case_only_renames]
  2176. result.extend(new_rename_changes)
  2177. return result
  2178. def update_working_tree(
  2179. repo: "Repo",
  2180. old_tree_id: bytes | None,
  2181. new_tree_id: bytes,
  2182. change_iterator: Iterator["TreeChange"],
  2183. honor_filemode: bool = True,
  2184. validate_path_element: Callable[[bytes], bool] | None = None,
  2185. symlink_fn: Callable[
  2186. [str | bytes | os.PathLike[str], str | bytes | os.PathLike[str]], None
  2187. ]
  2188. | None = None,
  2189. force_remove_untracked: bool = False,
  2190. blob_normalizer: "FilterBlobNormalizer | None" = None,
  2191. tree_encoding: str = "utf-8",
  2192. allow_overwrite_modified: bool = False,
  2193. ) -> None:
  2194. """Update the working tree and index to match a new tree.
  2195. This function handles:
  2196. - Adding new files
  2197. - Updating modified files
  2198. - Removing deleted files
  2199. - Cleaning up empty directories
  2200. Args:
  2201. repo: Repository object
  2202. old_tree_id: SHA of the tree before the update
  2203. new_tree_id: SHA of the tree to update to
  2204. change_iterator: Iterator of TreeChange objects to apply
  2205. honor_filemode: An optional flag to honor core.filemode setting
  2206. validate_path_element: Function to validate path elements to check out
  2207. symlink_fn: Function to use for creating symlinks
  2208. force_remove_untracked: If True, remove files that exist in working
  2209. directory but not in target tree, even if old_tree_id is None
  2210. blob_normalizer: An optional BlobNormalizer to use for converting line
  2211. endings when writing blobs to the working directory.
  2212. tree_encoding: Encoding used for tree paths (default: utf-8)
  2213. allow_overwrite_modified: If False, raise an error when attempting to
  2214. overwrite files that have been modified compared to old_tree_id
  2215. """
  2216. if validate_path_element is None:
  2217. validate_path_element = validate_path_element_default
  2218. from .diff_tree import (
  2219. CHANGE_ADD,
  2220. CHANGE_COPY,
  2221. CHANGE_DELETE,
  2222. CHANGE_MODIFY,
  2223. CHANGE_RENAME,
  2224. CHANGE_UNCHANGED,
  2225. )
  2226. repo_path = repo.path if isinstance(repo.path, bytes) else repo.path.encode()
  2227. index = repo.open_index()
  2228. # Convert iterator to list since we need multiple passes
  2229. changes = list(change_iterator)
  2230. # Transform case-only renames on case-insensitive filesystems
  2231. import platform
  2232. default_ignore_case = platform.system() in ("Windows", "Darwin")
  2233. config = repo.get_config()
  2234. ignore_case = config.get_boolean((b"core",), b"ignorecase", default_ignore_case)
  2235. if ignore_case:
  2236. config = repo.get_config()
  2237. changes = detect_case_only_renames(changes, config)
  2238. # Check for path conflicts where files need to become directories
  2239. paths_becoming_dirs = set()
  2240. for change in changes:
  2241. if change.type in (CHANGE_ADD, CHANGE_MODIFY, CHANGE_RENAME, CHANGE_COPY):
  2242. assert change.new is not None
  2243. path = change.new.path
  2244. assert path is not None
  2245. if b"/" in path: # This is a file inside a directory
  2246. # Check if any parent path exists as a file in the old tree or changes
  2247. parts = path.split(b"/")
  2248. for i in range(1, len(parts)):
  2249. parent = b"/".join(parts[:i])
  2250. # See if this parent path is being deleted (was a file, becoming a dir)
  2251. for other_change in changes:
  2252. if (
  2253. other_change.type == CHANGE_DELETE
  2254. and other_change.old
  2255. and other_change.old.path == parent
  2256. ):
  2257. paths_becoming_dirs.add(parent)
  2258. # Check if any path that needs to become a directory has been modified
  2259. for path in paths_becoming_dirs:
  2260. full_path = _tree_to_fs_path(repo_path, path, tree_encoding)
  2261. try:
  2262. current_stat = os.lstat(full_path)
  2263. except FileNotFoundError:
  2264. continue # File doesn't exist, nothing to check
  2265. except OSError as e:
  2266. raise OSError(
  2267. f"Cannot access {path.decode('utf-8', errors='replace')}: {e}"
  2268. ) from e
  2269. if stat.S_ISREG(current_stat.st_mode):
  2270. # Find the old entry for this path
  2271. old_change = None
  2272. for change in changes:
  2273. if (
  2274. change.type == CHANGE_DELETE
  2275. and change.old
  2276. and change.old.path == path
  2277. ):
  2278. old_change = change
  2279. break
  2280. if old_change:
  2281. # Check if file has been modified
  2282. assert old_change.old is not None
  2283. assert (
  2284. old_change.old.sha is not None and old_change.old.mode is not None
  2285. )
  2286. file_matches = _check_file_matches(
  2287. repo.object_store,
  2288. full_path,
  2289. old_change.old.sha,
  2290. old_change.old.mode,
  2291. current_stat,
  2292. honor_filemode,
  2293. blob_normalizer,
  2294. path,
  2295. )
  2296. if not file_matches:
  2297. raise OSError(
  2298. f"Cannot replace modified file with directory: {path!r}"
  2299. )
  2300. # Check for uncommitted modifications before making any changes
  2301. if not allow_overwrite_modified and old_tree_id:
  2302. for change in changes:
  2303. # Only check files that are being modified or deleted
  2304. if change.type in (CHANGE_MODIFY, CHANGE_DELETE) and change.old:
  2305. path = change.old.path
  2306. assert path is not None
  2307. if path.startswith(b".git") or not validate_path(
  2308. path, validate_path_element
  2309. ):
  2310. continue
  2311. full_path = _tree_to_fs_path(repo_path, path, tree_encoding)
  2312. try:
  2313. current_stat = os.lstat(full_path)
  2314. except FileNotFoundError:
  2315. continue # File doesn't exist, nothing to check
  2316. except OSError as e:
  2317. raise OSError(
  2318. f"Cannot access {path.decode('utf-8', errors='replace')}: {e}"
  2319. ) from e
  2320. if stat.S_ISREG(current_stat.st_mode):
  2321. # Check if working tree file differs from old tree
  2322. assert change.old.sha is not None and change.old.mode is not None
  2323. file_matches = _check_file_matches(
  2324. repo.object_store,
  2325. full_path,
  2326. change.old.sha,
  2327. change.old.mode,
  2328. current_stat,
  2329. honor_filemode,
  2330. blob_normalizer,
  2331. path,
  2332. )
  2333. if not file_matches:
  2334. from .errors import WorkingTreeModifiedError
  2335. raise WorkingTreeModifiedError(
  2336. f"Your local changes to '{path.decode('utf-8', errors='replace')}' "
  2337. f"would be overwritten by checkout. "
  2338. f"Please commit your changes or stash them before you switch branches."
  2339. )
  2340. # Apply the changes
  2341. for change in changes:
  2342. if change.type in (CHANGE_DELETE, CHANGE_RENAME):
  2343. # Remove file/directory
  2344. assert change.old is not None and change.old.path is not None
  2345. path = change.old.path
  2346. if path.startswith(b".git") or not validate_path(
  2347. path, validate_path_element
  2348. ):
  2349. continue
  2350. full_path = _tree_to_fs_path(repo_path, path, tree_encoding)
  2351. try:
  2352. delete_stat: os.stat_result | None = os.lstat(full_path)
  2353. except FileNotFoundError:
  2354. delete_stat = None
  2355. except OSError as e:
  2356. raise OSError(
  2357. f"Cannot access {path.decode('utf-8', errors='replace')}: {e}"
  2358. ) from e
  2359. _transition_to_absent(repo, path, full_path, delete_stat, index)
  2360. if change.type in (
  2361. CHANGE_ADD,
  2362. CHANGE_MODIFY,
  2363. CHANGE_UNCHANGED,
  2364. CHANGE_COPY,
  2365. CHANGE_RENAME,
  2366. ):
  2367. # Add or modify file
  2368. assert (
  2369. change.new is not None
  2370. and change.new.path is not None
  2371. and change.new.mode is not None
  2372. )
  2373. path = change.new.path
  2374. if path.startswith(b".git") or not validate_path(
  2375. path, validate_path_element
  2376. ):
  2377. continue
  2378. full_path = _tree_to_fs_path(repo_path, path, tree_encoding)
  2379. try:
  2380. modify_stat: os.stat_result | None = os.lstat(full_path)
  2381. except FileNotFoundError:
  2382. modify_stat = None
  2383. except OSError as e:
  2384. raise OSError(
  2385. f"Cannot access {path.decode('utf-8', errors='replace')}: {e}"
  2386. ) from e
  2387. if S_ISGITLINK(change.new.mode):
  2388. _transition_to_submodule(
  2389. repo, path, full_path, modify_stat, change.new, index
  2390. )
  2391. else:
  2392. _transition_to_file(
  2393. repo.object_store,
  2394. path,
  2395. full_path,
  2396. modify_stat,
  2397. change.new,
  2398. index,
  2399. honor_filemode,
  2400. symlink_fn,
  2401. blob_normalizer,
  2402. tree_encoding,
  2403. )
  2404. index.write()
  2405. def _stat_matches_entry(st: os.stat_result, entry: IndexEntry) -> bool:
  2406. """Check if filesystem stat matches index entry stat.
  2407. This is used to determine if a file might have changed without reading its content.
  2408. Git uses this optimization to avoid expensive filter operations on unchanged files.
  2409. Args:
  2410. st: Filesystem stat result
  2411. entry: Index entry to compare against
  2412. Returns: True if stat matches and file is likely unchanged
  2413. """
  2414. # Get entry mtime with nanosecond precision if available
  2415. if isinstance(entry.mtime, tuple):
  2416. entry_mtime_sec = entry.mtime[0]
  2417. entry_mtime_nsec = entry.mtime[1]
  2418. else:
  2419. entry_mtime_sec = int(entry.mtime)
  2420. entry_mtime_nsec = 0
  2421. # Compare modification time with nanosecond precision if available
  2422. # This is important for fast workflows (e.g., stash) where files can be
  2423. # modified multiple times within the same second
  2424. if hasattr(st, "st_mtime_ns"):
  2425. # Use nanosecond precision when available
  2426. st_mtime_nsec = st.st_mtime_ns
  2427. entry_mtime_nsec_total = entry_mtime_sec * 1_000_000_000 + entry_mtime_nsec
  2428. if st_mtime_nsec != entry_mtime_nsec_total:
  2429. return False
  2430. else:
  2431. # Fall back to second precision
  2432. if int(st.st_mtime) != entry_mtime_sec:
  2433. return False
  2434. # Compare file size
  2435. if st.st_size != entry.size:
  2436. return False
  2437. # If both mtime and size match, file is likely unchanged
  2438. return True
  2439. def _check_entry_for_changes(
  2440. tree_path: bytes,
  2441. entry: IndexEntry | ConflictedIndexEntry,
  2442. root_path: bytes,
  2443. filter_blob_callback: Callable[[Blob, bytes], Blob] | None = None,
  2444. ) -> bytes | None:
  2445. """Check a single index entry for changes.
  2446. Args:
  2447. tree_path: Path in the tree
  2448. entry: Index entry to check
  2449. root_path: Root filesystem path
  2450. filter_blob_callback: Optional callback to filter blobs
  2451. Returns: tree_path if changed, None otherwise
  2452. """
  2453. if isinstance(entry, ConflictedIndexEntry):
  2454. # Conflicted files are always unstaged
  2455. return tree_path
  2456. full_path = _tree_to_fs_path(root_path, tree_path)
  2457. try:
  2458. st = os.lstat(full_path)
  2459. if stat.S_ISDIR(st.st_mode):
  2460. if _has_directory_changed(tree_path, entry):
  2461. return tree_path
  2462. return None
  2463. if not stat.S_ISREG(st.st_mode) and not stat.S_ISLNK(st.st_mode):
  2464. return None
  2465. # Optimization: If stat matches index entry (mtime and size unchanged),
  2466. # we can skip reading and filtering the file entirely. This is a significant
  2467. # performance improvement for repositories with many unchanged files.
  2468. # Even with filters (e.g., LFS), if the file hasn't been modified (stat unchanged),
  2469. # the filter output would be the same, so we can safely skip the expensive
  2470. # filter operation. This addresses performance issues with LFS repositories
  2471. # where filter operations can be very slow.
  2472. if _stat_matches_entry(st, entry):
  2473. return None
  2474. blob = blob_from_path_and_stat(full_path, st)
  2475. if filter_blob_callback is not None:
  2476. blob = filter_blob_callback(blob, tree_path)
  2477. except FileNotFoundError:
  2478. # The file was removed, so we assume that counts as
  2479. # different from whatever file used to exist.
  2480. return tree_path
  2481. else:
  2482. if blob.id != entry.sha:
  2483. return tree_path
  2484. return None
  2485. def get_unstaged_changes(
  2486. index: Index,
  2487. root_path: str | bytes,
  2488. filter_blob_callback: Callable[..., Any] | None = None,
  2489. preload_index: bool = False,
  2490. ) -> Generator[bytes, None, None]:
  2491. """Walk through an index and check for differences against working tree.
  2492. Args:
  2493. index: index to check
  2494. root_path: path in which to find files
  2495. filter_blob_callback: Optional callback to filter blobs
  2496. preload_index: If True, use parallel threads to check files (requires threading support)
  2497. Returns: iterator over paths with unstaged changes
  2498. """
  2499. # For each entry in the index check the sha1 & ensure not staged
  2500. if not isinstance(root_path, bytes):
  2501. root_path = os.fsencode(root_path)
  2502. if preload_index:
  2503. # Use parallel processing for better performance on slow filesystems
  2504. try:
  2505. import multiprocessing
  2506. from concurrent.futures import ThreadPoolExecutor
  2507. except ImportError:
  2508. # If threading is not available, fall back to serial processing
  2509. preload_index = False
  2510. else:
  2511. # Collect all entries first
  2512. entries = list(index.iteritems())
  2513. # Use number of CPUs but cap at 8 threads to avoid overhead
  2514. num_workers = min(multiprocessing.cpu_count(), 8)
  2515. # Process entries in parallel
  2516. with ThreadPoolExecutor(max_workers=num_workers) as executor:
  2517. # Submit all tasks
  2518. futures = [
  2519. executor.submit(
  2520. _check_entry_for_changes,
  2521. tree_path,
  2522. entry,
  2523. root_path,
  2524. filter_blob_callback,
  2525. )
  2526. for tree_path, entry in entries
  2527. ]
  2528. # Yield results as they complete
  2529. for future in futures:
  2530. result = future.result()
  2531. if result is not None:
  2532. yield result
  2533. if not preload_index:
  2534. # Serial processing
  2535. for tree_path, entry in index.iteritems():
  2536. result = _check_entry_for_changes(
  2537. tree_path, entry, root_path, filter_blob_callback
  2538. )
  2539. if result is not None:
  2540. yield result
  2541. def _tree_to_fs_path(
  2542. root_path: bytes, tree_path: bytes, tree_encoding: str = "utf-8"
  2543. ) -> bytes:
  2544. """Convert a git tree path to a file system path.
  2545. Args:
  2546. root_path: Root filesystem path
  2547. tree_path: Git tree path as bytes (encoded with tree_encoding)
  2548. tree_encoding: Encoding used for tree paths (default: utf-8)
  2549. Returns: File system path.
  2550. """
  2551. assert isinstance(tree_path, bytes)
  2552. if os_sep_bytes != b"/":
  2553. sep_corrected_path = tree_path.replace(b"/", os_sep_bytes)
  2554. else:
  2555. sep_corrected_path = tree_path
  2556. # On Windows, we need to handle tree path encoding properly
  2557. if sys.platform == "win32":
  2558. # Decode from tree encoding, then re-encode for filesystem
  2559. try:
  2560. tree_path_str = sep_corrected_path.decode(tree_encoding)
  2561. sep_corrected_path = os.fsencode(tree_path_str)
  2562. except UnicodeDecodeError:
  2563. # If decoding fails, use the original bytes
  2564. pass
  2565. return os.path.join(root_path, sep_corrected_path)
  2566. def _fs_to_tree_path(fs_path: str | bytes, tree_encoding: str = "utf-8") -> bytes:
  2567. """Convert a file system path to a git tree path.
  2568. Args:
  2569. fs_path: File system path.
  2570. tree_encoding: Encoding to use for tree paths (default: utf-8)
  2571. Returns: Git tree path as bytes (encoded with tree_encoding)
  2572. """
  2573. if not isinstance(fs_path, bytes):
  2574. fs_path_bytes = os.fsencode(fs_path)
  2575. else:
  2576. fs_path_bytes = fs_path
  2577. # On Windows, we need to ensure tree paths are properly encoded
  2578. if sys.platform == "win32":
  2579. try:
  2580. # Decode from filesystem encoding, then re-encode with tree encoding
  2581. fs_path_str = os.fsdecode(fs_path_bytes)
  2582. fs_path_bytes = fs_path_str.encode(tree_encoding)
  2583. except UnicodeDecodeError:
  2584. # If filesystem decoding fails, use the original bytes
  2585. pass
  2586. if os_sep_bytes != b"/":
  2587. tree_path = fs_path_bytes.replace(os_sep_bytes, b"/")
  2588. else:
  2589. tree_path = fs_path_bytes
  2590. return tree_path
  2591. def index_entry_from_directory(st: os.stat_result, path: bytes) -> IndexEntry | None:
  2592. """Create an index entry for a directory.
  2593. This is only used for submodules (directories containing .git).
  2594. Args:
  2595. st: Stat result for the directory
  2596. path: Path to the directory
  2597. Returns:
  2598. IndexEntry for a submodule, or None if not a submodule
  2599. """
  2600. if os.path.exists(os.path.join(path, b".git")):
  2601. head = read_submodule_head(path)
  2602. if head is None:
  2603. return None
  2604. return index_entry_from_stat(st, head, mode=S_IFGITLINK)
  2605. return None
  2606. def index_entry_from_path(
  2607. path: bytes, object_store: ObjectContainer | None = None
  2608. ) -> IndexEntry | None:
  2609. """Create an index from a filesystem path.
  2610. This returns an index value for files, symlinks
  2611. and tree references. for directories and
  2612. non-existent files it returns None
  2613. Args:
  2614. path: Path to create an index entry for
  2615. object_store: Optional object store to
  2616. save new blobs in
  2617. Returns: An index entry; None for directories
  2618. """
  2619. assert isinstance(path, bytes)
  2620. st = os.lstat(path)
  2621. if stat.S_ISDIR(st.st_mode):
  2622. return index_entry_from_directory(st, path)
  2623. if stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode):
  2624. blob = blob_from_path_and_stat(path, st)
  2625. if object_store is not None:
  2626. object_store.add_object(blob)
  2627. return index_entry_from_stat(st, blob.id)
  2628. return None
  2629. def iter_fresh_entries(
  2630. paths: Iterable[bytes],
  2631. root_path: bytes,
  2632. object_store: ObjectContainer | None = None,
  2633. ) -> Iterator[tuple[bytes, IndexEntry | None]]:
  2634. """Iterate over current versions of index entries on disk.
  2635. Args:
  2636. paths: Paths to iterate over
  2637. root_path: Root path to access from
  2638. object_store: Optional store to save new blobs in
  2639. Returns: Iterator over path, index_entry
  2640. """
  2641. for path in paths:
  2642. p = _tree_to_fs_path(root_path, path)
  2643. try:
  2644. entry = index_entry_from_path(p, object_store=object_store)
  2645. except (FileNotFoundError, IsADirectoryError):
  2646. entry = None
  2647. yield path, entry
  2648. def iter_fresh_objects(
  2649. paths: Iterable[bytes],
  2650. root_path: bytes,
  2651. include_deleted: bool = False,
  2652. object_store: ObjectContainer | None = None,
  2653. ) -> Iterator[tuple[bytes, ObjectID | None, int | None]]:
  2654. """Iterate over versions of objects on disk referenced by index.
  2655. Args:
  2656. paths: Paths to check
  2657. root_path: Root path to access from
  2658. include_deleted: Include deleted entries with sha and
  2659. mode set to None
  2660. object_store: Optional object store to report new items to
  2661. Returns: Iterator over path, sha, mode
  2662. """
  2663. for path, entry in iter_fresh_entries(paths, root_path, object_store=object_store):
  2664. if entry is None:
  2665. if include_deleted:
  2666. yield path, None, None
  2667. else:
  2668. yield path, entry.sha, cleanup_mode(entry.mode)
  2669. def refresh_index(index: Index, root_path: bytes) -> None:
  2670. """Refresh the contents of an index.
  2671. This is the equivalent to running 'git commit -a'.
  2672. Args:
  2673. index: Index to update
  2674. root_path: Root filesystem path
  2675. """
  2676. for path, entry in iter_fresh_entries(index, root_path):
  2677. if entry:
  2678. index[path] = entry
  2679. class locked_index:
  2680. """Lock the index while making modifications.
  2681. Works as a context manager.
  2682. """
  2683. _file: "_GitFile"
  2684. def __init__(self, path: bytes | str) -> None:
  2685. """Initialize locked_index."""
  2686. self._path = path
  2687. def __enter__(self) -> Index:
  2688. """Enter context manager and lock index."""
  2689. f = GitFile(self._path, "wb")
  2690. self._file = f
  2691. self._index = Index(self._path)
  2692. return self._index
  2693. def __exit__(
  2694. self,
  2695. exc_type: type | None,
  2696. exc_value: BaseException | None,
  2697. traceback: types.TracebackType | None,
  2698. ) -> None:
  2699. """Exit context manager and unlock index."""
  2700. if exc_type is not None:
  2701. self._file.abort()
  2702. return
  2703. try:
  2704. f = SHA1Writer(self._file)
  2705. write_index_dict(f, self._index._byname)
  2706. except BaseException:
  2707. self._file.abort()
  2708. else:
  2709. f.close()