index.py 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362
  1. # index.py -- File parser/writer for the git index file
  2. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Parser for the git index file format."""
  22. import os
  23. import stat
  24. import struct
  25. import sys
  26. from collections.abc import Iterable, Iterator
  27. from dataclasses import dataclass
  28. from enum import Enum
  29. from typing import (
  30. Any,
  31. BinaryIO,
  32. Callable,
  33. Optional,
  34. Union,
  35. )
  36. from .file import GitFile
  37. from .object_store import iter_tree_contents
  38. from .objects import (
  39. S_IFGITLINK,
  40. S_ISGITLINK,
  41. Blob,
  42. ObjectID,
  43. Tree,
  44. hex_to_sha,
  45. sha_to_hex,
  46. )
  47. from .pack import ObjectContainer, SHA1Reader, SHA1Writer
  48. # 2-bit stage (during merge)
  49. FLAG_STAGEMASK = 0x3000
  50. FLAG_STAGESHIFT = 12
  51. FLAG_NAMEMASK = 0x0FFF
  52. # assume-valid
  53. FLAG_VALID = 0x8000
  54. # extended flag (must be zero in version 2)
  55. FLAG_EXTENDED = 0x4000
  56. # used by sparse checkout
  57. EXTENDED_FLAG_SKIP_WORKTREE = 0x4000
  58. # used by "git add -N"
  59. EXTENDED_FLAG_INTEND_TO_ADD = 0x2000
  60. DEFAULT_VERSION = 2
  61. class Stage(Enum):
  62. NORMAL = 0
  63. MERGE_CONFLICT_ANCESTOR = 1
  64. MERGE_CONFLICT_THIS = 2
  65. MERGE_CONFLICT_OTHER = 3
  66. @dataclass
  67. class SerializedIndexEntry:
  68. name: bytes
  69. ctime: Union[int, float, tuple[int, int]]
  70. mtime: Union[int, float, tuple[int, int]]
  71. dev: int
  72. ino: int
  73. mode: int
  74. uid: int
  75. gid: int
  76. size: int
  77. sha: bytes
  78. flags: int
  79. extended_flags: int
  80. def stage(self) -> Stage:
  81. return Stage((self.flags & FLAG_STAGEMASK) >> FLAG_STAGESHIFT)
  82. @dataclass
  83. class IndexEntry:
  84. ctime: Union[int, float, tuple[int, int]]
  85. mtime: Union[int, float, tuple[int, int]]
  86. dev: int
  87. ino: int
  88. mode: int
  89. uid: int
  90. gid: int
  91. size: int
  92. sha: bytes
  93. flags: int = 0
  94. extended_flags: int = 0
  95. @classmethod
  96. def from_serialized(cls, serialized: SerializedIndexEntry) -> "IndexEntry":
  97. return cls(
  98. ctime=serialized.ctime,
  99. mtime=serialized.mtime,
  100. dev=serialized.dev,
  101. ino=serialized.ino,
  102. mode=serialized.mode,
  103. uid=serialized.uid,
  104. gid=serialized.gid,
  105. size=serialized.size,
  106. sha=serialized.sha,
  107. flags=serialized.flags,
  108. extended_flags=serialized.extended_flags,
  109. )
  110. def serialize(self, name: bytes, stage: Stage) -> SerializedIndexEntry:
  111. # Clear out any existing stage bits, then set them from the Stage.
  112. new_flags = self.flags & ~FLAG_STAGEMASK
  113. new_flags |= stage.value << FLAG_STAGESHIFT
  114. return SerializedIndexEntry(
  115. name=name,
  116. ctime=self.ctime,
  117. mtime=self.mtime,
  118. dev=self.dev,
  119. ino=self.ino,
  120. mode=self.mode,
  121. uid=self.uid,
  122. gid=self.gid,
  123. size=self.size,
  124. sha=self.sha,
  125. flags=new_flags,
  126. extended_flags=self.extended_flags,
  127. )
  128. def stage(self) -> Stage:
  129. return Stage((self.flags & FLAG_STAGEMASK) >> FLAG_STAGESHIFT)
  130. @property
  131. def skip_worktree(self) -> bool:
  132. """Return True if the skip-worktree bit is set in extended_flags."""
  133. return bool(self.extended_flags & EXTENDED_FLAG_SKIP_WORKTREE)
  134. def set_skip_worktree(self, skip: bool = True) -> None:
  135. """Helper method to set or clear the skip-worktree bit in extended_flags.
  136. Also sets FLAG_EXTENDED in self.flags if needed.
  137. """
  138. if skip:
  139. # Turn on the skip-worktree bit
  140. self.extended_flags |= EXTENDED_FLAG_SKIP_WORKTREE
  141. # Also ensure the main 'extended' bit is set in flags
  142. self.flags |= FLAG_EXTENDED
  143. else:
  144. # Turn off the skip-worktree bit
  145. self.extended_flags &= ~EXTENDED_FLAG_SKIP_WORKTREE
  146. # Optionally unset the main extended bit if no extended flags remain
  147. if self.extended_flags == 0:
  148. self.flags &= ~FLAG_EXTENDED
  149. class ConflictedIndexEntry:
  150. """Index entry that represents a conflict."""
  151. ancestor: Optional[IndexEntry]
  152. this: Optional[IndexEntry]
  153. other: Optional[IndexEntry]
  154. def __init__(
  155. self,
  156. ancestor: Optional[IndexEntry] = None,
  157. this: Optional[IndexEntry] = None,
  158. other: Optional[IndexEntry] = None,
  159. ) -> None:
  160. self.ancestor = ancestor
  161. self.this = this
  162. self.other = other
  163. class UnmergedEntries(Exception):
  164. """Unmerged entries exist in the index."""
  165. def pathsplit(path: bytes) -> tuple[bytes, bytes]:
  166. """Split a /-delimited path into a directory part and a basename.
  167. Args:
  168. path: The path to split.
  169. Returns:
  170. Tuple with directory name and basename
  171. """
  172. try:
  173. (dirname, basename) = path.rsplit(b"/", 1)
  174. except ValueError:
  175. return (b"", path)
  176. else:
  177. return (dirname, basename)
  178. def pathjoin(*args):
  179. """Join a /-delimited path."""
  180. return b"/".join([p for p in args if p])
  181. def read_cache_time(f):
  182. """Read a cache time.
  183. Args:
  184. f: File-like object to read from
  185. Returns:
  186. Tuple with seconds and nanoseconds
  187. """
  188. return struct.unpack(">LL", f.read(8))
  189. def write_cache_time(f, t) -> None:
  190. """Write a cache time.
  191. Args:
  192. f: File-like object to write to
  193. t: Time to write (as int, float or tuple with secs and nsecs)
  194. """
  195. if isinstance(t, int):
  196. t = (t, 0)
  197. elif isinstance(t, float):
  198. (secs, nsecs) = divmod(t, 1.0)
  199. t = (int(secs), int(nsecs * 1000000000))
  200. elif not isinstance(t, tuple):
  201. raise TypeError(t)
  202. f.write(struct.pack(">LL", *t))
  203. def read_cache_entry(f, version: int) -> SerializedIndexEntry:
  204. """Read an entry from a cache file.
  205. Args:
  206. f: File-like object to read from
  207. """
  208. beginoffset = f.tell()
  209. ctime = read_cache_time(f)
  210. mtime = read_cache_time(f)
  211. (
  212. dev,
  213. ino,
  214. mode,
  215. uid,
  216. gid,
  217. size,
  218. sha,
  219. flags,
  220. ) = struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
  221. if flags & FLAG_EXTENDED:
  222. if version < 3:
  223. raise AssertionError("extended flag set in index with version < 3")
  224. (extended_flags,) = struct.unpack(">H", f.read(2))
  225. else:
  226. extended_flags = 0
  227. name = f.read(flags & FLAG_NAMEMASK)
  228. # Padding:
  229. if version < 4:
  230. real_size = (f.tell() - beginoffset + 8) & ~7
  231. f.read((beginoffset + real_size) - f.tell())
  232. return SerializedIndexEntry(
  233. name,
  234. ctime,
  235. mtime,
  236. dev,
  237. ino,
  238. mode,
  239. uid,
  240. gid,
  241. size,
  242. sha_to_hex(sha),
  243. flags & ~FLAG_NAMEMASK,
  244. extended_flags,
  245. )
  246. def write_cache_entry(f, entry: SerializedIndexEntry, version: int) -> None:
  247. """Write an index entry to a file.
  248. Args:
  249. f: File object
  250. entry: IndexEntry to write, tuple with:
  251. """
  252. beginoffset = f.tell()
  253. write_cache_time(f, entry.ctime)
  254. write_cache_time(f, entry.mtime)
  255. flags = len(entry.name) | (entry.flags & ~FLAG_NAMEMASK)
  256. if entry.extended_flags:
  257. flags |= FLAG_EXTENDED
  258. if flags & FLAG_EXTENDED and version is not None and version < 3:
  259. raise AssertionError("unable to use extended flags in version < 3")
  260. f.write(
  261. struct.pack(
  262. b">LLLLLL20sH",
  263. entry.dev & 0xFFFFFFFF,
  264. entry.ino & 0xFFFFFFFF,
  265. entry.mode,
  266. entry.uid,
  267. entry.gid,
  268. entry.size,
  269. hex_to_sha(entry.sha),
  270. flags,
  271. )
  272. )
  273. if flags & FLAG_EXTENDED:
  274. f.write(struct.pack(b">H", entry.extended_flags))
  275. f.write(entry.name)
  276. if version < 4:
  277. real_size = (f.tell() - beginoffset + 8) & ~7
  278. f.write(b"\0" * ((beginoffset + real_size) - f.tell()))
  279. class UnsupportedIndexFormat(Exception):
  280. """An unsupported index format was encountered."""
  281. def __init__(self, version) -> None:
  282. self.index_format_version = version
  283. def read_index(f: BinaryIO) -> Iterator[SerializedIndexEntry]:
  284. """Read an index file, yielding the individual entries."""
  285. header = f.read(4)
  286. if header != b"DIRC":
  287. raise AssertionError(f"Invalid index file header: {header!r}")
  288. (version, num_entries) = struct.unpack(b">LL", f.read(4 * 2))
  289. if version not in (1, 2, 3):
  290. raise UnsupportedIndexFormat(version)
  291. for i in range(num_entries):
  292. yield read_cache_entry(f, version)
  293. def read_index_dict(f) -> dict[bytes, Union[IndexEntry, ConflictedIndexEntry]]:
  294. """Read an index file and return it as a dictionary.
  295. Dict Key is tuple of path and stage number, as
  296. path alone is not unique
  297. Args:
  298. f: File object to read fromls.
  299. """
  300. ret: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]] = {}
  301. for entry in read_index(f):
  302. stage = entry.stage()
  303. if stage == Stage.NORMAL:
  304. ret[entry.name] = IndexEntry.from_serialized(entry)
  305. else:
  306. existing = ret.setdefault(entry.name, ConflictedIndexEntry())
  307. if isinstance(existing, IndexEntry):
  308. raise AssertionError(f"Non-conflicted entry for {entry.name!r} exists")
  309. if stage == Stage.MERGE_CONFLICT_ANCESTOR:
  310. existing.ancestor = IndexEntry.from_serialized(entry)
  311. elif stage == Stage.MERGE_CONFLICT_THIS:
  312. existing.this = IndexEntry.from_serialized(entry)
  313. elif stage == Stage.MERGE_CONFLICT_OTHER:
  314. existing.other = IndexEntry.from_serialized(entry)
  315. return ret
  316. def write_index(
  317. f: BinaryIO, entries: list[SerializedIndexEntry], version: Optional[int] = None
  318. ) -> None:
  319. """Write an index file.
  320. Args:
  321. f: File-like object to write to
  322. version: Version number to write
  323. entries: Iterable over the entries to write
  324. """
  325. if version is None:
  326. version = DEFAULT_VERSION
  327. # STEP 1: check if any extended_flags are set
  328. uses_extended_flags = any(e.extended_flags != 0 for e in entries)
  329. if uses_extended_flags and version < 3:
  330. # Force or bump the version to 3
  331. version = 3
  332. # The rest is unchanged, but you might insert a final check:
  333. if version < 3:
  334. # Double-check no extended flags appear
  335. for e in entries:
  336. if e.extended_flags != 0:
  337. raise AssertionError("Attempt to use extended flags in index < v3")
  338. # Proceed with the existing code to write the header and entries.
  339. f.write(b"DIRC")
  340. f.write(struct.pack(b">LL", version, len(entries)))
  341. for entry in entries:
  342. write_cache_entry(f, entry, version=version)
  343. def write_index_dict(
  344. f: BinaryIO,
  345. entries: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]],
  346. version: Optional[int] = None,
  347. ) -> None:
  348. """Write an index file based on the contents of a dictionary.
  349. being careful to sort by path and then by stage.
  350. """
  351. entries_list = []
  352. for key in sorted(entries):
  353. value = entries[key]
  354. if isinstance(value, ConflictedIndexEntry):
  355. if value.ancestor is not None:
  356. entries_list.append(
  357. value.ancestor.serialize(key, Stage.MERGE_CONFLICT_ANCESTOR)
  358. )
  359. if value.this is not None:
  360. entries_list.append(
  361. value.this.serialize(key, Stage.MERGE_CONFLICT_THIS)
  362. )
  363. if value.other is not None:
  364. entries_list.append(
  365. value.other.serialize(key, Stage.MERGE_CONFLICT_OTHER)
  366. )
  367. else:
  368. entries_list.append(value.serialize(key, Stage.NORMAL))
  369. write_index(f, entries_list, version=version)
  370. def cleanup_mode(mode: int) -> int:
  371. """Cleanup a mode value.
  372. This will return a mode that can be stored in a tree object.
  373. Args:
  374. mode: Mode to clean up.
  375. Returns:
  376. mode
  377. """
  378. if stat.S_ISLNK(mode):
  379. return stat.S_IFLNK
  380. elif stat.S_ISDIR(mode):
  381. return stat.S_IFDIR
  382. elif S_ISGITLINK(mode):
  383. return S_IFGITLINK
  384. ret = stat.S_IFREG | 0o644
  385. if mode & 0o100:
  386. ret |= 0o111
  387. return ret
  388. class Index:
  389. """A Git Index file."""
  390. _byname: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]]
  391. def __init__(self, filename: Union[bytes, str], read=True) -> None:
  392. """Create an index object associated with the given filename.
  393. Args:
  394. filename: Path to the index file
  395. read: Whether to initialize the index from the given file, should it exist.
  396. """
  397. self._filename = filename
  398. # TODO(jelmer): Store the version returned by read_index
  399. self._version = None
  400. self.clear()
  401. if read:
  402. self.read()
  403. @property
  404. def path(self):
  405. return self._filename
  406. def __repr__(self) -> str:
  407. return f"{self.__class__.__name__}({self._filename!r})"
  408. def write(self) -> None:
  409. """Write current contents of index to disk."""
  410. f = GitFile(self._filename, "wb")
  411. try:
  412. f = SHA1Writer(f)
  413. write_index_dict(f, self._byname, version=self._version)
  414. finally:
  415. f.close()
  416. def read(self) -> None:
  417. """Read current contents of index from disk."""
  418. if not os.path.exists(self._filename):
  419. return
  420. f = GitFile(self._filename, "rb")
  421. try:
  422. f = SHA1Reader(f)
  423. self.update(read_index_dict(f))
  424. # FIXME: Additional data?
  425. f.read(os.path.getsize(self._filename) - f.tell() - 20)
  426. f.check_sha(allow_empty=True)
  427. finally:
  428. f.close()
  429. def __len__(self) -> int:
  430. """Number of entries in this index file."""
  431. return len(self._byname)
  432. def __getitem__(self, key: bytes) -> Union[IndexEntry, ConflictedIndexEntry]:
  433. """Retrieve entry by relative path and stage.
  434. Returns: Either a IndexEntry or a ConflictedIndexEntry
  435. Raises KeyError: if the entry does not exist
  436. """
  437. return self._byname[key]
  438. def __iter__(self) -> Iterator[bytes]:
  439. """Iterate over the paths and stages in this index."""
  440. return iter(self._byname)
  441. def __contains__(self, key) -> bool:
  442. return key in self._byname
  443. def get_sha1(self, path: bytes) -> bytes:
  444. """Return the (git object) SHA1 for the object at a path."""
  445. value = self[path]
  446. if isinstance(value, ConflictedIndexEntry):
  447. raise UnmergedEntries
  448. return value.sha
  449. def get_mode(self, path: bytes) -> int:
  450. """Return the POSIX file mode for the object at a path."""
  451. value = self[path]
  452. if isinstance(value, ConflictedIndexEntry):
  453. raise UnmergedEntries
  454. return value.mode
  455. def iterobjects(self) -> Iterable[tuple[bytes, bytes, int]]:
  456. """Iterate over path, sha, mode tuples for use with commit_tree."""
  457. for path in self:
  458. entry = self[path]
  459. if isinstance(entry, ConflictedIndexEntry):
  460. raise UnmergedEntries
  461. yield path, entry.sha, cleanup_mode(entry.mode)
  462. def has_conflicts(self) -> bool:
  463. for value in self._byname.values():
  464. if isinstance(value, ConflictedIndexEntry):
  465. return True
  466. return False
  467. def clear(self) -> None:
  468. """Remove all contents from this index."""
  469. self._byname = {}
  470. def __setitem__(
  471. self, name: bytes, value: Union[IndexEntry, ConflictedIndexEntry]
  472. ) -> None:
  473. assert isinstance(name, bytes)
  474. self._byname[name] = value
  475. def __delitem__(self, name: bytes) -> None:
  476. del self._byname[name]
  477. def iteritems(
  478. self,
  479. ) -> Iterator[tuple[bytes, Union[IndexEntry, ConflictedIndexEntry]]]:
  480. return iter(self._byname.items())
  481. def items(self) -> Iterator[tuple[bytes, Union[IndexEntry, ConflictedIndexEntry]]]:
  482. return iter(self._byname.items())
  483. def update(
  484. self, entries: dict[bytes, Union[IndexEntry, ConflictedIndexEntry]]
  485. ) -> None:
  486. for key, value in entries.items():
  487. self[key] = value
  488. def paths(self):
  489. yield from self._byname.keys()
  490. def changes_from_tree(
  491. self, object_store, tree: ObjectID, want_unchanged: bool = False
  492. ):
  493. """Find the differences between the contents of this index and a tree.
  494. Args:
  495. object_store: Object store to use for retrieving tree contents
  496. tree: SHA1 of the root tree
  497. want_unchanged: Whether unchanged files should be reported
  498. Returns: Iterator over tuples with (oldpath, newpath), (oldmode,
  499. newmode), (oldsha, newsha)
  500. """
  501. def lookup_entry(path):
  502. entry = self[path]
  503. return entry.sha, cleanup_mode(entry.mode)
  504. yield from changes_from_tree(
  505. self.paths(),
  506. lookup_entry,
  507. object_store,
  508. tree,
  509. want_unchanged=want_unchanged,
  510. )
  511. def commit(self, object_store):
  512. """Create a new tree from an index.
  513. Args:
  514. object_store: Object store to save the tree in
  515. Returns:
  516. Root tree SHA
  517. """
  518. return commit_tree(object_store, self.iterobjects())
  519. def commit_tree(
  520. object_store: ObjectContainer, blobs: Iterable[tuple[bytes, bytes, int]]
  521. ) -> bytes:
  522. """Commit a new tree.
  523. Args:
  524. object_store: Object store to add trees to
  525. blobs: Iterable over blob path, sha, mode entries
  526. Returns:
  527. SHA1 of the created tree.
  528. """
  529. trees: dict[bytes, Any] = {b"": {}}
  530. def add_tree(path):
  531. if path in trees:
  532. return trees[path]
  533. dirname, basename = pathsplit(path)
  534. t = add_tree(dirname)
  535. assert isinstance(basename, bytes)
  536. newtree = {}
  537. t[basename] = newtree
  538. trees[path] = newtree
  539. return newtree
  540. for path, sha, mode in blobs:
  541. tree_path, basename = pathsplit(path)
  542. tree = add_tree(tree_path)
  543. tree[basename] = (mode, sha)
  544. def build_tree(path):
  545. tree = Tree()
  546. for basename, entry in trees[path].items():
  547. if isinstance(entry, dict):
  548. mode = stat.S_IFDIR
  549. sha = build_tree(pathjoin(path, basename))
  550. else:
  551. (mode, sha) = entry
  552. tree.add(basename, mode, sha)
  553. object_store.add_object(tree)
  554. return tree.id
  555. return build_tree(b"")
  556. def commit_index(object_store: ObjectContainer, index: Index) -> bytes:
  557. """Create a new tree from an index.
  558. Args:
  559. object_store: Object store to save the tree in
  560. index: Index file
  561. Note: This function is deprecated, use index.commit() instead.
  562. Returns: Root tree sha.
  563. """
  564. return commit_tree(object_store, index.iterobjects())
  565. def changes_from_tree(
  566. names: Iterable[bytes],
  567. lookup_entry: Callable[[bytes], tuple[bytes, int]],
  568. object_store: ObjectContainer,
  569. tree: Optional[bytes],
  570. want_unchanged=False,
  571. ) -> Iterable[
  572. tuple[
  573. tuple[Optional[bytes], Optional[bytes]],
  574. tuple[Optional[int], Optional[int]],
  575. tuple[Optional[bytes], Optional[bytes]],
  576. ]
  577. ]:
  578. """Find the differences between the contents of a tree and
  579. a working copy.
  580. Args:
  581. names: Iterable of names in the working copy
  582. lookup_entry: Function to lookup an entry in the working copy
  583. object_store: Object store to use for retrieving tree contents
  584. tree: SHA1 of the root tree, or None for an empty tree
  585. want_unchanged: Whether unchanged files should be reported
  586. Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
  587. (oldsha, newsha)
  588. """
  589. # TODO(jelmer): Support a include_trees option
  590. other_names = set(names)
  591. if tree is not None:
  592. for name, mode, sha in iter_tree_contents(object_store, tree):
  593. try:
  594. (other_sha, other_mode) = lookup_entry(name)
  595. except KeyError:
  596. # Was removed
  597. yield ((name, None), (mode, None), (sha, None))
  598. else:
  599. other_names.remove(name)
  600. if want_unchanged or other_sha != sha or other_mode != mode:
  601. yield ((name, name), (mode, other_mode), (sha, other_sha))
  602. # Mention added files
  603. for name in other_names:
  604. try:
  605. (other_sha, other_mode) = lookup_entry(name)
  606. except KeyError:
  607. pass
  608. else:
  609. yield ((None, name), (None, other_mode), (None, other_sha))
  610. def index_entry_from_stat(
  611. stat_val,
  612. hex_sha: bytes,
  613. mode: Optional[int] = None,
  614. ):
  615. """Create a new index entry from a stat value.
  616. Args:
  617. stat_val: POSIX stat_result instance
  618. hex_sha: Hex sha of the object
  619. """
  620. if mode is None:
  621. mode = cleanup_mode(stat_val.st_mode)
  622. return IndexEntry(
  623. ctime=stat_val.st_ctime,
  624. mtime=stat_val.st_mtime,
  625. dev=stat_val.st_dev,
  626. ino=stat_val.st_ino,
  627. mode=mode,
  628. uid=stat_val.st_uid,
  629. gid=stat_val.st_gid,
  630. size=stat_val.st_size,
  631. sha=hex_sha,
  632. flags=0,
  633. extended_flags=0,
  634. )
  635. if sys.platform == "win32":
  636. # On Windows, creating symlinks either requires administrator privileges
  637. # or developer mode. Raise a more helpful error when we're unable to
  638. # create symlinks
  639. # https://github.com/jelmer/dulwich/issues/1005
  640. class WindowsSymlinkPermissionError(PermissionError):
  641. def __init__(self, errno, msg, filename) -> None:
  642. super(PermissionError, self).__init__(
  643. errno,
  644. f"Unable to create symlink; do you have developer mode enabled? {msg}",
  645. filename,
  646. )
  647. def symlink(src, dst, target_is_directory=False, *, dir_fd=None):
  648. try:
  649. return os.symlink(
  650. src, dst, target_is_directory=target_is_directory, dir_fd=dir_fd
  651. )
  652. except PermissionError as e:
  653. raise WindowsSymlinkPermissionError(e.errno, e.strerror, e.filename) from e
  654. else:
  655. symlink = os.symlink
  656. def build_file_from_blob(
  657. blob: Blob,
  658. mode: int,
  659. target_path: bytes,
  660. *,
  661. honor_filemode=True,
  662. tree_encoding="utf-8",
  663. symlink_fn=None,
  664. ):
  665. """Build a file or symlink on disk based on a Git object.
  666. Args:
  667. blob: The git object
  668. mode: File mode
  669. target_path: Path to write to
  670. honor_filemode: An optional flag to honor core.filemode setting in
  671. config file, default is core.filemode=True, change executable bit
  672. symlink: Function to use for creating symlinks
  673. Returns: stat object for the file
  674. """
  675. try:
  676. oldstat = os.lstat(target_path)
  677. except FileNotFoundError:
  678. oldstat = None
  679. contents = blob.as_raw_string()
  680. if stat.S_ISLNK(mode):
  681. if oldstat:
  682. os.unlink(target_path)
  683. if sys.platform == "win32":
  684. # os.readlink on Python3 on Windows requires a unicode string.
  685. contents = contents.decode(tree_encoding) # type: ignore
  686. target_path = target_path.decode(tree_encoding) # type: ignore
  687. (symlink_fn or symlink)(contents, target_path)
  688. else:
  689. if oldstat is not None and oldstat.st_size == len(contents):
  690. with open(target_path, "rb") as f:
  691. if f.read() == contents:
  692. return oldstat
  693. with open(target_path, "wb") as f:
  694. # Write out file
  695. f.write(contents)
  696. if honor_filemode:
  697. os.chmod(target_path, mode)
  698. return os.lstat(target_path)
  699. INVALID_DOTNAMES = (b".git", b".", b"..", b"")
  700. def validate_path_element_default(element: bytes) -> bool:
  701. return element.lower() not in INVALID_DOTNAMES
  702. def validate_path_element_ntfs(element: bytes) -> bool:
  703. stripped = element.rstrip(b". ").lower()
  704. if stripped in INVALID_DOTNAMES:
  705. return False
  706. if stripped == b"git~1":
  707. return False
  708. return True
  709. def validate_path(path: bytes, element_validator=validate_path_element_default) -> bool:
  710. """Default path validator that just checks for .git/."""
  711. parts = path.split(b"/")
  712. for p in parts:
  713. if not element_validator(p):
  714. return False
  715. else:
  716. return True
  717. def build_index_from_tree(
  718. root_path: Union[str, bytes],
  719. index_path: Union[str, bytes],
  720. object_store: ObjectContainer,
  721. tree_id: bytes,
  722. honor_filemode: bool = True,
  723. validate_path_element=validate_path_element_default,
  724. symlink_fn=None,
  725. ) -> None:
  726. """Generate and materialize index from a tree.
  727. Args:
  728. tree_id: Tree to materialize
  729. root_path: Target dir for materialized index files
  730. index_path: Target path for generated index
  731. object_store: Non-empty object store holding tree contents
  732. honor_filemode: An optional flag to honor core.filemode setting in
  733. config file, default is core.filemode=True, change executable bit
  734. validate_path_element: Function to validate path elements to check
  735. out; default just refuses .git and .. directories.
  736. Note: existing index is wiped and contents are not merged
  737. in a working dir. Suitable only for fresh clones.
  738. """
  739. index = Index(index_path, read=False)
  740. if not isinstance(root_path, bytes):
  741. root_path = os.fsencode(root_path)
  742. for entry in iter_tree_contents(object_store, tree_id):
  743. if not validate_path(entry.path, validate_path_element):
  744. continue
  745. full_path = _tree_to_fs_path(root_path, entry.path)
  746. if not os.path.exists(os.path.dirname(full_path)):
  747. os.makedirs(os.path.dirname(full_path))
  748. # TODO(jelmer): Merge new index into working tree
  749. if S_ISGITLINK(entry.mode):
  750. if not os.path.isdir(full_path):
  751. os.mkdir(full_path)
  752. st = os.lstat(full_path)
  753. # TODO(jelmer): record and return submodule paths
  754. else:
  755. obj = object_store[entry.sha]
  756. assert isinstance(obj, Blob)
  757. st = build_file_from_blob(
  758. obj,
  759. entry.mode,
  760. full_path,
  761. honor_filemode=honor_filemode,
  762. symlink_fn=symlink_fn,
  763. )
  764. # Add file to index
  765. if not honor_filemode or S_ISGITLINK(entry.mode):
  766. # we can not use tuple slicing to build a new tuple,
  767. # because on windows that will convert the times to
  768. # longs, which causes errors further along
  769. st_tuple = (
  770. entry.mode,
  771. st.st_ino,
  772. st.st_dev,
  773. st.st_nlink,
  774. st.st_uid,
  775. st.st_gid,
  776. st.st_size,
  777. st.st_atime,
  778. st.st_mtime,
  779. st.st_ctime,
  780. )
  781. st = st.__class__(st_tuple)
  782. # default to a stage 0 index entry (normal)
  783. # when reading from the filesystem
  784. index[entry.path] = index_entry_from_stat(st, entry.sha)
  785. index.write()
  786. def blob_from_path_and_mode(fs_path: bytes, mode: int, tree_encoding="utf-8"):
  787. """Create a blob from a path and a stat object.
  788. Args:
  789. fs_path: Full file system path to file
  790. mode: File mode
  791. Returns: A `Blob` object
  792. """
  793. assert isinstance(fs_path, bytes)
  794. blob = Blob()
  795. if stat.S_ISLNK(mode):
  796. if sys.platform == "win32":
  797. # os.readlink on Python3 on Windows requires a unicode string.
  798. blob.data = os.readlink(os.fsdecode(fs_path)).encode(tree_encoding)
  799. else:
  800. blob.data = os.readlink(fs_path)
  801. else:
  802. with open(fs_path, "rb") as f:
  803. blob.data = f.read()
  804. return blob
  805. def blob_from_path_and_stat(fs_path: bytes, st, tree_encoding="utf-8"):
  806. """Create a blob from a path and a stat object.
  807. Args:
  808. fs_path: Full file system path to file
  809. st: A stat object
  810. Returns: A `Blob` object
  811. """
  812. return blob_from_path_and_mode(fs_path, st.st_mode, tree_encoding)
  813. def read_submodule_head(path: Union[str, bytes]) -> Optional[bytes]:
  814. """Read the head commit of a submodule.
  815. Args:
  816. path: path to the submodule
  817. Returns: HEAD sha, None if not a valid head/repository
  818. """
  819. from .errors import NotGitRepository
  820. from .repo import Repo
  821. # Repo currently expects a "str", so decode if necessary.
  822. # TODO(jelmer): Perhaps move this into Repo() ?
  823. if not isinstance(path, str):
  824. path = os.fsdecode(path)
  825. try:
  826. repo = Repo(path)
  827. except NotGitRepository:
  828. return None
  829. try:
  830. return repo.head()
  831. except KeyError:
  832. return None
  833. def _has_directory_changed(tree_path: bytes, entry) -> bool:
  834. """Check if a directory has changed after getting an error.
  835. When handling an error trying to create a blob from a path, call this
  836. function. It will check if the path is a directory. If it's a directory
  837. and a submodule, check the submodule head to see if it's has changed. If
  838. not, consider the file as changed as Git tracked a file and not a
  839. directory.
  840. Return true if the given path should be considered as changed and False
  841. otherwise or if the path is not a directory.
  842. """
  843. # This is actually a directory
  844. if os.path.exists(os.path.join(tree_path, b".git")):
  845. # Submodule
  846. head = read_submodule_head(tree_path)
  847. if entry.sha != head:
  848. return True
  849. else:
  850. # The file was changed to a directory, so consider it removed.
  851. return True
  852. return False
  853. def update_working_tree(
  854. repo,
  855. old_tree_id,
  856. new_tree_id,
  857. honor_filemode=True,
  858. validate_path_element=None,
  859. symlink_fn=None,
  860. force_remove_untracked=False,
  861. ):
  862. """Update the working tree and index to match a new tree.
  863. This function handles:
  864. - Adding new files
  865. - Updating modified files
  866. - Removing deleted files
  867. - Cleaning up empty directories
  868. Args:
  869. repo: Repository object
  870. old_tree_id: SHA of the tree before the update
  871. new_tree_id: SHA of the tree to update to
  872. honor_filemode: An optional flag to honor core.filemode setting
  873. validate_path_element: Function to validate path elements to check out
  874. symlink_fn: Function to use for creating symlinks
  875. force_remove_untracked: If True, remove files that exist in working
  876. directory but not in target tree, even if old_tree_id is None
  877. """
  878. import os
  879. # Set default validate_path_element if not provided
  880. if validate_path_element is None:
  881. validate_path_element = validate_path_element_default
  882. # Get the trees
  883. old_tree = repo[old_tree_id] if old_tree_id else None
  884. repo[new_tree_id]
  885. # Open the index
  886. index = repo.open_index()
  887. # Track which paths we've dealt with
  888. handled_paths = set()
  889. # Get repo path as string for comparisons
  890. repo_path_str = repo.path if isinstance(repo.path, str) else repo.path.decode()
  891. # First, update/add all files in the new tree
  892. for entry in iter_tree_contents(repo.object_store, new_tree_id):
  893. handled_paths.add(entry.path)
  894. # Skip .git directory
  895. if entry.path.startswith(b".git"):
  896. continue
  897. # Validate path element
  898. if not validate_path(entry.path, validate_path_element):
  899. continue
  900. # Build full path
  901. full_path = os.path.join(repo_path_str, entry.path.decode())
  902. # Get the blob
  903. blob = repo.object_store[entry.sha]
  904. # Ensure parent directory exists
  905. parent_dir = os.path.dirname(full_path)
  906. if parent_dir and not os.path.exists(parent_dir):
  907. os.makedirs(parent_dir)
  908. # Write the file
  909. st = build_file_from_blob(
  910. blob,
  911. entry.mode,
  912. full_path.encode(),
  913. honor_filemode=honor_filemode,
  914. symlink_fn=symlink_fn,
  915. )
  916. # Update index
  917. index[entry.path] = index_entry_from_stat(st, entry.sha)
  918. # Remove files that existed in old tree but not in new tree
  919. if old_tree:
  920. for entry in iter_tree_contents(repo.object_store, old_tree_id):
  921. if entry.path not in handled_paths:
  922. # Skip .git directory
  923. if entry.path.startswith(b".git"):
  924. continue
  925. # File was deleted
  926. full_path = os.path.join(repo_path_str, entry.path.decode())
  927. # Remove from working tree
  928. if os.path.exists(full_path):
  929. os.remove(full_path)
  930. # Remove from index
  931. if entry.path in index:
  932. del index[entry.path]
  933. # Clean up empty directories
  934. dir_path = os.path.dirname(full_path)
  935. while (
  936. dir_path and dir_path != repo_path_str and os.path.exists(dir_path)
  937. ):
  938. try:
  939. if not os.listdir(dir_path):
  940. os.rmdir(dir_path)
  941. dir_path = os.path.dirname(dir_path)
  942. else:
  943. break
  944. except OSError:
  945. break
  946. # If force_remove_untracked is True, remove any files in working directory
  947. # that are not in the target tree (useful for reset --hard)
  948. if force_remove_untracked:
  949. # Walk through all files in the working directory
  950. for root, dirs, files in os.walk(repo_path_str):
  951. # Skip .git directory
  952. if ".git" in dirs:
  953. dirs.remove(".git")
  954. for file in files:
  955. full_path = os.path.join(root, file)
  956. # Get relative path from repo root
  957. rel_path = os.path.relpath(full_path, repo_path_str)
  958. rel_path_bytes = rel_path.encode()
  959. # If this file is not in the target tree, remove it
  960. if rel_path_bytes not in handled_paths:
  961. os.remove(full_path)
  962. # Remove from index if present
  963. if rel_path_bytes in index:
  964. del index[rel_path_bytes]
  965. # Clean up empty directories
  966. for root, dirs, files in os.walk(repo_path_str, topdown=False):
  967. if ".git" in root:
  968. continue
  969. if root != repo_path_str and not files and not dirs:
  970. try:
  971. os.rmdir(root)
  972. except OSError:
  973. pass
  974. # Write the updated index
  975. index.write()
  976. def get_unstaged_changes(
  977. index: Index, root_path: Union[str, bytes], filter_blob_callback=None
  978. ):
  979. """Walk through an index and check for differences against working tree.
  980. Args:
  981. index: index to check
  982. root_path: path in which to find files
  983. Returns: iterator over paths with unstaged changes
  984. """
  985. # For each entry in the index check the sha1 & ensure not staged
  986. if not isinstance(root_path, bytes):
  987. root_path = os.fsencode(root_path)
  988. for tree_path, entry in index.iteritems():
  989. full_path = _tree_to_fs_path(root_path, tree_path)
  990. if isinstance(entry, ConflictedIndexEntry):
  991. # Conflicted files are always unstaged
  992. yield tree_path
  993. continue
  994. try:
  995. st = os.lstat(full_path)
  996. if stat.S_ISDIR(st.st_mode):
  997. if _has_directory_changed(tree_path, entry):
  998. yield tree_path
  999. continue
  1000. if not stat.S_ISREG(st.st_mode) and not stat.S_ISLNK(st.st_mode):
  1001. continue
  1002. blob = blob_from_path_and_stat(full_path, st)
  1003. if filter_blob_callback is not None:
  1004. blob = filter_blob_callback(blob, tree_path)
  1005. except FileNotFoundError:
  1006. # The file was removed, so we assume that counts as
  1007. # different from whatever file used to exist.
  1008. yield tree_path
  1009. else:
  1010. if blob.id != entry.sha:
  1011. yield tree_path
  1012. os_sep_bytes = os.sep.encode("ascii")
  1013. def _tree_to_fs_path(root_path: bytes, tree_path: bytes):
  1014. """Convert a git tree path to a file system path.
  1015. Args:
  1016. root_path: Root filesystem path
  1017. tree_path: Git tree path as bytes
  1018. Returns: File system path.
  1019. """
  1020. assert isinstance(tree_path, bytes)
  1021. if os_sep_bytes != b"/":
  1022. sep_corrected_path = tree_path.replace(b"/", os_sep_bytes)
  1023. else:
  1024. sep_corrected_path = tree_path
  1025. return os.path.join(root_path, sep_corrected_path)
  1026. def _fs_to_tree_path(fs_path: Union[str, bytes]) -> bytes:
  1027. """Convert a file system path to a git tree path.
  1028. Args:
  1029. fs_path: File system path.
  1030. Returns: Git tree path as bytes
  1031. """
  1032. if not isinstance(fs_path, bytes):
  1033. fs_path_bytes = os.fsencode(fs_path)
  1034. else:
  1035. fs_path_bytes = fs_path
  1036. if os_sep_bytes != b"/":
  1037. tree_path = fs_path_bytes.replace(os_sep_bytes, b"/")
  1038. else:
  1039. tree_path = fs_path_bytes
  1040. return tree_path
  1041. def index_entry_from_directory(st, path: bytes) -> Optional[IndexEntry]:
  1042. if os.path.exists(os.path.join(path, b".git")):
  1043. head = read_submodule_head(path)
  1044. if head is None:
  1045. return None
  1046. return index_entry_from_stat(st, head, mode=S_IFGITLINK)
  1047. return None
  1048. def index_entry_from_path(
  1049. path: bytes, object_store: Optional[ObjectContainer] = None
  1050. ) -> Optional[IndexEntry]:
  1051. """Create an index from a filesystem path.
  1052. This returns an index value for files, symlinks
  1053. and tree references. for directories and
  1054. non-existent files it returns None
  1055. Args:
  1056. path: Path to create an index entry for
  1057. object_store: Optional object store to
  1058. save new blobs in
  1059. Returns: An index entry; None for directories
  1060. """
  1061. assert isinstance(path, bytes)
  1062. st = os.lstat(path)
  1063. if stat.S_ISDIR(st.st_mode):
  1064. return index_entry_from_directory(st, path)
  1065. if stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode):
  1066. blob = blob_from_path_and_stat(path, st)
  1067. if object_store is not None:
  1068. object_store.add_object(blob)
  1069. return index_entry_from_stat(st, blob.id)
  1070. return None
  1071. def iter_fresh_entries(
  1072. paths: Iterable[bytes],
  1073. root_path: bytes,
  1074. object_store: Optional[ObjectContainer] = None,
  1075. ) -> Iterator[tuple[bytes, Optional[IndexEntry]]]:
  1076. """Iterate over current versions of index entries on disk.
  1077. Args:
  1078. paths: Paths to iterate over
  1079. root_path: Root path to access from
  1080. object_store: Optional store to save new blobs in
  1081. Returns: Iterator over path, index_entry
  1082. """
  1083. for path in paths:
  1084. p = _tree_to_fs_path(root_path, path)
  1085. try:
  1086. entry = index_entry_from_path(p, object_store=object_store)
  1087. except (FileNotFoundError, IsADirectoryError):
  1088. entry = None
  1089. yield path, entry
  1090. def iter_fresh_objects(
  1091. paths: Iterable[bytes], root_path: bytes, include_deleted=False, object_store=None
  1092. ) -> Iterator[tuple[bytes, Optional[bytes], Optional[int]]]:
  1093. """Iterate over versions of objects on disk referenced by index.
  1094. Args:
  1095. root_path: Root path to access from
  1096. include_deleted: Include deleted entries with sha and
  1097. mode set to None
  1098. object_store: Optional object store to report new items to
  1099. Returns: Iterator over path, sha, mode
  1100. """
  1101. for path, entry in iter_fresh_entries(paths, root_path, object_store=object_store):
  1102. if entry is None:
  1103. if include_deleted:
  1104. yield path, None, None
  1105. else:
  1106. yield path, entry.sha, cleanup_mode(entry.mode)
  1107. def refresh_index(index: Index, root_path: bytes) -> None:
  1108. """Refresh the contents of an index.
  1109. This is the equivalent to running 'git commit -a'.
  1110. Args:
  1111. index: Index to update
  1112. root_path: Root filesystem path
  1113. """
  1114. for path, entry in iter_fresh_entries(index, root_path):
  1115. if entry:
  1116. index[path] = entry
  1117. class locked_index:
  1118. """Lock the index while making modifications.
  1119. Works as a context manager.
  1120. """
  1121. def __init__(self, path: Union[bytes, str]) -> None:
  1122. self._path = path
  1123. def __enter__(self):
  1124. self._file = GitFile(self._path, "wb")
  1125. self._index = Index(self._path)
  1126. return self._index
  1127. def __exit__(self, exc_type, exc_value, traceback):
  1128. if exc_type is not None:
  1129. self._file.abort()
  1130. return
  1131. try:
  1132. f = SHA1Writer(self._file)
  1133. write_index_dict(f, self._index._byname)
  1134. except BaseException:
  1135. self._file.abort()
  1136. else:
  1137. f.close()