index.py 34 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168
  1. # index.py -- File parser/writer for the git index file
  2. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Parser for the git index file format."""
  21. import os
  22. import stat
  23. import struct
  24. import sys
  25. from dataclasses import dataclass
  26. from enum import Enum
  27. from typing import (
  28. Any,
  29. BinaryIO,
  30. Callable,
  31. Dict,
  32. Iterable,
  33. Iterator,
  34. List,
  35. Optional,
  36. Tuple,
  37. Union,
  38. )
  39. from .file import GitFile
  40. from .object_store import iter_tree_contents
  41. from .objects import (
  42. S_IFGITLINK,
  43. S_ISGITLINK,
  44. Blob,
  45. ObjectID,
  46. Tree,
  47. hex_to_sha,
  48. sha_to_hex,
  49. )
  50. from .pack import ObjectContainer, SHA1Reader, SHA1Writer
  51. # 2-bit stage (during merge)
  52. FLAG_STAGEMASK = 0x3000
  53. FLAG_STAGESHIFT = 12
  54. FLAG_NAMEMASK = 0x0FFF
  55. # assume-valid
  56. FLAG_VALID = 0x8000
  57. # extended flag (must be zero in version 2)
  58. FLAG_EXTENDED = 0x4000
  59. # used by sparse checkout
  60. EXTENDED_FLAG_SKIP_WORKTREE = 0x4000
  61. # used by "git add -N"
  62. EXTENDED_FLAG_INTEND_TO_ADD = 0x2000
  63. DEFAULT_VERSION = 2
  64. class Stage(Enum):
  65. NORMAL = 0
  66. MERGE_CONFLICT_ANCESTOR = 1
  67. MERGE_CONFLICT_THIS = 2
  68. MERGE_CONFLICT_OTHER = 3
  69. @dataclass
  70. class SerializedIndexEntry:
  71. name: bytes
  72. ctime: Union[int, float, Tuple[int, int]]
  73. mtime: Union[int, float, Tuple[int, int]]
  74. dev: int
  75. ino: int
  76. mode: int
  77. uid: int
  78. gid: int
  79. size: int
  80. sha: bytes
  81. flags: int
  82. extended_flags: int
  83. def stage(self) -> Stage:
  84. return Stage((self.flags & FLAG_STAGEMASK) >> FLAG_STAGESHIFT)
  85. @dataclass
  86. class IndexEntry:
  87. ctime: Union[int, float, Tuple[int, int]]
  88. mtime: Union[int, float, Tuple[int, int]]
  89. dev: int
  90. ino: int
  91. mode: int
  92. uid: int
  93. gid: int
  94. size: int
  95. sha: bytes
  96. @classmethod
  97. def from_serialized(cls, serialized: SerializedIndexEntry) -> "IndexEntry":
  98. return cls(
  99. ctime=serialized.ctime,
  100. mtime=serialized.mtime,
  101. dev=serialized.dev,
  102. ino=serialized.ino,
  103. mode=serialized.mode,
  104. uid=serialized.uid,
  105. gid=serialized.gid,
  106. size=serialized.size,
  107. sha=serialized.sha,
  108. )
  109. def serialize(self, name: bytes, stage: Stage) -> SerializedIndexEntry:
  110. return SerializedIndexEntry(
  111. name=name,
  112. ctime=self.ctime,
  113. mtime=self.mtime,
  114. dev=self.dev,
  115. ino=self.ino,
  116. mode=self.mode,
  117. uid=self.uid,
  118. gid=self.gid,
  119. size=self.size,
  120. sha=self.sha,
  121. flags=stage.value << FLAG_STAGESHIFT,
  122. extended_flags=0,
  123. )
  124. class ConflictedIndexEntry:
  125. """Index entry that represents a conflict."""
  126. ancestor: Optional[IndexEntry]
  127. this: Optional[IndexEntry]
  128. other: Optional[IndexEntry]
  129. def __init__(
  130. self,
  131. ancestor: Optional[IndexEntry] = None,
  132. this: Optional[IndexEntry] = None,
  133. other: Optional[IndexEntry] = None,
  134. ) -> None:
  135. self.ancestor = ancestor
  136. self.this = this
  137. self.other = other
  138. class UnmergedEntries(Exception):
  139. """Unmerged entries exist in the index."""
  140. def pathsplit(path: bytes) -> Tuple[bytes, bytes]:
  141. """Split a /-delimited path into a directory part and a basename.
  142. Args:
  143. path: The path to split.
  144. Returns:
  145. Tuple with directory name and basename
  146. """
  147. try:
  148. (dirname, basename) = path.rsplit(b"/", 1)
  149. except ValueError:
  150. return (b"", path)
  151. else:
  152. return (dirname, basename)
  153. def pathjoin(*args):
  154. """Join a /-delimited path."""
  155. return b"/".join([p for p in args if p])
  156. def read_cache_time(f):
  157. """Read a cache time.
  158. Args:
  159. f: File-like object to read from
  160. Returns:
  161. Tuple with seconds and nanoseconds
  162. """
  163. return struct.unpack(">LL", f.read(8))
  164. def write_cache_time(f, t):
  165. """Write a cache time.
  166. Args:
  167. f: File-like object to write to
  168. t: Time to write (as int, float or tuple with secs and nsecs)
  169. """
  170. if isinstance(t, int):
  171. t = (t, 0)
  172. elif isinstance(t, float):
  173. (secs, nsecs) = divmod(t, 1.0)
  174. t = (int(secs), int(nsecs * 1000000000))
  175. elif not isinstance(t, tuple):
  176. raise TypeError(t)
  177. f.write(struct.pack(">LL", *t))
  178. def read_cache_entry(f, version: int) -> SerializedIndexEntry:
  179. """Read an entry from a cache file.
  180. Args:
  181. f: File-like object to read from
  182. """
  183. beginoffset = f.tell()
  184. ctime = read_cache_time(f)
  185. mtime = read_cache_time(f)
  186. (
  187. dev,
  188. ino,
  189. mode,
  190. uid,
  191. gid,
  192. size,
  193. sha,
  194. flags,
  195. ) = struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
  196. if flags & FLAG_EXTENDED:
  197. if version < 3:
  198. raise AssertionError("extended flag set in index with version < 3")
  199. (extended_flags,) = struct.unpack(">H", f.read(2))
  200. else:
  201. extended_flags = 0
  202. name = f.read(flags & FLAG_NAMEMASK)
  203. # Padding:
  204. if version < 4:
  205. real_size = (f.tell() - beginoffset + 8) & ~7
  206. f.read((beginoffset + real_size) - f.tell())
  207. return SerializedIndexEntry(
  208. name,
  209. ctime,
  210. mtime,
  211. dev,
  212. ino,
  213. mode,
  214. uid,
  215. gid,
  216. size,
  217. sha_to_hex(sha),
  218. flags & ~FLAG_NAMEMASK,
  219. extended_flags,
  220. )
  221. def write_cache_entry(f, entry: SerializedIndexEntry, version: int) -> None:
  222. """Write an index entry to a file.
  223. Args:
  224. f: File object
  225. entry: IndexEntry to write, tuple with:
  226. """
  227. beginoffset = f.tell()
  228. write_cache_time(f, entry.ctime)
  229. write_cache_time(f, entry.mtime)
  230. flags = len(entry.name) | (entry.flags & ~FLAG_NAMEMASK)
  231. if entry.extended_flags:
  232. flags |= FLAG_EXTENDED
  233. if flags & FLAG_EXTENDED and version is not None and version < 3:
  234. raise AssertionError("unable to use extended flags in version < 3")
  235. f.write(
  236. struct.pack(
  237. b">LLLLLL20sH",
  238. entry.dev & 0xFFFFFFFF,
  239. entry.ino & 0xFFFFFFFF,
  240. entry.mode,
  241. entry.uid,
  242. entry.gid,
  243. entry.size,
  244. hex_to_sha(entry.sha),
  245. flags,
  246. )
  247. )
  248. if flags & FLAG_EXTENDED:
  249. f.write(struct.pack(b">H", entry.extended_flags))
  250. f.write(entry.name)
  251. if version < 4:
  252. real_size = (f.tell() - beginoffset + 8) & ~7
  253. f.write(b"\0" * ((beginoffset + real_size) - f.tell()))
  254. class UnsupportedIndexFormat(Exception):
  255. """An unsupported index format was encountered."""
  256. def __init__(self, version) -> None:
  257. self.index_format_version = version
  258. def read_index(f: BinaryIO) -> Iterator[SerializedIndexEntry]:
  259. """Read an index file, yielding the individual entries."""
  260. header = f.read(4)
  261. if header != b"DIRC":
  262. raise AssertionError(f"Invalid index file header: {header!r}")
  263. (version, num_entries) = struct.unpack(b">LL", f.read(4 * 2))
  264. if version not in (1, 2, 3):
  265. raise UnsupportedIndexFormat(version)
  266. for i in range(num_entries):
  267. yield read_cache_entry(f, version)
  268. def read_index_dict(f) -> Dict[bytes, Union[IndexEntry, ConflictedIndexEntry]]:
  269. """Read an index file and return it as a dictionary.
  270. Dict Key is tuple of path and stage number, as
  271. path alone is not unique
  272. Args:
  273. f: File object to read fromls.
  274. """
  275. ret: Dict[bytes, Union[IndexEntry, ConflictedIndexEntry]] = {}
  276. for entry in read_index(f):
  277. stage = entry.stage()
  278. if stage == Stage.NORMAL:
  279. ret[entry.name] = IndexEntry.from_serialized(entry)
  280. else:
  281. existing = ret.setdefault(entry.name, ConflictedIndexEntry())
  282. if isinstance(existing, IndexEntry):
  283. raise AssertionError(f"Non-conflicted entry for {entry.name!r} exists")
  284. if stage == Stage.MERGE_CONFLICT_ANCESTOR:
  285. existing.ancestor = IndexEntry.from_serialized(entry)
  286. elif stage == Stage.MERGE_CONFLICT_THIS:
  287. existing.this = IndexEntry.from_serialized(entry)
  288. elif stage == Stage.MERGE_CONFLICT_OTHER:
  289. existing.other = IndexEntry.from_serialized(entry)
  290. return ret
  291. def write_index(
  292. f: BinaryIO, entries: List[SerializedIndexEntry], version: Optional[int] = None
  293. ):
  294. """Write an index file.
  295. Args:
  296. f: File-like object to write to
  297. version: Version number to write
  298. entries: Iterable over the entries to write
  299. """
  300. if version is None:
  301. version = DEFAULT_VERSION
  302. f.write(b"DIRC")
  303. f.write(struct.pack(b">LL", version, len(entries)))
  304. for entry in entries:
  305. write_cache_entry(f, entry, version)
  306. def write_index_dict(
  307. f: BinaryIO,
  308. entries: Dict[bytes, Union[IndexEntry, ConflictedIndexEntry]],
  309. version: Optional[int] = None,
  310. ) -> None:
  311. """Write an index file based on the contents of a dictionary.
  312. being careful to sort by path and then by stage.
  313. """
  314. entries_list = []
  315. for key in sorted(entries):
  316. value = entries[key]
  317. if isinstance(value, ConflictedIndexEntry):
  318. if value.ancestor is not None:
  319. entries_list.append(
  320. value.ancestor.serialize(key, Stage.MERGE_CONFLICT_ANCESTOR)
  321. )
  322. if value.this is not None:
  323. entries_list.append(
  324. value.this.serialize(key, Stage.MERGE_CONFLICT_THIS)
  325. )
  326. if value.other is not None:
  327. entries_list.append(
  328. value.other.serialize(key, Stage.MERGE_CONFLICT_OTHER)
  329. )
  330. else:
  331. entries_list.append(value.serialize(key, Stage.NORMAL))
  332. write_index(f, entries_list, version=version)
  333. def cleanup_mode(mode: int) -> int:
  334. """Cleanup a mode value.
  335. This will return a mode that can be stored in a tree object.
  336. Args:
  337. mode: Mode to clean up.
  338. Returns:
  339. mode
  340. """
  341. if stat.S_ISLNK(mode):
  342. return stat.S_IFLNK
  343. elif stat.S_ISDIR(mode):
  344. return stat.S_IFDIR
  345. elif S_ISGITLINK(mode):
  346. return S_IFGITLINK
  347. ret = stat.S_IFREG | 0o644
  348. if mode & 0o100:
  349. ret |= 0o111
  350. return ret
  351. class Index:
  352. """A Git Index file."""
  353. _byname: Dict[bytes, Union[IndexEntry, ConflictedIndexEntry]]
  354. def __init__(self, filename: Union[bytes, str], read=True) -> None:
  355. """Create an index object associated with the given filename.
  356. Args:
  357. filename: Path to the index file
  358. read: Whether to initialize the index from the given file, should it exist.
  359. """
  360. self._filename = filename
  361. # TODO(jelmer): Store the version returned by read_index
  362. self._version = None
  363. self.clear()
  364. if read:
  365. self.read()
  366. @property
  367. def path(self):
  368. return self._filename
  369. def __repr__(self) -> str:
  370. return f"{self.__class__.__name__}({self._filename!r})"
  371. def write(self) -> None:
  372. """Write current contents of index to disk."""
  373. f = GitFile(self._filename, "wb")
  374. try:
  375. f = SHA1Writer(f)
  376. write_index_dict(f, self._byname, version=self._version)
  377. finally:
  378. f.close()
  379. def read(self):
  380. """Read current contents of index from disk."""
  381. if not os.path.exists(self._filename):
  382. return
  383. f = GitFile(self._filename, "rb")
  384. try:
  385. f = SHA1Reader(f)
  386. self.update(read_index_dict(f))
  387. # FIXME: Additional data?
  388. f.read(os.path.getsize(self._filename) - f.tell() - 20)
  389. f.check_sha()
  390. finally:
  391. f.close()
  392. def __len__(self) -> int:
  393. """Number of entries in this index file."""
  394. return len(self._byname)
  395. def __getitem__(self, key: bytes) -> Union[IndexEntry, ConflictedIndexEntry]:
  396. """Retrieve entry by relative path and stage.
  397. Returns: Either a IndexEntry or a ConflictedIndexEntry
  398. Raises KeyError: if the entry does not exist
  399. """
  400. return self._byname[key]
  401. def __iter__(self) -> Iterator[bytes]:
  402. """Iterate over the paths and stages in this index."""
  403. return iter(self._byname)
  404. def __contains__(self, key):
  405. return key in self._byname
  406. def get_sha1(self, path: bytes) -> bytes:
  407. """Return the (git object) SHA1 for the object at a path."""
  408. value = self[path]
  409. if isinstance(value, ConflictedIndexEntry):
  410. raise UnmergedEntries
  411. return value.sha
  412. def get_mode(self, path: bytes) -> int:
  413. """Return the POSIX file mode for the object at a path."""
  414. value = self[path]
  415. if isinstance(value, ConflictedIndexEntry):
  416. raise UnmergedEntries
  417. return value.mode
  418. def iterobjects(self) -> Iterable[Tuple[bytes, bytes, int]]:
  419. """Iterate over path, sha, mode tuples for use with commit_tree."""
  420. for path in self:
  421. entry = self[path]
  422. if isinstance(entry, ConflictedIndexEntry):
  423. raise UnmergedEntries
  424. yield path, entry.sha, cleanup_mode(entry.mode)
  425. def has_conflicts(self) -> bool:
  426. for value in self._byname.values():
  427. if isinstance(value, ConflictedIndexEntry):
  428. return True
  429. return False
  430. def clear(self):
  431. """Remove all contents from this index."""
  432. self._byname = {}
  433. def __setitem__(
  434. self, name: bytes, value: Union[IndexEntry, ConflictedIndexEntry]
  435. ) -> None:
  436. assert isinstance(name, bytes)
  437. self._byname[name] = value
  438. def __delitem__(self, name: bytes) -> None:
  439. del self._byname[name]
  440. def iteritems(
  441. self,
  442. ) -> Iterator[Tuple[bytes, Union[IndexEntry, ConflictedIndexEntry]]]:
  443. return iter(self._byname.items())
  444. def items(self) -> Iterator[Tuple[bytes, Union[IndexEntry, ConflictedIndexEntry]]]:
  445. return iter(self._byname.items())
  446. def update(self, entries: Dict[bytes, Union[IndexEntry, ConflictedIndexEntry]]):
  447. for key, value in entries.items():
  448. self[key] = value
  449. def paths(self):
  450. yield from self._byname.keys()
  451. def changes_from_tree(
  452. self, object_store, tree: ObjectID, want_unchanged: bool = False
  453. ):
  454. """Find the differences between the contents of this index and a tree.
  455. Args:
  456. object_store: Object store to use for retrieving tree contents
  457. tree: SHA1 of the root tree
  458. want_unchanged: Whether unchanged files should be reported
  459. Returns: Iterator over tuples with (oldpath, newpath), (oldmode,
  460. newmode), (oldsha, newsha)
  461. """
  462. def lookup_entry(path):
  463. entry = self[path]
  464. return entry.sha, cleanup_mode(entry.mode)
  465. yield from changes_from_tree(
  466. self.paths(),
  467. lookup_entry,
  468. object_store,
  469. tree,
  470. want_unchanged=want_unchanged,
  471. )
  472. def commit(self, object_store):
  473. """Create a new tree from an index.
  474. Args:
  475. object_store: Object store to save the tree in
  476. Returns:
  477. Root tree SHA
  478. """
  479. return commit_tree(object_store, self.iterobjects())
  480. def commit_tree(
  481. object_store: ObjectContainer, blobs: Iterable[Tuple[bytes, bytes, int]]
  482. ) -> bytes:
  483. """Commit a new tree.
  484. Args:
  485. object_store: Object store to add trees to
  486. blobs: Iterable over blob path, sha, mode entries
  487. Returns:
  488. SHA1 of the created tree.
  489. """
  490. trees: Dict[bytes, Any] = {b"": {}}
  491. def add_tree(path):
  492. if path in trees:
  493. return trees[path]
  494. dirname, basename = pathsplit(path)
  495. t = add_tree(dirname)
  496. assert isinstance(basename, bytes)
  497. newtree = {}
  498. t[basename] = newtree
  499. trees[path] = newtree
  500. return newtree
  501. for path, sha, mode in blobs:
  502. tree_path, basename = pathsplit(path)
  503. tree = add_tree(tree_path)
  504. tree[basename] = (mode, sha)
  505. def build_tree(path):
  506. tree = Tree()
  507. for basename, entry in trees[path].items():
  508. if isinstance(entry, dict):
  509. mode = stat.S_IFDIR
  510. sha = build_tree(pathjoin(path, basename))
  511. else:
  512. (mode, sha) = entry
  513. tree.add(basename, mode, sha)
  514. object_store.add_object(tree)
  515. return tree.id
  516. return build_tree(b"")
  517. def commit_index(object_store: ObjectContainer, index: Index) -> bytes:
  518. """Create a new tree from an index.
  519. Args:
  520. object_store: Object store to save the tree in
  521. index: Index file
  522. Note: This function is deprecated, use index.commit() instead.
  523. Returns: Root tree sha.
  524. """
  525. return commit_tree(object_store, index.iterobjects())
  526. def changes_from_tree(
  527. names: Iterable[bytes],
  528. lookup_entry: Callable[[bytes], Tuple[bytes, int]],
  529. object_store: ObjectContainer,
  530. tree: Optional[bytes],
  531. want_unchanged=False,
  532. ) -> Iterable[
  533. Tuple[
  534. Tuple[Optional[bytes], Optional[bytes]],
  535. Tuple[Optional[int], Optional[int]],
  536. Tuple[Optional[bytes], Optional[bytes]],
  537. ]
  538. ]:
  539. """Find the differences between the contents of a tree and
  540. a working copy.
  541. Args:
  542. names: Iterable of names in the working copy
  543. lookup_entry: Function to lookup an entry in the working copy
  544. object_store: Object store to use for retrieving tree contents
  545. tree: SHA1 of the root tree, or None for an empty tree
  546. want_unchanged: Whether unchanged files should be reported
  547. Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
  548. (oldsha, newsha)
  549. """
  550. # TODO(jelmer): Support a include_trees option
  551. other_names = set(names)
  552. if tree is not None:
  553. for name, mode, sha in iter_tree_contents(object_store, tree):
  554. try:
  555. (other_sha, other_mode) = lookup_entry(name)
  556. except KeyError:
  557. # Was removed
  558. yield ((name, None), (mode, None), (sha, None))
  559. else:
  560. other_names.remove(name)
  561. if want_unchanged or other_sha != sha or other_mode != mode:
  562. yield ((name, name), (mode, other_mode), (sha, other_sha))
  563. # Mention added files
  564. for name in other_names:
  565. try:
  566. (other_sha, other_mode) = lookup_entry(name)
  567. except KeyError:
  568. pass
  569. else:
  570. yield ((None, name), (None, other_mode), (None, other_sha))
  571. def index_entry_from_stat(
  572. stat_val,
  573. hex_sha: bytes,
  574. mode: Optional[int] = None,
  575. ):
  576. """Create a new index entry from a stat value.
  577. Args:
  578. stat_val: POSIX stat_result instance
  579. hex_sha: Hex sha of the object
  580. """
  581. if mode is None:
  582. mode = cleanup_mode(stat_val.st_mode)
  583. return IndexEntry(
  584. stat_val.st_ctime,
  585. stat_val.st_mtime,
  586. stat_val.st_dev,
  587. stat_val.st_ino,
  588. mode,
  589. stat_val.st_uid,
  590. stat_val.st_gid,
  591. stat_val.st_size,
  592. hex_sha,
  593. )
  594. if sys.platform == "win32":
  595. # On Windows, creating symlinks either requires administrator privileges
  596. # or developer mode. Raise a more helpful error when we're unable to
  597. # create symlinks
  598. # https://github.com/jelmer/dulwich/issues/1005
  599. class WindowsSymlinkPermissionError(PermissionError):
  600. def __init__(self, errno, msg, filename) -> None:
  601. super(PermissionError, self).__init__(
  602. errno,
  603. "Unable to create symlink; "
  604. f"do you have developer mode enabled? {msg}",
  605. filename,
  606. )
  607. def symlink(src, dst, target_is_directory=False, *, dir_fd=None):
  608. try:
  609. return os.symlink(
  610. src, dst, target_is_directory=target_is_directory, dir_fd=dir_fd
  611. )
  612. except PermissionError as e:
  613. raise WindowsSymlinkPermissionError(e.errno, e.strerror, e.filename) from e
  614. else:
  615. symlink = os.symlink
  616. def build_file_from_blob(
  617. blob: Blob,
  618. mode: int,
  619. target_path: bytes,
  620. *,
  621. honor_filemode=True,
  622. tree_encoding="utf-8",
  623. symlink_fn=None,
  624. ):
  625. """Build a file or symlink on disk based on a Git object.
  626. Args:
  627. blob: The git object
  628. mode: File mode
  629. target_path: Path to write to
  630. honor_filemode: An optional flag to honor core.filemode setting in
  631. config file, default is core.filemode=True, change executable bit
  632. symlink: Function to use for creating symlinks
  633. Returns: stat object for the file
  634. """
  635. try:
  636. oldstat = os.lstat(target_path)
  637. except FileNotFoundError:
  638. oldstat = None
  639. contents = blob.as_raw_string()
  640. if stat.S_ISLNK(mode):
  641. if oldstat:
  642. os.unlink(target_path)
  643. if sys.platform == "win32":
  644. # os.readlink on Python3 on Windows requires a unicode string.
  645. contents = contents.decode(tree_encoding) # type: ignore
  646. target_path = target_path.decode(tree_encoding) # type: ignore
  647. (symlink_fn or symlink)(contents, target_path)
  648. else:
  649. if oldstat is not None and oldstat.st_size == len(contents):
  650. with open(target_path, "rb") as f:
  651. if f.read() == contents:
  652. return oldstat
  653. with open(target_path, "wb") as f:
  654. # Write out file
  655. f.write(contents)
  656. if honor_filemode:
  657. os.chmod(target_path, mode)
  658. return os.lstat(target_path)
  659. INVALID_DOTNAMES = (b".git", b".", b"..", b"")
  660. def validate_path_element_default(element: bytes) -> bool:
  661. return element.lower() not in INVALID_DOTNAMES
  662. def validate_path_element_ntfs(element: bytes) -> bool:
  663. stripped = element.rstrip(b". ").lower()
  664. if stripped in INVALID_DOTNAMES:
  665. return False
  666. if stripped == b"git~1":
  667. return False
  668. return True
  669. def validate_path(path: bytes, element_validator=validate_path_element_default) -> bool:
  670. """Default path validator that just checks for .git/."""
  671. parts = path.split(b"/")
  672. for p in parts:
  673. if not element_validator(p):
  674. return False
  675. else:
  676. return True
  677. def build_index_from_tree(
  678. root_path: Union[str, bytes],
  679. index_path: Union[str, bytes],
  680. object_store: ObjectContainer,
  681. tree_id: bytes,
  682. honor_filemode: bool = True,
  683. validate_path_element=validate_path_element_default,
  684. symlink_fn=None,
  685. ):
  686. """Generate and materialize index from a tree.
  687. Args:
  688. tree_id: Tree to materialize
  689. root_path: Target dir for materialized index files
  690. index_path: Target path for generated index
  691. object_store: Non-empty object store holding tree contents
  692. honor_filemode: An optional flag to honor core.filemode setting in
  693. config file, default is core.filemode=True, change executable bit
  694. validate_path_element: Function to validate path elements to check
  695. out; default just refuses .git and .. directories.
  696. Note: existing index is wiped and contents are not merged
  697. in a working dir. Suitable only for fresh clones.
  698. """
  699. index = Index(index_path, read=False)
  700. if not isinstance(root_path, bytes):
  701. root_path = os.fsencode(root_path)
  702. for entry in iter_tree_contents(object_store, tree_id):
  703. if not validate_path(entry.path, validate_path_element):
  704. continue
  705. full_path = _tree_to_fs_path(root_path, entry.path)
  706. if not os.path.exists(os.path.dirname(full_path)):
  707. os.makedirs(os.path.dirname(full_path))
  708. # TODO(jelmer): Merge new index into working tree
  709. if S_ISGITLINK(entry.mode):
  710. if not os.path.isdir(full_path):
  711. os.mkdir(full_path)
  712. st = os.lstat(full_path)
  713. # TODO(jelmer): record and return submodule paths
  714. else:
  715. obj = object_store[entry.sha]
  716. assert isinstance(obj, Blob)
  717. st = build_file_from_blob(
  718. obj,
  719. entry.mode,
  720. full_path,
  721. honor_filemode=honor_filemode,
  722. symlink_fn=symlink_fn,
  723. )
  724. # Add file to index
  725. if not honor_filemode or S_ISGITLINK(entry.mode):
  726. # we can not use tuple slicing to build a new tuple,
  727. # because on windows that will convert the times to
  728. # longs, which causes errors further along
  729. st_tuple = (
  730. entry.mode,
  731. st.st_ino,
  732. st.st_dev,
  733. st.st_nlink,
  734. st.st_uid,
  735. st.st_gid,
  736. st.st_size,
  737. st.st_atime,
  738. st.st_mtime,
  739. st.st_ctime,
  740. )
  741. st = st.__class__(st_tuple)
  742. # default to a stage 0 index entry (normal)
  743. # when reading from the filesystem
  744. index[entry.path] = index_entry_from_stat(st, entry.sha)
  745. index.write()
  746. def blob_from_path_and_mode(fs_path: bytes, mode: int, tree_encoding="utf-8"):
  747. """Create a blob from a path and a stat object.
  748. Args:
  749. fs_path: Full file system path to file
  750. mode: File mode
  751. Returns: A `Blob` object
  752. """
  753. assert isinstance(fs_path, bytes)
  754. blob = Blob()
  755. if stat.S_ISLNK(mode):
  756. if sys.platform == "win32":
  757. # os.readlink on Python3 on Windows requires a unicode string.
  758. blob.data = os.readlink(os.fsdecode(fs_path)).encode(tree_encoding)
  759. else:
  760. blob.data = os.readlink(fs_path)
  761. else:
  762. with open(fs_path, "rb") as f:
  763. blob.data = f.read()
  764. return blob
  765. def blob_from_path_and_stat(fs_path: bytes, st, tree_encoding="utf-8"):
  766. """Create a blob from a path and a stat object.
  767. Args:
  768. fs_path: Full file system path to file
  769. st: A stat object
  770. Returns: A `Blob` object
  771. """
  772. return blob_from_path_and_mode(fs_path, st.st_mode, tree_encoding)
  773. def read_submodule_head(path: Union[str, bytes]) -> Optional[bytes]:
  774. """Read the head commit of a submodule.
  775. Args:
  776. path: path to the submodule
  777. Returns: HEAD sha, None if not a valid head/repository
  778. """
  779. from .errors import NotGitRepository
  780. from .repo import Repo
  781. # Repo currently expects a "str", so decode if necessary.
  782. # TODO(jelmer): Perhaps move this into Repo() ?
  783. if not isinstance(path, str):
  784. path = os.fsdecode(path)
  785. try:
  786. repo = Repo(path)
  787. except NotGitRepository:
  788. return None
  789. try:
  790. return repo.head()
  791. except KeyError:
  792. return None
  793. def _has_directory_changed(tree_path: bytes, entry):
  794. """Check if a directory has changed after getting an error.
  795. When handling an error trying to create a blob from a path, call this
  796. function. It will check if the path is a directory. If it's a directory
  797. and a submodule, check the submodule head to see if it's has changed. If
  798. not, consider the file as changed as Git tracked a file and not a
  799. directory.
  800. Return true if the given path should be considered as changed and False
  801. otherwise or if the path is not a directory.
  802. """
  803. # This is actually a directory
  804. if os.path.exists(os.path.join(tree_path, b".git")):
  805. # Submodule
  806. head = read_submodule_head(tree_path)
  807. if entry.sha != head:
  808. return True
  809. else:
  810. # The file was changed to a directory, so consider it removed.
  811. return True
  812. return False
  813. def get_unstaged_changes(
  814. index: Index, root_path: Union[str, bytes], filter_blob_callback=None
  815. ):
  816. """Walk through an index and check for differences against working tree.
  817. Args:
  818. index: index to check
  819. root_path: path in which to find files
  820. Returns: iterator over paths with unstaged changes
  821. """
  822. # For each entry in the index check the sha1 & ensure not staged
  823. if not isinstance(root_path, bytes):
  824. root_path = os.fsencode(root_path)
  825. for tree_path, entry in index.iteritems():
  826. full_path = _tree_to_fs_path(root_path, tree_path)
  827. if isinstance(entry, ConflictedIndexEntry):
  828. # Conflicted files are always unstaged
  829. yield tree_path
  830. continue
  831. try:
  832. st = os.lstat(full_path)
  833. if stat.S_ISDIR(st.st_mode):
  834. if _has_directory_changed(tree_path, entry):
  835. yield tree_path
  836. continue
  837. if not stat.S_ISREG(st.st_mode) and not stat.S_ISLNK(st.st_mode):
  838. continue
  839. blob = blob_from_path_and_stat(full_path, st)
  840. if filter_blob_callback is not None:
  841. blob = filter_blob_callback(blob, tree_path)
  842. except FileNotFoundError:
  843. # The file was removed, so we assume that counts as
  844. # different from whatever file used to exist.
  845. yield tree_path
  846. else:
  847. if blob.id != entry.sha:
  848. yield tree_path
  849. os_sep_bytes = os.sep.encode("ascii")
  850. def _tree_to_fs_path(root_path: bytes, tree_path: bytes):
  851. """Convert a git tree path to a file system path.
  852. Args:
  853. root_path: Root filesystem path
  854. tree_path: Git tree path as bytes
  855. Returns: File system path.
  856. """
  857. assert isinstance(tree_path, bytes)
  858. if os_sep_bytes != b"/":
  859. sep_corrected_path = tree_path.replace(b"/", os_sep_bytes)
  860. else:
  861. sep_corrected_path = tree_path
  862. return os.path.join(root_path, sep_corrected_path)
  863. def _fs_to_tree_path(fs_path: Union[str, bytes]) -> bytes:
  864. """Convert a file system path to a git tree path.
  865. Args:
  866. fs_path: File system path.
  867. Returns: Git tree path as bytes
  868. """
  869. if not isinstance(fs_path, bytes):
  870. fs_path_bytes = os.fsencode(fs_path)
  871. else:
  872. fs_path_bytes = fs_path
  873. if os_sep_bytes != b"/":
  874. tree_path = fs_path_bytes.replace(os_sep_bytes, b"/")
  875. else:
  876. tree_path = fs_path_bytes
  877. return tree_path
  878. def index_entry_from_directory(st, path: bytes) -> Optional[IndexEntry]:
  879. if os.path.exists(os.path.join(path, b".git")):
  880. head = read_submodule_head(path)
  881. if head is None:
  882. return None
  883. return index_entry_from_stat(st, head, mode=S_IFGITLINK)
  884. return None
  885. def index_entry_from_path(
  886. path: bytes, object_store: Optional[ObjectContainer] = None
  887. ) -> Optional[IndexEntry]:
  888. """Create an index from a filesystem path.
  889. This returns an index value for files, symlinks
  890. and tree references. for directories and
  891. non-existent files it returns None
  892. Args:
  893. path: Path to create an index entry for
  894. object_store: Optional object store to
  895. save new blobs in
  896. Returns: An index entry; None for directories
  897. """
  898. assert isinstance(path, bytes)
  899. st = os.lstat(path)
  900. if stat.S_ISDIR(st.st_mode):
  901. return index_entry_from_directory(st, path)
  902. if stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode):
  903. blob = blob_from_path_and_stat(path, st)
  904. if object_store is not None:
  905. object_store.add_object(blob)
  906. return index_entry_from_stat(st, blob.id)
  907. return None
  908. def iter_fresh_entries(
  909. paths: Iterable[bytes],
  910. root_path: bytes,
  911. object_store: Optional[ObjectContainer] = None,
  912. ) -> Iterator[Tuple[bytes, Optional[IndexEntry]]]:
  913. """Iterate over current versions of index entries on disk.
  914. Args:
  915. paths: Paths to iterate over
  916. root_path: Root path to access from
  917. object_store: Optional store to save new blobs in
  918. Returns: Iterator over path, index_entry
  919. """
  920. for path in paths:
  921. p = _tree_to_fs_path(root_path, path)
  922. try:
  923. entry = index_entry_from_path(p, object_store=object_store)
  924. except (FileNotFoundError, IsADirectoryError):
  925. entry = None
  926. yield path, entry
  927. def iter_fresh_objects(
  928. paths: Iterable[bytes], root_path: bytes, include_deleted=False, object_store=None
  929. ) -> Iterator[Tuple[bytes, Optional[bytes], Optional[int]]]:
  930. """Iterate over versions of objects on disk referenced by index.
  931. Args:
  932. root_path: Root path to access from
  933. include_deleted: Include deleted entries with sha and
  934. mode set to None
  935. object_store: Optional object store to report new items to
  936. Returns: Iterator over path, sha, mode
  937. """
  938. for path, entry in iter_fresh_entries(paths, root_path, object_store=object_store):
  939. if entry is None:
  940. if include_deleted:
  941. yield path, None, None
  942. else:
  943. yield path, entry.sha, cleanup_mode(entry.mode)
  944. def refresh_index(index: Index, root_path: bytes):
  945. """Refresh the contents of an index.
  946. This is the equivalent to running 'git commit -a'.
  947. Args:
  948. index: Index to update
  949. root_path: Root filesystem path
  950. """
  951. for path, entry in iter_fresh_entries(index, root_path):
  952. if entry:
  953. index[path] = entry
  954. class locked_index:
  955. """Lock the index while making modifications.
  956. Works as a context manager.
  957. """
  958. def __init__(self, path: Union[bytes, str]) -> None:
  959. self._path = path
  960. def __enter__(self):
  961. self._file = GitFile(self._path, "wb")
  962. self._index = Index(self._path)
  963. return self._index
  964. def __exit__(self, exc_type, exc_value, traceback):
  965. if exc_type is not None:
  966. self._file.abort()
  967. return
  968. try:
  969. f = SHA1Writer(self._file)
  970. write_index_dict(f, self._index._byname)
  971. except BaseException:
  972. self._file.abort()
  973. else:
  974. f.close()