2
0

index.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986
  1. # index.py -- File parser/writer for the git index file
  2. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Parser for the git index file format."""
  21. import collections
  22. import os
  23. import stat
  24. import struct
  25. import sys
  26. from typing import (
  27. Any,
  28. BinaryIO,
  29. Callable,
  30. Dict,
  31. List,
  32. Optional,
  33. TYPE_CHECKING,
  34. Iterable,
  35. Iterator,
  36. Tuple,
  37. )
  38. if TYPE_CHECKING:
  39. from dulwich.object_store import BaseObjectStore
  40. from dulwich.file import GitFile
  41. from dulwich.objects import (
  42. Blob,
  43. S_IFGITLINK,
  44. S_ISGITLINK,
  45. Tree,
  46. hex_to_sha,
  47. sha_to_hex,
  48. )
  49. from dulwich.pack import (
  50. SHA1Reader,
  51. SHA1Writer,
  52. )
  53. # TODO(jelmer): Switch to dataclass?
  54. IndexEntry = collections.namedtuple(
  55. "IndexEntry",
  56. [
  57. "ctime",
  58. "mtime",
  59. "dev",
  60. "ino",
  61. "mode",
  62. "uid",
  63. "gid",
  64. "size",
  65. "sha",
  66. "flags",
  67. "extended_flags",
  68. ],
  69. )
  70. # 2-bit stage (during merge)
  71. FLAG_STAGEMASK = 0x3000
  72. # assume-valid
  73. FLAG_VALID = 0x8000
  74. # extended flag (must be zero in version 2)
  75. FLAG_EXTENDED = 0x4000
  76. # used by sparse checkout
  77. EXTENDED_FLAG_SKIP_WORKTREE = 0x4000
  78. # used by "git add -N"
  79. EXTENDED_FLAG_INTEND_TO_ADD = 0x2000
  80. DEFAULT_VERSION = 2
  81. def pathsplit(path):
  82. """Split a /-delimited path into a directory part and a basename.
  83. Args:
  84. path: The path to split.
  85. Returns:
  86. Tuple with directory name and basename
  87. """
  88. try:
  89. (dirname, basename) = path.rsplit(b"/", 1)
  90. except ValueError:
  91. return (b"", path)
  92. else:
  93. return (dirname, basename)
  94. def pathjoin(*args):
  95. """Join a /-delimited path."""
  96. return b"/".join([p for p in args if p])
  97. def read_cache_time(f):
  98. """Read a cache time.
  99. Args:
  100. f: File-like object to read from
  101. Returns:
  102. Tuple with seconds and nanoseconds
  103. """
  104. return struct.unpack(">LL", f.read(8))
  105. def write_cache_time(f, t):
  106. """Write a cache time.
  107. Args:
  108. f: File-like object to write to
  109. t: Time to write (as int, float or tuple with secs and nsecs)
  110. """
  111. if isinstance(t, int):
  112. t = (t, 0)
  113. elif isinstance(t, float):
  114. (secs, nsecs) = divmod(t, 1.0)
  115. t = (int(secs), int(nsecs * 1000000000))
  116. elif not isinstance(t, tuple):
  117. raise TypeError(t)
  118. f.write(struct.pack(">LL", *t))
  119. def read_cache_entry(f, version: int) -> Tuple[str, IndexEntry]:
  120. """Read an entry from a cache file.
  121. Args:
  122. f: File-like object to read from
  123. Returns:
  124. tuple with: name, IndexEntry
  125. """
  126. beginoffset = f.tell()
  127. ctime = read_cache_time(f)
  128. mtime = read_cache_time(f)
  129. (
  130. dev,
  131. ino,
  132. mode,
  133. uid,
  134. gid,
  135. size,
  136. sha,
  137. flags,
  138. ) = struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
  139. if flags & FLAG_EXTENDED:
  140. if version < 3:
  141. raise AssertionError(
  142. 'extended flag set in index with version < 3')
  143. (extended_flags, ) = struct.unpack(">H", f.read(2))
  144. else:
  145. extended_flags = 0
  146. name = f.read((flags & 0x0FFF))
  147. # Padding:
  148. if version < 4:
  149. real_size = (f.tell() - beginoffset + 8) & ~7
  150. f.read((beginoffset + real_size) - f.tell())
  151. return (
  152. name,
  153. IndexEntry(
  154. ctime,
  155. mtime,
  156. dev,
  157. ino,
  158. mode,
  159. uid,
  160. gid,
  161. size,
  162. sha_to_hex(sha),
  163. flags & ~0x0FFF,
  164. extended_flags,
  165. ))
  166. def write_cache_entry(f, name, entry, version):
  167. """Write an index entry to a file.
  168. Args:
  169. f: File object
  170. entry: IndexEntry to write, tuple with:
  171. """
  172. beginoffset = f.tell()
  173. write_cache_time(f, entry.ctime)
  174. write_cache_time(f, entry.mtime)
  175. flags = len(name) | (entry.flags & ~0x0FFF)
  176. if entry.extended_flags:
  177. flags |= FLAG_EXTENDED
  178. if flags & FLAG_EXTENDED and version is not None and version < 3:
  179. raise AssertionError('unable to use extended flags in version < 3')
  180. f.write(
  181. struct.pack(
  182. b">LLLLLL20sH",
  183. entry.dev & 0xFFFFFFFF,
  184. entry.ino & 0xFFFFFFFF,
  185. entry.mode,
  186. entry.uid,
  187. entry.gid,
  188. entry.size,
  189. hex_to_sha(entry.sha),
  190. flags,
  191. )
  192. )
  193. if flags & FLAG_EXTENDED:
  194. f.write(struct.pack(b">H", entry.extended_flags))
  195. f.write(name)
  196. if version < 4:
  197. real_size = (f.tell() - beginoffset + 8) & ~7
  198. f.write(b"\0" * ((beginoffset + real_size) - f.tell()))
  199. def read_index(f: BinaryIO):
  200. """Read an index file, yielding the individual entries."""
  201. header = f.read(4)
  202. if header != b"DIRC":
  203. raise AssertionError("Invalid index file header: %r" % header)
  204. (version, num_entries) = struct.unpack(b">LL", f.read(4 * 2))
  205. assert version in (1, 2, 3), "index version is %r" % version
  206. for i in range(num_entries):
  207. yield read_cache_entry(f, version)
  208. def read_index_dict(f):
  209. """Read an index file and return it as a dictionary.
  210. Args:
  211. f: File object to read from
  212. """
  213. ret = {}
  214. for name, entry in read_index(f):
  215. ret[name] = entry
  216. return ret
  217. def write_index(f: BinaryIO, entries: List[Tuple[bytes, IndexEntry]], version: Optional[int] = None):
  218. """Write an index file.
  219. Args:
  220. f: File-like object to write to
  221. version: Version number to write
  222. entries: Iterable over the entries to write
  223. """
  224. if version is None:
  225. version = DEFAULT_VERSION
  226. f.write(b"DIRC")
  227. f.write(struct.pack(b">LL", version, len(entries)))
  228. for name, entry in entries:
  229. write_cache_entry(f, name, entry, version)
  230. def write_index_dict(
  231. f: BinaryIO,
  232. entries: Dict[bytes, IndexEntry],
  233. version: Optional[int] = None,
  234. ) -> None:
  235. """Write an index file based on the contents of a dictionary."""
  236. entries_list = []
  237. for name in sorted(entries):
  238. entries_list.append((name, entries[name]))
  239. write_index(f, entries_list, version=version)
  240. def cleanup_mode(mode: int) -> int:
  241. """Cleanup a mode value.
  242. This will return a mode that can be stored in a tree object.
  243. Args:
  244. mode: Mode to clean up.
  245. Returns:
  246. mode
  247. """
  248. if stat.S_ISLNK(mode):
  249. return stat.S_IFLNK
  250. elif stat.S_ISDIR(mode):
  251. return stat.S_IFDIR
  252. elif S_ISGITLINK(mode):
  253. return S_IFGITLINK
  254. ret = stat.S_IFREG | 0o644
  255. if mode & 0o100:
  256. ret |= 0o111
  257. return ret
  258. class Index(object):
  259. """A Git Index file."""
  260. def __init__(self, filename):
  261. """Open an index file.
  262. Args:
  263. filename: Path to the index file
  264. """
  265. self._filename = filename
  266. # TODO(jelmer): Store the version returned by read_index
  267. self._version = None
  268. self.clear()
  269. self.read()
  270. @property
  271. def path(self):
  272. return self._filename
  273. def __repr__(self):
  274. return "%s(%r)" % (self.__class__.__name__, self._filename)
  275. def write(self) -> None:
  276. """Write current contents of index to disk."""
  277. f = GitFile(self._filename, "wb")
  278. try:
  279. f = SHA1Writer(f)
  280. write_index_dict(f, self._byname, version=self._version)
  281. finally:
  282. f.close()
  283. def read(self):
  284. """Read current contents of index from disk."""
  285. if not os.path.exists(self._filename):
  286. return
  287. f = GitFile(self._filename, "rb")
  288. try:
  289. f = SHA1Reader(f)
  290. for name, entry in read_index(f):
  291. self[name] = entry
  292. # FIXME: Additional data?
  293. f.read(os.path.getsize(self._filename) - f.tell() - 20)
  294. f.check_sha()
  295. finally:
  296. f.close()
  297. def __len__(self) -> int:
  298. """Number of entries in this index file."""
  299. return len(self._byname)
  300. def __getitem__(self, name: bytes) -> IndexEntry:
  301. """Retrieve entry by relative path.
  302. Returns: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha,
  303. flags)
  304. """
  305. return self._byname[name]
  306. def __iter__(self) -> Iterator[bytes]:
  307. """Iterate over the paths in this index."""
  308. return iter(self._byname)
  309. def get_sha1(self, path: bytes) -> bytes:
  310. """Return the (git object) SHA1 for the object at a path."""
  311. return self[path].sha
  312. def get_mode(self, path: bytes) -> int:
  313. """Return the POSIX file mode for the object at a path."""
  314. return self[path].mode
  315. def iterobjects(self) -> Iterable[Tuple[bytes, bytes, int]]:
  316. """Iterate over path, sha, mode tuples for use with commit_tree."""
  317. for path in self:
  318. entry = self[path]
  319. yield path, entry.sha, cleanup_mode(entry.mode)
  320. def clear(self):
  321. """Remove all contents from this index."""
  322. self._byname = {}
  323. def __setitem__(self, name, x):
  324. assert isinstance(name, bytes)
  325. assert len(x) == len(IndexEntry._fields)
  326. # Remove the old entry if any
  327. self._byname[name] = IndexEntry(*x)
  328. def __delitem__(self, name):
  329. assert isinstance(name, bytes)
  330. del self._byname[name]
  331. def iteritems(self):
  332. return self._byname.items()
  333. def items(self):
  334. return self._byname.items()
  335. def update(self, entries):
  336. for name, value in entries.items():
  337. self[name] = value
  338. def changes_from_tree(self, object_store, tree, want_unchanged=False):
  339. """Find the differences between the contents of this index and a tree.
  340. Args:
  341. object_store: Object store to use for retrieving tree contents
  342. tree: SHA1 of the root tree
  343. want_unchanged: Whether unchanged files should be reported
  344. Returns: Iterator over tuples with (oldpath, newpath), (oldmode,
  345. newmode), (oldsha, newsha)
  346. """
  347. def lookup_entry(path):
  348. entry = self[path]
  349. return entry.sha, cleanup_mode(entry.mode)
  350. for (name, mode, sha) in changes_from_tree(
  351. self._byname.keys(),
  352. lookup_entry,
  353. object_store,
  354. tree,
  355. want_unchanged=want_unchanged,
  356. ):
  357. yield (name, mode, sha)
  358. def commit(self, object_store):
  359. """Create a new tree from an index.
  360. Args:
  361. object_store: Object store to save the tree in
  362. Returns:
  363. Root tree SHA
  364. """
  365. return commit_tree(object_store, self.iterobjects())
  366. def commit_tree(
  367. object_store: "BaseObjectStore", blobs: Iterable[Tuple[bytes, bytes, int]]
  368. ) -> bytes:
  369. """Commit a new tree.
  370. Args:
  371. object_store: Object store to add trees to
  372. blobs: Iterable over blob path, sha, mode entries
  373. Returns:
  374. SHA1 of the created tree.
  375. """
  376. trees = {b"": {}} # type: Dict[bytes, Any]
  377. def add_tree(path):
  378. if path in trees:
  379. return trees[path]
  380. dirname, basename = pathsplit(path)
  381. t = add_tree(dirname)
  382. assert isinstance(basename, bytes)
  383. newtree = {}
  384. t[basename] = newtree
  385. trees[path] = newtree
  386. return newtree
  387. for path, sha, mode in blobs:
  388. tree_path, basename = pathsplit(path)
  389. tree = add_tree(tree_path)
  390. tree[basename] = (mode, sha)
  391. def build_tree(path):
  392. tree = Tree()
  393. for basename, entry in trees[path].items():
  394. if isinstance(entry, dict):
  395. mode = stat.S_IFDIR
  396. sha = build_tree(pathjoin(path, basename))
  397. else:
  398. (mode, sha) = entry
  399. tree.add(basename, mode, sha)
  400. object_store.add_object(tree)
  401. return tree.id
  402. return build_tree(b"")
  403. def commit_index(object_store: "BaseObjectStore", index: Index) -> bytes:
  404. """Create a new tree from an index.
  405. Args:
  406. object_store: Object store to save the tree in
  407. index: Index file
  408. Note: This function is deprecated, use index.commit() instead.
  409. Returns: Root tree sha.
  410. """
  411. return commit_tree(object_store, index.iterobjects())
  412. def changes_from_tree(
  413. names: Iterable[bytes],
  414. lookup_entry: Callable[[bytes], Tuple[bytes, int]],
  415. object_store: "BaseObjectStore",
  416. tree: Optional[bytes],
  417. want_unchanged=False,
  418. ) -> Iterable[
  419. Tuple[
  420. Tuple[Optional[bytes], Optional[bytes]],
  421. Tuple[Optional[int], Optional[int]],
  422. Tuple[Optional[bytes], Optional[bytes]],
  423. ]
  424. ]:
  425. """Find the differences between the contents of a tree and
  426. a working copy.
  427. Args:
  428. names: Iterable of names in the working copy
  429. lookup_entry: Function to lookup an entry in the working copy
  430. object_store: Object store to use for retrieving tree contents
  431. tree: SHA1 of the root tree, or None for an empty tree
  432. want_unchanged: Whether unchanged files should be reported
  433. Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
  434. (oldsha, newsha)
  435. """
  436. # TODO(jelmer): Support a include_trees option
  437. other_names = set(names)
  438. if tree is not None:
  439. for (name, mode, sha) in object_store.iter_tree_contents(tree):
  440. try:
  441. (other_sha, other_mode) = lookup_entry(name)
  442. except KeyError:
  443. # Was removed
  444. yield ((name, None), (mode, None), (sha, None))
  445. else:
  446. other_names.remove(name)
  447. if want_unchanged or other_sha != sha or other_mode != mode:
  448. yield ((name, name), (mode, other_mode), (sha, other_sha))
  449. # Mention added files
  450. for name in other_names:
  451. try:
  452. (other_sha, other_mode) = lookup_entry(name)
  453. except KeyError:
  454. pass
  455. else:
  456. yield ((None, name), (None, other_mode), (None, other_sha))
  457. def index_entry_from_stat(
  458. stat_val, hex_sha: bytes, flags: int, mode: Optional[int] = None,
  459. extended_flags: Optional[int] = None
  460. ):
  461. """Create a new index entry from a stat value.
  462. Args:
  463. stat_val: POSIX stat_result instance
  464. hex_sha: Hex sha of the object
  465. flags: Index flags
  466. """
  467. if mode is None:
  468. mode = cleanup_mode(stat_val.st_mode)
  469. return IndexEntry(
  470. stat_val.st_ctime,
  471. stat_val.st_mtime,
  472. stat_val.st_dev,
  473. stat_val.st_ino,
  474. mode,
  475. stat_val.st_uid,
  476. stat_val.st_gid,
  477. stat_val.st_size,
  478. hex_sha,
  479. flags,
  480. extended_flags
  481. )
  482. def build_file_from_blob(
  483. blob, mode, target_path, honor_filemode=True, tree_encoding="utf-8"
  484. ):
  485. """Build a file or symlink on disk based on a Git object.
  486. Args:
  487. blob: The git object
  488. mode: File mode
  489. target_path: Path to write to
  490. honor_filemode: An optional flag to honor core.filemode setting in
  491. config file, default is core.filemode=True, change executable bit
  492. Returns: stat object for the file
  493. """
  494. try:
  495. oldstat = os.lstat(target_path)
  496. except FileNotFoundError:
  497. oldstat = None
  498. contents = blob.as_raw_string()
  499. if stat.S_ISLNK(mode):
  500. # FIXME: This will fail on Windows. What should we do instead?
  501. if oldstat:
  502. os.unlink(target_path)
  503. if sys.platform == "win32":
  504. # os.readlink on Python3 on Windows requires a unicode string.
  505. contents = contents.decode(tree_encoding)
  506. target_path = target_path.decode(tree_encoding)
  507. os.symlink(contents, target_path)
  508. else:
  509. if oldstat is not None and oldstat.st_size == len(contents):
  510. with open(target_path, "rb") as f:
  511. if f.read() == contents:
  512. return oldstat
  513. with open(target_path, "wb") as f:
  514. # Write out file
  515. f.write(contents)
  516. if honor_filemode:
  517. os.chmod(target_path, mode)
  518. return os.lstat(target_path)
  519. INVALID_DOTNAMES = (b".git", b".", b"..", b"")
  520. def validate_path_element_default(element):
  521. return element.lower() not in INVALID_DOTNAMES
  522. def validate_path_element_ntfs(element):
  523. stripped = element.rstrip(b". ").lower()
  524. if stripped in INVALID_DOTNAMES:
  525. return False
  526. if stripped == b"git~1":
  527. return False
  528. return True
  529. def validate_path(path, element_validator=validate_path_element_default):
  530. """Default path validator that just checks for .git/."""
  531. parts = path.split(b"/")
  532. for p in parts:
  533. if not element_validator(p):
  534. return False
  535. else:
  536. return True
  537. def build_index_from_tree(
  538. root_path,
  539. index_path,
  540. object_store,
  541. tree_id,
  542. honor_filemode=True,
  543. validate_path_element=validate_path_element_default,
  544. ):
  545. """Generate and materialize index from a tree
  546. Args:
  547. tree_id: Tree to materialize
  548. root_path: Target dir for materialized index files
  549. index_path: Target path for generated index
  550. object_store: Non-empty object store holding tree contents
  551. honor_filemode: An optional flag to honor core.filemode setting in
  552. config file, default is core.filemode=True, change executable bit
  553. validate_path_element: Function to validate path elements to check
  554. out; default just refuses .git and .. directories.
  555. Note: existing index is wiped and contents are not merged
  556. in a working dir. Suitable only for fresh clones.
  557. """
  558. index = Index(index_path)
  559. if not isinstance(root_path, bytes):
  560. root_path = os.fsencode(root_path)
  561. for entry in object_store.iter_tree_contents(tree_id):
  562. if not validate_path(entry.path, validate_path_element):
  563. continue
  564. full_path = _tree_to_fs_path(root_path, entry.path)
  565. if not os.path.exists(os.path.dirname(full_path)):
  566. os.makedirs(os.path.dirname(full_path))
  567. # TODO(jelmer): Merge new index into working tree
  568. if S_ISGITLINK(entry.mode):
  569. if not os.path.isdir(full_path):
  570. os.mkdir(full_path)
  571. st = os.lstat(full_path)
  572. # TODO(jelmer): record and return submodule paths
  573. else:
  574. obj = object_store[entry.sha]
  575. st = build_file_from_blob(
  576. obj, entry.mode, full_path, honor_filemode=honor_filemode
  577. )
  578. # Add file to index
  579. if not honor_filemode or S_ISGITLINK(entry.mode):
  580. # we can not use tuple slicing to build a new tuple,
  581. # because on windows that will convert the times to
  582. # longs, which causes errors further along
  583. st_tuple = (
  584. entry.mode,
  585. st.st_ino,
  586. st.st_dev,
  587. st.st_nlink,
  588. st.st_uid,
  589. st.st_gid,
  590. st.st_size,
  591. st.st_atime,
  592. st.st_mtime,
  593. st.st_ctime,
  594. )
  595. st = st.__class__(st_tuple)
  596. index[entry.path] = index_entry_from_stat(st, entry.sha, 0)
  597. index.write()
  598. def blob_from_path_and_mode(fs_path, mode, tree_encoding="utf-8"):
  599. """Create a blob from a path and a stat object.
  600. Args:
  601. fs_path: Full file system path to file
  602. mode: File mode
  603. Returns: A `Blob` object
  604. """
  605. assert isinstance(fs_path, bytes)
  606. blob = Blob()
  607. if stat.S_ISLNK(mode):
  608. if sys.platform == "win32":
  609. # os.readlink on Python3 on Windows requires a unicode string.
  610. fs_path = os.fsdecode(fs_path)
  611. blob.data = os.readlink(fs_path).encode(tree_encoding)
  612. else:
  613. blob.data = os.readlink(fs_path)
  614. else:
  615. with open(fs_path, "rb") as f:
  616. blob.data = f.read()
  617. return blob
  618. def blob_from_path_and_stat(fs_path, st, tree_encoding="utf-8"):
  619. """Create a blob from a path and a stat object.
  620. Args:
  621. fs_path: Full file system path to file
  622. st: A stat object
  623. Returns: A `Blob` object
  624. """
  625. return blob_from_path_and_mode(fs_path, st.st_mode, tree_encoding)
  626. def read_submodule_head(path):
  627. """Read the head commit of a submodule.
  628. Args:
  629. path: path to the submodule
  630. Returns: HEAD sha, None if not a valid head/repository
  631. """
  632. from dulwich.errors import NotGitRepository
  633. from dulwich.repo import Repo
  634. # Repo currently expects a "str", so decode if necessary.
  635. # TODO(jelmer): Perhaps move this into Repo() ?
  636. if not isinstance(path, str):
  637. path = os.fsdecode(path)
  638. try:
  639. repo = Repo(path)
  640. except NotGitRepository:
  641. return None
  642. try:
  643. return repo.head()
  644. except KeyError:
  645. return None
  646. def _has_directory_changed(tree_path, entry):
  647. """Check if a directory has changed after getting an error.
  648. When handling an error trying to create a blob from a path, call this
  649. function. It will check if the path is a directory. If it's a directory
  650. and a submodule, check the submodule head to see if it's has changed. If
  651. not, consider the file as changed as Git tracked a file and not a
  652. directory.
  653. Return true if the given path should be considered as changed and False
  654. otherwise or if the path is not a directory.
  655. """
  656. # This is actually a directory
  657. if os.path.exists(os.path.join(tree_path, b".git")):
  658. # Submodule
  659. head = read_submodule_head(tree_path)
  660. if entry.sha != head:
  661. return True
  662. else:
  663. # The file was changed to a directory, so consider it removed.
  664. return True
  665. return False
  666. def get_unstaged_changes(index: Index, root_path, filter_blob_callback=None):
  667. """Walk through an index and check for differences against working tree.
  668. Args:
  669. index: index to check
  670. root_path: path in which to find files
  671. Returns: iterator over paths with unstaged changes
  672. """
  673. # For each entry in the index check the sha1 & ensure not staged
  674. if not isinstance(root_path, bytes):
  675. root_path = os.fsencode(root_path)
  676. for tree_path, entry in index.iteritems():
  677. full_path = _tree_to_fs_path(root_path, tree_path)
  678. try:
  679. st = os.lstat(full_path)
  680. if stat.S_ISDIR(st.st_mode):
  681. if _has_directory_changed(tree_path, entry):
  682. yield tree_path
  683. continue
  684. if not stat.S_ISREG(st.st_mode) and not stat.S_ISLNK(st.st_mode):
  685. continue
  686. blob = blob_from_path_and_stat(full_path, st)
  687. if filter_blob_callback is not None:
  688. blob = filter_blob_callback(blob, tree_path)
  689. except FileNotFoundError:
  690. # The file was removed, so we assume that counts as
  691. # different from whatever file used to exist.
  692. yield tree_path
  693. else:
  694. if blob.id != entry.sha:
  695. yield tree_path
  696. os_sep_bytes = os.sep.encode("ascii")
  697. def _tree_to_fs_path(root_path, tree_path: bytes):
  698. """Convert a git tree path to a file system path.
  699. Args:
  700. root_path: Root filesystem path
  701. tree_path: Git tree path as bytes
  702. Returns: File system path.
  703. """
  704. assert isinstance(tree_path, bytes)
  705. if os_sep_bytes != b"/":
  706. sep_corrected_path = tree_path.replace(b"/", os_sep_bytes)
  707. else:
  708. sep_corrected_path = tree_path
  709. return os.path.join(root_path, sep_corrected_path)
  710. def _fs_to_tree_path(fs_path):
  711. """Convert a file system path to a git tree path.
  712. Args:
  713. fs_path: File system path.
  714. Returns: Git tree path as bytes
  715. """
  716. if not isinstance(fs_path, bytes):
  717. fs_path_bytes = os.fsencode(fs_path)
  718. else:
  719. fs_path_bytes = fs_path
  720. if os_sep_bytes != b"/":
  721. tree_path = fs_path_bytes.replace(os_sep_bytes, b"/")
  722. else:
  723. tree_path = fs_path_bytes
  724. return tree_path
  725. def index_entry_from_directory(st, path):
  726. if os.path.exists(os.path.join(path, b".git")):
  727. head = read_submodule_head(path)
  728. if head is None:
  729. return None
  730. return index_entry_from_stat(st, head, 0, mode=S_IFGITLINK)
  731. return None
  732. def index_entry_from_path(path, object_store=None):
  733. """Create an index from a filesystem path.
  734. This returns an index value for files, symlinks
  735. and tree references. for directories and
  736. non-existent files it returns None
  737. Args:
  738. path: Path to create an index entry for
  739. object_store: Optional object store to
  740. save new blobs in
  741. Returns: An index entry; None for directories
  742. """
  743. assert isinstance(path, bytes)
  744. st = os.lstat(path)
  745. if stat.S_ISDIR(st.st_mode):
  746. return index_entry_from_directory(st, path)
  747. if stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode):
  748. blob = blob_from_path_and_stat(path, st)
  749. if object_store is not None:
  750. object_store.add_object(blob)
  751. return index_entry_from_stat(st, blob.id, 0)
  752. return None
  753. def iter_fresh_entries(
  754. paths, root_path, object_store: Optional["BaseObjectStore"] = None
  755. ):
  756. """Iterate over current versions of index entries on disk.
  757. Args:
  758. paths: Paths to iterate over
  759. root_path: Root path to access from
  760. object_store: Optional store to save new blobs in
  761. Returns: Iterator over path, index_entry
  762. """
  763. for path in paths:
  764. p = _tree_to_fs_path(root_path, path)
  765. try:
  766. entry = index_entry_from_path(p, object_store=object_store)
  767. except (FileNotFoundError, IsADirectoryError):
  768. entry = None
  769. yield path, entry
  770. def iter_fresh_objects(paths, root_path, include_deleted=False, object_store=None):
  771. """Iterate over versions of objects on disk referenced by index.
  772. Args:
  773. root_path: Root path to access from
  774. include_deleted: Include deleted entries with sha and
  775. mode set to None
  776. object_store: Optional object store to report new items to
  777. Returns: Iterator over path, sha, mode
  778. """
  779. for path, entry in iter_fresh_entries(paths, root_path, object_store=object_store):
  780. if entry is None:
  781. if include_deleted:
  782. yield path, None, None
  783. else:
  784. entry = IndexEntry(*entry)
  785. yield path, entry.sha, cleanup_mode(entry.mode)
  786. def refresh_index(index, root_path):
  787. """Refresh the contents of an index.
  788. This is the equivalent to running 'git commit -a'.
  789. Args:
  790. index: Index to update
  791. root_path: Root filesystem path
  792. """
  793. for path, entry in iter_fresh_entries(index, root_path):
  794. index[path] = path
  795. class locked_index(object):
  796. """Lock the index while making modifications.
  797. Works as a context manager.
  798. """
  799. def __init__(self, path):
  800. self._path = path
  801. def __enter__(self):
  802. self._file = GitFile(self._path, "wb")
  803. self._index = Index(self._path)
  804. return self._index
  805. def __exit__(self, exc_type, exc_value, traceback):
  806. if exc_type is not None:
  807. self._file.abort()
  808. return
  809. try:
  810. f = SHA1Writer(self._file)
  811. write_index_dict(f, self._index._byname)
  812. except BaseException:
  813. self._file.abort()
  814. else:
  815. f.close()