2
0

index.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953
  1. # index.py -- File parser/writer for the git index file
  2. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Parser for the git index file format."""
  21. import collections
  22. import os
  23. import stat
  24. import struct
  25. import sys
  26. from typing import (
  27. Any,
  28. BinaryIO,
  29. Callable,
  30. Dict,
  31. List,
  32. Optional,
  33. TYPE_CHECKING,
  34. Iterable,
  35. Iterator,
  36. Tuple,
  37. )
  38. if TYPE_CHECKING:
  39. from dulwich.object_store import BaseObjectStore
  40. from dulwich.file import GitFile
  41. from dulwich.objects import (
  42. Blob,
  43. S_IFGITLINK,
  44. S_ISGITLINK,
  45. Tree,
  46. hex_to_sha,
  47. sha_to_hex,
  48. )
  49. from dulwich.pack import (
  50. SHA1Reader,
  51. SHA1Writer,
  52. )
  53. IndexEntry = collections.namedtuple(
  54. "IndexEntry",
  55. [
  56. "ctime",
  57. "mtime",
  58. "dev",
  59. "ino",
  60. "mode",
  61. "uid",
  62. "gid",
  63. "size",
  64. "sha",
  65. "flags",
  66. ],
  67. )
  68. FLAG_STAGEMASK = 0x3000
  69. FLAG_VALID = 0x8000
  70. FLAG_EXTENDED = 0x4000
  71. DEFAULT_VERSION = 2
  72. def pathsplit(path):
  73. """Split a /-delimited path into a directory part and a basename.
  74. Args:
  75. path: The path to split.
  76. Returns:
  77. Tuple with directory name and basename
  78. """
  79. try:
  80. (dirname, basename) = path.rsplit(b"/", 1)
  81. except ValueError:
  82. return (b"", path)
  83. else:
  84. return (dirname, basename)
  85. def pathjoin(*args):
  86. """Join a /-delimited path."""
  87. return b"/".join([p for p in args if p])
  88. def read_cache_time(f):
  89. """Read a cache time.
  90. Args:
  91. f: File-like object to read from
  92. Returns:
  93. Tuple with seconds and nanoseconds
  94. """
  95. return struct.unpack(">LL", f.read(8))
  96. def write_cache_time(f, t):
  97. """Write a cache time.
  98. Args:
  99. f: File-like object to write to
  100. t: Time to write (as int, float or tuple with secs and nsecs)
  101. """
  102. if isinstance(t, int):
  103. t = (t, 0)
  104. elif isinstance(t, float):
  105. (secs, nsecs) = divmod(t, 1.0)
  106. t = (int(secs), int(nsecs * 1000000000))
  107. elif not isinstance(t, tuple):
  108. raise TypeError(t)
  109. f.write(struct.pack(">LL", *t))
  110. def read_cache_entry(f):
  111. """Read an entry from a cache file.
  112. Args:
  113. f: File-like object to read from
  114. Returns:
  115. tuple with: device, inode, mode, uid, gid, size, sha, flags
  116. """
  117. beginoffset = f.tell()
  118. ctime = read_cache_time(f)
  119. mtime = read_cache_time(f)
  120. (
  121. dev,
  122. ino,
  123. mode,
  124. uid,
  125. gid,
  126. size,
  127. sha,
  128. flags,
  129. ) = struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
  130. name = f.read((flags & 0x0FFF))
  131. # Padding:
  132. real_size = (f.tell() - beginoffset + 8) & ~7
  133. f.read((beginoffset + real_size) - f.tell())
  134. return (
  135. name,
  136. ctime,
  137. mtime,
  138. dev,
  139. ino,
  140. mode,
  141. uid,
  142. gid,
  143. size,
  144. sha_to_hex(sha),
  145. flags & ~0x0FFF,
  146. )
  147. def write_cache_entry(f, entry):
  148. """Write an index entry to a file.
  149. Args:
  150. f: File object
  151. entry: Entry to write, tuple with:
  152. (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags)
  153. """
  154. beginoffset = f.tell()
  155. (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags) = entry
  156. write_cache_time(f, ctime)
  157. write_cache_time(f, mtime)
  158. flags = len(name) | (flags & ~0x0FFF)
  159. f.write(
  160. struct.pack(
  161. b">LLLLLL20sH",
  162. dev & 0xFFFFFFFF,
  163. ino & 0xFFFFFFFF,
  164. mode,
  165. uid,
  166. gid,
  167. size,
  168. hex_to_sha(sha),
  169. flags,
  170. )
  171. )
  172. f.write(name)
  173. real_size = (f.tell() - beginoffset + 8) & ~7
  174. f.write(b"\0" * ((beginoffset + real_size) - f.tell()))
  175. def read_index(f: BinaryIO):
  176. """Read an index file, yielding the individual entries."""
  177. header = f.read(4)
  178. if header != b"DIRC":
  179. raise AssertionError("Invalid index file header: %r" % header)
  180. (version, num_entries) = struct.unpack(b">LL", f.read(4 * 2))
  181. assert version in (1, 2)
  182. for i in range(num_entries):
  183. yield read_cache_entry(f)
  184. def read_index_dict(f):
  185. """Read an index file and return it as a dictionary.
  186. Args:
  187. f: File object to read from
  188. """
  189. ret = {}
  190. for x in read_index(f):
  191. ret[x[0]] = IndexEntry(*x[1:])
  192. return ret
  193. def write_index(f: BinaryIO, entries: List[Any], version: Optional[int] = None):
  194. """Write an index file.
  195. Args:
  196. f: File-like object to write to
  197. version: Version number to write
  198. entries: Iterable over the entries to write
  199. """
  200. if version is None:
  201. version = DEFAULT_VERSION
  202. f.write(b"DIRC")
  203. f.write(struct.pack(b">LL", version, len(entries)))
  204. for x in entries:
  205. write_cache_entry(f, x)
  206. def write_index_dict(
  207. f: BinaryIO,
  208. entries: Dict[bytes, IndexEntry],
  209. version: Optional[int] = None,
  210. ) -> None:
  211. """Write an index file based on the contents of a dictionary."""
  212. entries_list = []
  213. for name in sorted(entries):
  214. entries_list.append((name,) + tuple(entries[name]))
  215. write_index(f, entries_list, version=version)
  216. def cleanup_mode(mode: int) -> int:
  217. """Cleanup a mode value.
  218. This will return a mode that can be stored in a tree object.
  219. Args:
  220. mode: Mode to clean up.
  221. Returns:
  222. mode
  223. """
  224. if stat.S_ISLNK(mode):
  225. return stat.S_IFLNK
  226. elif stat.S_ISDIR(mode):
  227. return stat.S_IFDIR
  228. elif S_ISGITLINK(mode):
  229. return S_IFGITLINK
  230. ret = stat.S_IFREG | 0o644
  231. if mode & 0o100:
  232. ret |= 0o111
  233. return ret
  234. class Index(object):
  235. """A Git Index file."""
  236. def __init__(self, filename):
  237. """Open an index file.
  238. Args:
  239. filename: Path to the index file
  240. """
  241. self._filename = filename
  242. # TODO(jelmer): Store the version returned by read_index
  243. self._version = None
  244. self.clear()
  245. self.read()
  246. @property
  247. def path(self):
  248. return self._filename
  249. def __repr__(self):
  250. return "%s(%r)" % (self.__class__.__name__, self._filename)
  251. def write(self) -> None:
  252. """Write current contents of index to disk."""
  253. f = GitFile(self._filename, "wb")
  254. try:
  255. f = SHA1Writer(f)
  256. write_index_dict(f, self._byname, version=self._version)
  257. finally:
  258. f.close()
  259. def read(self):
  260. """Read current contents of index from disk."""
  261. if not os.path.exists(self._filename):
  262. return
  263. f = GitFile(self._filename, "rb")
  264. try:
  265. f = SHA1Reader(f)
  266. for x in read_index(f):
  267. self[x[0]] = IndexEntry(*x[1:])
  268. # FIXME: Additional data?
  269. f.read(os.path.getsize(self._filename) - f.tell() - 20)
  270. f.check_sha()
  271. finally:
  272. f.close()
  273. def __len__(self) -> int:
  274. """Number of entries in this index file."""
  275. return len(self._byname)
  276. def __getitem__(self, name: bytes) -> IndexEntry:
  277. """Retrieve entry by relative path.
  278. Returns: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha,
  279. flags)
  280. """
  281. return self._byname[name]
  282. def __iter__(self) -> Iterator[bytes]:
  283. """Iterate over the paths in this index."""
  284. return iter(self._byname)
  285. def get_sha1(self, path: bytes) -> bytes:
  286. """Return the (git object) SHA1 for the object at a path."""
  287. return self[path].sha
  288. def get_mode(self, path: bytes) -> int:
  289. """Return the POSIX file mode for the object at a path."""
  290. return self[path].mode
  291. def iterobjects(self) -> Iterable[Tuple[bytes, bytes, int]]:
  292. """Iterate over path, sha, mode tuples for use with commit_tree."""
  293. for path in self:
  294. entry = self[path]
  295. yield path, entry.sha, cleanup_mode(entry.mode)
  296. def iterblobs(self):
  297. import warnings
  298. warnings.warn("Use iterobjects() instead.", PendingDeprecationWarning)
  299. return self.iterobjects()
  300. def clear(self):
  301. """Remove all contents from this index."""
  302. self._byname = {}
  303. def __setitem__(self, name, x):
  304. assert isinstance(name, bytes)
  305. assert len(x) == 10
  306. # Remove the old entry if any
  307. self._byname[name] = IndexEntry(*x)
  308. def __delitem__(self, name):
  309. assert isinstance(name, bytes)
  310. del self._byname[name]
  311. def iteritems(self):
  312. return self._byname.items()
  313. def items(self):
  314. return self._byname.items()
  315. def update(self, entries):
  316. for name, value in entries.items():
  317. self[name] = value
  318. def changes_from_tree(self, object_store, tree, want_unchanged=False):
  319. """Find the differences between the contents of this index and a tree.
  320. Args:
  321. object_store: Object store to use for retrieving tree contents
  322. tree: SHA1 of the root tree
  323. want_unchanged: Whether unchanged files should be reported
  324. Returns: Iterator over tuples with (oldpath, newpath), (oldmode,
  325. newmode), (oldsha, newsha)
  326. """
  327. def lookup_entry(path):
  328. entry = self[path]
  329. return entry.sha, cleanup_mode(entry.mode)
  330. for (name, mode, sha) in changes_from_tree(
  331. self._byname.keys(),
  332. lookup_entry,
  333. object_store,
  334. tree,
  335. want_unchanged=want_unchanged,
  336. ):
  337. yield (name, mode, sha)
  338. def commit(self, object_store):
  339. """Create a new tree from an index.
  340. Args:
  341. object_store: Object store to save the tree in
  342. Returns:
  343. Root tree SHA
  344. """
  345. return commit_tree(object_store, self.iterobjects())
  346. def commit_tree(
  347. object_store: "BaseObjectStore", blobs: Iterable[Tuple[bytes, bytes, int]]
  348. ) -> bytes:
  349. """Commit a new tree.
  350. Args:
  351. object_store: Object store to add trees to
  352. blobs: Iterable over blob path, sha, mode entries
  353. Returns:
  354. SHA1 of the created tree.
  355. """
  356. trees = {b"": {}} # type: Dict[bytes, Any]
  357. def add_tree(path):
  358. if path in trees:
  359. return trees[path]
  360. dirname, basename = pathsplit(path)
  361. t = add_tree(dirname)
  362. assert isinstance(basename, bytes)
  363. newtree = {}
  364. t[basename] = newtree
  365. trees[path] = newtree
  366. return newtree
  367. for path, sha, mode in blobs:
  368. tree_path, basename = pathsplit(path)
  369. tree = add_tree(tree_path)
  370. tree[basename] = (mode, sha)
  371. def build_tree(path):
  372. tree = Tree()
  373. for basename, entry in trees[path].items():
  374. if isinstance(entry, dict):
  375. mode = stat.S_IFDIR
  376. sha = build_tree(pathjoin(path, basename))
  377. else:
  378. (mode, sha) = entry
  379. tree.add(basename, mode, sha)
  380. object_store.add_object(tree)
  381. return tree.id
  382. return build_tree(b"")
  383. def commit_index(object_store: "BaseObjectStore", index: Index) -> bytes:
  384. """Create a new tree from an index.
  385. Args:
  386. object_store: Object store to save the tree in
  387. index: Index file
  388. Note: This function is deprecated, use index.commit() instead.
  389. Returns: Root tree sha.
  390. """
  391. return commit_tree(object_store, index.iterobjects())
  392. def changes_from_tree(
  393. names: Iterable[bytes],
  394. lookup_entry: Callable[[bytes], Tuple[bytes, int]],
  395. object_store: "BaseObjectStore",
  396. tree: Optional[bytes],
  397. want_unchanged=False,
  398. ) -> Iterable[
  399. Tuple[
  400. Tuple[Optional[bytes], Optional[bytes]],
  401. Tuple[Optional[int], Optional[int]],
  402. Tuple[Optional[bytes], Optional[bytes]],
  403. ]
  404. ]:
  405. """Find the differences between the contents of a tree and
  406. a working copy.
  407. Args:
  408. names: Iterable of names in the working copy
  409. lookup_entry: Function to lookup an entry in the working copy
  410. object_store: Object store to use for retrieving tree contents
  411. tree: SHA1 of the root tree, or None for an empty tree
  412. want_unchanged: Whether unchanged files should be reported
  413. Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
  414. (oldsha, newsha)
  415. """
  416. # TODO(jelmer): Support a include_trees option
  417. other_names = set(names)
  418. if tree is not None:
  419. for (name, mode, sha) in object_store.iter_tree_contents(tree):
  420. try:
  421. (other_sha, other_mode) = lookup_entry(name)
  422. except KeyError:
  423. # Was removed
  424. yield ((name, None), (mode, None), (sha, None))
  425. else:
  426. other_names.remove(name)
  427. if want_unchanged or other_sha != sha or other_mode != mode:
  428. yield ((name, name), (mode, other_mode), (sha, other_sha))
  429. # Mention added files
  430. for name in other_names:
  431. try:
  432. (other_sha, other_mode) = lookup_entry(name)
  433. except KeyError:
  434. pass
  435. else:
  436. yield ((None, name), (None, other_mode), (None, other_sha))
  437. def index_entry_from_stat(
  438. stat_val, hex_sha: bytes, flags: int, mode: Optional[int] = None
  439. ):
  440. """Create a new index entry from a stat value.
  441. Args:
  442. stat_val: POSIX stat_result instance
  443. hex_sha: Hex sha of the object
  444. flags: Index flags
  445. """
  446. if mode is None:
  447. mode = cleanup_mode(stat_val.st_mode)
  448. return IndexEntry(
  449. stat_val.st_ctime,
  450. stat_val.st_mtime,
  451. stat_val.st_dev,
  452. stat_val.st_ino,
  453. mode,
  454. stat_val.st_uid,
  455. stat_val.st_gid,
  456. stat_val.st_size,
  457. hex_sha,
  458. flags,
  459. )
  460. def build_file_from_blob(
  461. blob, mode, target_path, honor_filemode=True, tree_encoding="utf-8"
  462. ):
  463. """Build a file or symlink on disk based on a Git object.
  464. Args:
  465. obj: The git object
  466. mode: File mode
  467. target_path: Path to write to
  468. honor_filemode: An optional flag to honor core.filemode setting in
  469. config file, default is core.filemode=True, change executable bit
  470. Returns: stat object for the file
  471. """
  472. try:
  473. oldstat = os.lstat(target_path)
  474. except FileNotFoundError:
  475. oldstat = None
  476. contents = blob.as_raw_string()
  477. if stat.S_ISLNK(mode):
  478. # FIXME: This will fail on Windows. What should we do instead?
  479. if oldstat:
  480. os.unlink(target_path)
  481. if sys.platform == "win32":
  482. # os.readlink on Python3 on Windows requires a unicode string.
  483. contents = contents.decode(tree_encoding)
  484. target_path = target_path.decode(tree_encoding)
  485. os.symlink(contents, target_path)
  486. else:
  487. if oldstat is not None and oldstat.st_size == len(contents):
  488. with open(target_path, "rb") as f:
  489. if f.read() == contents:
  490. return oldstat
  491. with open(target_path, "wb") as f:
  492. # Write out file
  493. f.write(contents)
  494. if honor_filemode:
  495. os.chmod(target_path, mode)
  496. return os.lstat(target_path)
  497. INVALID_DOTNAMES = (b".git", b".", b"..", b"")
  498. def validate_path_element_default(element):
  499. return element.lower() not in INVALID_DOTNAMES
  500. def validate_path_element_ntfs(element):
  501. stripped = element.rstrip(b". ").lower()
  502. if stripped in INVALID_DOTNAMES:
  503. return False
  504. if stripped == b"git~1":
  505. return False
  506. return True
  507. def validate_path(path, element_validator=validate_path_element_default):
  508. """Default path validator that just checks for .git/."""
  509. parts = path.split(b"/")
  510. for p in parts:
  511. if not element_validator(p):
  512. return False
  513. else:
  514. return True
  515. def build_index_from_tree(
  516. root_path,
  517. index_path,
  518. object_store,
  519. tree_id,
  520. honor_filemode=True,
  521. validate_path_element=validate_path_element_default,
  522. ):
  523. """Generate and materialize index from a tree
  524. Args:
  525. tree_id: Tree to materialize
  526. root_path: Target dir for materialized index files
  527. index_path: Target path for generated index
  528. object_store: Non-empty object store holding tree contents
  529. honor_filemode: An optional flag to honor core.filemode setting in
  530. config file, default is core.filemode=True, change executable bit
  531. validate_path_element: Function to validate path elements to check
  532. out; default just refuses .git and .. directories.
  533. Note: existing index is wiped and contents are not merged
  534. in a working dir. Suitable only for fresh clones.
  535. """
  536. index = Index(index_path)
  537. if not isinstance(root_path, bytes):
  538. root_path = os.fsencode(root_path)
  539. for entry in object_store.iter_tree_contents(tree_id):
  540. if not validate_path(entry.path, validate_path_element):
  541. continue
  542. full_path = _tree_to_fs_path(root_path, entry.path)
  543. if not os.path.exists(os.path.dirname(full_path)):
  544. os.makedirs(os.path.dirname(full_path))
  545. # TODO(jelmer): Merge new index into working tree
  546. if S_ISGITLINK(entry.mode):
  547. if not os.path.isdir(full_path):
  548. os.mkdir(full_path)
  549. st = os.lstat(full_path)
  550. # TODO(jelmer): record and return submodule paths
  551. else:
  552. obj = object_store[entry.sha]
  553. st = build_file_from_blob(
  554. obj, entry.mode, full_path, honor_filemode=honor_filemode
  555. )
  556. # Add file to index
  557. if not honor_filemode or S_ISGITLINK(entry.mode):
  558. # we can not use tuple slicing to build a new tuple,
  559. # because on windows that will convert the times to
  560. # longs, which causes errors further along
  561. st_tuple = (
  562. entry.mode,
  563. st.st_ino,
  564. st.st_dev,
  565. st.st_nlink,
  566. st.st_uid,
  567. st.st_gid,
  568. st.st_size,
  569. st.st_atime,
  570. st.st_mtime,
  571. st.st_ctime,
  572. )
  573. st = st.__class__(st_tuple)
  574. index[entry.path] = index_entry_from_stat(st, entry.sha, 0)
  575. index.write()
  576. def blob_from_path_and_mode(fs_path, mode, tree_encoding="utf-8"):
  577. """Create a blob from a path and a stat object.
  578. Args:
  579. fs_path: Full file system path to file
  580. st: A stat object
  581. Returns: A `Blob` object
  582. """
  583. assert isinstance(fs_path, bytes)
  584. blob = Blob()
  585. if stat.S_ISLNK(mode):
  586. if sys.platform == "win32":
  587. # os.readlink on Python3 on Windows requires a unicode string.
  588. fs_path = os.fsdecode(fs_path)
  589. blob.data = os.readlink(fs_path).encode(tree_encoding)
  590. else:
  591. blob.data = os.readlink(fs_path)
  592. else:
  593. with open(fs_path, "rb") as f:
  594. blob.data = f.read()
  595. return blob
  596. def blob_from_path_and_stat(fs_path, st, tree_encoding="utf-8"):
  597. """Create a blob from a path and a stat object.
  598. Args:
  599. fs_path: Full file system path to file
  600. st: A stat object
  601. Returns: A `Blob` object
  602. """
  603. return blob_from_path_and_mode(fs_path, st.st_mode, tree_encoding)
  604. def read_submodule_head(path):
  605. """Read the head commit of a submodule.
  606. Args:
  607. path: path to the submodule
  608. Returns: HEAD sha, None if not a valid head/repository
  609. """
  610. from dulwich.errors import NotGitRepository
  611. from dulwich.repo import Repo
  612. # Repo currently expects a "str", so decode if necessary.
  613. # TODO(jelmer): Perhaps move this into Repo() ?
  614. if not isinstance(path, str):
  615. path = os.fsdecode(path)
  616. try:
  617. repo = Repo(path)
  618. except NotGitRepository:
  619. return None
  620. try:
  621. return repo.head()
  622. except KeyError:
  623. return None
  624. def _has_directory_changed(tree_path, entry):
  625. """Check if a directory has changed after getting an error.
  626. When handling an error trying to create a blob from a path, call this
  627. function. It will check if the path is a directory. If it's a directory
  628. and a submodule, check the submodule head to see if it's has changed. If
  629. not, consider the file as changed as Git tracked a file and not a
  630. directory.
  631. Return true if the given path should be considered as changed and False
  632. otherwise or if the path is not a directory.
  633. """
  634. # This is actually a directory
  635. if os.path.exists(os.path.join(tree_path, b".git")):
  636. # Submodule
  637. head = read_submodule_head(tree_path)
  638. if entry.sha != head:
  639. return True
  640. else:
  641. # The file was changed to a directory, so consider it removed.
  642. return True
  643. return False
  644. def get_unstaged_changes(index: Index, root_path, filter_blob_callback=None):
  645. """Walk through an index and check for differences against working tree.
  646. Args:
  647. index: index to check
  648. root_path: path in which to find files
  649. Returns: iterator over paths with unstaged changes
  650. """
  651. # For each entry in the index check the sha1 & ensure not staged
  652. if not isinstance(root_path, bytes):
  653. root_path = os.fsencode(root_path)
  654. for tree_path, entry in index.iteritems():
  655. full_path = _tree_to_fs_path(root_path, tree_path)
  656. try:
  657. st = os.lstat(full_path)
  658. if stat.S_ISDIR(st.st_mode):
  659. if _has_directory_changed(tree_path, entry):
  660. yield tree_path
  661. continue
  662. if not stat.S_ISREG(st.st_mode) and not stat.S_ISLNK(st.st_mode):
  663. continue
  664. blob = blob_from_path_and_stat(full_path, st)
  665. if filter_blob_callback is not None:
  666. blob = filter_blob_callback(blob, tree_path)
  667. except FileNotFoundError:
  668. # The file was removed, so we assume that counts as
  669. # different from whatever file used to exist.
  670. yield tree_path
  671. else:
  672. if blob.id != entry.sha:
  673. yield tree_path
  674. os_sep_bytes = os.sep.encode("ascii")
  675. def _tree_to_fs_path(root_path, tree_path: bytes):
  676. """Convert a git tree path to a file system path.
  677. Args:
  678. root_path: Root filesystem path
  679. tree_path: Git tree path as bytes
  680. Returns: File system path.
  681. """
  682. assert isinstance(tree_path, bytes)
  683. if os_sep_bytes != b"/":
  684. sep_corrected_path = tree_path.replace(b"/", os_sep_bytes)
  685. else:
  686. sep_corrected_path = tree_path
  687. return os.path.join(root_path, sep_corrected_path)
  688. def _fs_to_tree_path(fs_path):
  689. """Convert a file system path to a git tree path.
  690. Args:
  691. fs_path: File system path.
  692. Returns: Git tree path as bytes
  693. """
  694. if not isinstance(fs_path, bytes):
  695. fs_path_bytes = os.fsencode(fs_path)
  696. else:
  697. fs_path_bytes = fs_path
  698. if os_sep_bytes != b"/":
  699. tree_path = fs_path_bytes.replace(os_sep_bytes, b"/")
  700. else:
  701. tree_path = fs_path_bytes
  702. return tree_path
  703. def index_entry_from_path(path, object_store=None):
  704. """Create an index from a filesystem path.
  705. This returns an index value for files, symlinks
  706. and tree references. for directories and
  707. non-existant files it returns None
  708. Args:
  709. path: Path to create an index entry for
  710. object_store: Optional object store to
  711. save new blobs in
  712. Returns: An index entry; None for directories
  713. """
  714. assert isinstance(path, bytes)
  715. st = os.lstat(path)
  716. if stat.S_ISDIR(st.st_mode):
  717. if os.path.exists(os.path.join(path, b".git")):
  718. head = read_submodule_head(path)
  719. if head is None:
  720. return None
  721. return index_entry_from_stat(st, head, 0, mode=S_IFGITLINK)
  722. return None
  723. if stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode):
  724. blob = blob_from_path_and_stat(path, st)
  725. if object_store is not None:
  726. object_store.add_object(blob)
  727. return index_entry_from_stat(st, blob.id, 0)
  728. return None
  729. def iter_fresh_entries(
  730. paths, root_path, object_store: Optional["BaseObjectStore"] = None
  731. ):
  732. """Iterate over current versions of index entries on disk.
  733. Args:
  734. paths: Paths to iterate over
  735. root_path: Root path to access from
  736. store: Optional store to save new blobs in
  737. Returns: Iterator over path, index_entry
  738. """
  739. for path in paths:
  740. p = _tree_to_fs_path(root_path, path)
  741. try:
  742. entry = index_entry_from_path(p, object_store=object_store)
  743. except (FileNotFoundError, IsADirectoryError):
  744. entry = None
  745. yield path, entry
  746. def iter_fresh_blobs(index, root_path):
  747. """Iterate over versions of blobs on disk referenced by index.
  748. Don't use this function; it removes missing entries from index.
  749. Args:
  750. index: Index file
  751. root_path: Root path to access from
  752. include_deleted: Include deleted entries with sha and
  753. mode set to None
  754. Returns: Iterator over path, sha, mode
  755. """
  756. import warnings
  757. warnings.warn(PendingDeprecationWarning, "Use iter_fresh_objects instead.")
  758. for entry in iter_fresh_objects(index, root_path, include_deleted=True):
  759. if entry[1] is None:
  760. del index[entry[0]]
  761. else:
  762. yield entry
  763. def iter_fresh_objects(paths, root_path, include_deleted=False, object_store=None):
  764. """Iterate over versions of objecs on disk referenced by index.
  765. Args:
  766. root_path: Root path to access from
  767. include_deleted: Include deleted entries with sha and
  768. mode set to None
  769. object_store: Optional object store to report new items to
  770. Returns: Iterator over path, sha, mode
  771. """
  772. for path, entry in iter_fresh_entries(paths, root_path, object_store=object_store):
  773. if entry is None:
  774. if include_deleted:
  775. yield path, None, None
  776. else:
  777. entry = IndexEntry(*entry)
  778. yield path, entry.sha, cleanup_mode(entry.mode)
  779. def refresh_index(index, root_path):
  780. """Refresh the contents of an index.
  781. This is the equivalent to running 'git commit -a'.
  782. Args:
  783. index: Index to update
  784. root_path: Root filesystem path
  785. """
  786. for path, entry in iter_fresh_entries(index, root_path):
  787. index[path] = path