index.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984
  1. # index.py -- File parser/writer for the git index file
  2. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Parser for the git index file format."""
  21. import collections
  22. import os
  23. import stat
  24. import struct
  25. import sys
  26. from typing import (
  27. Any,
  28. BinaryIO,
  29. Callable,
  30. Dict,
  31. List,
  32. Optional,
  33. TYPE_CHECKING,
  34. Iterable,
  35. Iterator,
  36. Tuple,
  37. )
  38. if TYPE_CHECKING:
  39. from dulwich.object_store import BaseObjectStore
  40. from dulwich.file import GitFile
  41. from dulwich.objects import (
  42. Blob,
  43. S_IFGITLINK,
  44. S_ISGITLINK,
  45. Tree,
  46. hex_to_sha,
  47. sha_to_hex,
  48. )
  49. from dulwich.pack import (
  50. SHA1Reader,
  51. SHA1Writer,
  52. )
  53. # TODO(jelmer): Switch to dataclass?
  54. IndexEntry = collections.namedtuple(
  55. "IndexEntry",
  56. [
  57. "ctime",
  58. "mtime",
  59. "dev",
  60. "ino",
  61. "mode",
  62. "uid",
  63. "gid",
  64. "size",
  65. "sha",
  66. "flags",
  67. "extended_flags",
  68. ],
  69. )
  70. # 2-bit stage (during merge)
  71. FLAG_STAGEMASK = 0x3000
  72. # assume-valid
  73. FLAG_VALID = 0x8000
  74. # extended flag (must be zero in version 2)
  75. FLAG_EXTENDED = 0x4000
  76. # used by sparse checkout
  77. EXTENDED_FLAG_SKIP_WORKTREE = 0x4000
  78. # used by "git add -N"
  79. EXTENDED_FLAG_INTEND_TO_ADD = 0x2000
  80. DEFAULT_VERSION = 2
  81. def pathsplit(path):
  82. """Split a /-delimited path into a directory part and a basename.
  83. Args:
  84. path: The path to split.
  85. Returns:
  86. Tuple with directory name and basename
  87. """
  88. try:
  89. (dirname, basename) = path.rsplit(b"/", 1)
  90. except ValueError:
  91. return (b"", path)
  92. else:
  93. return (dirname, basename)
  94. def pathjoin(*args):
  95. """Join a /-delimited path."""
  96. return b"/".join([p for p in args if p])
  97. def read_cache_time(f):
  98. """Read a cache time.
  99. Args:
  100. f: File-like object to read from
  101. Returns:
  102. Tuple with seconds and nanoseconds
  103. """
  104. return struct.unpack(">LL", f.read(8))
  105. def write_cache_time(f, t):
  106. """Write a cache time.
  107. Args:
  108. f: File-like object to write to
  109. t: Time to write (as int, float or tuple with secs and nsecs)
  110. """
  111. if isinstance(t, int):
  112. t = (t, 0)
  113. elif isinstance(t, float):
  114. (secs, nsecs) = divmod(t, 1.0)
  115. t = (int(secs), int(nsecs * 1000000000))
  116. elif not isinstance(t, tuple):
  117. raise TypeError(t)
  118. f.write(struct.pack(">LL", *t))
  119. def read_cache_entry(f, version: int) -> Tuple[str, IndexEntry]:
  120. """Read an entry from a cache file.
  121. Args:
  122. f: File-like object to read from
  123. Returns:
  124. tuple with: name, IndexEntry
  125. """
  126. beginoffset = f.tell()
  127. ctime = read_cache_time(f)
  128. mtime = read_cache_time(f)
  129. (
  130. dev,
  131. ino,
  132. mode,
  133. uid,
  134. gid,
  135. size,
  136. sha,
  137. flags,
  138. ) = struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
  139. if flags & FLAG_EXTENDED:
  140. if version < 3:
  141. raise AssertionError(
  142. 'extended flag set in index with version < 3')
  143. (extended_flags, ) = struct.unpack(">H", f.read(2))
  144. else:
  145. extended_flags = 0
  146. name = f.read((flags & 0x0FFF))
  147. # Padding:
  148. if version < 4:
  149. real_size = (f.tell() - beginoffset + 8) & ~7
  150. f.read((beginoffset + real_size) - f.tell())
  151. return (
  152. name,
  153. IndexEntry(
  154. ctime,
  155. mtime,
  156. dev,
  157. ino,
  158. mode,
  159. uid,
  160. gid,
  161. size,
  162. sha_to_hex(sha),
  163. flags & ~0x0FFF,
  164. extended_flags,
  165. ))
  166. def write_cache_entry(f, name, entry, version):
  167. """Write an index entry to a file.
  168. Args:
  169. f: File object
  170. entry: IndexEntry to write, tuple with:
  171. """
  172. beginoffset = f.tell()
  173. write_cache_time(f, entry.ctime)
  174. write_cache_time(f, entry.mtime)
  175. flags = len(name) | (entry.flags & ~0x0FFF)
  176. if entry.extended_flags:
  177. flags |= FLAG_EXTENDED
  178. if flags & FLAG_EXTENDED and version is not None and version < 3:
  179. raise AssertionError('unable to use extended flags in version < 3')
  180. f.write(
  181. struct.pack(
  182. b">LLLLLL20sH",
  183. entry.dev & 0xFFFFFFFF,
  184. entry.ino & 0xFFFFFFFF,
  185. entry.mode,
  186. entry.uid,
  187. entry.gid,
  188. entry.size,
  189. hex_to_sha(entry.sha),
  190. flags,
  191. )
  192. )
  193. if flags & FLAG_EXTENDED:
  194. f.write(struct.pack(b">H", entry.extended_flags))
  195. f.write(name)
  196. if version < 4:
  197. real_size = (f.tell() - beginoffset + 8) & ~7
  198. f.write(b"\0" * ((beginoffset + real_size) - f.tell()))
  199. def read_index(f: BinaryIO):
  200. """Read an index file, yielding the individual entries."""
  201. header = f.read(4)
  202. if header != b"DIRC":
  203. raise AssertionError("Invalid index file header: %r" % header)
  204. (version, num_entries) = struct.unpack(b">LL", f.read(4 * 2))
  205. assert version in (1, 2, 3), "index version is %r" % version
  206. for i in range(num_entries):
  207. yield read_cache_entry(f, version)
  208. def read_index_dict(f):
  209. """Read an index file and return it as a dictionary.
  210. Args:
  211. f: File object to read from
  212. """
  213. ret = {}
  214. for name, entry in read_index(f):
  215. ret[name] = entry
  216. return ret
  217. def write_index(f: BinaryIO, entries: List[Tuple[bytes, IndexEntry]], version: Optional[int] = None):
  218. """Write an index file.
  219. Args:
  220. f: File-like object to write to
  221. version: Version number to write
  222. entries: Iterable over the entries to write
  223. """
  224. if version is None:
  225. version = DEFAULT_VERSION
  226. f.write(b"DIRC")
  227. f.write(struct.pack(b">LL", version, len(entries)))
  228. for name, entry in entries:
  229. write_cache_entry(f, name, entry, version)
  230. def write_index_dict(
  231. f: BinaryIO,
  232. entries: Dict[bytes, IndexEntry],
  233. version: Optional[int] = None,
  234. ) -> None:
  235. """Write an index file based on the contents of a dictionary."""
  236. entries_list = []
  237. for name in sorted(entries):
  238. entries_list.append((name, entries[name]))
  239. write_index(f, entries_list, version=version)
  240. def cleanup_mode(mode: int) -> int:
  241. """Cleanup a mode value.
  242. This will return a mode that can be stored in a tree object.
  243. Args:
  244. mode: Mode to clean up.
  245. Returns:
  246. mode
  247. """
  248. if stat.S_ISLNK(mode):
  249. return stat.S_IFLNK
  250. elif stat.S_ISDIR(mode):
  251. return stat.S_IFDIR
  252. elif S_ISGITLINK(mode):
  253. return S_IFGITLINK
  254. ret = stat.S_IFREG | 0o644
  255. if mode & 0o100:
  256. ret |= 0o111
  257. return ret
  258. class Index(object):
  259. """A Git Index file."""
  260. def __init__(self, filename):
  261. """Open an index file.
  262. Args:
  263. filename: Path to the index file
  264. """
  265. self._filename = filename
  266. # TODO(jelmer): Store the version returned by read_index
  267. self._version = None
  268. self.clear()
  269. self.read()
  270. @property
  271. def path(self):
  272. return self._filename
  273. def __repr__(self):
  274. return "%s(%r)" % (self.__class__.__name__, self._filename)
  275. def write(self) -> None:
  276. """Write current contents of index to disk."""
  277. f = GitFile(self._filename, "wb")
  278. try:
  279. f = SHA1Writer(f)
  280. write_index_dict(f, self._byname, version=self._version)
  281. finally:
  282. f.close()
  283. def read(self):
  284. """Read current contents of index from disk."""
  285. if not os.path.exists(self._filename):
  286. return
  287. f = GitFile(self._filename, "rb")
  288. try:
  289. f = SHA1Reader(f)
  290. for name, entry in read_index(f):
  291. self[name] = entry
  292. # FIXME: Additional data?
  293. f.read(os.path.getsize(self._filename) - f.tell() - 20)
  294. f.check_sha()
  295. finally:
  296. f.close()
  297. def __len__(self) -> int:
  298. """Number of entries in this index file."""
  299. return len(self._byname)
  300. def __getitem__(self, name: bytes) -> IndexEntry:
  301. """Retrieve entry by relative path.
  302. Returns: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha,
  303. flags)
  304. """
  305. return self._byname[name]
  306. def __iter__(self) -> Iterator[bytes]:
  307. """Iterate over the paths in this index."""
  308. return iter(self._byname)
  309. def get_sha1(self, path: bytes) -> bytes:
  310. """Return the (git object) SHA1 for the object at a path."""
  311. return self[path].sha
  312. def get_mode(self, path: bytes) -> int:
  313. """Return the POSIX file mode for the object at a path."""
  314. return self[path].mode
  315. def iterobjects(self) -> Iterable[Tuple[bytes, bytes, int]]:
  316. """Iterate over path, sha, mode tuples for use with commit_tree."""
  317. for path in self:
  318. entry = self[path]
  319. yield path, entry.sha, cleanup_mode(entry.mode)
  320. def iterblobs(self):
  321. import warnings
  322. warnings.warn("Use iterobjects() instead.", PendingDeprecationWarning)
  323. return self.iterobjects()
  324. def clear(self):
  325. """Remove all contents from this index."""
  326. self._byname = {}
  327. def __setitem__(self, name, x):
  328. assert isinstance(name, bytes)
  329. assert len(x) == len(IndexEntry._fields)
  330. # Remove the old entry if any
  331. self._byname[name] = IndexEntry(*x)
  332. def __delitem__(self, name):
  333. assert isinstance(name, bytes)
  334. del self._byname[name]
  335. def iteritems(self):
  336. return self._byname.items()
  337. def items(self):
  338. return self._byname.items()
  339. def update(self, entries):
  340. for name, value in entries.items():
  341. self[name] = value
  342. def changes_from_tree(self, object_store, tree, want_unchanged=False):
  343. """Find the differences between the contents of this index and a tree.
  344. Args:
  345. object_store: Object store to use for retrieving tree contents
  346. tree: SHA1 of the root tree
  347. want_unchanged: Whether unchanged files should be reported
  348. Returns: Iterator over tuples with (oldpath, newpath), (oldmode,
  349. newmode), (oldsha, newsha)
  350. """
  351. def lookup_entry(path):
  352. entry = self[path]
  353. return entry.sha, cleanup_mode(entry.mode)
  354. for (name, mode, sha) in changes_from_tree(
  355. self._byname.keys(),
  356. lookup_entry,
  357. object_store,
  358. tree,
  359. want_unchanged=want_unchanged,
  360. ):
  361. yield (name, mode, sha)
  362. def commit(self, object_store):
  363. """Create a new tree from an index.
  364. Args:
  365. object_store: Object store to save the tree in
  366. Returns:
  367. Root tree SHA
  368. """
  369. return commit_tree(object_store, self.iterobjects())
  370. def commit_tree(
  371. object_store: "BaseObjectStore", blobs: Iterable[Tuple[bytes, bytes, int]]
  372. ) -> bytes:
  373. """Commit a new tree.
  374. Args:
  375. object_store: Object store to add trees to
  376. blobs: Iterable over blob path, sha, mode entries
  377. Returns:
  378. SHA1 of the created tree.
  379. """
  380. trees = {b"": {}} # type: Dict[bytes, Any]
  381. def add_tree(path):
  382. if path in trees:
  383. return trees[path]
  384. dirname, basename = pathsplit(path)
  385. t = add_tree(dirname)
  386. assert isinstance(basename, bytes)
  387. newtree = {}
  388. t[basename] = newtree
  389. trees[path] = newtree
  390. return newtree
  391. for path, sha, mode in blobs:
  392. tree_path, basename = pathsplit(path)
  393. tree = add_tree(tree_path)
  394. tree[basename] = (mode, sha)
  395. def build_tree(path):
  396. tree = Tree()
  397. for basename, entry in trees[path].items():
  398. if isinstance(entry, dict):
  399. mode = stat.S_IFDIR
  400. sha = build_tree(pathjoin(path, basename))
  401. else:
  402. (mode, sha) = entry
  403. tree.add(basename, mode, sha)
  404. object_store.add_object(tree)
  405. return tree.id
  406. return build_tree(b"")
  407. def commit_index(object_store: "BaseObjectStore", index: Index) -> bytes:
  408. """Create a new tree from an index.
  409. Args:
  410. object_store: Object store to save the tree in
  411. index: Index file
  412. Note: This function is deprecated, use index.commit() instead.
  413. Returns: Root tree sha.
  414. """
  415. return commit_tree(object_store, index.iterobjects())
  416. def changes_from_tree(
  417. names: Iterable[bytes],
  418. lookup_entry: Callable[[bytes], Tuple[bytes, int]],
  419. object_store: "BaseObjectStore",
  420. tree: Optional[bytes],
  421. want_unchanged=False,
  422. ) -> Iterable[
  423. Tuple[
  424. Tuple[Optional[bytes], Optional[bytes]],
  425. Tuple[Optional[int], Optional[int]],
  426. Tuple[Optional[bytes], Optional[bytes]],
  427. ]
  428. ]:
  429. """Find the differences between the contents of a tree and
  430. a working copy.
  431. Args:
  432. names: Iterable of names in the working copy
  433. lookup_entry: Function to lookup an entry in the working copy
  434. object_store: Object store to use for retrieving tree contents
  435. tree: SHA1 of the root tree, or None for an empty tree
  436. want_unchanged: Whether unchanged files should be reported
  437. Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
  438. (oldsha, newsha)
  439. """
  440. # TODO(jelmer): Support a include_trees option
  441. other_names = set(names)
  442. if tree is not None:
  443. for (name, mode, sha) in object_store.iter_tree_contents(tree):
  444. try:
  445. (other_sha, other_mode) = lookup_entry(name)
  446. except KeyError:
  447. # Was removed
  448. yield ((name, None), (mode, None), (sha, None))
  449. else:
  450. other_names.remove(name)
  451. if want_unchanged or other_sha != sha or other_mode != mode:
  452. yield ((name, name), (mode, other_mode), (sha, other_sha))
  453. # Mention added files
  454. for name in other_names:
  455. try:
  456. (other_sha, other_mode) = lookup_entry(name)
  457. except KeyError:
  458. pass
  459. else:
  460. yield ((None, name), (None, other_mode), (None, other_sha))
  461. def index_entry_from_stat(
  462. stat_val, hex_sha: bytes, flags: int, mode: Optional[int] = None,
  463. extended_flags: Optional[int] = None
  464. ):
  465. """Create a new index entry from a stat value.
  466. Args:
  467. stat_val: POSIX stat_result instance
  468. hex_sha: Hex sha of the object
  469. flags: Index flags
  470. """
  471. if mode is None:
  472. mode = cleanup_mode(stat_val.st_mode)
  473. return IndexEntry(
  474. stat_val.st_ctime,
  475. stat_val.st_mtime,
  476. stat_val.st_dev,
  477. stat_val.st_ino,
  478. mode,
  479. stat_val.st_uid,
  480. stat_val.st_gid,
  481. stat_val.st_size,
  482. hex_sha,
  483. flags,
  484. extended_flags
  485. )
  486. def build_file_from_blob(
  487. blob, mode, target_path, honor_filemode=True, tree_encoding="utf-8"
  488. ):
  489. """Build a file or symlink on disk based on a Git object.
  490. Args:
  491. obj: The git object
  492. mode: File mode
  493. target_path: Path to write to
  494. honor_filemode: An optional flag to honor core.filemode setting in
  495. config file, default is core.filemode=True, change executable bit
  496. Returns: stat object for the file
  497. """
  498. try:
  499. oldstat = os.lstat(target_path)
  500. except FileNotFoundError:
  501. oldstat = None
  502. contents = blob.as_raw_string()
  503. if stat.S_ISLNK(mode):
  504. # FIXME: This will fail on Windows. What should we do instead?
  505. if oldstat:
  506. os.unlink(target_path)
  507. if sys.platform == "win32":
  508. # os.readlink on Python3 on Windows requires a unicode string.
  509. contents = contents.decode(tree_encoding)
  510. target_path = target_path.decode(tree_encoding)
  511. os.symlink(contents, target_path)
  512. else:
  513. if oldstat is not None and oldstat.st_size == len(contents):
  514. with open(target_path, "rb") as f:
  515. if f.read() == contents:
  516. return oldstat
  517. with open(target_path, "wb") as f:
  518. # Write out file
  519. f.write(contents)
  520. if honor_filemode:
  521. os.chmod(target_path, mode)
  522. return os.lstat(target_path)
  523. INVALID_DOTNAMES = (b".git", b".", b"..", b"")
  524. def validate_path_element_default(element):
  525. return element.lower() not in INVALID_DOTNAMES
  526. def validate_path_element_ntfs(element):
  527. stripped = element.rstrip(b". ").lower()
  528. if stripped in INVALID_DOTNAMES:
  529. return False
  530. if stripped == b"git~1":
  531. return False
  532. return True
  533. def validate_path(path, element_validator=validate_path_element_default):
  534. """Default path validator that just checks for .git/."""
  535. parts = path.split(b"/")
  536. for p in parts:
  537. if not element_validator(p):
  538. return False
  539. else:
  540. return True
  541. def build_index_from_tree(
  542. root_path,
  543. index_path,
  544. object_store,
  545. tree_id,
  546. honor_filemode=True,
  547. validate_path_element=validate_path_element_default,
  548. ):
  549. """Generate and materialize index from a tree
  550. Args:
  551. tree_id: Tree to materialize
  552. root_path: Target dir for materialized index files
  553. index_path: Target path for generated index
  554. object_store: Non-empty object store holding tree contents
  555. honor_filemode: An optional flag to honor core.filemode setting in
  556. config file, default is core.filemode=True, change executable bit
  557. validate_path_element: Function to validate path elements to check
  558. out; default just refuses .git and .. directories.
  559. Note: existing index is wiped and contents are not merged
  560. in a working dir. Suitable only for fresh clones.
  561. """
  562. index = Index(index_path)
  563. if not isinstance(root_path, bytes):
  564. root_path = os.fsencode(root_path)
  565. for entry in object_store.iter_tree_contents(tree_id):
  566. if not validate_path(entry.path, validate_path_element):
  567. continue
  568. full_path = _tree_to_fs_path(root_path, entry.path)
  569. if not os.path.exists(os.path.dirname(full_path)):
  570. os.makedirs(os.path.dirname(full_path))
  571. # TODO(jelmer): Merge new index into working tree
  572. if S_ISGITLINK(entry.mode):
  573. if not os.path.isdir(full_path):
  574. os.mkdir(full_path)
  575. st = os.lstat(full_path)
  576. # TODO(jelmer): record and return submodule paths
  577. else:
  578. obj = object_store[entry.sha]
  579. st = build_file_from_blob(
  580. obj, entry.mode, full_path, honor_filemode=honor_filemode
  581. )
  582. # Add file to index
  583. if not honor_filemode or S_ISGITLINK(entry.mode):
  584. # we can not use tuple slicing to build a new tuple,
  585. # because on windows that will convert the times to
  586. # longs, which causes errors further along
  587. st_tuple = (
  588. entry.mode,
  589. st.st_ino,
  590. st.st_dev,
  591. st.st_nlink,
  592. st.st_uid,
  593. st.st_gid,
  594. st.st_size,
  595. st.st_atime,
  596. st.st_mtime,
  597. st.st_ctime,
  598. )
  599. st = st.__class__(st_tuple)
  600. index[entry.path] = index_entry_from_stat(st, entry.sha, 0)
  601. index.write()
  602. def blob_from_path_and_mode(fs_path, mode, tree_encoding="utf-8"):
  603. """Create a blob from a path and a stat object.
  604. Args:
  605. fs_path: Full file system path to file
  606. st: A stat object
  607. Returns: A `Blob` object
  608. """
  609. assert isinstance(fs_path, bytes)
  610. blob = Blob()
  611. if stat.S_ISLNK(mode):
  612. if sys.platform == "win32":
  613. # os.readlink on Python3 on Windows requires a unicode string.
  614. fs_path = os.fsdecode(fs_path)
  615. blob.data = os.readlink(fs_path).encode(tree_encoding)
  616. else:
  617. blob.data = os.readlink(fs_path)
  618. else:
  619. with open(fs_path, "rb") as f:
  620. blob.data = f.read()
  621. return blob
  622. def blob_from_path_and_stat(fs_path, st, tree_encoding="utf-8"):
  623. """Create a blob from a path and a stat object.
  624. Args:
  625. fs_path: Full file system path to file
  626. st: A stat object
  627. Returns: A `Blob` object
  628. """
  629. return blob_from_path_and_mode(fs_path, st.st_mode, tree_encoding)
  630. def read_submodule_head(path):
  631. """Read the head commit of a submodule.
  632. Args:
  633. path: path to the submodule
  634. Returns: HEAD sha, None if not a valid head/repository
  635. """
  636. from dulwich.errors import NotGitRepository
  637. from dulwich.repo import Repo
  638. # Repo currently expects a "str", so decode if necessary.
  639. # TODO(jelmer): Perhaps move this into Repo() ?
  640. if not isinstance(path, str):
  641. path = os.fsdecode(path)
  642. try:
  643. repo = Repo(path)
  644. except NotGitRepository:
  645. return None
  646. try:
  647. return repo.head()
  648. except KeyError:
  649. return None
  650. def _has_directory_changed(tree_path, entry):
  651. """Check if a directory has changed after getting an error.
  652. When handling an error trying to create a blob from a path, call this
  653. function. It will check if the path is a directory. If it's a directory
  654. and a submodule, check the submodule head to see if it's has changed. If
  655. not, consider the file as changed as Git tracked a file and not a
  656. directory.
  657. Return true if the given path should be considered as changed and False
  658. otherwise or if the path is not a directory.
  659. """
  660. # This is actually a directory
  661. if os.path.exists(os.path.join(tree_path, b".git")):
  662. # Submodule
  663. head = read_submodule_head(tree_path)
  664. if entry.sha != head:
  665. return True
  666. else:
  667. # The file was changed to a directory, so consider it removed.
  668. return True
  669. return False
  670. def get_unstaged_changes(index: Index, root_path, filter_blob_callback=None):
  671. """Walk through an index and check for differences against working tree.
  672. Args:
  673. index: index to check
  674. root_path: path in which to find files
  675. Returns: iterator over paths with unstaged changes
  676. """
  677. # For each entry in the index check the sha1 & ensure not staged
  678. if not isinstance(root_path, bytes):
  679. root_path = os.fsencode(root_path)
  680. for tree_path, entry in index.iteritems():
  681. full_path = _tree_to_fs_path(root_path, tree_path)
  682. try:
  683. st = os.lstat(full_path)
  684. if stat.S_ISDIR(st.st_mode):
  685. if _has_directory_changed(tree_path, entry):
  686. yield tree_path
  687. continue
  688. if not stat.S_ISREG(st.st_mode) and not stat.S_ISLNK(st.st_mode):
  689. continue
  690. blob = blob_from_path_and_stat(full_path, st)
  691. if filter_blob_callback is not None:
  692. blob = filter_blob_callback(blob, tree_path)
  693. except FileNotFoundError:
  694. # The file was removed, so we assume that counts as
  695. # different from whatever file used to exist.
  696. yield tree_path
  697. else:
  698. if blob.id != entry.sha:
  699. yield tree_path
  700. os_sep_bytes = os.sep.encode("ascii")
  701. def _tree_to_fs_path(root_path, tree_path: bytes):
  702. """Convert a git tree path to a file system path.
  703. Args:
  704. root_path: Root filesystem path
  705. tree_path: Git tree path as bytes
  706. Returns: File system path.
  707. """
  708. assert isinstance(tree_path, bytes)
  709. if os_sep_bytes != b"/":
  710. sep_corrected_path = tree_path.replace(b"/", os_sep_bytes)
  711. else:
  712. sep_corrected_path = tree_path
  713. return os.path.join(root_path, sep_corrected_path)
  714. def _fs_to_tree_path(fs_path):
  715. """Convert a file system path to a git tree path.
  716. Args:
  717. fs_path: File system path.
  718. Returns: Git tree path as bytes
  719. """
  720. if not isinstance(fs_path, bytes):
  721. fs_path_bytes = os.fsencode(fs_path)
  722. else:
  723. fs_path_bytes = fs_path
  724. if os_sep_bytes != b"/":
  725. tree_path = fs_path_bytes.replace(os_sep_bytes, b"/")
  726. else:
  727. tree_path = fs_path_bytes
  728. return tree_path
  729. def index_entry_from_path(path, object_store=None):
  730. """Create an index from a filesystem path.
  731. This returns an index value for files, symlinks
  732. and tree references. for directories and
  733. non-existant files it returns None
  734. Args:
  735. path: Path to create an index entry for
  736. object_store: Optional object store to
  737. save new blobs in
  738. Returns: An index entry; None for directories
  739. """
  740. assert isinstance(path, bytes)
  741. st = os.lstat(path)
  742. if stat.S_ISDIR(st.st_mode):
  743. if os.path.exists(os.path.join(path, b".git")):
  744. head = read_submodule_head(path)
  745. if head is None:
  746. return None
  747. return index_entry_from_stat(st, head, 0, mode=S_IFGITLINK)
  748. return None
  749. if stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode):
  750. blob = blob_from_path_and_stat(path, st)
  751. if object_store is not None:
  752. object_store.add_object(blob)
  753. return index_entry_from_stat(st, blob.id, 0)
  754. return None
  755. def iter_fresh_entries(
  756. paths, root_path, object_store: Optional["BaseObjectStore"] = None
  757. ):
  758. """Iterate over current versions of index entries on disk.
  759. Args:
  760. paths: Paths to iterate over
  761. root_path: Root path to access from
  762. store: Optional store to save new blobs in
  763. Returns: Iterator over path, index_entry
  764. """
  765. for path in paths:
  766. p = _tree_to_fs_path(root_path, path)
  767. try:
  768. entry = index_entry_from_path(p, object_store=object_store)
  769. except (FileNotFoundError, IsADirectoryError):
  770. entry = None
  771. yield path, entry
  772. def iter_fresh_blobs(index, root_path):
  773. """Iterate over versions of blobs on disk referenced by index.
  774. Don't use this function; it removes missing entries from index.
  775. Args:
  776. index: Index file
  777. root_path: Root path to access from
  778. include_deleted: Include deleted entries with sha and
  779. mode set to None
  780. Returns: Iterator over path, sha, mode
  781. """
  782. import warnings
  783. warnings.warn(PendingDeprecationWarning, "Use iter_fresh_objects instead.")
  784. for entry in iter_fresh_objects(index, root_path, include_deleted=True):
  785. if entry[1] is None:
  786. del index[entry[0]]
  787. else:
  788. yield entry
  789. def iter_fresh_objects(paths, root_path, include_deleted=False, object_store=None):
  790. """Iterate over versions of objecs on disk referenced by index.
  791. Args:
  792. root_path: Root path to access from
  793. include_deleted: Include deleted entries with sha and
  794. mode set to None
  795. object_store: Optional object store to report new items to
  796. Returns: Iterator over path, sha, mode
  797. """
  798. for path, entry in iter_fresh_entries(paths, root_path, object_store=object_store):
  799. if entry is None:
  800. if include_deleted:
  801. yield path, None, None
  802. else:
  803. entry = IndexEntry(*entry)
  804. yield path, entry.sha, cleanup_mode(entry.mode)
  805. def refresh_index(index, root_path):
  806. """Refresh the contents of an index.
  807. This is the equivalent to running 'git commit -a'.
  808. Args:
  809. index: Index to update
  810. root_path: Root filesystem path
  811. """
  812. for path, entry in iter_fresh_entries(index, root_path):
  813. index[path] = path