index.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982
  1. # index.py -- File parser/writer for the git index file
  2. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Parser for the git index file format."""
  21. import collections
  22. import os
  23. import stat
  24. import struct
  25. import sys
  26. from typing import (
  27. Any,
  28. BinaryIO,
  29. Callable,
  30. Dict,
  31. List,
  32. Optional,
  33. TYPE_CHECKING,
  34. Iterable,
  35. Iterator,
  36. Tuple,
  37. )
  38. if TYPE_CHECKING:
  39. from dulwich.object_store import BaseObjectStore
  40. from dulwich.file import GitFile
  41. from dulwich.objects import (
  42. Blob,
  43. S_IFGITLINK,
  44. S_ISGITLINK,
  45. Tree,
  46. hex_to_sha,
  47. sha_to_hex,
  48. )
  49. from dulwich.pack import (
  50. SHA1Reader,
  51. SHA1Writer,
  52. )
  53. # TODO(jelmer): Switch to dataclass?
  54. IndexEntry = collections.namedtuple(
  55. "IndexEntry",
  56. [
  57. "ctime",
  58. "mtime",
  59. "dev",
  60. "ino",
  61. "mode",
  62. "uid",
  63. "gid",
  64. "size",
  65. "sha",
  66. "flags",
  67. "extended_flags",
  68. ],
  69. )
  70. # 2-bit stage (during merge)
  71. FLAG_STAGEMASK = 0x3000
  72. # assume-valid
  73. FLAG_VALID = 0x8000
  74. # extended flag (must be zero in version 2)
  75. FLAG_EXTENDED = 0x4000
  76. # used by sparse checkout
  77. EXTENDED_FLAG_SKIP_WORKTREE = 0x4000
  78. # used by "git add -N"
  79. EXTENDED_FLAG_INTEND_TO_ADD = 0x2000
  80. DEFAULT_VERSION = 2
  81. def pathsplit(path):
  82. """Split a /-delimited path into a directory part and a basename.
  83. Args:
  84. path: The path to split.
  85. Returns:
  86. Tuple with directory name and basename
  87. """
  88. try:
  89. (dirname, basename) = path.rsplit(b"/", 1)
  90. except ValueError:
  91. return (b"", path)
  92. else:
  93. return (dirname, basename)
  94. def pathjoin(*args):
  95. """Join a /-delimited path."""
  96. return b"/".join([p for p in args if p])
  97. def read_cache_time(f):
  98. """Read a cache time.
  99. Args:
  100. f: File-like object to read from
  101. Returns:
  102. Tuple with seconds and nanoseconds
  103. """
  104. return struct.unpack(">LL", f.read(8))
  105. def write_cache_time(f, t):
  106. """Write a cache time.
  107. Args:
  108. f: File-like object to write to
  109. t: Time to write (as int, float or tuple with secs and nsecs)
  110. """
  111. if isinstance(t, int):
  112. t = (t, 0)
  113. elif isinstance(t, float):
  114. (secs, nsecs) = divmod(t, 1.0)
  115. t = (int(secs), int(nsecs * 1000000000))
  116. elif not isinstance(t, tuple):
  117. raise TypeError(t)
  118. f.write(struct.pack(">LL", *t))
  119. def read_cache_entry(f, version: int) -> Tuple[str, IndexEntry]:
  120. """Read an entry from a cache file.
  121. Args:
  122. f: File-like object to read from
  123. Returns:
  124. tuple with: name, IndexEntry
  125. """
  126. beginoffset = f.tell()
  127. ctime = read_cache_time(f)
  128. mtime = read_cache_time(f)
  129. (
  130. dev,
  131. ino,
  132. mode,
  133. uid,
  134. gid,
  135. size,
  136. sha,
  137. flags,
  138. ) = struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
  139. if flags & FLAG_EXTENDED:
  140. if version < 3:
  141. raise AssertionError(
  142. 'extended flag set in index with version < 3')
  143. extended_flags = struct.unpack(">H", f.read(2))
  144. else:
  145. extended_flags = 0
  146. name = f.read((flags & 0x0FFF))
  147. # Padding:
  148. real_size = (f.tell() - beginoffset + 8) & ~7
  149. f.read((beginoffset + real_size) - f.tell())
  150. return (
  151. name,
  152. IndexEntry(
  153. ctime,
  154. mtime,
  155. dev,
  156. ino,
  157. mode,
  158. uid,
  159. gid,
  160. size,
  161. sha_to_hex(sha),
  162. flags & ~0x0FFF,
  163. extended_flags,
  164. ))
  165. def write_cache_entry(f, name, entry, version=None):
  166. """Write an index entry to a file.
  167. Args:
  168. f: File object
  169. entry: IndexEntry to write, tuple with:
  170. """
  171. beginoffset = f.tell()
  172. write_cache_time(f, entry.ctime)
  173. write_cache_time(f, entry.mtime)
  174. flags = len(name) | (entry.flags & ~0x0FFF)
  175. if entry.extended_flags:
  176. flags |= FLAG_EXTENDED
  177. if flags & FLAG_EXTENDED and version is not None and version < 3:
  178. raise AssertionError('unable to use extended flags in version < 3')
  179. f.write(
  180. struct.pack(
  181. b">LLLLLL20sH",
  182. entry.dev & 0xFFFFFFFF,
  183. entry.ino & 0xFFFFFFFF,
  184. entry.mode,
  185. entry.uid,
  186. entry.gid,
  187. entry.size,
  188. hex_to_sha(entry.sha),
  189. flags,
  190. )
  191. )
  192. if flags & FLAG_EXTENDED:
  193. f.write(struct.pack(b">H", entry.extended_flags))
  194. f.write(name)
  195. real_size = (f.tell() - beginoffset + 8) & ~7
  196. f.write(b"\0" * ((beginoffset + real_size) - f.tell()))
  197. def read_index(f: BinaryIO):
  198. """Read an index file, yielding the individual entries."""
  199. header = f.read(4)
  200. if header != b"DIRC":
  201. raise AssertionError("Invalid index file header: %r" % header)
  202. (version, num_entries) = struct.unpack(b">LL", f.read(4 * 2))
  203. assert version in (1, 2, 3), "index version is %r" % version
  204. for i in range(num_entries):
  205. yield read_cache_entry(f, version)
  206. def read_index_dict(f):
  207. """Read an index file and return it as a dictionary.
  208. Args:
  209. f: File object to read from
  210. """
  211. ret = {}
  212. for name, entry in read_index(f):
  213. ret[name] = entry
  214. return ret
  215. def write_index(f: BinaryIO, entries: Iterable[Tuple[bytes, IndexEntry]], version: Optional[int] = None):
  216. """Write an index file.
  217. Args:
  218. f: File-like object to write to
  219. version: Version number to write
  220. entries: Iterable over the entries to write
  221. """
  222. if version is None:
  223. version = DEFAULT_VERSION
  224. f.write(b"DIRC")
  225. f.write(struct.pack(b">LL", version, len(entries)))
  226. for name, entry in entries:
  227. write_cache_entry(f, name, entry, version)
  228. def write_index_dict(
  229. f: BinaryIO,
  230. entries: Dict[bytes, IndexEntry],
  231. version: Optional[int] = None,
  232. ) -> None:
  233. """Write an index file based on the contents of a dictionary."""
  234. entries_list = []
  235. for name in sorted(entries):
  236. entries_list.append((name, entries[name]))
  237. write_index(f, entries_list, version=version)
  238. def cleanup_mode(mode: int) -> int:
  239. """Cleanup a mode value.
  240. This will return a mode that can be stored in a tree object.
  241. Args:
  242. mode: Mode to clean up.
  243. Returns:
  244. mode
  245. """
  246. if stat.S_ISLNK(mode):
  247. return stat.S_IFLNK
  248. elif stat.S_ISDIR(mode):
  249. return stat.S_IFDIR
  250. elif S_ISGITLINK(mode):
  251. return S_IFGITLINK
  252. ret = stat.S_IFREG | 0o644
  253. if mode & 0o100:
  254. ret |= 0o111
  255. return ret
  256. class Index(object):
  257. """A Git Index file."""
  258. def __init__(self, filename):
  259. """Open an index file.
  260. Args:
  261. filename: Path to the index file
  262. """
  263. self._filename = filename
  264. # TODO(jelmer): Store the version returned by read_index
  265. self._version = None
  266. self.clear()
  267. self.read()
  268. @property
  269. def path(self):
  270. return self._filename
  271. def __repr__(self):
  272. return "%s(%r)" % (self.__class__.__name__, self._filename)
  273. def write(self) -> None:
  274. """Write current contents of index to disk."""
  275. f = GitFile(self._filename, "wb")
  276. try:
  277. f = SHA1Writer(f)
  278. write_index_dict(f, self._byname, version=self._version)
  279. finally:
  280. f.close()
  281. def read(self):
  282. """Read current contents of index from disk."""
  283. if not os.path.exists(self._filename):
  284. return
  285. f = GitFile(self._filename, "rb")
  286. try:
  287. f = SHA1Reader(f)
  288. for name, entry in read_index(f):
  289. self[name] = entry
  290. # FIXME: Additional data?
  291. f.read(os.path.getsize(self._filename) - f.tell() - 20)
  292. f.check_sha()
  293. finally:
  294. f.close()
  295. def __len__(self) -> int:
  296. """Number of entries in this index file."""
  297. return len(self._byname)
  298. def __getitem__(self, name: bytes) -> IndexEntry:
  299. """Retrieve entry by relative path.
  300. Returns: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha,
  301. flags)
  302. """
  303. return self._byname[name]
  304. def __iter__(self) -> Iterator[bytes]:
  305. """Iterate over the paths in this index."""
  306. return iter(self._byname)
  307. def get_sha1(self, path: bytes) -> bytes:
  308. """Return the (git object) SHA1 for the object at a path."""
  309. return self[path].sha
  310. def get_mode(self, path: bytes) -> int:
  311. """Return the POSIX file mode for the object at a path."""
  312. return self[path].mode
  313. def iterobjects(self) -> Iterable[Tuple[bytes, bytes, int]]:
  314. """Iterate over path, sha, mode tuples for use with commit_tree."""
  315. for path in self:
  316. entry = self[path]
  317. yield path, entry.sha, cleanup_mode(entry.mode)
  318. def iterblobs(self):
  319. import warnings
  320. warnings.warn("Use iterobjects() instead.", PendingDeprecationWarning)
  321. return self.iterobjects()
  322. def clear(self):
  323. """Remove all contents from this index."""
  324. self._byname = {}
  325. def __setitem__(self, name, x):
  326. assert isinstance(name, bytes)
  327. assert len(x) == len(IndexEntry._fields)
  328. # Remove the old entry if any
  329. self._byname[name] = IndexEntry(*x)
  330. def __delitem__(self, name):
  331. assert isinstance(name, bytes)
  332. del self._byname[name]
  333. def iteritems(self):
  334. return self._byname.items()
  335. def items(self):
  336. return self._byname.items()
  337. def update(self, entries):
  338. for name, value in entries.items():
  339. self[name] = value
  340. def changes_from_tree(self, object_store, tree, want_unchanged=False):
  341. """Find the differences between the contents of this index and a tree.
  342. Args:
  343. object_store: Object store to use for retrieving tree contents
  344. tree: SHA1 of the root tree
  345. want_unchanged: Whether unchanged files should be reported
  346. Returns: Iterator over tuples with (oldpath, newpath), (oldmode,
  347. newmode), (oldsha, newsha)
  348. """
  349. def lookup_entry(path):
  350. entry = self[path]
  351. return entry.sha, cleanup_mode(entry.mode)
  352. for (name, mode, sha) in changes_from_tree(
  353. self._byname.keys(),
  354. lookup_entry,
  355. object_store,
  356. tree,
  357. want_unchanged=want_unchanged,
  358. ):
  359. yield (name, mode, sha)
  360. def commit(self, object_store):
  361. """Create a new tree from an index.
  362. Args:
  363. object_store: Object store to save the tree in
  364. Returns:
  365. Root tree SHA
  366. """
  367. return commit_tree(object_store, self.iterobjects())
  368. def commit_tree(
  369. object_store: "BaseObjectStore", blobs: Iterable[Tuple[bytes, bytes, int]]
  370. ) -> bytes:
  371. """Commit a new tree.
  372. Args:
  373. object_store: Object store to add trees to
  374. blobs: Iterable over blob path, sha, mode entries
  375. Returns:
  376. SHA1 of the created tree.
  377. """
  378. trees = {b"": {}} # type: Dict[bytes, Any]
  379. def add_tree(path):
  380. if path in trees:
  381. return trees[path]
  382. dirname, basename = pathsplit(path)
  383. t = add_tree(dirname)
  384. assert isinstance(basename, bytes)
  385. newtree = {}
  386. t[basename] = newtree
  387. trees[path] = newtree
  388. return newtree
  389. for path, sha, mode in blobs:
  390. tree_path, basename = pathsplit(path)
  391. tree = add_tree(tree_path)
  392. tree[basename] = (mode, sha)
  393. def build_tree(path):
  394. tree = Tree()
  395. for basename, entry in trees[path].items():
  396. if isinstance(entry, dict):
  397. mode = stat.S_IFDIR
  398. sha = build_tree(pathjoin(path, basename))
  399. else:
  400. (mode, sha) = entry
  401. tree.add(basename, mode, sha)
  402. object_store.add_object(tree)
  403. return tree.id
  404. return build_tree(b"")
  405. def commit_index(object_store: "BaseObjectStore", index: Index) -> bytes:
  406. """Create a new tree from an index.
  407. Args:
  408. object_store: Object store to save the tree in
  409. index: Index file
  410. Note: This function is deprecated, use index.commit() instead.
  411. Returns: Root tree sha.
  412. """
  413. return commit_tree(object_store, index.iterobjects())
  414. def changes_from_tree(
  415. names: Iterable[bytes],
  416. lookup_entry: Callable[[bytes], Tuple[bytes, int]],
  417. object_store: "BaseObjectStore",
  418. tree: Optional[bytes],
  419. want_unchanged=False,
  420. ) -> Iterable[
  421. Tuple[
  422. Tuple[Optional[bytes], Optional[bytes]],
  423. Tuple[Optional[int], Optional[int]],
  424. Tuple[Optional[bytes], Optional[bytes]],
  425. ]
  426. ]:
  427. """Find the differences between the contents of a tree and
  428. a working copy.
  429. Args:
  430. names: Iterable of names in the working copy
  431. lookup_entry: Function to lookup an entry in the working copy
  432. object_store: Object store to use for retrieving tree contents
  433. tree: SHA1 of the root tree, or None for an empty tree
  434. want_unchanged: Whether unchanged files should be reported
  435. Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
  436. (oldsha, newsha)
  437. """
  438. # TODO(jelmer): Support a include_trees option
  439. other_names = set(names)
  440. if tree is not None:
  441. for (name, mode, sha) in object_store.iter_tree_contents(tree):
  442. try:
  443. (other_sha, other_mode) = lookup_entry(name)
  444. except KeyError:
  445. # Was removed
  446. yield ((name, None), (mode, None), (sha, None))
  447. else:
  448. other_names.remove(name)
  449. if want_unchanged or other_sha != sha or other_mode != mode:
  450. yield ((name, name), (mode, other_mode), (sha, other_sha))
  451. # Mention added files
  452. for name in other_names:
  453. try:
  454. (other_sha, other_mode) = lookup_entry(name)
  455. except KeyError:
  456. pass
  457. else:
  458. yield ((None, name), (None, other_mode), (None, other_sha))
  459. def index_entry_from_stat(
  460. stat_val, hex_sha: bytes, flags: int, mode: Optional[int] = None,
  461. extended_flags: Optional[int] =None
  462. ):
  463. """Create a new index entry from a stat value.
  464. Args:
  465. stat_val: POSIX stat_result instance
  466. hex_sha: Hex sha of the object
  467. flags: Index flags
  468. """
  469. if mode is None:
  470. mode = cleanup_mode(stat_val.st_mode)
  471. return IndexEntry(
  472. stat_val.st_ctime,
  473. stat_val.st_mtime,
  474. stat_val.st_dev,
  475. stat_val.st_ino,
  476. mode,
  477. stat_val.st_uid,
  478. stat_val.st_gid,
  479. stat_val.st_size,
  480. hex_sha,
  481. flags,
  482. extended_flags
  483. )
  484. def build_file_from_blob(
  485. blob, mode, target_path, honor_filemode=True, tree_encoding="utf-8"
  486. ):
  487. """Build a file or symlink on disk based on a Git object.
  488. Args:
  489. obj: The git object
  490. mode: File mode
  491. target_path: Path to write to
  492. honor_filemode: An optional flag to honor core.filemode setting in
  493. config file, default is core.filemode=True, change executable bit
  494. Returns: stat object for the file
  495. """
  496. try:
  497. oldstat = os.lstat(target_path)
  498. except FileNotFoundError:
  499. oldstat = None
  500. contents = blob.as_raw_string()
  501. if stat.S_ISLNK(mode):
  502. # FIXME: This will fail on Windows. What should we do instead?
  503. if oldstat:
  504. os.unlink(target_path)
  505. if sys.platform == "win32":
  506. # os.readlink on Python3 on Windows requires a unicode string.
  507. contents = contents.decode(tree_encoding)
  508. target_path = target_path.decode(tree_encoding)
  509. os.symlink(contents, target_path)
  510. else:
  511. if oldstat is not None and oldstat.st_size == len(contents):
  512. with open(target_path, "rb") as f:
  513. if f.read() == contents:
  514. return oldstat
  515. with open(target_path, "wb") as f:
  516. # Write out file
  517. f.write(contents)
  518. if honor_filemode:
  519. os.chmod(target_path, mode)
  520. return os.lstat(target_path)
  521. INVALID_DOTNAMES = (b".git", b".", b"..", b"")
  522. def validate_path_element_default(element):
  523. return element.lower() not in INVALID_DOTNAMES
  524. def validate_path_element_ntfs(element):
  525. stripped = element.rstrip(b". ").lower()
  526. if stripped in INVALID_DOTNAMES:
  527. return False
  528. if stripped == b"git~1":
  529. return False
  530. return True
  531. def validate_path(path, element_validator=validate_path_element_default):
  532. """Default path validator that just checks for .git/."""
  533. parts = path.split(b"/")
  534. for p in parts:
  535. if not element_validator(p):
  536. return False
  537. else:
  538. return True
  539. def build_index_from_tree(
  540. root_path,
  541. index_path,
  542. object_store,
  543. tree_id,
  544. honor_filemode=True,
  545. validate_path_element=validate_path_element_default,
  546. ):
  547. """Generate and materialize index from a tree
  548. Args:
  549. tree_id: Tree to materialize
  550. root_path: Target dir for materialized index files
  551. index_path: Target path for generated index
  552. object_store: Non-empty object store holding tree contents
  553. honor_filemode: An optional flag to honor core.filemode setting in
  554. config file, default is core.filemode=True, change executable bit
  555. validate_path_element: Function to validate path elements to check
  556. out; default just refuses .git and .. directories.
  557. Note: existing index is wiped and contents are not merged
  558. in a working dir. Suitable only for fresh clones.
  559. """
  560. index = Index(index_path)
  561. if not isinstance(root_path, bytes):
  562. root_path = os.fsencode(root_path)
  563. for entry in object_store.iter_tree_contents(tree_id):
  564. if not validate_path(entry.path, validate_path_element):
  565. continue
  566. full_path = _tree_to_fs_path(root_path, entry.path)
  567. if not os.path.exists(os.path.dirname(full_path)):
  568. os.makedirs(os.path.dirname(full_path))
  569. # TODO(jelmer): Merge new index into working tree
  570. if S_ISGITLINK(entry.mode):
  571. if not os.path.isdir(full_path):
  572. os.mkdir(full_path)
  573. st = os.lstat(full_path)
  574. # TODO(jelmer): record and return submodule paths
  575. else:
  576. obj = object_store[entry.sha]
  577. st = build_file_from_blob(
  578. obj, entry.mode, full_path, honor_filemode=honor_filemode
  579. )
  580. # Add file to index
  581. if not honor_filemode or S_ISGITLINK(entry.mode):
  582. # we can not use tuple slicing to build a new tuple,
  583. # because on windows that will convert the times to
  584. # longs, which causes errors further along
  585. st_tuple = (
  586. entry.mode,
  587. st.st_ino,
  588. st.st_dev,
  589. st.st_nlink,
  590. st.st_uid,
  591. st.st_gid,
  592. st.st_size,
  593. st.st_atime,
  594. st.st_mtime,
  595. st.st_ctime,
  596. )
  597. st = st.__class__(st_tuple)
  598. index[entry.path] = index_entry_from_stat(st, entry.sha, 0)
  599. index.write()
  600. def blob_from_path_and_mode(fs_path, mode, tree_encoding="utf-8"):
  601. """Create a blob from a path and a stat object.
  602. Args:
  603. fs_path: Full file system path to file
  604. st: A stat object
  605. Returns: A `Blob` object
  606. """
  607. assert isinstance(fs_path, bytes)
  608. blob = Blob()
  609. if stat.S_ISLNK(mode):
  610. if sys.platform == "win32":
  611. # os.readlink on Python3 on Windows requires a unicode string.
  612. fs_path = os.fsdecode(fs_path)
  613. blob.data = os.readlink(fs_path).encode(tree_encoding)
  614. else:
  615. blob.data = os.readlink(fs_path)
  616. else:
  617. with open(fs_path, "rb") as f:
  618. blob.data = f.read()
  619. return blob
  620. def blob_from_path_and_stat(fs_path, st, tree_encoding="utf-8"):
  621. """Create a blob from a path and a stat object.
  622. Args:
  623. fs_path: Full file system path to file
  624. st: A stat object
  625. Returns: A `Blob` object
  626. """
  627. return blob_from_path_and_mode(fs_path, st.st_mode, tree_encoding)
  628. def read_submodule_head(path):
  629. """Read the head commit of a submodule.
  630. Args:
  631. path: path to the submodule
  632. Returns: HEAD sha, None if not a valid head/repository
  633. """
  634. from dulwich.errors import NotGitRepository
  635. from dulwich.repo import Repo
  636. # Repo currently expects a "str", so decode if necessary.
  637. # TODO(jelmer): Perhaps move this into Repo() ?
  638. if not isinstance(path, str):
  639. path = os.fsdecode(path)
  640. try:
  641. repo = Repo(path)
  642. except NotGitRepository:
  643. return None
  644. try:
  645. return repo.head()
  646. except KeyError:
  647. return None
  648. def _has_directory_changed(tree_path, entry):
  649. """Check if a directory has changed after getting an error.
  650. When handling an error trying to create a blob from a path, call this
  651. function. It will check if the path is a directory. If it's a directory
  652. and a submodule, check the submodule head to see if it's has changed. If
  653. not, consider the file as changed as Git tracked a file and not a
  654. directory.
  655. Return true if the given path should be considered as changed and False
  656. otherwise or if the path is not a directory.
  657. """
  658. # This is actually a directory
  659. if os.path.exists(os.path.join(tree_path, b".git")):
  660. # Submodule
  661. head = read_submodule_head(tree_path)
  662. if entry.sha != head:
  663. return True
  664. else:
  665. # The file was changed to a directory, so consider it removed.
  666. return True
  667. return False
  668. def get_unstaged_changes(index: Index, root_path, filter_blob_callback=None):
  669. """Walk through an index and check for differences against working tree.
  670. Args:
  671. index: index to check
  672. root_path: path in which to find files
  673. Returns: iterator over paths with unstaged changes
  674. """
  675. # For each entry in the index check the sha1 & ensure not staged
  676. if not isinstance(root_path, bytes):
  677. root_path = os.fsencode(root_path)
  678. for tree_path, entry in index.iteritems():
  679. full_path = _tree_to_fs_path(root_path, tree_path)
  680. try:
  681. st = os.lstat(full_path)
  682. if stat.S_ISDIR(st.st_mode):
  683. if _has_directory_changed(tree_path, entry):
  684. yield tree_path
  685. continue
  686. if not stat.S_ISREG(st.st_mode) and not stat.S_ISLNK(st.st_mode):
  687. continue
  688. blob = blob_from_path_and_stat(full_path, st)
  689. if filter_blob_callback is not None:
  690. blob = filter_blob_callback(blob, tree_path)
  691. except FileNotFoundError:
  692. # The file was removed, so we assume that counts as
  693. # different from whatever file used to exist.
  694. yield tree_path
  695. else:
  696. if blob.id != entry.sha:
  697. yield tree_path
  698. os_sep_bytes = os.sep.encode("ascii")
  699. def _tree_to_fs_path(root_path, tree_path: bytes):
  700. """Convert a git tree path to a file system path.
  701. Args:
  702. root_path: Root filesystem path
  703. tree_path: Git tree path as bytes
  704. Returns: File system path.
  705. """
  706. assert isinstance(tree_path, bytes)
  707. if os_sep_bytes != b"/":
  708. sep_corrected_path = tree_path.replace(b"/", os_sep_bytes)
  709. else:
  710. sep_corrected_path = tree_path
  711. return os.path.join(root_path, sep_corrected_path)
  712. def _fs_to_tree_path(fs_path):
  713. """Convert a file system path to a git tree path.
  714. Args:
  715. fs_path: File system path.
  716. Returns: Git tree path as bytes
  717. """
  718. if not isinstance(fs_path, bytes):
  719. fs_path_bytes = os.fsencode(fs_path)
  720. else:
  721. fs_path_bytes = fs_path
  722. if os_sep_bytes != b"/":
  723. tree_path = fs_path_bytes.replace(os_sep_bytes, b"/")
  724. else:
  725. tree_path = fs_path_bytes
  726. return tree_path
  727. def index_entry_from_path(path, object_store=None):
  728. """Create an index from a filesystem path.
  729. This returns an index value for files, symlinks
  730. and tree references. for directories and
  731. non-existant files it returns None
  732. Args:
  733. path: Path to create an index entry for
  734. object_store: Optional object store to
  735. save new blobs in
  736. Returns: An index entry; None for directories
  737. """
  738. assert isinstance(path, bytes)
  739. st = os.lstat(path)
  740. if stat.S_ISDIR(st.st_mode):
  741. if os.path.exists(os.path.join(path, b".git")):
  742. head = read_submodule_head(path)
  743. if head is None:
  744. return None
  745. return index_entry_from_stat(st, head, 0, mode=S_IFGITLINK)
  746. return None
  747. if stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode):
  748. blob = blob_from_path_and_stat(path, st)
  749. if object_store is not None:
  750. object_store.add_object(blob)
  751. return index_entry_from_stat(st, blob.id, 0)
  752. return None
  753. def iter_fresh_entries(
  754. paths, root_path, object_store: Optional["BaseObjectStore"] = None
  755. ):
  756. """Iterate over current versions of index entries on disk.
  757. Args:
  758. paths: Paths to iterate over
  759. root_path: Root path to access from
  760. store: Optional store to save new blobs in
  761. Returns: Iterator over path, index_entry
  762. """
  763. for path in paths:
  764. p = _tree_to_fs_path(root_path, path)
  765. try:
  766. entry = index_entry_from_path(p, object_store=object_store)
  767. except (FileNotFoundError, IsADirectoryError):
  768. entry = None
  769. yield path, entry
  770. def iter_fresh_blobs(index, root_path):
  771. """Iterate over versions of blobs on disk referenced by index.
  772. Don't use this function; it removes missing entries from index.
  773. Args:
  774. index: Index file
  775. root_path: Root path to access from
  776. include_deleted: Include deleted entries with sha and
  777. mode set to None
  778. Returns: Iterator over path, sha, mode
  779. """
  780. import warnings
  781. warnings.warn(PendingDeprecationWarning, "Use iter_fresh_objects instead.")
  782. for entry in iter_fresh_objects(index, root_path, include_deleted=True):
  783. if entry[1] is None:
  784. del index[entry[0]]
  785. else:
  786. yield entry
  787. def iter_fresh_objects(paths, root_path, include_deleted=False, object_store=None):
  788. """Iterate over versions of objecs on disk referenced by index.
  789. Args:
  790. root_path: Root path to access from
  791. include_deleted: Include deleted entries with sha and
  792. mode set to None
  793. object_store: Optional object store to report new items to
  794. Returns: Iterator over path, sha, mode
  795. """
  796. for path, entry in iter_fresh_entries(paths, root_path, object_store=object_store):
  797. if entry is None:
  798. if include_deleted:
  799. yield path, None, None
  800. else:
  801. entry = IndexEntry(*entry)
  802. yield path, entry.sha, cleanup_mode(entry.mode)
  803. def refresh_index(index, root_path):
  804. """Refresh the contents of an index.
  805. This is the equivalent to running 'git commit -a'.
  806. Args:
  807. index: Index to update
  808. root_path: Root filesystem path
  809. """
  810. for path, entry in iter_fresh_entries(index, root_path):
  811. index[path] = path