index.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844
  1. # index.py -- File parser/writer for the git index file
  2. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Parser for the git index file format."""
  21. import collections
  22. import os
  23. import stat
  24. import struct
  25. import sys
  26. from dulwich.file import GitFile
  27. from dulwich.objects import (
  28. Blob,
  29. S_IFGITLINK,
  30. S_ISGITLINK,
  31. Tree,
  32. hex_to_sha,
  33. sha_to_hex,
  34. )
  35. from dulwich.pack import (
  36. SHA1Reader,
  37. SHA1Writer,
  38. )
  39. IndexEntry = collections.namedtuple(
  40. 'IndexEntry', [
  41. 'ctime', 'mtime', 'dev', 'ino', 'mode', 'uid', 'gid', 'size', 'sha',
  42. 'flags'])
  43. FLAG_STAGEMASK = 0x3000
  44. FLAG_VALID = 0x8000
  45. FLAG_EXTENDED = 0x4000
  46. def pathsplit(path):
  47. """Split a /-delimited path into a directory part and a basename.
  48. Args:
  49. path: The path to split.
  50. Returns:
  51. Tuple with directory name and basename
  52. """
  53. try:
  54. (dirname, basename) = path.rsplit(b"/", 1)
  55. except ValueError:
  56. return (b"", path)
  57. else:
  58. return (dirname, basename)
  59. def pathjoin(*args):
  60. """Join a /-delimited path.
  61. """
  62. return b"/".join([p for p in args if p])
  63. def read_cache_time(f):
  64. """Read a cache time.
  65. Args:
  66. f: File-like object to read from
  67. Returns:
  68. Tuple with seconds and nanoseconds
  69. """
  70. return struct.unpack(">LL", f.read(8))
  71. def write_cache_time(f, t):
  72. """Write a cache time.
  73. Args:
  74. f: File-like object to write to
  75. t: Time to write (as int, float or tuple with secs and nsecs)
  76. """
  77. if isinstance(t, int):
  78. t = (t, 0)
  79. elif isinstance(t, float):
  80. (secs, nsecs) = divmod(t, 1.0)
  81. t = (int(secs), int(nsecs * 1000000000))
  82. elif not isinstance(t, tuple):
  83. raise TypeError(t)
  84. f.write(struct.pack(">LL", *t))
  85. def read_cache_entry(f):
  86. """Read an entry from a cache file.
  87. Args:
  88. f: File-like object to read from
  89. Returns:
  90. tuple with: device, inode, mode, uid, gid, size, sha, flags
  91. """
  92. beginoffset = f.tell()
  93. ctime = read_cache_time(f)
  94. mtime = read_cache_time(f)
  95. (dev, ino, mode, uid, gid, size, sha, flags, ) = \
  96. struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
  97. name = f.read((flags & 0x0fff))
  98. # Padding:
  99. real_size = ((f.tell() - beginoffset + 8) & ~7)
  100. f.read((beginoffset + real_size) - f.tell())
  101. return (name, ctime, mtime, dev, ino, mode, uid, gid, size,
  102. sha_to_hex(sha), flags & ~0x0fff)
  103. def write_cache_entry(f, entry):
  104. """Write an index entry to a file.
  105. Args:
  106. f: File object
  107. entry: Entry to write, tuple with:
  108. (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags)
  109. """
  110. beginoffset = f.tell()
  111. (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags) = entry
  112. write_cache_time(f, ctime)
  113. write_cache_time(f, mtime)
  114. flags = len(name) | (flags & ~0x0fff)
  115. f.write(struct.pack(
  116. b'>LLLLLL20sH', dev & 0xFFFFFFFF, ino & 0xFFFFFFFF,
  117. mode, uid, gid, size, hex_to_sha(sha), flags))
  118. f.write(name)
  119. real_size = ((f.tell() - beginoffset + 8) & ~7)
  120. f.write(b'\0' * ((beginoffset + real_size) - f.tell()))
  121. def read_index(f):
  122. """Read an index file, yielding the individual entries."""
  123. header = f.read(4)
  124. if header != b'DIRC':
  125. raise AssertionError("Invalid index file header: %r" % header)
  126. (version, num_entries) = struct.unpack(b'>LL', f.read(4 * 2))
  127. assert version in (1, 2)
  128. for i in range(num_entries):
  129. yield read_cache_entry(f)
  130. def read_index_dict(f):
  131. """Read an index file and return it as a dictionary.
  132. Args:
  133. f: File object to read from
  134. """
  135. ret = {}
  136. for x in read_index(f):
  137. ret[x[0]] = IndexEntry(*x[1:])
  138. return ret
  139. def write_index(f, entries):
  140. """Write an index file.
  141. Args:
  142. f: File-like object to write to
  143. entries: Iterable over the entries to write
  144. """
  145. f.write(b'DIRC')
  146. f.write(struct.pack(b'>LL', 2, len(entries)))
  147. for x in entries:
  148. write_cache_entry(f, x)
  149. def write_index_dict(f, entries):
  150. """Write an index file based on the contents of a dictionary.
  151. """
  152. entries_list = []
  153. for name in sorted(entries):
  154. entries_list.append((name,) + tuple(entries[name]))
  155. write_index(f, entries_list)
  156. def cleanup_mode(mode):
  157. """Cleanup a mode value.
  158. This will return a mode that can be stored in a tree object.
  159. Args:
  160. mode: Mode to clean up.
  161. """
  162. if stat.S_ISLNK(mode):
  163. return stat.S_IFLNK
  164. elif stat.S_ISDIR(mode):
  165. return stat.S_IFDIR
  166. elif S_ISGITLINK(mode):
  167. return S_IFGITLINK
  168. ret = stat.S_IFREG | 0o644
  169. if mode & 0o100:
  170. ret |= 0o111
  171. return ret
  172. class Index(object):
  173. """A Git Index file."""
  174. def __init__(self, filename):
  175. """Open an index file.
  176. Args:
  177. filename: Path to the index file
  178. """
  179. self._filename = filename
  180. self.clear()
  181. self.read()
  182. @property
  183. def path(self):
  184. return self._filename
  185. def __repr__(self):
  186. return "%s(%r)" % (self.__class__.__name__, self._filename)
  187. def write(self):
  188. """Write current contents of index to disk."""
  189. f = GitFile(self._filename, 'wb')
  190. try:
  191. f = SHA1Writer(f)
  192. write_index_dict(f, self._byname)
  193. finally:
  194. f.close()
  195. def read(self):
  196. """Read current contents of index from disk."""
  197. if not os.path.exists(self._filename):
  198. return
  199. f = GitFile(self._filename, 'rb')
  200. try:
  201. f = SHA1Reader(f)
  202. for x in read_index(f):
  203. self[x[0]] = IndexEntry(*x[1:])
  204. # FIXME: Additional data?
  205. f.read(os.path.getsize(self._filename)-f.tell()-20)
  206. f.check_sha()
  207. finally:
  208. f.close()
  209. def __len__(self):
  210. """Number of entries in this index file."""
  211. return len(self._byname)
  212. def __getitem__(self, name):
  213. """Retrieve entry by relative path.
  214. Returns: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha,
  215. flags)
  216. """
  217. return self._byname[name]
  218. def __iter__(self):
  219. """Iterate over the paths in this index."""
  220. return iter(self._byname)
  221. def get_sha1(self, path):
  222. """Return the (git object) SHA1 for the object at a path."""
  223. return self[path].sha
  224. def get_mode(self, path):
  225. """Return the POSIX file mode for the object at a path."""
  226. return self[path].mode
  227. def iterobjects(self):
  228. """Iterate over path, sha, mode tuples for use with commit_tree."""
  229. for path in self:
  230. entry = self[path]
  231. yield path, entry.sha, cleanup_mode(entry.mode)
  232. def iterblobs(self):
  233. import warnings
  234. warnings.warn('Use iterobjects() instead.', PendingDeprecationWarning)
  235. return self.iterobjects()
  236. def clear(self):
  237. """Remove all contents from this index."""
  238. self._byname = {}
  239. def __setitem__(self, name, x):
  240. assert isinstance(name, bytes)
  241. assert len(x) == 10
  242. # Remove the old entry if any
  243. self._byname[name] = IndexEntry(*x)
  244. def __delitem__(self, name):
  245. assert isinstance(name, bytes)
  246. del self._byname[name]
  247. def iteritems(self):
  248. return self._byname.items()
  249. def items(self):
  250. return self._byname.items()
  251. def update(self, entries):
  252. for name, value in entries.items():
  253. self[name] = value
  254. def changes_from_tree(self, object_store, tree, want_unchanged=False):
  255. """Find the differences between the contents of this index and a tree.
  256. Args:
  257. object_store: Object store to use for retrieving tree contents
  258. tree: SHA1 of the root tree
  259. want_unchanged: Whether unchanged files should be reported
  260. Returns: Iterator over tuples with (oldpath, newpath), (oldmode,
  261. newmode), (oldsha, newsha)
  262. """
  263. def lookup_entry(path):
  264. entry = self[path]
  265. return entry.sha, cleanup_mode(entry.mode)
  266. for (name, mode, sha) in changes_from_tree(
  267. self._byname.keys(), lookup_entry, object_store, tree,
  268. want_unchanged=want_unchanged):
  269. yield (name, mode, sha)
  270. def commit(self, object_store):
  271. """Create a new tree from an index.
  272. Args:
  273. object_store: Object store to save the tree in
  274. Returns:
  275. Root tree SHA
  276. """
  277. return commit_tree(object_store, self.iterobjects())
  278. def commit_tree(object_store, blobs):
  279. """Commit a new tree.
  280. Args:
  281. object_store: Object store to add trees to
  282. blobs: Iterable over blob path, sha, mode entries
  283. Returns:
  284. SHA1 of the created tree.
  285. """
  286. trees = {b'': {}}
  287. def add_tree(path):
  288. if path in trees:
  289. return trees[path]
  290. dirname, basename = pathsplit(path)
  291. t = add_tree(dirname)
  292. assert isinstance(basename, bytes)
  293. newtree = {}
  294. t[basename] = newtree
  295. trees[path] = newtree
  296. return newtree
  297. for path, sha, mode in blobs:
  298. tree_path, basename = pathsplit(path)
  299. tree = add_tree(tree_path)
  300. tree[basename] = (mode, sha)
  301. def build_tree(path):
  302. tree = Tree()
  303. for basename, entry in trees[path].items():
  304. if isinstance(entry, dict):
  305. mode = stat.S_IFDIR
  306. sha = build_tree(pathjoin(path, basename))
  307. else:
  308. (mode, sha) = entry
  309. tree.add(basename, mode, sha)
  310. object_store.add_object(tree)
  311. return tree.id
  312. return build_tree(b'')
  313. def commit_index(object_store, index):
  314. """Create a new tree from an index.
  315. Args:
  316. object_store: Object store to save the tree in
  317. index: Index file
  318. Note: This function is deprecated, use index.commit() instead.
  319. Returns: Root tree sha.
  320. """
  321. return commit_tree(object_store, index.iterobjects())
  322. def changes_from_tree(names, lookup_entry, object_store, tree,
  323. want_unchanged=False):
  324. """Find the differences between the contents of a tree and
  325. a working copy.
  326. Args:
  327. names: Iterable of names in the working copy
  328. lookup_entry: Function to lookup an entry in the working copy
  329. object_store: Object store to use for retrieving tree contents
  330. tree: SHA1 of the root tree, or None for an empty tree
  331. want_unchanged: Whether unchanged files should be reported
  332. Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
  333. (oldsha, newsha)
  334. """
  335. # TODO(jelmer): Support a include_trees option
  336. other_names = set(names)
  337. if tree is not None:
  338. for (name, mode, sha) in object_store.iter_tree_contents(tree):
  339. try:
  340. (other_sha, other_mode) = lookup_entry(name)
  341. except KeyError:
  342. # Was removed
  343. yield ((name, None), (mode, None), (sha, None))
  344. else:
  345. other_names.remove(name)
  346. if (want_unchanged or other_sha != sha or other_mode != mode):
  347. yield ((name, name), (mode, other_mode), (sha, other_sha))
  348. # Mention added files
  349. for name in other_names:
  350. try:
  351. (other_sha, other_mode) = lookup_entry(name)
  352. except KeyError:
  353. pass
  354. else:
  355. yield ((None, name), (None, other_mode), (None, other_sha))
  356. def index_entry_from_stat(stat_val, hex_sha, flags, mode=None):
  357. """Create a new index entry from a stat value.
  358. Args:
  359. stat_val: POSIX stat_result instance
  360. hex_sha: Hex sha of the object
  361. flags: Index flags
  362. """
  363. if mode is None:
  364. mode = cleanup_mode(stat_val.st_mode)
  365. return IndexEntry(
  366. stat_val.st_ctime, stat_val.st_mtime, stat_val.st_dev,
  367. stat_val.st_ino, mode, stat_val.st_uid,
  368. stat_val.st_gid, stat_val.st_size, hex_sha, flags)
  369. def build_file_from_blob(blob, mode, target_path, honor_filemode=True,
  370. tree_encoding='utf-8'):
  371. """Build a file or symlink on disk based on a Git object.
  372. Args:
  373. obj: The git object
  374. mode: File mode
  375. target_path: Path to write to
  376. honor_filemode: An optional flag to honor core.filemode setting in
  377. config file, default is core.filemode=True, change executable bit
  378. Returns: stat object for the file
  379. """
  380. try:
  381. oldstat = os.lstat(target_path)
  382. except FileNotFoundError:
  383. oldstat = None
  384. contents = blob.as_raw_string()
  385. if stat.S_ISLNK(mode):
  386. # FIXME: This will fail on Windows. What should we do instead?
  387. if oldstat:
  388. os.unlink(target_path)
  389. if sys.platform == 'win32':
  390. # os.readlink on Python3 on Windows requires a unicode string.
  391. contents = contents.decode(tree_encoding)
  392. target_path = target_path.decode(tree_encoding)
  393. os.symlink(contents, target_path)
  394. else:
  395. if oldstat is not None and oldstat.st_size == len(contents):
  396. with open(target_path, 'rb') as f:
  397. if f.read() == contents:
  398. return oldstat
  399. with open(target_path, 'wb') as f:
  400. # Write out file
  401. f.write(contents)
  402. if honor_filemode:
  403. os.chmod(target_path, mode)
  404. return os.lstat(target_path)
  405. INVALID_DOTNAMES = (b".git", b".", b"..", b"")
  406. def validate_path_element_default(element):
  407. return element.lower() not in INVALID_DOTNAMES
  408. def validate_path_element_ntfs(element):
  409. stripped = element.rstrip(b". ").lower()
  410. if stripped in INVALID_DOTNAMES:
  411. return False
  412. if stripped == b"git~1":
  413. return False
  414. return True
  415. def validate_path(path, element_validator=validate_path_element_default):
  416. """Default path validator that just checks for .git/."""
  417. parts = path.split(b"/")
  418. for p in parts:
  419. if not element_validator(p):
  420. return False
  421. else:
  422. return True
  423. def build_index_from_tree(root_path, index_path, object_store, tree_id,
  424. honor_filemode=True,
  425. validate_path_element=validate_path_element_default):
  426. """Generate and materialize index from a tree
  427. Args:
  428. tree_id: Tree to materialize
  429. root_path: Target dir for materialized index files
  430. index_path: Target path for generated index
  431. object_store: Non-empty object store holding tree contents
  432. honor_filemode: An optional flag to honor core.filemode setting in
  433. config file, default is core.filemode=True, change executable bit
  434. validate_path_element: Function to validate path elements to check
  435. out; default just refuses .git and .. directories.
  436. Note: existing index is wiped and contents are not merged
  437. in a working dir. Suitable only for fresh clones.
  438. """
  439. index = Index(index_path)
  440. if not isinstance(root_path, bytes):
  441. root_path = os.fsencode(root_path)
  442. for entry in object_store.iter_tree_contents(tree_id):
  443. if not validate_path(entry.path, validate_path_element):
  444. continue
  445. full_path = _tree_to_fs_path(root_path, entry.path)
  446. if not os.path.exists(os.path.dirname(full_path)):
  447. os.makedirs(os.path.dirname(full_path))
  448. # TODO(jelmer): Merge new index into working tree
  449. if S_ISGITLINK(entry.mode):
  450. if not os.path.isdir(full_path):
  451. os.mkdir(full_path)
  452. st = os.lstat(full_path)
  453. # TODO(jelmer): record and return submodule paths
  454. else:
  455. obj = object_store[entry.sha]
  456. try:
  457. st = build_file_from_blob(
  458. obj, entry.mode, full_path, honor_filemode=honor_filemode)
  459. except OSError as e:
  460. if e.errno == 92 and sys.platform == 'darwin':
  461. # Our filename isn't supported by the platform :(
  462. import warnings
  463. warnings.warn(
  464. 'Unable to write ile %s: %s', full_path, e.strerror)
  465. continue
  466. else:
  467. raise
  468. # Add file to index
  469. if not honor_filemode or S_ISGITLINK(entry.mode):
  470. # we can not use tuple slicing to build a new tuple,
  471. # because on windows that will convert the times to
  472. # longs, which causes errors further along
  473. st_tuple = (entry.mode, st.st_ino, st.st_dev, st.st_nlink,
  474. st.st_uid, st.st_gid, st.st_size, st.st_atime,
  475. st.st_mtime, st.st_ctime)
  476. st = st.__class__(st_tuple)
  477. index[entry.path] = index_entry_from_stat(st, entry.sha, 0)
  478. index.write()
  479. def blob_from_path_and_stat(fs_path, st, tree_encoding='utf-8'):
  480. """Create a blob from a path and a stat object.
  481. Args:
  482. fs_path: Full file system path to file
  483. st: A stat object
  484. Returns: A `Blob` object
  485. """
  486. assert isinstance(fs_path, bytes)
  487. blob = Blob()
  488. if stat.S_ISLNK(st.st_mode):
  489. if sys.platform == 'win32':
  490. # os.readlink on Python3 on Windows requires a unicode string.
  491. fs_path = os.fsdecode(fs_path)
  492. blob.data = os.readlink(fs_path).encode(tree_encoding)
  493. else:
  494. blob.data = os.readlink(fs_path)
  495. else:
  496. with open(fs_path, 'rb') as f:
  497. blob.data = f.read()
  498. return blob
  499. def read_submodule_head(path):
  500. """Read the head commit of a submodule.
  501. Args:
  502. path: path to the submodule
  503. Returns: HEAD sha, None if not a valid head/repository
  504. """
  505. from dulwich.errors import NotGitRepository
  506. from dulwich.repo import Repo
  507. # Repo currently expects a "str", so decode if necessary.
  508. # TODO(jelmer): Perhaps move this into Repo() ?
  509. if not isinstance(path, str):
  510. path = os.fsdecode(path)
  511. try:
  512. repo = Repo(path)
  513. except NotGitRepository:
  514. return None
  515. try:
  516. return repo.head()
  517. except KeyError:
  518. return None
  519. def _has_directory_changed(tree_path, entry):
  520. """Check if a directory has changed after getting an error.
  521. When handling an error trying to create a blob from a path, call this
  522. function. It will check if the path is a directory. If it's a directory
  523. and a submodule, check the submodule head to see if it's has changed. If
  524. not, consider the file as changed as Git tracked a file and not a
  525. directory.
  526. Return true if the given path should be considered as changed and False
  527. otherwise or if the path is not a directory.
  528. """
  529. # This is actually a directory
  530. if os.path.exists(os.path.join(tree_path, b'.git')):
  531. # Submodule
  532. head = read_submodule_head(tree_path)
  533. if entry.sha != head:
  534. return True
  535. else:
  536. # The file was changed to a directory, so consider it removed.
  537. return True
  538. return False
  539. def get_unstaged_changes(index, root_path, filter_blob_callback=None):
  540. """Walk through an index and check for differences against working tree.
  541. Args:
  542. index: index to check
  543. root_path: path in which to find files
  544. Returns: iterator over paths with unstaged changes
  545. """
  546. # For each entry in the index check the sha1 & ensure not staged
  547. if not isinstance(root_path, bytes):
  548. root_path = os.fsencode(root_path)
  549. for tree_path, entry in index.iteritems():
  550. full_path = _tree_to_fs_path(root_path, tree_path)
  551. try:
  552. st = os.lstat(full_path)
  553. if stat.S_ISDIR(st.st_mode):
  554. if _has_directory_changed(tree_path, entry):
  555. yield tree_path
  556. continue
  557. if not stat.S_ISREG(st.st_mode) and not stat.S_ISLNK(st.st_mode):
  558. continue
  559. blob = blob_from_path_and_stat(full_path, st)
  560. if filter_blob_callback is not None:
  561. blob = filter_blob_callback(blob, tree_path)
  562. except FileNotFoundError:
  563. # The file was removed, so we assume that counts as
  564. # different from whatever file used to exist.
  565. yield tree_path
  566. else:
  567. if blob.id != entry.sha:
  568. yield tree_path
  569. os_sep_bytes = os.sep.encode('ascii')
  570. def _tree_to_fs_path(root_path, tree_path):
  571. """Convert a git tree path to a file system path.
  572. Args:
  573. root_path: Root filesystem path
  574. tree_path: Git tree path as bytes
  575. Returns: File system path.
  576. """
  577. assert isinstance(tree_path, bytes)
  578. if os_sep_bytes != b'/':
  579. sep_corrected_path = tree_path.replace(b'/', os_sep_bytes)
  580. else:
  581. sep_corrected_path = tree_path
  582. return os.path.join(root_path, sep_corrected_path)
  583. def _fs_to_tree_path(fs_path, fs_encoding=None):
  584. """Convert a file system path to a git tree path.
  585. Args:
  586. fs_path: File system path.
  587. fs_encoding: File system encoding
  588. Returns: Git tree path as bytes
  589. """
  590. if not isinstance(fs_path, bytes):
  591. fs_path_bytes = fs_path.encode(fs_encoding)
  592. else:
  593. fs_path_bytes = fs_path
  594. if os_sep_bytes != b'/':
  595. tree_path = fs_path_bytes.replace(os_sep_bytes, b'/')
  596. else:
  597. tree_path = fs_path_bytes
  598. return tree_path
  599. def index_entry_from_path(path, object_store=None):
  600. """Create an index from a filesystem path.
  601. This returns an index value for files, symlinks
  602. and tree references. for directories and
  603. non-existant files it returns None
  604. Args:
  605. path: Path to create an index entry for
  606. object_store: Optional object store to
  607. save new blobs in
  608. Returns: An index entry; None for directories
  609. """
  610. assert isinstance(path, bytes)
  611. st = os.lstat(path)
  612. if stat.S_ISDIR(st.st_mode):
  613. if os.path.exists(os.path.join(path, b'.git')):
  614. head = read_submodule_head(path)
  615. if head is None:
  616. return None
  617. return index_entry_from_stat(
  618. st, head, 0, mode=S_IFGITLINK)
  619. return None
  620. if stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode):
  621. blob = blob_from_path_and_stat(path, st)
  622. if object_store is not None:
  623. object_store.add_object(blob)
  624. return index_entry_from_stat(st, blob.id, 0)
  625. return None
  626. def iter_fresh_entries(paths, root_path, object_store=None):
  627. """Iterate over current versions of index entries on disk.
  628. Args:
  629. paths: Paths to iterate over
  630. root_path: Root path to access from
  631. store: Optional store to save new blobs in
  632. Returns: Iterator over path, index_entry
  633. """
  634. for path in paths:
  635. p = _tree_to_fs_path(root_path, path)
  636. try:
  637. entry = index_entry_from_path(p, object_store=object_store)
  638. except (FileNotFoundError, IsADirectoryError):
  639. entry = None
  640. yield path, entry
  641. def iter_fresh_blobs(index, root_path):
  642. """Iterate over versions of blobs on disk referenced by index.
  643. Don't use this function; it removes missing entries from index.
  644. Args:
  645. index: Index file
  646. root_path: Root path to access from
  647. include_deleted: Include deleted entries with sha and
  648. mode set to None
  649. Returns: Iterator over path, sha, mode
  650. """
  651. import warnings
  652. warnings.warn(PendingDeprecationWarning,
  653. "Use iter_fresh_objects instead.")
  654. for entry in iter_fresh_objects(
  655. index, root_path, include_deleted=True):
  656. if entry[1] is None:
  657. del index[entry[0]]
  658. else:
  659. yield entry
  660. def iter_fresh_objects(paths, root_path, include_deleted=False,
  661. object_store=None):
  662. """Iterate over versions of objecs on disk referenced by index.
  663. Args:
  664. index: Index file
  665. root_path: Root path to access from
  666. include_deleted: Include deleted entries with sha and
  667. mode set to None
  668. object_store: Optional object store to report new items to
  669. Returns: Iterator over path, sha, mode
  670. """
  671. for path, entry in iter_fresh_entries(paths, root_path,
  672. object_store=object_store):
  673. if entry is None:
  674. if include_deleted:
  675. yield path, None, None
  676. else:
  677. entry = IndexEntry(*entry)
  678. yield path, entry.sha, cleanup_mode(entry.mode)
  679. def refresh_index(index, root_path):
  680. """Refresh the contents of an index.
  681. This is the equivalent to running 'git commit -a'.
  682. Args:
  683. index: Index to update
  684. root_path: Root filesystem path
  685. """
  686. for path, entry in iter_fresh_entries(index, root_path):
  687. index[path] = path