index.py 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016
  1. # index.py -- File parser/writer for the git index file
  2. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Parser for the git index file format."""
  21. import collections
  22. import os
  23. import stat
  24. import struct
  25. import sys
  26. from typing import (Any, BinaryIO, Callable, Dict, Iterable, Iterator, List,
  27. Optional, Tuple, Union)
  28. from .file import GitFile
  29. from .object_store import iter_tree_contents
  30. from .objects import (S_IFGITLINK, S_ISGITLINK, Blob, ObjectID, Tree,
  31. hex_to_sha, sha_to_hex)
  32. from .pack import ObjectContainer, SHA1Reader, SHA1Writer
  33. # TODO(jelmer): Switch to dataclass?
  34. IndexEntry = collections.namedtuple(
  35. "IndexEntry",
  36. [
  37. "ctime",
  38. "mtime",
  39. "dev",
  40. "ino",
  41. "mode",
  42. "uid",
  43. "gid",
  44. "size",
  45. "sha",
  46. "flags",
  47. "extended_flags",
  48. ],
  49. )
  50. # 2-bit stage (during merge)
  51. FLAG_STAGEMASK = 0x3000
  52. # assume-valid
  53. FLAG_VALID = 0x8000
  54. # extended flag (must be zero in version 2)
  55. FLAG_EXTENDED = 0x4000
  56. # used by sparse checkout
  57. EXTENDED_FLAG_SKIP_WORKTREE = 0x4000
  58. # used by "git add -N"
  59. EXTENDED_FLAG_INTEND_TO_ADD = 0x2000
  60. DEFAULT_VERSION = 2
  61. def pathsplit(path: bytes) -> Tuple[bytes, bytes]:
  62. """Split a /-delimited path into a directory part and a basename.
  63. Args:
  64. path: The path to split.
  65. Returns:
  66. Tuple with directory name and basename
  67. """
  68. try:
  69. (dirname, basename) = path.rsplit(b"/", 1)
  70. except ValueError:
  71. return (b"", path)
  72. else:
  73. return (dirname, basename)
  74. def pathjoin(*args):
  75. """Join a /-delimited path."""
  76. return b"/".join([p for p in args if p])
  77. def read_cache_time(f):
  78. """Read a cache time.
  79. Args:
  80. f: File-like object to read from
  81. Returns:
  82. Tuple with seconds and nanoseconds
  83. """
  84. return struct.unpack(">LL", f.read(8))
  85. def write_cache_time(f, t):
  86. """Write a cache time.
  87. Args:
  88. f: File-like object to write to
  89. t: Time to write (as int, float or tuple with secs and nsecs)
  90. """
  91. if isinstance(t, int):
  92. t = (t, 0)
  93. elif isinstance(t, float):
  94. (secs, nsecs) = divmod(t, 1.0)
  95. t = (int(secs), int(nsecs * 1000000000))
  96. elif not isinstance(t, tuple):
  97. raise TypeError(t)
  98. f.write(struct.pack(">LL", *t))
  99. def read_cache_entry(f, version: int) -> Tuple[str, IndexEntry]:
  100. """Read an entry from a cache file.
  101. Args:
  102. f: File-like object to read from
  103. Returns:
  104. tuple with: name, IndexEntry
  105. """
  106. beginoffset = f.tell()
  107. ctime = read_cache_time(f)
  108. mtime = read_cache_time(f)
  109. (
  110. dev,
  111. ino,
  112. mode,
  113. uid,
  114. gid,
  115. size,
  116. sha,
  117. flags,
  118. ) = struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
  119. if flags & FLAG_EXTENDED:
  120. if version < 3:
  121. raise AssertionError(
  122. 'extended flag set in index with version < 3')
  123. (extended_flags, ) = struct.unpack(">H", f.read(2))
  124. else:
  125. extended_flags = 0
  126. name = f.read(flags & 0x0FFF)
  127. # Padding:
  128. if version < 4:
  129. real_size = (f.tell() - beginoffset + 8) & ~7
  130. f.read((beginoffset + real_size) - f.tell())
  131. return (
  132. name,
  133. IndexEntry(
  134. ctime,
  135. mtime,
  136. dev,
  137. ino,
  138. mode,
  139. uid,
  140. gid,
  141. size,
  142. sha_to_hex(sha),
  143. flags & ~0x0FFF,
  144. extended_flags,
  145. ))
  146. def write_cache_entry(f, name: bytes, entry: IndexEntry, version: int) -> None:
  147. """Write an index entry to a file.
  148. Args:
  149. f: File object
  150. entry: IndexEntry to write, tuple with:
  151. """
  152. beginoffset = f.tell()
  153. write_cache_time(f, entry.ctime)
  154. write_cache_time(f, entry.mtime)
  155. flags = len(name) | (entry.flags & ~0x0FFF)
  156. if entry.extended_flags:
  157. flags |= FLAG_EXTENDED
  158. if flags & FLAG_EXTENDED and version is not None and version < 3:
  159. raise AssertionError('unable to use extended flags in version < 3')
  160. f.write(
  161. struct.pack(
  162. b">LLLLLL20sH",
  163. entry.dev & 0xFFFFFFFF,
  164. entry.ino & 0xFFFFFFFF,
  165. entry.mode,
  166. entry.uid,
  167. entry.gid,
  168. entry.size,
  169. hex_to_sha(entry.sha),
  170. flags,
  171. )
  172. )
  173. if flags & FLAG_EXTENDED:
  174. f.write(struct.pack(b">H", entry.extended_flags))
  175. f.write(name)
  176. if version < 4:
  177. real_size = (f.tell() - beginoffset + 8) & ~7
  178. f.write(b"\0" * ((beginoffset + real_size) - f.tell()))
  179. class UnsupportedIndexFormat(Exception):
  180. """An unsupported index format was encountered."""
  181. def __init__(self, version):
  182. self.index_format_version = version
  183. def read_index(f: BinaryIO):
  184. """Read an index file, yielding the individual entries."""
  185. header = f.read(4)
  186. if header != b"DIRC":
  187. raise AssertionError("Invalid index file header: %r" % header)
  188. (version, num_entries) = struct.unpack(b">LL", f.read(4 * 2))
  189. if version not in (1, 2, 3):
  190. raise UnsupportedIndexFormat(version)
  191. for i in range(num_entries):
  192. yield read_cache_entry(f, version)
  193. def read_index_dict(f) -> Dict[bytes, IndexEntry]:
  194. """Read an index file and return it as a dictionary.
  195. Args:
  196. f: File object to read from
  197. """
  198. ret = {}
  199. for name, entry in read_index(f):
  200. ret[name] = entry
  201. return ret
  202. def write_index(f: BinaryIO, entries: List[Tuple[bytes, IndexEntry]], version: Optional[int] = None):
  203. """Write an index file.
  204. Args:
  205. f: File-like object to write to
  206. version: Version number to write
  207. entries: Iterable over the entries to write
  208. """
  209. if version is None:
  210. version = DEFAULT_VERSION
  211. f.write(b"DIRC")
  212. f.write(struct.pack(b">LL", version, len(entries)))
  213. for name, entry in entries:
  214. write_cache_entry(f, name, entry, version)
  215. def write_index_dict(
  216. f: BinaryIO,
  217. entries: Dict[bytes, IndexEntry],
  218. version: Optional[int] = None,
  219. ) -> None:
  220. """Write an index file based on the contents of a dictionary."""
  221. entries_list = []
  222. for name in sorted(entries):
  223. entries_list.append((name, entries[name]))
  224. write_index(f, entries_list, version=version)
  225. def cleanup_mode(mode: int) -> int:
  226. """Cleanup a mode value.
  227. This will return a mode that can be stored in a tree object.
  228. Args:
  229. mode: Mode to clean up.
  230. Returns:
  231. mode
  232. """
  233. if stat.S_ISLNK(mode):
  234. return stat.S_IFLNK
  235. elif stat.S_ISDIR(mode):
  236. return stat.S_IFDIR
  237. elif S_ISGITLINK(mode):
  238. return S_IFGITLINK
  239. ret = stat.S_IFREG | 0o644
  240. if mode & 0o100:
  241. ret |= 0o111
  242. return ret
  243. class Index:
  244. """A Git Index file."""
  245. def __init__(self, filename: Union[bytes, str], read=True):
  246. """Create an index object associated with the given filename.
  247. Args:
  248. filename: Path to the index file
  249. read: Whether to initialize the index from the given file, should it exist.
  250. """
  251. self._filename = filename
  252. # TODO(jelmer): Store the version returned by read_index
  253. self._version = None
  254. self.clear()
  255. if read:
  256. self.read()
  257. @property
  258. def path(self):
  259. return self._filename
  260. def __repr__(self):
  261. return "{}({!r})".format(self.__class__.__name__, self._filename)
  262. def write(self) -> None:
  263. """Write current contents of index to disk."""
  264. f = GitFile(self._filename, "wb")
  265. try:
  266. f = SHA1Writer(f)
  267. write_index_dict(f, self._byname, version=self._version)
  268. finally:
  269. f.close()
  270. def read(self):
  271. """Read current contents of index from disk."""
  272. if not os.path.exists(self._filename):
  273. return
  274. f = GitFile(self._filename, "rb")
  275. try:
  276. f = SHA1Reader(f)
  277. for name, entry in read_index(f):
  278. self[name] = entry
  279. # FIXME: Additional data?
  280. f.read(os.path.getsize(self._filename) - f.tell() - 20)
  281. f.check_sha()
  282. finally:
  283. f.close()
  284. def __len__(self) -> int:
  285. """Number of entries in this index file."""
  286. return len(self._byname)
  287. def __getitem__(self, name: bytes) -> IndexEntry:
  288. """Retrieve entry by relative path.
  289. Returns: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha,
  290. flags)
  291. """
  292. return self._byname[name]
  293. def __iter__(self) -> Iterator[bytes]:
  294. """Iterate over the paths in this index."""
  295. return iter(self._byname)
  296. def get_sha1(self, path: bytes) -> bytes:
  297. """Return the (git object) SHA1 for the object at a path."""
  298. return self[path].sha
  299. def get_mode(self, path: bytes) -> int:
  300. """Return the POSIX file mode for the object at a path."""
  301. return self[path].mode
  302. def iterobjects(self) -> Iterable[Tuple[bytes, bytes, int]]:
  303. """Iterate over path, sha, mode tuples for use with commit_tree."""
  304. for path in self:
  305. entry = self[path]
  306. yield path, entry.sha, cleanup_mode(entry.mode)
  307. def clear(self):
  308. """Remove all contents from this index."""
  309. self._byname = {}
  310. def __setitem__(self, name: bytes, x: IndexEntry):
  311. assert isinstance(name, bytes)
  312. assert len(x) == len(IndexEntry._fields)
  313. # Remove the old entry if any
  314. self._byname[name] = IndexEntry(*x)
  315. def __delitem__(self, name: bytes):
  316. assert isinstance(name, bytes)
  317. del self._byname[name]
  318. def iteritems(self) -> Iterator[Tuple[bytes, IndexEntry]]:
  319. return self._byname.items()
  320. def items(self) -> Iterator[Tuple[bytes, IndexEntry]]:
  321. return self._byname.items()
  322. def update(self, entries: Dict[bytes, IndexEntry]):
  323. for name, value in entries.items():
  324. self[name] = value
  325. def changes_from_tree(
  326. self, object_store, tree: ObjectID, want_unchanged: bool = False):
  327. """Find the differences between the contents of this index and a tree.
  328. Args:
  329. object_store: Object store to use for retrieving tree contents
  330. tree: SHA1 of the root tree
  331. want_unchanged: Whether unchanged files should be reported
  332. Returns: Iterator over tuples with (oldpath, newpath), (oldmode,
  333. newmode), (oldsha, newsha)
  334. """
  335. def lookup_entry(path):
  336. entry = self[path]
  337. return entry.sha, cleanup_mode(entry.mode)
  338. yield from changes_from_tree(
  339. self._byname.keys(),
  340. lookup_entry,
  341. object_store,
  342. tree,
  343. want_unchanged=want_unchanged,
  344. )
  345. def commit(self, object_store):
  346. """Create a new tree from an index.
  347. Args:
  348. object_store: Object store to save the tree in
  349. Returns:
  350. Root tree SHA
  351. """
  352. return commit_tree(object_store, self.iterobjects())
  353. def commit_tree(
  354. object_store: ObjectContainer, blobs: Iterable[Tuple[bytes, bytes, int]]
  355. ) -> bytes:
  356. """Commit a new tree.
  357. Args:
  358. object_store: Object store to add trees to
  359. blobs: Iterable over blob path, sha, mode entries
  360. Returns:
  361. SHA1 of the created tree.
  362. """
  363. trees: Dict[bytes, Any] = {b"": {}}
  364. def add_tree(path):
  365. if path in trees:
  366. return trees[path]
  367. dirname, basename = pathsplit(path)
  368. t = add_tree(dirname)
  369. assert isinstance(basename, bytes)
  370. newtree = {}
  371. t[basename] = newtree
  372. trees[path] = newtree
  373. return newtree
  374. for path, sha, mode in blobs:
  375. tree_path, basename = pathsplit(path)
  376. tree = add_tree(tree_path)
  377. tree[basename] = (mode, sha)
  378. def build_tree(path):
  379. tree = Tree()
  380. for basename, entry in trees[path].items():
  381. if isinstance(entry, dict):
  382. mode = stat.S_IFDIR
  383. sha = build_tree(pathjoin(path, basename))
  384. else:
  385. (mode, sha) = entry
  386. tree.add(basename, mode, sha)
  387. object_store.add_object(tree)
  388. return tree.id
  389. return build_tree(b"")
  390. def commit_index(object_store: ObjectContainer, index: Index) -> bytes:
  391. """Create a new tree from an index.
  392. Args:
  393. object_store: Object store to save the tree in
  394. index: Index file
  395. Note: This function is deprecated, use index.commit() instead.
  396. Returns: Root tree sha.
  397. """
  398. return commit_tree(object_store, index.iterobjects())
  399. def changes_from_tree(
  400. names: Iterable[bytes],
  401. lookup_entry: Callable[[bytes], Tuple[bytes, int]],
  402. object_store: ObjectContainer,
  403. tree: Optional[bytes],
  404. want_unchanged=False,
  405. ) -> Iterable[
  406. Tuple[
  407. Tuple[Optional[bytes], Optional[bytes]],
  408. Tuple[Optional[int], Optional[int]],
  409. Tuple[Optional[bytes], Optional[bytes]],
  410. ]
  411. ]:
  412. """Find the differences between the contents of a tree and
  413. a working copy.
  414. Args:
  415. names: Iterable of names in the working copy
  416. lookup_entry: Function to lookup an entry in the working copy
  417. object_store: Object store to use for retrieving tree contents
  418. tree: SHA1 of the root tree, or None for an empty tree
  419. want_unchanged: Whether unchanged files should be reported
  420. Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
  421. (oldsha, newsha)
  422. """
  423. # TODO(jelmer): Support a include_trees option
  424. other_names = set(names)
  425. if tree is not None:
  426. for (name, mode, sha) in iter_tree_contents(object_store, tree):
  427. try:
  428. (other_sha, other_mode) = lookup_entry(name)
  429. except KeyError:
  430. # Was removed
  431. yield ((name, None), (mode, None), (sha, None))
  432. else:
  433. other_names.remove(name)
  434. if want_unchanged or other_sha != sha or other_mode != mode:
  435. yield ((name, name), (mode, other_mode), (sha, other_sha))
  436. # Mention added files
  437. for name in other_names:
  438. try:
  439. (other_sha, other_mode) = lookup_entry(name)
  440. except KeyError:
  441. pass
  442. else:
  443. yield ((None, name), (None, other_mode), (None, other_sha))
  444. def index_entry_from_stat(
  445. stat_val, hex_sha: bytes, flags: int, mode: Optional[int] = None,
  446. extended_flags: Optional[int] = None
  447. ):
  448. """Create a new index entry from a stat value.
  449. Args:
  450. stat_val: POSIX stat_result instance
  451. hex_sha: Hex sha of the object
  452. flags: Index flags
  453. """
  454. if mode is None:
  455. mode = cleanup_mode(stat_val.st_mode)
  456. return IndexEntry(
  457. stat_val.st_ctime,
  458. stat_val.st_mtime,
  459. stat_val.st_dev,
  460. stat_val.st_ino,
  461. mode,
  462. stat_val.st_uid,
  463. stat_val.st_gid,
  464. stat_val.st_size,
  465. hex_sha,
  466. flags,
  467. extended_flags
  468. )
  469. if sys.platform == 'win32':
  470. # On Windows, creating symlinks either requires administrator privileges
  471. # or developer mode. Raise a more helpful error when we're unable to
  472. # create symlinks
  473. # https://github.com/jelmer/dulwich/issues/1005
  474. class WindowsSymlinkPermissionError(PermissionError):
  475. def __init__(self, errno, msg, filename):
  476. super(PermissionError, self).__init__(
  477. errno, "Unable to create symlink; "
  478. "do you have developer mode enabled? %s" % msg,
  479. filename)
  480. def symlink(src, dst, target_is_directory=False, *, dir_fd=None):
  481. try:
  482. return os.symlink(
  483. src, dst, target_is_directory=target_is_directory,
  484. dir_fd=dir_fd)
  485. except PermissionError as e:
  486. raise WindowsSymlinkPermissionError(
  487. e.errno, e.strerror, e.filename) from e
  488. else:
  489. symlink = os.symlink
  490. def build_file_from_blob(
  491. blob: Blob, mode: int, target_path: bytes, *, honor_filemode=True,
  492. tree_encoding="utf-8", symlink_fn=None
  493. ):
  494. """Build a file or symlink on disk based on a Git object.
  495. Args:
  496. blob: The git object
  497. mode: File mode
  498. target_path: Path to write to
  499. honor_filemode: An optional flag to honor core.filemode setting in
  500. config file, default is core.filemode=True, change executable bit
  501. symlink: Function to use for creating symlinks
  502. Returns: stat object for the file
  503. """
  504. try:
  505. oldstat = os.lstat(target_path)
  506. except FileNotFoundError:
  507. oldstat = None
  508. contents = blob.as_raw_string()
  509. if stat.S_ISLNK(mode):
  510. if oldstat:
  511. os.unlink(target_path)
  512. if sys.platform == "win32":
  513. # os.readlink on Python3 on Windows requires a unicode string.
  514. contents = contents.decode(tree_encoding) # type: ignore
  515. target_path = target_path.decode(tree_encoding) # type: ignore
  516. (symlink_fn or symlink)(contents, target_path)
  517. else:
  518. if oldstat is not None and oldstat.st_size == len(contents):
  519. with open(target_path, "rb") as f:
  520. if f.read() == contents:
  521. return oldstat
  522. with open(target_path, "wb") as f:
  523. # Write out file
  524. f.write(contents)
  525. if honor_filemode:
  526. os.chmod(target_path, mode)
  527. return os.lstat(target_path)
  528. INVALID_DOTNAMES = (b".git", b".", b"..", b"")
  529. def validate_path_element_default(element: bytes) -> bool:
  530. return element.lower() not in INVALID_DOTNAMES
  531. def validate_path_element_ntfs(element: bytes) -> bool:
  532. stripped = element.rstrip(b". ").lower()
  533. if stripped in INVALID_DOTNAMES:
  534. return False
  535. if stripped == b"git~1":
  536. return False
  537. return True
  538. def validate_path(path: bytes,
  539. element_validator=validate_path_element_default) -> bool:
  540. """Default path validator that just checks for .git/."""
  541. parts = path.split(b"/")
  542. for p in parts:
  543. if not element_validator(p):
  544. return False
  545. else:
  546. return True
  547. def build_index_from_tree(
  548. root_path: Union[str, bytes],
  549. index_path: Union[str, bytes],
  550. object_store: ObjectContainer,
  551. tree_id: bytes,
  552. honor_filemode: bool = True,
  553. validate_path_element=validate_path_element_default,
  554. symlink_fn=None
  555. ):
  556. """Generate and materialize index from a tree
  557. Args:
  558. tree_id: Tree to materialize
  559. root_path: Target dir for materialized index files
  560. index_path: Target path for generated index
  561. object_store: Non-empty object store holding tree contents
  562. honor_filemode: An optional flag to honor core.filemode setting in
  563. config file, default is core.filemode=True, change executable bit
  564. validate_path_element: Function to validate path elements to check
  565. out; default just refuses .git and .. directories.
  566. Note: existing index is wiped and contents are not merged
  567. in a working dir. Suitable only for fresh clones.
  568. """
  569. index = Index(index_path, read=False)
  570. if not isinstance(root_path, bytes):
  571. root_path = os.fsencode(root_path)
  572. for entry in iter_tree_contents(object_store, tree_id):
  573. if not validate_path(entry.path, validate_path_element):
  574. continue
  575. full_path = _tree_to_fs_path(root_path, entry.path)
  576. if not os.path.exists(os.path.dirname(full_path)):
  577. os.makedirs(os.path.dirname(full_path))
  578. # TODO(jelmer): Merge new index into working tree
  579. if S_ISGITLINK(entry.mode):
  580. if not os.path.isdir(full_path):
  581. os.mkdir(full_path)
  582. st = os.lstat(full_path)
  583. # TODO(jelmer): record and return submodule paths
  584. else:
  585. obj = object_store[entry.sha]
  586. assert isinstance(obj, Blob)
  587. st = build_file_from_blob(
  588. obj, entry.mode, full_path,
  589. honor_filemode=honor_filemode,
  590. symlink_fn=symlink_fn,
  591. )
  592. # Add file to index
  593. if not honor_filemode or S_ISGITLINK(entry.mode):
  594. # we can not use tuple slicing to build a new tuple,
  595. # because on windows that will convert the times to
  596. # longs, which causes errors further along
  597. st_tuple = (
  598. entry.mode,
  599. st.st_ino,
  600. st.st_dev,
  601. st.st_nlink,
  602. st.st_uid,
  603. st.st_gid,
  604. st.st_size,
  605. st.st_atime,
  606. st.st_mtime,
  607. st.st_ctime,
  608. )
  609. st = st.__class__(st_tuple)
  610. index[entry.path] = index_entry_from_stat(st, entry.sha, 0)
  611. index.write()
  612. def blob_from_path_and_mode(fs_path: bytes, mode: int,
  613. tree_encoding="utf-8"):
  614. """Create a blob from a path and a stat object.
  615. Args:
  616. fs_path: Full file system path to file
  617. mode: File mode
  618. Returns: A `Blob` object
  619. """
  620. assert isinstance(fs_path, bytes)
  621. blob = Blob()
  622. if stat.S_ISLNK(mode):
  623. if sys.platform == "win32":
  624. # os.readlink on Python3 on Windows requires a unicode string.
  625. blob.data = os.readlink(os.fsdecode(fs_path)).encode(tree_encoding)
  626. else:
  627. blob.data = os.readlink(fs_path)
  628. else:
  629. with open(fs_path, "rb") as f:
  630. blob.data = f.read()
  631. return blob
  632. def blob_from_path_and_stat(fs_path: bytes, st, tree_encoding="utf-8"):
  633. """Create a blob from a path and a stat object.
  634. Args:
  635. fs_path: Full file system path to file
  636. st: A stat object
  637. Returns: A `Blob` object
  638. """
  639. return blob_from_path_and_mode(fs_path, st.st_mode, tree_encoding)
  640. def read_submodule_head(path: Union[str, bytes]) -> Optional[bytes]:
  641. """Read the head commit of a submodule.
  642. Args:
  643. path: path to the submodule
  644. Returns: HEAD sha, None if not a valid head/repository
  645. """
  646. from .errors import NotGitRepository
  647. from .repo import Repo
  648. # Repo currently expects a "str", so decode if necessary.
  649. # TODO(jelmer): Perhaps move this into Repo() ?
  650. if not isinstance(path, str):
  651. path = os.fsdecode(path)
  652. try:
  653. repo = Repo(path)
  654. except NotGitRepository:
  655. return None
  656. try:
  657. return repo.head()
  658. except KeyError:
  659. return None
  660. def _has_directory_changed(tree_path: bytes, entry):
  661. """Check if a directory has changed after getting an error.
  662. When handling an error trying to create a blob from a path, call this
  663. function. It will check if the path is a directory. If it's a directory
  664. and a submodule, check the submodule head to see if it's has changed. If
  665. not, consider the file as changed as Git tracked a file and not a
  666. directory.
  667. Return true if the given path should be considered as changed and False
  668. otherwise or if the path is not a directory.
  669. """
  670. # This is actually a directory
  671. if os.path.exists(os.path.join(tree_path, b".git")):
  672. # Submodule
  673. head = read_submodule_head(tree_path)
  674. if entry.sha != head:
  675. return True
  676. else:
  677. # The file was changed to a directory, so consider it removed.
  678. return True
  679. return False
  680. def get_unstaged_changes(
  681. index: Index, root_path: Union[str, bytes],
  682. filter_blob_callback=None):
  683. """Walk through an index and check for differences against working tree.
  684. Args:
  685. index: index to check
  686. root_path: path in which to find files
  687. Returns: iterator over paths with unstaged changes
  688. """
  689. # For each entry in the index check the sha1 & ensure not staged
  690. if not isinstance(root_path, bytes):
  691. root_path = os.fsencode(root_path)
  692. for tree_path, entry in index.iteritems():
  693. full_path = _tree_to_fs_path(root_path, tree_path)
  694. try:
  695. st = os.lstat(full_path)
  696. if stat.S_ISDIR(st.st_mode):
  697. if _has_directory_changed(tree_path, entry):
  698. yield tree_path
  699. continue
  700. if not stat.S_ISREG(st.st_mode) and not stat.S_ISLNK(st.st_mode):
  701. continue
  702. blob = blob_from_path_and_stat(full_path, st)
  703. if filter_blob_callback is not None:
  704. blob = filter_blob_callback(blob, tree_path)
  705. except FileNotFoundError:
  706. # The file was removed, so we assume that counts as
  707. # different from whatever file used to exist.
  708. yield tree_path
  709. else:
  710. if blob.id != entry.sha:
  711. yield tree_path
  712. os_sep_bytes = os.sep.encode("ascii")
  713. def _tree_to_fs_path(root_path: bytes, tree_path: bytes):
  714. """Convert a git tree path to a file system path.
  715. Args:
  716. root_path: Root filesystem path
  717. tree_path: Git tree path as bytes
  718. Returns: File system path.
  719. """
  720. assert isinstance(tree_path, bytes)
  721. if os_sep_bytes != b"/":
  722. sep_corrected_path = tree_path.replace(b"/", os_sep_bytes)
  723. else:
  724. sep_corrected_path = tree_path
  725. return os.path.join(root_path, sep_corrected_path)
  726. def _fs_to_tree_path(fs_path: Union[str, bytes]) -> bytes:
  727. """Convert a file system path to a git tree path.
  728. Args:
  729. fs_path: File system path.
  730. Returns: Git tree path as bytes
  731. """
  732. if not isinstance(fs_path, bytes):
  733. fs_path_bytes = os.fsencode(fs_path)
  734. else:
  735. fs_path_bytes = fs_path
  736. if os_sep_bytes != b"/":
  737. tree_path = fs_path_bytes.replace(os_sep_bytes, b"/")
  738. else:
  739. tree_path = fs_path_bytes
  740. return tree_path
  741. def index_entry_from_directory(st, path: bytes) -> Optional[IndexEntry]:
  742. if os.path.exists(os.path.join(path, b".git")):
  743. head = read_submodule_head(path)
  744. if head is None:
  745. return None
  746. return index_entry_from_stat(st, head, 0, mode=S_IFGITLINK)
  747. return None
  748. def index_entry_from_path(
  749. path: bytes, object_store: Optional[ObjectContainer] = None
  750. ) -> Optional[IndexEntry]:
  751. """Create an index from a filesystem path.
  752. This returns an index value for files, symlinks
  753. and tree references. for directories and
  754. non-existent files it returns None
  755. Args:
  756. path: Path to create an index entry for
  757. object_store: Optional object store to
  758. save new blobs in
  759. Returns: An index entry; None for directories
  760. """
  761. assert isinstance(path, bytes)
  762. st = os.lstat(path)
  763. if stat.S_ISDIR(st.st_mode):
  764. return index_entry_from_directory(st, path)
  765. if stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode):
  766. blob = blob_from_path_and_stat(path, st)
  767. if object_store is not None:
  768. object_store.add_object(blob)
  769. return index_entry_from_stat(st, blob.id, 0)
  770. return None
  771. def iter_fresh_entries(
  772. paths: Iterable[bytes], root_path: bytes,
  773. object_store: Optional[ObjectContainer] = None
  774. ) -> Iterator[Tuple[bytes, Optional[IndexEntry]]]:
  775. """Iterate over current versions of index entries on disk.
  776. Args:
  777. paths: Paths to iterate over
  778. root_path: Root path to access from
  779. object_store: Optional store to save new blobs in
  780. Returns: Iterator over path, index_entry
  781. """
  782. for path in paths:
  783. p = _tree_to_fs_path(root_path, path)
  784. try:
  785. entry = index_entry_from_path(p, object_store=object_store)
  786. except (FileNotFoundError, IsADirectoryError):
  787. entry = None
  788. yield path, entry
  789. def iter_fresh_objects(
  790. paths: Iterable[bytes], root_path: bytes, include_deleted=False,
  791. object_store=None) -> Iterator[
  792. Tuple[bytes, Optional[bytes], Optional[int]]]:
  793. """Iterate over versions of objects on disk referenced by index.
  794. Args:
  795. root_path: Root path to access from
  796. include_deleted: Include deleted entries with sha and
  797. mode set to None
  798. object_store: Optional object store to report new items to
  799. Returns: Iterator over path, sha, mode
  800. """
  801. for path, entry in iter_fresh_entries(
  802. paths, root_path, object_store=object_store):
  803. if entry is None:
  804. if include_deleted:
  805. yield path, None, None
  806. else:
  807. entry = IndexEntry(*entry)
  808. yield path, entry.sha, cleanup_mode(entry.mode)
  809. def refresh_index(index: Index, root_path: bytes):
  810. """Refresh the contents of an index.
  811. This is the equivalent to running 'git commit -a'.
  812. Args:
  813. index: Index to update
  814. root_path: Root filesystem path
  815. """
  816. for path, entry in iter_fresh_entries(index, root_path):
  817. if entry:
  818. index[path] = entry
  819. class locked_index:
  820. """Lock the index while making modifications.
  821. Works as a context manager.
  822. """
  823. def __init__(self, path: Union[bytes, str]):
  824. self._path = path
  825. def __enter__(self):
  826. self._file = GitFile(self._path, "wb")
  827. self._index = Index(self._path)
  828. return self._index
  829. def __exit__(self, exc_type, exc_value, traceback):
  830. if exc_type is not None:
  831. self._file.abort()
  832. return
  833. try:
  834. f = SHA1Writer(self._file)
  835. write_index_dict(f, self._index._byname)
  836. except BaseException:
  837. self._file.abort()
  838. else:
  839. f.close()