index.py 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040
  1. # index.py -- File parser/writer for the git index file
  2. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Parser for the git index file format."""
  21. import collections
  22. import os
  23. import stat
  24. import struct
  25. import sys
  26. from typing import (
  27. Any,
  28. BinaryIO,
  29. Callable,
  30. Dict,
  31. List,
  32. Optional,
  33. TYPE_CHECKING,
  34. Iterable,
  35. Iterator,
  36. Tuple,
  37. Union,
  38. )
  39. if TYPE_CHECKING:
  40. from dulwich.object_store import BaseObjectStore
  41. from dulwich.file import GitFile
  42. from dulwich.objects import (
  43. Blob,
  44. S_IFGITLINK,
  45. S_ISGITLINK,
  46. Tree,
  47. hex_to_sha,
  48. sha_to_hex,
  49. ObjectID,
  50. )
  51. from dulwich.pack import (
  52. SHA1Reader,
  53. SHA1Writer,
  54. )
  55. # TODO(jelmer): Switch to dataclass?
  56. IndexEntry = collections.namedtuple(
  57. "IndexEntry",
  58. [
  59. "ctime",
  60. "mtime",
  61. "dev",
  62. "ino",
  63. "mode",
  64. "uid",
  65. "gid",
  66. "size",
  67. "sha",
  68. "flags",
  69. "extended_flags",
  70. ],
  71. )
  72. # 2-bit stage (during merge)
  73. FLAG_STAGEMASK = 0x3000
  74. # assume-valid
  75. FLAG_VALID = 0x8000
  76. # extended flag (must be zero in version 2)
  77. FLAG_EXTENDED = 0x4000
  78. # used by sparse checkout
  79. EXTENDED_FLAG_SKIP_WORKTREE = 0x4000
  80. # used by "git add -N"
  81. EXTENDED_FLAG_INTEND_TO_ADD = 0x2000
  82. DEFAULT_VERSION = 2
  83. def pathsplit(path: bytes) -> Tuple[bytes, bytes]:
  84. """Split a /-delimited path into a directory part and a basename.
  85. Args:
  86. path: The path to split.
  87. Returns:
  88. Tuple with directory name and basename
  89. """
  90. try:
  91. (dirname, basename) = path.rsplit(b"/", 1)
  92. except ValueError:
  93. return (b"", path)
  94. else:
  95. return (dirname, basename)
  96. def pathjoin(*args):
  97. """Join a /-delimited path."""
  98. return b"/".join([p for p in args if p])
  99. def read_cache_time(f):
  100. """Read a cache time.
  101. Args:
  102. f: File-like object to read from
  103. Returns:
  104. Tuple with seconds and nanoseconds
  105. """
  106. return struct.unpack(">LL", f.read(8))
  107. def write_cache_time(f, t):
  108. """Write a cache time.
  109. Args:
  110. f: File-like object to write to
  111. t: Time to write (as int, float or tuple with secs and nsecs)
  112. """
  113. if isinstance(t, int):
  114. t = (t, 0)
  115. elif isinstance(t, float):
  116. (secs, nsecs) = divmod(t, 1.0)
  117. t = (int(secs), int(nsecs * 1000000000))
  118. elif not isinstance(t, tuple):
  119. raise TypeError(t)
  120. f.write(struct.pack(">LL", *t))
  121. def read_cache_entry(f, version: int) -> Tuple[str, IndexEntry]:
  122. """Read an entry from a cache file.
  123. Args:
  124. f: File-like object to read from
  125. Returns:
  126. tuple with: name, IndexEntry
  127. """
  128. beginoffset = f.tell()
  129. ctime = read_cache_time(f)
  130. mtime = read_cache_time(f)
  131. (
  132. dev,
  133. ino,
  134. mode,
  135. uid,
  136. gid,
  137. size,
  138. sha,
  139. flags,
  140. ) = struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
  141. if flags & FLAG_EXTENDED:
  142. if version < 3:
  143. raise AssertionError(
  144. 'extended flag set in index with version < 3')
  145. (extended_flags, ) = struct.unpack(">H", f.read(2))
  146. else:
  147. extended_flags = 0
  148. name = f.read((flags & 0x0FFF))
  149. # Padding:
  150. if version < 4:
  151. real_size = (f.tell() - beginoffset + 8) & ~7
  152. f.read((beginoffset + real_size) - f.tell())
  153. return (
  154. name,
  155. IndexEntry(
  156. ctime,
  157. mtime,
  158. dev,
  159. ino,
  160. mode,
  161. uid,
  162. gid,
  163. size,
  164. sha_to_hex(sha),
  165. flags & ~0x0FFF,
  166. extended_flags,
  167. ))
  168. def write_cache_entry(f, name: bytes, entry: IndexEntry, version: int) -> None:
  169. """Write an index entry to a file.
  170. Args:
  171. f: File object
  172. entry: IndexEntry to write, tuple with:
  173. """
  174. beginoffset = f.tell()
  175. write_cache_time(f, entry.ctime)
  176. write_cache_time(f, entry.mtime)
  177. flags = len(name) | (entry.flags & ~0x0FFF)
  178. if entry.extended_flags:
  179. flags |= FLAG_EXTENDED
  180. if flags & FLAG_EXTENDED and version is not None and version < 3:
  181. raise AssertionError('unable to use extended flags in version < 3')
  182. f.write(
  183. struct.pack(
  184. b">LLLLLL20sH",
  185. entry.dev & 0xFFFFFFFF,
  186. entry.ino & 0xFFFFFFFF,
  187. entry.mode,
  188. entry.uid,
  189. entry.gid,
  190. entry.size,
  191. hex_to_sha(entry.sha),
  192. flags,
  193. )
  194. )
  195. if flags & FLAG_EXTENDED:
  196. f.write(struct.pack(b">H", entry.extended_flags))
  197. f.write(name)
  198. if version < 4:
  199. real_size = (f.tell() - beginoffset + 8) & ~7
  200. f.write(b"\0" * ((beginoffset + real_size) - f.tell()))
  201. class UnsupportedIndexFormat(Exception):
  202. """An unsupported index format was encountered."""
  203. def __init__(self, version):
  204. self.index_format_version = version
  205. def read_index(f: BinaryIO):
  206. """Read an index file, yielding the individual entries."""
  207. header = f.read(4)
  208. if header != b"DIRC":
  209. raise AssertionError("Invalid index file header: %r" % header)
  210. (version, num_entries) = struct.unpack(b">LL", f.read(4 * 2))
  211. if version not in (1, 2, 3):
  212. raise UnsupportedIndexFormat(version)
  213. for i in range(num_entries):
  214. yield read_cache_entry(f, version)
  215. def read_index_dict(f) -> Dict[bytes, IndexEntry]:
  216. """Read an index file and return it as a dictionary.
  217. Args:
  218. f: File object to read from
  219. """
  220. ret = {}
  221. for name, entry in read_index(f):
  222. ret[name] = entry
  223. return ret
  224. def write_index(f: BinaryIO, entries: List[Tuple[bytes, IndexEntry]], version: Optional[int] = None):
  225. """Write an index file.
  226. Args:
  227. f: File-like object to write to
  228. version: Version number to write
  229. entries: Iterable over the entries to write
  230. """
  231. if version is None:
  232. version = DEFAULT_VERSION
  233. f.write(b"DIRC")
  234. f.write(struct.pack(b">LL", version, len(entries)))
  235. for name, entry in entries:
  236. write_cache_entry(f, name, entry, version)
  237. def write_index_dict(
  238. f: BinaryIO,
  239. entries: Dict[bytes, IndexEntry],
  240. version: Optional[int] = None,
  241. ) -> None:
  242. """Write an index file based on the contents of a dictionary."""
  243. entries_list = []
  244. for name in sorted(entries):
  245. entries_list.append((name, entries[name]))
  246. write_index(f, entries_list, version=version)
  247. def cleanup_mode(mode: int) -> int:
  248. """Cleanup a mode value.
  249. This will return a mode that can be stored in a tree object.
  250. Args:
  251. mode: Mode to clean up.
  252. Returns:
  253. mode
  254. """
  255. if stat.S_ISLNK(mode):
  256. return stat.S_IFLNK
  257. elif stat.S_ISDIR(mode):
  258. return stat.S_IFDIR
  259. elif S_ISGITLINK(mode):
  260. return S_IFGITLINK
  261. ret = stat.S_IFREG | 0o644
  262. if mode & 0o100:
  263. ret |= 0o111
  264. return ret
  265. class Index(object):
  266. """A Git Index file."""
  267. def __init__(self, filename: Union[bytes, str], read=True):
  268. """Create an index object associated with the given filename.
  269. Args:
  270. filename: Path to the index file
  271. read: Whether to initialize the index from the given file, should it exist.
  272. """
  273. self._filename = filename
  274. # TODO(jelmer): Store the version returned by read_index
  275. self._version = None
  276. self.clear()
  277. if read:
  278. self.read()
  279. @property
  280. def path(self):
  281. return self._filename
  282. def __repr__(self):
  283. return "%s(%r)" % (self.__class__.__name__, self._filename)
  284. def write(self) -> None:
  285. """Write current contents of index to disk."""
  286. f = GitFile(self._filename, "wb")
  287. try:
  288. f = SHA1Writer(f)
  289. write_index_dict(f, self._byname, version=self._version)
  290. finally:
  291. f.close()
  292. def read(self):
  293. """Read current contents of index from disk."""
  294. if not os.path.exists(self._filename):
  295. return
  296. f = GitFile(self._filename, "rb")
  297. try:
  298. f = SHA1Reader(f)
  299. for name, entry in read_index(f):
  300. self[name] = entry
  301. # FIXME: Additional data?
  302. f.read(os.path.getsize(self._filename) - f.tell() - 20)
  303. f.check_sha()
  304. finally:
  305. f.close()
  306. def __len__(self) -> int:
  307. """Number of entries in this index file."""
  308. return len(self._byname)
  309. def __getitem__(self, name: bytes) -> IndexEntry:
  310. """Retrieve entry by relative path.
  311. Returns: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha,
  312. flags)
  313. """
  314. return self._byname[name]
  315. def __iter__(self) -> Iterator[bytes]:
  316. """Iterate over the paths in this index."""
  317. return iter(self._byname)
  318. def get_sha1(self, path: bytes) -> bytes:
  319. """Return the (git object) SHA1 for the object at a path."""
  320. return self[path].sha
  321. def get_mode(self, path: bytes) -> int:
  322. """Return the POSIX file mode for the object at a path."""
  323. return self[path].mode
  324. def iterobjects(self) -> Iterable[Tuple[bytes, bytes, int]]:
  325. """Iterate over path, sha, mode tuples for use with commit_tree."""
  326. for path in self:
  327. entry = self[path]
  328. yield path, entry.sha, cleanup_mode(entry.mode)
  329. def clear(self):
  330. """Remove all contents from this index."""
  331. self._byname = {}
  332. def __setitem__(self, name: bytes, x: IndexEntry):
  333. assert isinstance(name, bytes)
  334. assert len(x) == len(IndexEntry._fields)
  335. # Remove the old entry if any
  336. self._byname[name] = IndexEntry(*x)
  337. def __delitem__(self, name: bytes):
  338. assert isinstance(name, bytes)
  339. del self._byname[name]
  340. def iteritems(self) -> Iterator[Tuple[bytes, IndexEntry]]:
  341. return self._byname.items()
  342. def items(self) -> Iterator[Tuple[bytes, IndexEntry]]:
  343. return self._byname.items()
  344. def update(self, entries: Dict[bytes, IndexEntry]):
  345. for name, value in entries.items():
  346. self[name] = value
  347. def changes_from_tree(
  348. self, object_store, tree: ObjectID, want_unchanged: bool = False):
  349. """Find the differences between the contents of this index and a tree.
  350. Args:
  351. object_store: Object store to use for retrieving tree contents
  352. tree: SHA1 of the root tree
  353. want_unchanged: Whether unchanged files should be reported
  354. Returns: Iterator over tuples with (oldpath, newpath), (oldmode,
  355. newmode), (oldsha, newsha)
  356. """
  357. def lookup_entry(path):
  358. entry = self[path]
  359. return entry.sha, cleanup_mode(entry.mode)
  360. for (name, mode, sha) in changes_from_tree(
  361. self._byname.keys(),
  362. lookup_entry,
  363. object_store,
  364. tree,
  365. want_unchanged=want_unchanged,
  366. ):
  367. yield (name, mode, sha)
  368. def commit(self, object_store):
  369. """Create a new tree from an index.
  370. Args:
  371. object_store: Object store to save the tree in
  372. Returns:
  373. Root tree SHA
  374. """
  375. return commit_tree(object_store, self.iterobjects())
  376. def commit_tree(
  377. object_store: "BaseObjectStore", blobs: Iterable[Tuple[bytes, bytes, int]]
  378. ) -> bytes:
  379. """Commit a new tree.
  380. Args:
  381. object_store: Object store to add trees to
  382. blobs: Iterable over blob path, sha, mode entries
  383. Returns:
  384. SHA1 of the created tree.
  385. """
  386. trees = {b"": {}} # type: Dict[bytes, Any]
  387. def add_tree(path):
  388. if path in trees:
  389. return trees[path]
  390. dirname, basename = pathsplit(path)
  391. t = add_tree(dirname)
  392. assert isinstance(basename, bytes)
  393. newtree = {}
  394. t[basename] = newtree
  395. trees[path] = newtree
  396. return newtree
  397. for path, sha, mode in blobs:
  398. tree_path, basename = pathsplit(path)
  399. tree = add_tree(tree_path)
  400. tree[basename] = (mode, sha)
  401. def build_tree(path):
  402. tree = Tree()
  403. for basename, entry in trees[path].items():
  404. if isinstance(entry, dict):
  405. mode = stat.S_IFDIR
  406. sha = build_tree(pathjoin(path, basename))
  407. else:
  408. (mode, sha) = entry
  409. tree.add(basename, mode, sha)
  410. object_store.add_object(tree)
  411. return tree.id
  412. return build_tree(b"")
  413. def commit_index(object_store: "BaseObjectStore", index: Index) -> bytes:
  414. """Create a new tree from an index.
  415. Args:
  416. object_store: Object store to save the tree in
  417. index: Index file
  418. Note: This function is deprecated, use index.commit() instead.
  419. Returns: Root tree sha.
  420. """
  421. return commit_tree(object_store, index.iterobjects())
  422. def changes_from_tree(
  423. names: Iterable[bytes],
  424. lookup_entry: Callable[[bytes], Tuple[bytes, int]],
  425. object_store: "BaseObjectStore",
  426. tree: Optional[bytes],
  427. want_unchanged=False,
  428. ) -> Iterable[
  429. Tuple[
  430. Tuple[Optional[bytes], Optional[bytes]],
  431. Tuple[Optional[int], Optional[int]],
  432. Tuple[Optional[bytes], Optional[bytes]],
  433. ]
  434. ]:
  435. """Find the differences between the contents of a tree and
  436. a working copy.
  437. Args:
  438. names: Iterable of names in the working copy
  439. lookup_entry: Function to lookup an entry in the working copy
  440. object_store: Object store to use for retrieving tree contents
  441. tree: SHA1 of the root tree, or None for an empty tree
  442. want_unchanged: Whether unchanged files should be reported
  443. Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
  444. (oldsha, newsha)
  445. """
  446. # TODO(jelmer): Support a include_trees option
  447. other_names = set(names)
  448. if tree is not None:
  449. for (name, mode, sha) in object_store.iter_tree_contents(tree):
  450. try:
  451. (other_sha, other_mode) = lookup_entry(name)
  452. except KeyError:
  453. # Was removed
  454. yield ((name, None), (mode, None), (sha, None))
  455. else:
  456. other_names.remove(name)
  457. if want_unchanged or other_sha != sha or other_mode != mode:
  458. yield ((name, name), (mode, other_mode), (sha, other_sha))
  459. # Mention added files
  460. for name in other_names:
  461. try:
  462. (other_sha, other_mode) = lookup_entry(name)
  463. except KeyError:
  464. pass
  465. else:
  466. yield ((None, name), (None, other_mode), (None, other_sha))
  467. def index_entry_from_stat(
  468. stat_val, hex_sha: bytes, flags: int, mode: Optional[int] = None,
  469. extended_flags: Optional[int] = None
  470. ):
  471. """Create a new index entry from a stat value.
  472. Args:
  473. stat_val: POSIX stat_result instance
  474. hex_sha: Hex sha of the object
  475. flags: Index flags
  476. """
  477. if mode is None:
  478. mode = cleanup_mode(stat_val.st_mode)
  479. return IndexEntry(
  480. stat_val.st_ctime,
  481. stat_val.st_mtime,
  482. stat_val.st_dev,
  483. stat_val.st_ino,
  484. mode,
  485. stat_val.st_uid,
  486. stat_val.st_gid,
  487. stat_val.st_size,
  488. hex_sha,
  489. flags,
  490. extended_flags
  491. )
  492. if sys.platform == 'win32':
  493. # On Windows, creating symlinks either requires administrator privileges
  494. # or developer mode. Raise a more helpful error when we're unable to
  495. # create symlinks
  496. # https://github.com/jelmer/dulwich/issues/1005
  497. class WindowsSymlinkPermissionError(PermissionError):
  498. def __init__(self, errno, msg, filename):
  499. super(PermissionError, self).__init__(
  500. errno, "Unable to create symlink; "
  501. "do you have developer mode enabled? %s" % msg,
  502. filename)
  503. def symlink(src, dst, target_is_directory=False, *, dir_fd=None):
  504. try:
  505. return os.symlink(
  506. src, dst, target_is_directory=target_is_directory,
  507. dir_fd=dir_fd)
  508. except PermissionError as e:
  509. raise WindowsSymlinkPermissionError(
  510. e.errno, e.strerror, e.filename) from e
  511. else:
  512. symlink = os.symlink
  513. def build_file_from_blob(
  514. blob: Blob, mode: int, target_path: bytes, *, honor_filemode=True,
  515. tree_encoding="utf-8", symlink_fn=None
  516. ):
  517. """Build a file or symlink on disk based on a Git object.
  518. Args:
  519. blob: The git object
  520. mode: File mode
  521. target_path: Path to write to
  522. honor_filemode: An optional flag to honor core.filemode setting in
  523. config file, default is core.filemode=True, change executable bit
  524. symlink: Function to use for creating symlinks
  525. Returns: stat object for the file
  526. """
  527. try:
  528. oldstat = os.lstat(target_path)
  529. except FileNotFoundError:
  530. oldstat = None
  531. contents = blob.as_raw_string()
  532. if stat.S_ISLNK(mode):
  533. if oldstat:
  534. os.unlink(target_path)
  535. if sys.platform == "win32":
  536. # os.readlink on Python3 on Windows requires a unicode string.
  537. contents = contents.decode(tree_encoding) # type: ignore
  538. target_path = target_path.decode(tree_encoding) # type: ignore
  539. (symlink_fn or symlink)(contents, target_path)
  540. else:
  541. if oldstat is not None and oldstat.st_size == len(contents):
  542. with open(target_path, "rb") as f:
  543. if f.read() == contents:
  544. return oldstat
  545. with open(target_path, "wb") as f:
  546. # Write out file
  547. f.write(contents)
  548. if honor_filemode:
  549. os.chmod(target_path, mode)
  550. return os.lstat(target_path)
  551. INVALID_DOTNAMES = (b".git", b".", b"..", b"")
  552. def validate_path_element_default(element: bytes) -> bool:
  553. return element.lower() not in INVALID_DOTNAMES
  554. def validate_path_element_ntfs(element: bytes) -> bool:
  555. stripped = element.rstrip(b". ").lower()
  556. if stripped in INVALID_DOTNAMES:
  557. return False
  558. if stripped == b"git~1":
  559. return False
  560. return True
  561. def validate_path(path: bytes,
  562. element_validator=validate_path_element_default) -> bool:
  563. """Default path validator that just checks for .git/."""
  564. parts = path.split(b"/")
  565. for p in parts:
  566. if not element_validator(p):
  567. return False
  568. else:
  569. return True
  570. def build_index_from_tree(
  571. root_path: Union[str, bytes],
  572. index_path: Union[str, bytes],
  573. object_store: "BaseObjectStore",
  574. tree_id: bytes,
  575. honor_filemode: bool = True,
  576. validate_path_element=validate_path_element_default,
  577. symlink_fn=None
  578. ):
  579. """Generate and materialize index from a tree
  580. Args:
  581. tree_id: Tree to materialize
  582. root_path: Target dir for materialized index files
  583. index_path: Target path for generated index
  584. object_store: Non-empty object store holding tree contents
  585. honor_filemode: An optional flag to honor core.filemode setting in
  586. config file, default is core.filemode=True, change executable bit
  587. validate_path_element: Function to validate path elements to check
  588. out; default just refuses .git and .. directories.
  589. Note: existing index is wiped and contents are not merged
  590. in a working dir. Suitable only for fresh clones.
  591. """
  592. index = Index(index_path, read=False)
  593. if not isinstance(root_path, bytes):
  594. root_path = os.fsencode(root_path)
  595. for entry in object_store.iter_tree_contents(tree_id):
  596. if not validate_path(entry.path, validate_path_element):
  597. continue
  598. full_path = _tree_to_fs_path(root_path, entry.path)
  599. if not os.path.exists(os.path.dirname(full_path)):
  600. os.makedirs(os.path.dirname(full_path))
  601. # TODO(jelmer): Merge new index into working tree
  602. if S_ISGITLINK(entry.mode):
  603. if not os.path.isdir(full_path):
  604. os.mkdir(full_path)
  605. st = os.lstat(full_path)
  606. # TODO(jelmer): record and return submodule paths
  607. else:
  608. obj = object_store[entry.sha]
  609. st = build_file_from_blob(
  610. obj, entry.mode, full_path,
  611. honor_filemode=honor_filemode,
  612. symlink_fn=symlink_fn,
  613. )
  614. # Add file to index
  615. if not honor_filemode or S_ISGITLINK(entry.mode):
  616. # we can not use tuple slicing to build a new tuple,
  617. # because on windows that will convert the times to
  618. # longs, which causes errors further along
  619. st_tuple = (
  620. entry.mode,
  621. st.st_ino,
  622. st.st_dev,
  623. st.st_nlink,
  624. st.st_uid,
  625. st.st_gid,
  626. st.st_size,
  627. st.st_atime,
  628. st.st_mtime,
  629. st.st_ctime,
  630. )
  631. st = st.__class__(st_tuple)
  632. index[entry.path] = index_entry_from_stat(st, entry.sha, 0)
  633. index.write()
  634. def blob_from_path_and_mode(fs_path: bytes, mode: int,
  635. tree_encoding="utf-8"):
  636. """Create a blob from a path and a stat object.
  637. Args:
  638. fs_path: Full file system path to file
  639. mode: File mode
  640. Returns: A `Blob` object
  641. """
  642. assert isinstance(fs_path, bytes)
  643. blob = Blob()
  644. if stat.S_ISLNK(mode):
  645. if sys.platform == "win32":
  646. # os.readlink on Python3 on Windows requires a unicode string.
  647. blob.data = os.readlink(os.fsdecode(fs_path)).encode(tree_encoding)
  648. else:
  649. blob.data = os.readlink(fs_path)
  650. else:
  651. with open(fs_path, "rb") as f:
  652. blob.data = f.read()
  653. return blob
  654. def blob_from_path_and_stat(fs_path: bytes, st, tree_encoding="utf-8"):
  655. """Create a blob from a path and a stat object.
  656. Args:
  657. fs_path: Full file system path to file
  658. st: A stat object
  659. Returns: A `Blob` object
  660. """
  661. return blob_from_path_and_mode(fs_path, st.st_mode, tree_encoding)
  662. def read_submodule_head(path: Union[str, bytes]) -> Optional[bytes]:
  663. """Read the head commit of a submodule.
  664. Args:
  665. path: path to the submodule
  666. Returns: HEAD sha, None if not a valid head/repository
  667. """
  668. from dulwich.errors import NotGitRepository
  669. from dulwich.repo import Repo
  670. # Repo currently expects a "str", so decode if necessary.
  671. # TODO(jelmer): Perhaps move this into Repo() ?
  672. if not isinstance(path, str):
  673. path = os.fsdecode(path)
  674. try:
  675. repo = Repo(path)
  676. except NotGitRepository:
  677. return None
  678. try:
  679. return repo.head()
  680. except KeyError:
  681. return None
  682. def _has_directory_changed(tree_path: bytes, entry):
  683. """Check if a directory has changed after getting an error.
  684. When handling an error trying to create a blob from a path, call this
  685. function. It will check if the path is a directory. If it's a directory
  686. and a submodule, check the submodule head to see if it's has changed. If
  687. not, consider the file as changed as Git tracked a file and not a
  688. directory.
  689. Return true if the given path should be considered as changed and False
  690. otherwise or if the path is not a directory.
  691. """
  692. # This is actually a directory
  693. if os.path.exists(os.path.join(tree_path, b".git")):
  694. # Submodule
  695. head = read_submodule_head(tree_path)
  696. if entry.sha != head:
  697. return True
  698. else:
  699. # The file was changed to a directory, so consider it removed.
  700. return True
  701. return False
  702. def get_unstaged_changes(
  703. index: Index, root_path: Union[str, bytes],
  704. filter_blob_callback=None):
  705. """Walk through an index and check for differences against working tree.
  706. Args:
  707. index: index to check
  708. root_path: path in which to find files
  709. Returns: iterator over paths with unstaged changes
  710. """
  711. # For each entry in the index check the sha1 & ensure not staged
  712. if not isinstance(root_path, bytes):
  713. root_path = os.fsencode(root_path)
  714. for tree_path, entry in index.iteritems():
  715. full_path = _tree_to_fs_path(root_path, tree_path)
  716. try:
  717. st = os.lstat(full_path)
  718. if stat.S_ISDIR(st.st_mode):
  719. if _has_directory_changed(tree_path, entry):
  720. yield tree_path
  721. continue
  722. if not stat.S_ISREG(st.st_mode) and not stat.S_ISLNK(st.st_mode):
  723. continue
  724. blob = blob_from_path_and_stat(full_path, st)
  725. if filter_blob_callback is not None:
  726. blob = filter_blob_callback(blob, tree_path)
  727. except FileNotFoundError:
  728. # The file was removed, so we assume that counts as
  729. # different from whatever file used to exist.
  730. yield tree_path
  731. else:
  732. if blob.id != entry.sha:
  733. yield tree_path
  734. os_sep_bytes = os.sep.encode("ascii")
  735. def _tree_to_fs_path(root_path: bytes, tree_path: bytes):
  736. """Convert a git tree path to a file system path.
  737. Args:
  738. root_path: Root filesystem path
  739. tree_path: Git tree path as bytes
  740. Returns: File system path.
  741. """
  742. assert isinstance(tree_path, bytes)
  743. if os_sep_bytes != b"/":
  744. sep_corrected_path = tree_path.replace(b"/", os_sep_bytes)
  745. else:
  746. sep_corrected_path = tree_path
  747. return os.path.join(root_path, sep_corrected_path)
  748. def _fs_to_tree_path(fs_path: Union[str, bytes]) -> bytes:
  749. """Convert a file system path to a git tree path.
  750. Args:
  751. fs_path: File system path.
  752. Returns: Git tree path as bytes
  753. """
  754. if not isinstance(fs_path, bytes):
  755. fs_path_bytes = os.fsencode(fs_path)
  756. else:
  757. fs_path_bytes = fs_path
  758. if os_sep_bytes != b"/":
  759. tree_path = fs_path_bytes.replace(os_sep_bytes, b"/")
  760. else:
  761. tree_path = fs_path_bytes
  762. return tree_path
  763. def index_entry_from_directory(st, path: bytes) -> Optional[IndexEntry]:
  764. if os.path.exists(os.path.join(path, b".git")):
  765. head = read_submodule_head(path)
  766. if head is None:
  767. return None
  768. return index_entry_from_stat(st, head, 0, mode=S_IFGITLINK)
  769. return None
  770. def index_entry_from_path(
  771. path: bytes, object_store: Optional["BaseObjectStore"] = None
  772. ) -> Optional[IndexEntry]:
  773. """Create an index from a filesystem path.
  774. This returns an index value for files, symlinks
  775. and tree references. for directories and
  776. non-existent files it returns None
  777. Args:
  778. path: Path to create an index entry for
  779. object_store: Optional object store to
  780. save new blobs in
  781. Returns: An index entry; None for directories
  782. """
  783. assert isinstance(path, bytes)
  784. st = os.lstat(path)
  785. if stat.S_ISDIR(st.st_mode):
  786. return index_entry_from_directory(st, path)
  787. if stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode):
  788. blob = blob_from_path_and_stat(path, st)
  789. if object_store is not None:
  790. object_store.add_object(blob)
  791. return index_entry_from_stat(st, blob.id, 0)
  792. return None
  793. def iter_fresh_entries(
  794. paths: Iterable[bytes], root_path: bytes,
  795. object_store: Optional["BaseObjectStore"] = None
  796. ) -> Iterator[Tuple[bytes, Optional[IndexEntry]]]:
  797. """Iterate over current versions of index entries on disk.
  798. Args:
  799. paths: Paths to iterate over
  800. root_path: Root path to access from
  801. object_store: Optional store to save new blobs in
  802. Returns: Iterator over path, index_entry
  803. """
  804. for path in paths:
  805. p = _tree_to_fs_path(root_path, path)
  806. try:
  807. entry = index_entry_from_path(p, object_store=object_store)
  808. except (FileNotFoundError, IsADirectoryError):
  809. entry = None
  810. yield path, entry
  811. def iter_fresh_objects(
  812. paths: Iterable[bytes], root_path: bytes, include_deleted=False,
  813. object_store=None) -> Iterator[
  814. Tuple[bytes, Optional[bytes], Optional[int]]]:
  815. """Iterate over versions of objects on disk referenced by index.
  816. Args:
  817. root_path: Root path to access from
  818. include_deleted: Include deleted entries with sha and
  819. mode set to None
  820. object_store: Optional object store to report new items to
  821. Returns: Iterator over path, sha, mode
  822. """
  823. for path, entry in iter_fresh_entries(
  824. paths, root_path, object_store=object_store):
  825. if entry is None:
  826. if include_deleted:
  827. yield path, None, None
  828. else:
  829. entry = IndexEntry(*entry)
  830. yield path, entry.sha, cleanup_mode(entry.mode)
  831. def refresh_index(index: Index, root_path: bytes):
  832. """Refresh the contents of an index.
  833. This is the equivalent to running 'git commit -a'.
  834. Args:
  835. index: Index to update
  836. root_path: Root filesystem path
  837. """
  838. for path, entry in iter_fresh_entries(index, root_path):
  839. if entry:
  840. index[path] = entry
  841. class locked_index(object):
  842. """Lock the index while making modifications.
  843. Works as a context manager.
  844. """
  845. def __init__(self, path: Union[bytes, str]):
  846. self._path = path
  847. def __enter__(self):
  848. self._file = GitFile(self._path, "wb")
  849. self._index = Index(self._path)
  850. return self._index
  851. def __exit__(self, exc_type, exc_value, traceback):
  852. if exc_type is not None:
  853. self._file.abort()
  854. return
  855. try:
  856. f = SHA1Writer(self._file)
  857. write_index_dict(f, self._index._byname)
  858. except BaseException:
  859. self._file.abort()
  860. else:
  861. f.close()