index.py 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039
  1. # index.py -- File parser/writer for the git index file
  2. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Parser for the git index file format."""
  21. import collections
  22. import os
  23. import stat
  24. import struct
  25. import sys
  26. from typing import (
  27. Any,
  28. BinaryIO,
  29. Callable,
  30. Dict,
  31. List,
  32. Optional,
  33. TYPE_CHECKING,
  34. Iterable,
  35. Iterator,
  36. Tuple,
  37. Union,
  38. )
  39. if TYPE_CHECKING:
  40. from dulwich.object_store import BaseObjectStore
  41. from dulwich.file import GitFile
  42. from dulwich.objects import (
  43. Blob,
  44. S_IFGITLINK,
  45. S_ISGITLINK,
  46. Tree,
  47. hex_to_sha,
  48. sha_to_hex,
  49. ObjectID,
  50. )
  51. from dulwich.pack import (
  52. SHA1Reader,
  53. SHA1Writer,
  54. )
  55. # TODO(jelmer): Switch to dataclass?
  56. IndexEntry = collections.namedtuple(
  57. "IndexEntry",
  58. [
  59. "ctime",
  60. "mtime",
  61. "dev",
  62. "ino",
  63. "mode",
  64. "uid",
  65. "gid",
  66. "size",
  67. "sha",
  68. "flags",
  69. "extended_flags",
  70. ],
  71. )
  72. # 2-bit stage (during merge)
  73. FLAG_STAGEMASK = 0x3000
  74. # assume-valid
  75. FLAG_VALID = 0x8000
  76. # extended flag (must be zero in version 2)
  77. FLAG_EXTENDED = 0x4000
  78. # used by sparse checkout
  79. EXTENDED_FLAG_SKIP_WORKTREE = 0x4000
  80. # used by "git add -N"
  81. EXTENDED_FLAG_INTEND_TO_ADD = 0x2000
  82. DEFAULT_VERSION = 2
  83. def pathsplit(path: bytes) -> Tuple[bytes, bytes]:
  84. """Split a /-delimited path into a directory part and a basename.
  85. Args:
  86. path: The path to split.
  87. Returns:
  88. Tuple with directory name and basename
  89. """
  90. try:
  91. (dirname, basename) = path.rsplit(b"/", 1)
  92. except ValueError:
  93. return (b"", path)
  94. else:
  95. return (dirname, basename)
  96. def pathjoin(*args):
  97. """Join a /-delimited path."""
  98. return b"/".join([p for p in args if p])
  99. def read_cache_time(f):
  100. """Read a cache time.
  101. Args:
  102. f: File-like object to read from
  103. Returns:
  104. Tuple with seconds and nanoseconds
  105. """
  106. return struct.unpack(">LL", f.read(8))
  107. def write_cache_time(f, t):
  108. """Write a cache time.
  109. Args:
  110. f: File-like object to write to
  111. t: Time to write (as int, float or tuple with secs and nsecs)
  112. """
  113. if isinstance(t, int):
  114. t = (t, 0)
  115. elif isinstance(t, float):
  116. (secs, nsecs) = divmod(t, 1.0)
  117. t = (int(secs), int(nsecs * 1000000000))
  118. elif not isinstance(t, tuple):
  119. raise TypeError(t)
  120. f.write(struct.pack(">LL", *t))
  121. def read_cache_entry(f, version: int) -> Tuple[str, IndexEntry]:
  122. """Read an entry from a cache file.
  123. Args:
  124. f: File-like object to read from
  125. Returns:
  126. tuple with: name, IndexEntry
  127. """
  128. beginoffset = f.tell()
  129. ctime = read_cache_time(f)
  130. mtime = read_cache_time(f)
  131. (
  132. dev,
  133. ino,
  134. mode,
  135. uid,
  136. gid,
  137. size,
  138. sha,
  139. flags,
  140. ) = struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
  141. if flags & FLAG_EXTENDED:
  142. if version < 3:
  143. raise AssertionError(
  144. 'extended flag set in index with version < 3')
  145. (extended_flags, ) = struct.unpack(">H", f.read(2))
  146. else:
  147. extended_flags = 0
  148. name = f.read(flags & 0x0FFF)
  149. # Padding:
  150. if version < 4:
  151. real_size = (f.tell() - beginoffset + 8) & ~7
  152. f.read((beginoffset + real_size) - f.tell())
  153. return (
  154. name,
  155. IndexEntry(
  156. ctime,
  157. mtime,
  158. dev,
  159. ino,
  160. mode,
  161. uid,
  162. gid,
  163. size,
  164. sha_to_hex(sha),
  165. flags & ~0x0FFF,
  166. extended_flags,
  167. ))
  168. def write_cache_entry(f, name: bytes, entry: IndexEntry, version: int) -> None:
  169. """Write an index entry to a file.
  170. Args:
  171. f: File object
  172. entry: IndexEntry to write, tuple with:
  173. """
  174. beginoffset = f.tell()
  175. write_cache_time(f, entry.ctime)
  176. write_cache_time(f, entry.mtime)
  177. flags = len(name) | (entry.flags & ~0x0FFF)
  178. if entry.extended_flags:
  179. flags |= FLAG_EXTENDED
  180. if flags & FLAG_EXTENDED and version is not None and version < 3:
  181. raise AssertionError('unable to use extended flags in version < 3')
  182. f.write(
  183. struct.pack(
  184. b">LLLLLL20sH",
  185. entry.dev & 0xFFFFFFFF,
  186. entry.ino & 0xFFFFFFFF,
  187. entry.mode,
  188. entry.uid,
  189. entry.gid,
  190. entry.size,
  191. hex_to_sha(entry.sha),
  192. flags,
  193. )
  194. )
  195. if flags & FLAG_EXTENDED:
  196. f.write(struct.pack(b">H", entry.extended_flags))
  197. f.write(name)
  198. if version < 4:
  199. real_size = (f.tell() - beginoffset + 8) & ~7
  200. f.write(b"\0" * ((beginoffset + real_size) - f.tell()))
  201. class UnsupportedIndexFormat(Exception):
  202. """An unsupported index format was encountered."""
  203. def __init__(self, version):
  204. self.index_format_version = version
  205. def read_index(f: BinaryIO):
  206. """Read an index file, yielding the individual entries."""
  207. header = f.read(4)
  208. if header != b"DIRC":
  209. raise AssertionError("Invalid index file header: %r" % header)
  210. (version, num_entries) = struct.unpack(b">LL", f.read(4 * 2))
  211. if version not in (1, 2, 3):
  212. raise UnsupportedIndexFormat(version)
  213. for i in range(num_entries):
  214. yield read_cache_entry(f, version)
  215. def read_index_dict(f) -> Dict[bytes, IndexEntry]:
  216. """Read an index file and return it as a dictionary.
  217. Args:
  218. f: File object to read from
  219. """
  220. ret = {}
  221. for name, entry in read_index(f):
  222. ret[name] = entry
  223. return ret
  224. def write_index(f: BinaryIO, entries: List[Tuple[bytes, IndexEntry]], version: Optional[int] = None):
  225. """Write an index file.
  226. Args:
  227. f: File-like object to write to
  228. version: Version number to write
  229. entries: Iterable over the entries to write
  230. """
  231. if version is None:
  232. version = DEFAULT_VERSION
  233. f.write(b"DIRC")
  234. f.write(struct.pack(b">LL", version, len(entries)))
  235. for name, entry in entries:
  236. write_cache_entry(f, name, entry, version)
  237. def write_index_dict(
  238. f: BinaryIO,
  239. entries: Dict[bytes, IndexEntry],
  240. version: Optional[int] = None,
  241. ) -> None:
  242. """Write an index file based on the contents of a dictionary."""
  243. entries_list = []
  244. for name in sorted(entries):
  245. entries_list.append((name, entries[name]))
  246. write_index(f, entries_list, version=version)
  247. def cleanup_mode(mode: int) -> int:
  248. """Cleanup a mode value.
  249. This will return a mode that can be stored in a tree object.
  250. Args:
  251. mode: Mode to clean up.
  252. Returns:
  253. mode
  254. """
  255. if stat.S_ISLNK(mode):
  256. return stat.S_IFLNK
  257. elif stat.S_ISDIR(mode):
  258. return stat.S_IFDIR
  259. elif S_ISGITLINK(mode):
  260. return S_IFGITLINK
  261. ret = stat.S_IFREG | 0o644
  262. if mode & 0o100:
  263. ret |= 0o111
  264. return ret
  265. class Index:
  266. """A Git Index file."""
  267. def __init__(self, filename: Union[bytes, str], read=True):
  268. """Create an index object associated with the given filename.
  269. Args:
  270. filename: Path to the index file
  271. read: Whether to initialize the index from the given file, should it exist.
  272. """
  273. self._filename = filename
  274. # TODO(jelmer): Store the version returned by read_index
  275. self._version = None
  276. self.clear()
  277. if read:
  278. self.read()
  279. @property
  280. def path(self):
  281. return self._filename
  282. def __repr__(self):
  283. return "{}({!r})".format(self.__class__.__name__, self._filename)
  284. def write(self) -> None:
  285. """Write current contents of index to disk."""
  286. f = GitFile(self._filename, "wb")
  287. try:
  288. f = SHA1Writer(f)
  289. write_index_dict(f, self._byname, version=self._version)
  290. finally:
  291. f.close()
  292. def read(self):
  293. """Read current contents of index from disk."""
  294. if not os.path.exists(self._filename):
  295. return
  296. f = GitFile(self._filename, "rb")
  297. try:
  298. f = SHA1Reader(f)
  299. for name, entry in read_index(f):
  300. self[name] = entry
  301. # FIXME: Additional data?
  302. f.read(os.path.getsize(self._filename) - f.tell() - 20)
  303. f.check_sha()
  304. finally:
  305. f.close()
  306. def __len__(self) -> int:
  307. """Number of entries in this index file."""
  308. return len(self._byname)
  309. def __getitem__(self, name: bytes) -> IndexEntry:
  310. """Retrieve entry by relative path.
  311. Returns: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha,
  312. flags)
  313. """
  314. return self._byname[name]
  315. def __iter__(self) -> Iterator[bytes]:
  316. """Iterate over the paths in this index."""
  317. return iter(self._byname)
  318. def get_sha1(self, path: bytes) -> bytes:
  319. """Return the (git object) SHA1 for the object at a path."""
  320. return self[path].sha
  321. def get_mode(self, path: bytes) -> int:
  322. """Return the POSIX file mode for the object at a path."""
  323. return self[path].mode
  324. def iterobjects(self) -> Iterable[Tuple[bytes, bytes, int]]:
  325. """Iterate over path, sha, mode tuples for use with commit_tree."""
  326. for path in self:
  327. entry = self[path]
  328. yield path, entry.sha, cleanup_mode(entry.mode)
  329. def clear(self):
  330. """Remove all contents from this index."""
  331. self._byname = {}
  332. def __setitem__(self, name: bytes, x: IndexEntry):
  333. assert isinstance(name, bytes)
  334. assert len(x) == len(IndexEntry._fields)
  335. # Remove the old entry if any
  336. self._byname[name] = IndexEntry(*x)
  337. def __delitem__(self, name: bytes):
  338. assert isinstance(name, bytes)
  339. del self._byname[name]
  340. def iteritems(self) -> Iterator[Tuple[bytes, IndexEntry]]:
  341. return self._byname.items()
  342. def items(self) -> Iterator[Tuple[bytes, IndexEntry]]:
  343. return self._byname.items()
  344. def update(self, entries: Dict[bytes, IndexEntry]):
  345. for name, value in entries.items():
  346. self[name] = value
  347. def changes_from_tree(
  348. self, object_store, tree: ObjectID, want_unchanged: bool = False):
  349. """Find the differences between the contents of this index and a tree.
  350. Args:
  351. object_store: Object store to use for retrieving tree contents
  352. tree: SHA1 of the root tree
  353. want_unchanged: Whether unchanged files should be reported
  354. Returns: Iterator over tuples with (oldpath, newpath), (oldmode,
  355. newmode), (oldsha, newsha)
  356. """
  357. def lookup_entry(path):
  358. entry = self[path]
  359. return entry.sha, cleanup_mode(entry.mode)
  360. yield from changes_from_tree(
  361. self._byname.keys(),
  362. lookup_entry,
  363. object_store,
  364. tree,
  365. want_unchanged=want_unchanged,
  366. )
  367. def commit(self, object_store):
  368. """Create a new tree from an index.
  369. Args:
  370. object_store: Object store to save the tree in
  371. Returns:
  372. Root tree SHA
  373. """
  374. return commit_tree(object_store, self.iterobjects())
  375. def commit_tree(
  376. object_store: "BaseObjectStore", blobs: Iterable[Tuple[bytes, bytes, int]]
  377. ) -> bytes:
  378. """Commit a new tree.
  379. Args:
  380. object_store: Object store to add trees to
  381. blobs: Iterable over blob path, sha, mode entries
  382. Returns:
  383. SHA1 of the created tree.
  384. """
  385. trees: Dict[bytes, Any] = {b"": {}}
  386. def add_tree(path):
  387. if path in trees:
  388. return trees[path]
  389. dirname, basename = pathsplit(path)
  390. t = add_tree(dirname)
  391. assert isinstance(basename, bytes)
  392. newtree = {}
  393. t[basename] = newtree
  394. trees[path] = newtree
  395. return newtree
  396. for path, sha, mode in blobs:
  397. tree_path, basename = pathsplit(path)
  398. tree = add_tree(tree_path)
  399. tree[basename] = (mode, sha)
  400. def build_tree(path):
  401. tree = Tree()
  402. for basename, entry in trees[path].items():
  403. if isinstance(entry, dict):
  404. mode = stat.S_IFDIR
  405. sha = build_tree(pathjoin(path, basename))
  406. else:
  407. (mode, sha) = entry
  408. tree.add(basename, mode, sha)
  409. object_store.add_object(tree)
  410. return tree.id
  411. return build_tree(b"")
  412. def commit_index(object_store: "BaseObjectStore", index: Index) -> bytes:
  413. """Create a new tree from an index.
  414. Args:
  415. object_store: Object store to save the tree in
  416. index: Index file
  417. Note: This function is deprecated, use index.commit() instead.
  418. Returns: Root tree sha.
  419. """
  420. return commit_tree(object_store, index.iterobjects())
  421. def changes_from_tree(
  422. names: Iterable[bytes],
  423. lookup_entry: Callable[[bytes], Tuple[bytes, int]],
  424. object_store: "BaseObjectStore",
  425. tree: Optional[bytes],
  426. want_unchanged=False,
  427. ) -> Iterable[
  428. Tuple[
  429. Tuple[Optional[bytes], Optional[bytes]],
  430. Tuple[Optional[int], Optional[int]],
  431. Tuple[Optional[bytes], Optional[bytes]],
  432. ]
  433. ]:
  434. """Find the differences between the contents of a tree and
  435. a working copy.
  436. Args:
  437. names: Iterable of names in the working copy
  438. lookup_entry: Function to lookup an entry in the working copy
  439. object_store: Object store to use for retrieving tree contents
  440. tree: SHA1 of the root tree, or None for an empty tree
  441. want_unchanged: Whether unchanged files should be reported
  442. Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
  443. (oldsha, newsha)
  444. """
  445. # TODO(jelmer): Support a include_trees option
  446. other_names = set(names)
  447. if tree is not None:
  448. for (name, mode, sha) in object_store.iter_tree_contents(tree):
  449. try:
  450. (other_sha, other_mode) = lookup_entry(name)
  451. except KeyError:
  452. # Was removed
  453. yield ((name, None), (mode, None), (sha, None))
  454. else:
  455. other_names.remove(name)
  456. if want_unchanged or other_sha != sha or other_mode != mode:
  457. yield ((name, name), (mode, other_mode), (sha, other_sha))
  458. # Mention added files
  459. for name in other_names:
  460. try:
  461. (other_sha, other_mode) = lookup_entry(name)
  462. except KeyError:
  463. pass
  464. else:
  465. yield ((None, name), (None, other_mode), (None, other_sha))
  466. def index_entry_from_stat(
  467. stat_val, hex_sha: bytes, flags: int, mode: Optional[int] = None,
  468. extended_flags: Optional[int] = None
  469. ):
  470. """Create a new index entry from a stat value.
  471. Args:
  472. stat_val: POSIX stat_result instance
  473. hex_sha: Hex sha of the object
  474. flags: Index flags
  475. """
  476. if mode is None:
  477. mode = cleanup_mode(stat_val.st_mode)
  478. return IndexEntry(
  479. stat_val.st_ctime,
  480. stat_val.st_mtime,
  481. stat_val.st_dev,
  482. stat_val.st_ino,
  483. mode,
  484. stat_val.st_uid,
  485. stat_val.st_gid,
  486. stat_val.st_size,
  487. hex_sha,
  488. flags,
  489. extended_flags
  490. )
  491. if sys.platform == 'win32':
  492. # On Windows, creating symlinks either requires administrator privileges
  493. # or developer mode. Raise a more helpful error when we're unable to
  494. # create symlinks
  495. # https://github.com/jelmer/dulwich/issues/1005
  496. class WindowsSymlinkPermissionError(PermissionError):
  497. def __init__(self, errno, msg, filename):
  498. super(PermissionError, self).__init__(
  499. errno, "Unable to create symlink; "
  500. "do you have developer mode enabled? %s" % msg,
  501. filename)
  502. def symlink(src, dst, target_is_directory=False, *, dir_fd=None):
  503. try:
  504. return os.symlink(
  505. src, dst, target_is_directory=target_is_directory,
  506. dir_fd=dir_fd)
  507. except PermissionError as e:
  508. raise WindowsSymlinkPermissionError(
  509. e.errno, e.strerror, e.filename) from e
  510. else:
  511. symlink = os.symlink
  512. def build_file_from_blob(
  513. blob: Blob, mode: int, target_path: bytes, *, honor_filemode=True,
  514. tree_encoding="utf-8", symlink_fn=None
  515. ):
  516. """Build a file or symlink on disk based on a Git object.
  517. Args:
  518. blob: The git object
  519. mode: File mode
  520. target_path: Path to write to
  521. honor_filemode: An optional flag to honor core.filemode setting in
  522. config file, default is core.filemode=True, change executable bit
  523. symlink: Function to use for creating symlinks
  524. Returns: stat object for the file
  525. """
  526. try:
  527. oldstat = os.lstat(target_path)
  528. except FileNotFoundError:
  529. oldstat = None
  530. contents = blob.as_raw_string()
  531. if stat.S_ISLNK(mode):
  532. if oldstat:
  533. os.unlink(target_path)
  534. if sys.platform == "win32":
  535. # os.readlink on Python3 on Windows requires a unicode string.
  536. contents = contents.decode(tree_encoding) # type: ignore
  537. target_path = target_path.decode(tree_encoding) # type: ignore
  538. (symlink_fn or symlink)(contents, target_path)
  539. else:
  540. if oldstat is not None and oldstat.st_size == len(contents):
  541. with open(target_path, "rb") as f:
  542. if f.read() == contents:
  543. return oldstat
  544. with open(target_path, "wb") as f:
  545. # Write out file
  546. f.write(contents)
  547. if honor_filemode:
  548. os.chmod(target_path, mode)
  549. return os.lstat(target_path)
  550. INVALID_DOTNAMES = (b".git", b".", b"..", b"")
  551. def validate_path_element_default(element: bytes) -> bool:
  552. return element.lower() not in INVALID_DOTNAMES
  553. def validate_path_element_ntfs(element: bytes) -> bool:
  554. stripped = element.rstrip(b". ").lower()
  555. if stripped in INVALID_DOTNAMES:
  556. return False
  557. if stripped == b"git~1":
  558. return False
  559. return True
  560. def validate_path(path: bytes,
  561. element_validator=validate_path_element_default) -> bool:
  562. """Default path validator that just checks for .git/."""
  563. parts = path.split(b"/")
  564. for p in parts:
  565. if not element_validator(p):
  566. return False
  567. else:
  568. return True
  569. def build_index_from_tree(
  570. root_path: Union[str, bytes],
  571. index_path: Union[str, bytes],
  572. object_store: "BaseObjectStore",
  573. tree_id: bytes,
  574. honor_filemode: bool = True,
  575. validate_path_element=validate_path_element_default,
  576. symlink_fn=None
  577. ):
  578. """Generate and materialize index from a tree
  579. Args:
  580. tree_id: Tree to materialize
  581. root_path: Target dir for materialized index files
  582. index_path: Target path for generated index
  583. object_store: Non-empty object store holding tree contents
  584. honor_filemode: An optional flag to honor core.filemode setting in
  585. config file, default is core.filemode=True, change executable bit
  586. validate_path_element: Function to validate path elements to check
  587. out; default just refuses .git and .. directories.
  588. Note: existing index is wiped and contents are not merged
  589. in a working dir. Suitable only for fresh clones.
  590. """
  591. index = Index(index_path, read=False)
  592. if not isinstance(root_path, bytes):
  593. root_path = os.fsencode(root_path)
  594. for entry in object_store.iter_tree_contents(tree_id):
  595. if not validate_path(entry.path, validate_path_element):
  596. continue
  597. full_path = _tree_to_fs_path(root_path, entry.path)
  598. if not os.path.exists(os.path.dirname(full_path)):
  599. os.makedirs(os.path.dirname(full_path))
  600. # TODO(jelmer): Merge new index into working tree
  601. if S_ISGITLINK(entry.mode):
  602. if not os.path.isdir(full_path):
  603. os.mkdir(full_path)
  604. st = os.lstat(full_path)
  605. # TODO(jelmer): record and return submodule paths
  606. else:
  607. obj = object_store[entry.sha]
  608. st = build_file_from_blob(
  609. obj, entry.mode, full_path,
  610. honor_filemode=honor_filemode,
  611. symlink_fn=symlink_fn,
  612. )
  613. # Add file to index
  614. if not honor_filemode or S_ISGITLINK(entry.mode):
  615. # we can not use tuple slicing to build a new tuple,
  616. # because on windows that will convert the times to
  617. # longs, which causes errors further along
  618. st_tuple = (
  619. entry.mode,
  620. st.st_ino,
  621. st.st_dev,
  622. st.st_nlink,
  623. st.st_uid,
  624. st.st_gid,
  625. st.st_size,
  626. st.st_atime,
  627. st.st_mtime,
  628. st.st_ctime,
  629. )
  630. st = st.__class__(st_tuple)
  631. index[entry.path] = index_entry_from_stat(st, entry.sha, 0)
  632. index.write()
  633. def blob_from_path_and_mode(fs_path: bytes, mode: int,
  634. tree_encoding="utf-8"):
  635. """Create a blob from a path and a stat object.
  636. Args:
  637. fs_path: Full file system path to file
  638. mode: File mode
  639. Returns: A `Blob` object
  640. """
  641. assert isinstance(fs_path, bytes)
  642. blob = Blob()
  643. if stat.S_ISLNK(mode):
  644. if sys.platform == "win32":
  645. # os.readlink on Python3 on Windows requires a unicode string.
  646. blob.data = os.readlink(os.fsdecode(fs_path)).encode(tree_encoding)
  647. else:
  648. blob.data = os.readlink(fs_path)
  649. else:
  650. with open(fs_path, "rb") as f:
  651. blob.data = f.read()
  652. return blob
  653. def blob_from_path_and_stat(fs_path: bytes, st, tree_encoding="utf-8"):
  654. """Create a blob from a path and a stat object.
  655. Args:
  656. fs_path: Full file system path to file
  657. st: A stat object
  658. Returns: A `Blob` object
  659. """
  660. return blob_from_path_and_mode(fs_path, st.st_mode, tree_encoding)
  661. def read_submodule_head(path: Union[str, bytes]) -> Optional[bytes]:
  662. """Read the head commit of a submodule.
  663. Args:
  664. path: path to the submodule
  665. Returns: HEAD sha, None if not a valid head/repository
  666. """
  667. from dulwich.errors import NotGitRepository
  668. from dulwich.repo import Repo
  669. # Repo currently expects a "str", so decode if necessary.
  670. # TODO(jelmer): Perhaps move this into Repo() ?
  671. if not isinstance(path, str):
  672. path = os.fsdecode(path)
  673. try:
  674. repo = Repo(path)
  675. except NotGitRepository:
  676. return None
  677. try:
  678. return repo.head()
  679. except KeyError:
  680. return None
  681. def _has_directory_changed(tree_path: bytes, entry):
  682. """Check if a directory has changed after getting an error.
  683. When handling an error trying to create a blob from a path, call this
  684. function. It will check if the path is a directory. If it's a directory
  685. and a submodule, check the submodule head to see if it's has changed. If
  686. not, consider the file as changed as Git tracked a file and not a
  687. directory.
  688. Return true if the given path should be considered as changed and False
  689. otherwise or if the path is not a directory.
  690. """
  691. # This is actually a directory
  692. if os.path.exists(os.path.join(tree_path, b".git")):
  693. # Submodule
  694. head = read_submodule_head(tree_path)
  695. if entry.sha != head:
  696. return True
  697. else:
  698. # The file was changed to a directory, so consider it removed.
  699. return True
  700. return False
  701. def get_unstaged_changes(
  702. index: Index, root_path: Union[str, bytes],
  703. filter_blob_callback=None):
  704. """Walk through an index and check for differences against working tree.
  705. Args:
  706. index: index to check
  707. root_path: path in which to find files
  708. Returns: iterator over paths with unstaged changes
  709. """
  710. # For each entry in the index check the sha1 & ensure not staged
  711. if not isinstance(root_path, bytes):
  712. root_path = os.fsencode(root_path)
  713. for tree_path, entry in index.iteritems():
  714. full_path = _tree_to_fs_path(root_path, tree_path)
  715. try:
  716. st = os.lstat(full_path)
  717. if stat.S_ISDIR(st.st_mode):
  718. if _has_directory_changed(tree_path, entry):
  719. yield tree_path
  720. continue
  721. if not stat.S_ISREG(st.st_mode) and not stat.S_ISLNK(st.st_mode):
  722. continue
  723. blob = blob_from_path_and_stat(full_path, st)
  724. if filter_blob_callback is not None:
  725. blob = filter_blob_callback(blob, tree_path)
  726. except FileNotFoundError:
  727. # The file was removed, so we assume that counts as
  728. # different from whatever file used to exist.
  729. yield tree_path
  730. else:
  731. if blob.id != entry.sha:
  732. yield tree_path
  733. os_sep_bytes = os.sep.encode("ascii")
  734. def _tree_to_fs_path(root_path: bytes, tree_path: bytes):
  735. """Convert a git tree path to a file system path.
  736. Args:
  737. root_path: Root filesystem path
  738. tree_path: Git tree path as bytes
  739. Returns: File system path.
  740. """
  741. assert isinstance(tree_path, bytes)
  742. if os_sep_bytes != b"/":
  743. sep_corrected_path = tree_path.replace(b"/", os_sep_bytes)
  744. else:
  745. sep_corrected_path = tree_path
  746. return os.path.join(root_path, sep_corrected_path)
  747. def _fs_to_tree_path(fs_path: Union[str, bytes]) -> bytes:
  748. """Convert a file system path to a git tree path.
  749. Args:
  750. fs_path: File system path.
  751. Returns: Git tree path as bytes
  752. """
  753. if not isinstance(fs_path, bytes):
  754. fs_path_bytes = os.fsencode(fs_path)
  755. else:
  756. fs_path_bytes = fs_path
  757. if os_sep_bytes != b"/":
  758. tree_path = fs_path_bytes.replace(os_sep_bytes, b"/")
  759. else:
  760. tree_path = fs_path_bytes
  761. return tree_path
  762. def index_entry_from_directory(st, path: bytes) -> Optional[IndexEntry]:
  763. if os.path.exists(os.path.join(path, b".git")):
  764. head = read_submodule_head(path)
  765. if head is None:
  766. return None
  767. return index_entry_from_stat(st, head, 0, mode=S_IFGITLINK)
  768. return None
  769. def index_entry_from_path(
  770. path: bytes, object_store: Optional["BaseObjectStore"] = None
  771. ) -> Optional[IndexEntry]:
  772. """Create an index from a filesystem path.
  773. This returns an index value for files, symlinks
  774. and tree references. for directories and
  775. non-existent files it returns None
  776. Args:
  777. path: Path to create an index entry for
  778. object_store: Optional object store to
  779. save new blobs in
  780. Returns: An index entry; None for directories
  781. """
  782. assert isinstance(path, bytes)
  783. st = os.lstat(path)
  784. if stat.S_ISDIR(st.st_mode):
  785. return index_entry_from_directory(st, path)
  786. if stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode):
  787. blob = blob_from_path_and_stat(path, st)
  788. if object_store is not None:
  789. object_store.add_object(blob)
  790. return index_entry_from_stat(st, blob.id, 0)
  791. return None
  792. def iter_fresh_entries(
  793. paths: Iterable[bytes], root_path: bytes,
  794. object_store: Optional["BaseObjectStore"] = None
  795. ) -> Iterator[Tuple[bytes, Optional[IndexEntry]]]:
  796. """Iterate over current versions of index entries on disk.
  797. Args:
  798. paths: Paths to iterate over
  799. root_path: Root path to access from
  800. object_store: Optional store to save new blobs in
  801. Returns: Iterator over path, index_entry
  802. """
  803. for path in paths:
  804. p = _tree_to_fs_path(root_path, path)
  805. try:
  806. entry = index_entry_from_path(p, object_store=object_store)
  807. except (FileNotFoundError, IsADirectoryError):
  808. entry = None
  809. yield path, entry
  810. def iter_fresh_objects(
  811. paths: Iterable[bytes], root_path: bytes, include_deleted=False,
  812. object_store=None) -> Iterator[
  813. Tuple[bytes, Optional[bytes], Optional[int]]]:
  814. """Iterate over versions of objects on disk referenced by index.
  815. Args:
  816. root_path: Root path to access from
  817. include_deleted: Include deleted entries with sha and
  818. mode set to None
  819. object_store: Optional object store to report new items to
  820. Returns: Iterator over path, sha, mode
  821. """
  822. for path, entry in iter_fresh_entries(
  823. paths, root_path, object_store=object_store):
  824. if entry is None:
  825. if include_deleted:
  826. yield path, None, None
  827. else:
  828. entry = IndexEntry(*entry)
  829. yield path, entry.sha, cleanup_mode(entry.mode)
  830. def refresh_index(index: Index, root_path: bytes):
  831. """Refresh the contents of an index.
  832. This is the equivalent to running 'git commit -a'.
  833. Args:
  834. index: Index to update
  835. root_path: Root filesystem path
  836. """
  837. for path, entry in iter_fresh_entries(index, root_path):
  838. if entry:
  839. index[path] = entry
  840. class locked_index:
  841. """Lock the index while making modifications.
  842. Works as a context manager.
  843. """
  844. def __init__(self, path: Union[bytes, str]):
  845. self._path = path
  846. def __enter__(self):
  847. self._file = GitFile(self._path, "wb")
  848. self._index = Index(self._path)
  849. return self._index
  850. def __exit__(self, exc_type, exc_value, traceback):
  851. if exc_type is not None:
  852. self._file.abort()
  853. return
  854. try:
  855. f = SHA1Writer(self._file)
  856. write_index_dict(f, self._index._byname)
  857. except BaseException:
  858. self._file.abort()
  859. else:
  860. f.close()