index.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578
  1. # index.py -- File parser/writer for the git index file
  2. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; version 2
  7. # of the License or (at your opinion) any later version of the license.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """Parser for the git index file format."""
  19. import collections
  20. import errno
  21. import os
  22. import stat
  23. import struct
  24. import sys
  25. from dulwich.file import GitFile
  26. from dulwich.objects import (
  27. Blob,
  28. S_IFGITLINK,
  29. S_ISGITLINK,
  30. Tree,
  31. hex_to_sha,
  32. sha_to_hex,
  33. )
  34. from dulwich.pack import (
  35. SHA1Reader,
  36. SHA1Writer,
  37. )
  38. IndexEntry = collections.namedtuple(
  39. 'IndexEntry', [
  40. 'ctime', 'mtime', 'dev', 'ino', 'mode', 'uid', 'gid', 'size', 'sha',
  41. 'flags'])
  42. def pathsplit(path):
  43. """Split a /-delimited path into a directory part and a basename.
  44. :param path: The path to split.
  45. :return: Tuple with directory name and basename
  46. """
  47. try:
  48. (dirname, basename) = path.rsplit(b"/", 1)
  49. except ValueError:
  50. return (b"", path)
  51. else:
  52. return (dirname, basename)
  53. def pathjoin(*args):
  54. """Join a /-delimited path.
  55. """
  56. return b"/".join([p for p in args if p])
  57. def read_cache_time(f):
  58. """Read a cache time.
  59. :param f: File-like object to read from
  60. :return: Tuple with seconds and nanoseconds
  61. """
  62. return struct.unpack(">LL", f.read(8))
  63. def write_cache_time(f, t):
  64. """Write a cache time.
  65. :param f: File-like object to write to
  66. :param t: Time to write (as int, float or tuple with secs and nsecs)
  67. """
  68. if isinstance(t, int):
  69. t = (t, 0)
  70. elif isinstance(t, float):
  71. (secs, nsecs) = divmod(t, 1.0)
  72. t = (int(secs), int(nsecs * 1000000000))
  73. elif not isinstance(t, tuple):
  74. raise TypeError(t)
  75. f.write(struct.pack(">LL", *t))
  76. def read_cache_entry(f):
  77. """Read an entry from a cache file.
  78. :param f: File-like object to read from
  79. :return: tuple with: device, inode, mode, uid, gid, size, sha, flags
  80. """
  81. beginoffset = f.tell()
  82. ctime = read_cache_time(f)
  83. mtime = read_cache_time(f)
  84. (dev, ino, mode, uid, gid, size, sha, flags, ) = \
  85. struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
  86. name = f.read((flags & 0x0fff))
  87. # Padding:
  88. real_size = ((f.tell() - beginoffset + 8) & ~7)
  89. f.read((beginoffset + real_size) - f.tell())
  90. return (name, ctime, mtime, dev, ino, mode, uid, gid, size,
  91. sha_to_hex(sha), flags & ~0x0fff)
  92. def write_cache_entry(f, entry):
  93. """Write an index entry to a file.
  94. :param f: File object
  95. :param entry: Entry to write, tuple with:
  96. (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags)
  97. """
  98. beginoffset = f.tell()
  99. (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags) = entry
  100. write_cache_time(f, ctime)
  101. write_cache_time(f, mtime)
  102. flags = len(name) | (flags &~ 0x0fff)
  103. f.write(struct.pack(b'>LLLLLL20sH', dev & 0xFFFFFFFF, ino & 0xFFFFFFFF, mode, uid, gid, size, hex_to_sha(sha), flags))
  104. f.write(name)
  105. real_size = ((f.tell() - beginoffset + 8) & ~7)
  106. f.write(b'\0' * ((beginoffset + real_size) - f.tell()))
  107. def read_index(f):
  108. """Read an index file, yielding the individual entries."""
  109. header = f.read(4)
  110. if header != b'DIRC':
  111. raise AssertionError("Invalid index file header: %r" % header)
  112. (version, num_entries) = struct.unpack(b'>LL', f.read(4 * 2))
  113. assert version in (1, 2)
  114. for i in range(num_entries):
  115. yield read_cache_entry(f)
  116. def read_index_dict(f):
  117. """Read an index file and return it as a dictionary.
  118. :param f: File object to read from
  119. """
  120. ret = {}
  121. for x in read_index(f):
  122. ret[x[0]] = IndexEntry(*x[1:])
  123. return ret
  124. def write_index(f, entries):
  125. """Write an index file.
  126. :param f: File-like object to write to
  127. :param entries: Iterable over the entries to write
  128. """
  129. f.write(b'DIRC')
  130. f.write(struct.pack(b'>LL', 2, len(entries)))
  131. for x in entries:
  132. write_cache_entry(f, x)
  133. def write_index_dict(f, entries):
  134. """Write an index file based on the contents of a dictionary.
  135. """
  136. entries_list = []
  137. for name in sorted(entries):
  138. entries_list.append((name,) + tuple(entries[name]))
  139. write_index(f, entries_list)
  140. def cleanup_mode(mode):
  141. """Cleanup a mode value.
  142. This will return a mode that can be stored in a tree object.
  143. :param mode: Mode to clean up.
  144. """
  145. if stat.S_ISLNK(mode):
  146. return stat.S_IFLNK
  147. elif stat.S_ISDIR(mode):
  148. return stat.S_IFDIR
  149. elif S_ISGITLINK(mode):
  150. return S_IFGITLINK
  151. ret = stat.S_IFREG | 0o644
  152. ret |= (mode & 0o111)
  153. return ret
  154. class Index(object):
  155. """A Git Index file."""
  156. def __init__(self, filename):
  157. """Open an index file.
  158. :param filename: Path to the index file
  159. """
  160. self._filename = filename
  161. self.clear()
  162. self.read()
  163. @property
  164. def path(self):
  165. return self._filename
  166. def __repr__(self):
  167. return "%s(%r)" % (self.__class__.__name__, self._filename)
  168. def write(self):
  169. """Write current contents of index to disk."""
  170. f = GitFile(self._filename, 'wb')
  171. try:
  172. f = SHA1Writer(f)
  173. write_index_dict(f, self._byname)
  174. finally:
  175. f.close()
  176. def read(self):
  177. """Read current contents of index from disk."""
  178. if not os.path.exists(self._filename):
  179. return
  180. f = GitFile(self._filename, 'rb')
  181. try:
  182. f = SHA1Reader(f)
  183. for x in read_index(f):
  184. self[x[0]] = IndexEntry(*x[1:])
  185. # FIXME: Additional data?
  186. f.read(os.path.getsize(self._filename)-f.tell()-20)
  187. f.check_sha()
  188. finally:
  189. f.close()
  190. def __len__(self):
  191. """Number of entries in this index file."""
  192. return len(self._byname)
  193. def __getitem__(self, name):
  194. """Retrieve entry by relative path.
  195. :return: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags)
  196. """
  197. return self._byname[name]
  198. def __iter__(self):
  199. """Iterate over the paths in this index."""
  200. return iter(self._byname)
  201. def get_sha1(self, path):
  202. """Return the (git object) SHA1 for the object at a path."""
  203. return self[path].sha
  204. def get_mode(self, path):
  205. """Return the POSIX file mode for the object at a path."""
  206. return self[path].mode
  207. def iterblobs(self):
  208. """Iterate over path, sha, mode tuples for use with commit_tree."""
  209. for path in self:
  210. entry = self[path]
  211. yield path, entry.sha, cleanup_mode(entry.mode)
  212. def clear(self):
  213. """Remove all contents from this index."""
  214. self._byname = {}
  215. def __setitem__(self, name, x):
  216. assert isinstance(name, bytes)
  217. assert len(x) == 10
  218. # Remove the old entry if any
  219. self._byname[name] = x
  220. def __delitem__(self, name):
  221. assert isinstance(name, bytes)
  222. del self._byname[name]
  223. def iteritems(self):
  224. return self._byname.items()
  225. def update(self, entries):
  226. for name, value in entries.items():
  227. self[name] = value
  228. def changes_from_tree(self, object_store, tree, want_unchanged=False):
  229. """Find the differences between the contents of this index and a tree.
  230. :param object_store: Object store to use for retrieving tree contents
  231. :param tree: SHA1 of the root tree
  232. :param want_unchanged: Whether unchanged files should be reported
  233. :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
  234. """
  235. def lookup_entry(path):
  236. entry = self[path]
  237. return entry.sha, entry.mode
  238. for (name, mode, sha) in changes_from_tree(self._byname.keys(),
  239. lookup_entry, object_store, tree,
  240. want_unchanged=want_unchanged):
  241. yield (name, mode, sha)
  242. def commit(self, object_store):
  243. """Create a new tree from an index.
  244. :param object_store: Object store to save the tree in
  245. :return: Root tree SHA
  246. """
  247. return commit_tree(object_store, self.iterblobs())
  248. def commit_tree(object_store, blobs):
  249. """Commit a new tree.
  250. :param object_store: Object store to add trees to
  251. :param blobs: Iterable over blob path, sha, mode entries
  252. :return: SHA1 of the created tree.
  253. """
  254. trees = {b'': {}}
  255. def add_tree(path):
  256. if path in trees:
  257. return trees[path]
  258. dirname, basename = pathsplit(path)
  259. t = add_tree(dirname)
  260. assert isinstance(basename, bytes)
  261. newtree = {}
  262. t[basename] = newtree
  263. trees[path] = newtree
  264. return newtree
  265. for path, sha, mode in blobs:
  266. tree_path, basename = pathsplit(path)
  267. tree = add_tree(tree_path)
  268. tree[basename] = (mode, sha)
  269. def build_tree(path):
  270. tree = Tree()
  271. for basename, entry in trees[path].items():
  272. if isinstance(entry, dict):
  273. mode = stat.S_IFDIR
  274. sha = build_tree(pathjoin(path, basename))
  275. else:
  276. (mode, sha) = entry
  277. tree.add(basename, mode, sha)
  278. object_store.add_object(tree)
  279. return tree.id
  280. return build_tree(b'')
  281. def commit_index(object_store, index):
  282. """Create a new tree from an index.
  283. :param object_store: Object store to save the tree in
  284. :param index: Index file
  285. :note: This function is deprecated, use index.commit() instead.
  286. :return: Root tree sha.
  287. """
  288. return commit_tree(object_store, index.iterblobs())
  289. def changes_from_tree(names, lookup_entry, object_store, tree,
  290. want_unchanged=False):
  291. """Find the differences between the contents of a tree and
  292. a working copy.
  293. :param names: Iterable of names in the working copy
  294. :param lookup_entry: Function to lookup an entry in the working copy
  295. :param object_store: Object store to use for retrieving tree contents
  296. :param tree: SHA1 of the root tree, or None for an empty tree
  297. :param want_unchanged: Whether unchanged files should be reported
  298. :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
  299. (oldsha, newsha)
  300. """
  301. other_names = set(names)
  302. if tree is not None:
  303. for (name, mode, sha) in object_store.iter_tree_contents(tree):
  304. try:
  305. (other_sha, other_mode) = lookup_entry(name)
  306. except KeyError:
  307. # Was removed
  308. yield ((name, None), (mode, None), (sha, None))
  309. else:
  310. other_names.remove(name)
  311. if (want_unchanged or other_sha != sha or other_mode != mode):
  312. yield ((name, name), (mode, other_mode), (sha, other_sha))
  313. # Mention added files
  314. for name in other_names:
  315. (other_sha, other_mode) = lookup_entry(name)
  316. yield ((None, name), (None, other_mode), (None, other_sha))
  317. def index_entry_from_stat(stat_val, hex_sha, flags, mode=None):
  318. """Create a new index entry from a stat value.
  319. :param stat_val: POSIX stat_result instance
  320. :param hex_sha: Hex sha of the object
  321. :param flags: Index flags
  322. """
  323. if mode is None:
  324. mode = cleanup_mode(stat_val.st_mode)
  325. return (stat_val.st_ctime, stat_val.st_mtime, stat_val.st_dev,
  326. stat_val.st_ino, mode, stat_val.st_uid,
  327. stat_val.st_gid, stat_val.st_size, hex_sha, flags)
  328. def build_file_from_blob(blob, mode, target_path, honor_filemode=True):
  329. """Build a file or symlink on disk based on a Git object.
  330. :param obj: The git object
  331. :param mode: File mode
  332. :param target_path: Path to write to
  333. :param honor_filemode: An optional flag to honor core.filemode setting in
  334. config file, default is core.filemode=True, change executable bit
  335. """
  336. if stat.S_ISLNK(mode):
  337. # FIXME: This will fail on Windows. What should we do instead?
  338. src_path = blob.as_raw_string()
  339. try:
  340. os.symlink(src_path, target_path)
  341. except OSError as e:
  342. if e.errno == errno.EEXIST:
  343. os.unlink(target_path)
  344. os.symlink(src_path, target_path)
  345. else:
  346. raise
  347. else:
  348. with open(target_path, 'wb') as f:
  349. # Write out file
  350. f.write(blob.as_raw_string())
  351. if honor_filemode:
  352. os.chmod(target_path, mode)
  353. INVALID_DOTNAMES = (b".git", b".", b"..", b"")
  354. def validate_path_element_default(element):
  355. return element.lower() not in INVALID_DOTNAMES
  356. def validate_path_element_ntfs(element):
  357. stripped = element.rstrip(b". ").lower()
  358. if stripped in INVALID_DOTNAMES:
  359. return False
  360. if stripped == b"git~1":
  361. return False
  362. return True
  363. def validate_path(path, element_validator=validate_path_element_default):
  364. """Default path validator that just checks for .git/."""
  365. parts = path.split(b"/")
  366. for p in parts:
  367. if not element_validator(p):
  368. return False
  369. else:
  370. return True
  371. def build_index_from_tree(root_path, index_path, object_store, tree_id,
  372. honor_filemode=True,
  373. validate_path_element=validate_path_element_default):
  374. """Generate and materialize index from a tree
  375. :param tree_id: Tree to materialize
  376. :param root_path: Target dir for materialized index files
  377. :param index_path: Target path for generated index
  378. :param object_store: Non-empty object store holding tree contents
  379. :param honor_filemode: An optional flag to honor core.filemode setting in
  380. config file, default is core.filemode=True, change executable bit
  381. :param validate_path_element: Function to validate path elements to check out;
  382. default just refuses .git and .. directories.
  383. :note:: existing index is wiped and contents are not merged
  384. in a working dir. Suitable only for fresh clones.
  385. """
  386. index = Index(index_path)
  387. if not isinstance(root_path, bytes):
  388. root_path = root_path.encode(sys.getfilesystemencoding())
  389. for entry in object_store.iter_tree_contents(tree_id):
  390. if not validate_path(entry.path, validate_path_element):
  391. continue
  392. full_path = _tree_to_fs_path(root_path, entry.path)
  393. if not os.path.exists(os.path.dirname(full_path)):
  394. os.makedirs(os.path.dirname(full_path))
  395. # FIXME: Merge new index into working tree
  396. obj = object_store[entry.sha]
  397. build_file_from_blob(obj, entry.mode, full_path,
  398. honor_filemode=honor_filemode)
  399. # Add file to index
  400. st = os.lstat(full_path)
  401. index[entry.path] = index_entry_from_stat(st, entry.sha, 0)
  402. index.write()
  403. def blob_from_path_and_stat(fs_path, st):
  404. """Create a blob from a path and a stat object.
  405. :param fs_path: Full file system path to file
  406. :param st: A stat object
  407. :return: A `Blob` object
  408. """
  409. assert isinstance(fs_path, bytes)
  410. blob = Blob()
  411. if not stat.S_ISLNK(st.st_mode):
  412. with open(fs_path, 'rb') as f:
  413. blob.data = f.read()
  414. else:
  415. blob.data = os.readlink(fs_path)
  416. return blob
  417. def get_unstaged_changes(index, root_path):
  418. """Walk through an index and check for differences against working tree.
  419. :param index: index to check
  420. :param root_path: path in which to find files
  421. :return: iterator over paths with unstaged changes
  422. """
  423. # For each entry in the index check the sha1 & ensure not staged
  424. if not isinstance(root_path, bytes):
  425. root_path = root_path.encode(sys.getfilesystemencoding())
  426. for tree_path, entry in index.iteritems():
  427. full_path = _tree_to_fs_path(root_path, tree_path)
  428. blob = blob_from_path_and_stat(full_path, os.lstat(full_path))
  429. if blob.id != entry.sha:
  430. yield tree_path
  431. os_sep_bytes = os.sep.encode('ascii')
  432. def _tree_to_fs_path(root_path, tree_path):
  433. """Convert a git tree path to a file system path.
  434. :param root_path: Root filesystem path
  435. :param tree_path: Git tree path as bytes
  436. :return: File system path.
  437. """
  438. assert isinstance(tree_path, bytes)
  439. if os_sep_bytes != b'/':
  440. sep_corrected_path = tree_path.replace(b'/', os_sep_bytes)
  441. else:
  442. sep_corrected_path = tree_path
  443. return os.path.join(root_path, sep_corrected_path)
  444. def _fs_to_tree_path(fs_path, fs_encoding=None):
  445. """Convert a file system path to a git tree path.
  446. :param fs_path: File system path.
  447. :param fs_encoding: File system encoding
  448. :return: Git tree path as bytes
  449. """
  450. if fs_encoding is None:
  451. fs_encoding = sys.getfilesystemencoding()
  452. if not isinstance(fs_path, bytes):
  453. fs_path_bytes = fs_path.encode(fs_encoding)
  454. else:
  455. fs_path_bytes = fs_path
  456. if os_sep_bytes != b'/':
  457. tree_path = fs_path_bytes.replace(os_sep_bytes, b'/')
  458. else:
  459. tree_path = fs_path_bytes
  460. return tree_path