index.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. # index.py -- File parser/writer for the git index file
  2. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; version 2
  7. # of the License or (at your opinion) any later version of the license.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """Parser for the git index file format."""
  19. import collections
  20. import errno
  21. import os
  22. import stat
  23. import struct
  24. import sys
  25. from dulwich.file import GitFile
  26. from dulwich.objects import (
  27. Blob,
  28. S_IFGITLINK,
  29. S_ISGITLINK,
  30. Tree,
  31. hex_to_sha,
  32. sha_to_hex,
  33. )
  34. from dulwich.pack import (
  35. SHA1Reader,
  36. SHA1Writer,
  37. )
  38. if sys.version_info[0] == 2:
  39. iteritems = lambda d: d.iteritems()
  40. else:
  41. iteritems = lambda d: d.items()
  42. IndexEntry = collections.namedtuple(
  43. 'IndexEntry', [
  44. 'ctime', 'mtime', 'dev', 'ino', 'mode', 'uid', 'gid', 'size', 'sha',
  45. 'flags'])
  46. def pathsplit(path):
  47. """Split a /-delimited path into a directory part and a basename.
  48. :param path: The path to split.
  49. :return: Tuple with directory name and basename
  50. """
  51. try:
  52. (dirname, basename) = path.rsplit(b"/", 1)
  53. except ValueError:
  54. return (b"", path)
  55. else:
  56. return (dirname, basename)
  57. def pathjoin(*args):
  58. """Join a /-delimited path.
  59. """
  60. return b"/".join([p for p in args if p])
  61. def read_cache_time(f):
  62. """Read a cache time.
  63. :param f: File-like object to read from
  64. :return: Tuple with seconds and nanoseconds
  65. """
  66. return struct.unpack(">LL", f.read(8))
  67. def write_cache_time(f, t):
  68. """Write a cache time.
  69. :param f: File-like object to write to
  70. :param t: Time to write (as int, float or tuple with secs and nsecs)
  71. """
  72. if isinstance(t, int):
  73. t = (t, 0)
  74. elif isinstance(t, float):
  75. (secs, nsecs) = divmod(t, 1.0)
  76. t = (int(secs), int(nsecs * 1000000000))
  77. elif not isinstance(t, tuple):
  78. raise TypeError(t)
  79. f.write(struct.pack(">LL", *t))
  80. def read_cache_entry(f):
  81. """Read an entry from a cache file.
  82. :param f: File-like object to read from
  83. :return: tuple with: device, inode, mode, uid, gid, size, sha, flags
  84. """
  85. beginoffset = f.tell()
  86. ctime = read_cache_time(f)
  87. mtime = read_cache_time(f)
  88. (dev, ino, mode, uid, gid, size, sha, flags, ) = \
  89. struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
  90. name = f.read((flags & 0x0fff))
  91. # Padding:
  92. real_size = ((f.tell() - beginoffset + 8) & ~7)
  93. f.read((beginoffset + real_size) - f.tell())
  94. return (name, ctime, mtime, dev, ino, mode, uid, gid, size,
  95. sha_to_hex(sha), flags & ~0x0fff)
  96. def write_cache_entry(f, entry):
  97. """Write an index entry to a file.
  98. :param f: File object
  99. :param entry: Entry to write, tuple with:
  100. (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags)
  101. """
  102. beginoffset = f.tell()
  103. (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags) = entry
  104. write_cache_time(f, ctime)
  105. write_cache_time(f, mtime)
  106. flags = len(name) | (flags &~ 0x0fff)
  107. f.write(struct.pack(b'>LLLLLL20sH', dev & 0xFFFFFFFF, ino & 0xFFFFFFFF, mode, uid, gid, size, hex_to_sha(sha), flags))
  108. f.write(name)
  109. real_size = ((f.tell() - beginoffset + 8) & ~7)
  110. f.write(b'\0' * ((beginoffset + real_size) - f.tell()))
  111. def read_index(f):
  112. """Read an index file, yielding the individual entries."""
  113. header = f.read(4)
  114. if header != b'DIRC':
  115. raise AssertionError("Invalid index file header: %r" % header)
  116. (version, num_entries) = struct.unpack(b'>LL', f.read(4 * 2))
  117. assert version in (1, 2)
  118. for i in range(num_entries):
  119. yield read_cache_entry(f)
  120. def read_index_dict(f):
  121. """Read an index file and return it as a dictionary.
  122. :param f: File object to read from
  123. """
  124. ret = {}
  125. for x in read_index(f):
  126. ret[x[0]] = IndexEntry(*x[1:])
  127. return ret
  128. def write_index(f, entries):
  129. """Write an index file.
  130. :param f: File-like object to write to
  131. :param entries: Iterable over the entries to write
  132. """
  133. f.write(b'DIRC')
  134. f.write(struct.pack(b'>LL', 2, len(entries)))
  135. for x in entries:
  136. write_cache_entry(f, x)
  137. def write_index_dict(f, entries):
  138. """Write an index file based on the contents of a dictionary.
  139. """
  140. entries_list = []
  141. for name in sorted(entries):
  142. entries_list.append((name,) + tuple(entries[name]))
  143. write_index(f, entries_list)
  144. def cleanup_mode(mode):
  145. """Cleanup a mode value.
  146. This will return a mode that can be stored in a tree object.
  147. :param mode: Mode to clean up.
  148. """
  149. if stat.S_ISLNK(mode):
  150. return stat.S_IFLNK
  151. elif stat.S_ISDIR(mode):
  152. return stat.S_IFDIR
  153. elif S_ISGITLINK(mode):
  154. return S_IFGITLINK
  155. ret = stat.S_IFREG | 0o644
  156. ret |= (mode & 0o111)
  157. return ret
  158. class Index(object):
  159. """A Git Index file."""
  160. def __init__(self, filename):
  161. """Open an index file.
  162. :param filename: Path to the index file
  163. """
  164. self._filename = filename
  165. self.clear()
  166. self.read()
  167. def __repr__(self):
  168. return "%s(%r)" % (self.__class__.__name__, self._filename)
  169. def write(self):
  170. """Write current contents of index to disk."""
  171. f = GitFile(self._filename, 'wb')
  172. try:
  173. f = SHA1Writer(f)
  174. write_index_dict(f, self._byname)
  175. finally:
  176. f.close()
  177. def read(self):
  178. """Read current contents of index from disk."""
  179. if not os.path.exists(self._filename):
  180. return
  181. f = GitFile(self._filename, 'rb')
  182. try:
  183. f = SHA1Reader(f)
  184. for x in read_index(f):
  185. self[x[0]] = IndexEntry(*x[1:])
  186. # FIXME: Additional data?
  187. f.read(os.path.getsize(self._filename)-f.tell()-20)
  188. f.check_sha()
  189. finally:
  190. f.close()
  191. def __len__(self):
  192. """Number of entries in this index file."""
  193. return len(self._byname)
  194. def __getitem__(self, name):
  195. """Retrieve entry by relative path.
  196. :return: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags)
  197. """
  198. return self._byname[name]
  199. def __iter__(self):
  200. """Iterate over the paths in this index."""
  201. return iter(self._byname)
  202. def get_sha1(self, path):
  203. """Return the (git object) SHA1 for the object at a path."""
  204. return self[path].sha
  205. def get_mode(self, path):
  206. """Return the POSIX file mode for the object at a path."""
  207. return self[path].mode
  208. def iterblobs(self):
  209. """Iterate over path, sha, mode tuples for use with commit_tree."""
  210. for path in self:
  211. entry = self[path]
  212. yield path, entry.sha, cleanup_mode(entry.mode)
  213. def clear(self):
  214. """Remove all contents from this index."""
  215. self._byname = {}
  216. def __setitem__(self, name, x):
  217. assert isinstance(name, bytes)
  218. assert len(x) == 10
  219. # Remove the old entry if any
  220. self._byname[name] = x
  221. def __delitem__(self, name):
  222. assert isinstance(name, bytes)
  223. del self._byname[name]
  224. def iteritems(self):
  225. return iteritems(self._byname)
  226. def update(self, entries):
  227. for name, value in iteritems(entries):
  228. self[name] = value
  229. def changes_from_tree(self, object_store, tree, want_unchanged=False):
  230. """Find the differences between the contents of this index and a tree.
  231. :param object_store: Object store to use for retrieving tree contents
  232. :param tree: SHA1 of the root tree
  233. :param want_unchanged: Whether unchanged files should be reported
  234. :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
  235. """
  236. def lookup_entry(path):
  237. entry = self[path]
  238. return entry.sha, entry.mode
  239. for (name, mode, sha) in changes_from_tree(self._byname.keys(),
  240. lookup_entry, object_store, tree,
  241. want_unchanged=want_unchanged):
  242. yield (name, mode, sha)
  243. def commit(self, object_store):
  244. """Create a new tree from an index.
  245. :param object_store: Object store to save the tree in
  246. :return: Root tree SHA
  247. """
  248. return commit_tree(object_store, self.iterblobs())
  249. def commit_tree(object_store, blobs):
  250. """Commit a new tree.
  251. :param object_store: Object store to add trees to
  252. :param blobs: Iterable over blob path, sha, mode entries
  253. :return: SHA1 of the created tree.
  254. """
  255. trees = {b'': {}}
  256. def add_tree(path):
  257. if path in trees:
  258. return trees[path]
  259. dirname, basename = pathsplit(path)
  260. t = add_tree(dirname)
  261. assert isinstance(basename, bytes)
  262. newtree = {}
  263. t[basename] = newtree
  264. trees[path] = newtree
  265. return newtree
  266. for path, sha, mode in blobs:
  267. tree_path, basename = pathsplit(path)
  268. tree = add_tree(tree_path)
  269. tree[basename] = (mode, sha)
  270. def build_tree(path):
  271. tree = Tree()
  272. for basename, entry in iteritems(trees[path]):
  273. if isinstance(entry, dict):
  274. mode = stat.S_IFDIR
  275. sha = build_tree(pathjoin(path, basename))
  276. else:
  277. (mode, sha) = entry
  278. tree.add(basename, mode, sha)
  279. object_store.add_object(tree)
  280. return tree.id
  281. return build_tree(b'')
  282. def commit_index(object_store, index):
  283. """Create a new tree from an index.
  284. :param object_store: Object store to save the tree in
  285. :param index: Index file
  286. :note: This function is deprecated, use index.commit() instead.
  287. :return: Root tree sha.
  288. """
  289. return commit_tree(object_store, index.iterblobs())
  290. def changes_from_tree(names, lookup_entry, object_store, tree,
  291. want_unchanged=False):
  292. """Find the differences between the contents of a tree and
  293. a working copy.
  294. :param names: Iterable of names in the working copy
  295. :param lookup_entry: Function to lookup an entry in the working copy
  296. :param object_store: Object store to use for retrieving tree contents
  297. :param tree: SHA1 of the root tree, or None for an empty tree
  298. :param want_unchanged: Whether unchanged files should be reported
  299. :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
  300. (oldsha, newsha)
  301. """
  302. other_names = set(names)
  303. if tree is not None:
  304. for (name, mode, sha) in object_store.iter_tree_contents(tree):
  305. try:
  306. (other_sha, other_mode) = lookup_entry(name)
  307. except KeyError:
  308. # Was removed
  309. yield ((name, None), (mode, None), (sha, None))
  310. else:
  311. other_names.remove(name)
  312. if (want_unchanged or other_sha != sha or other_mode != mode):
  313. yield ((name, name), (mode, other_mode), (sha, other_sha))
  314. # Mention added files
  315. for name in other_names:
  316. (other_sha, other_mode) = lookup_entry(name)
  317. yield ((None, name), (None, other_mode), (None, other_sha))
  318. def index_entry_from_stat(stat_val, hex_sha, flags, mode=None):
  319. """Create a new index entry from a stat value.
  320. :param stat_val: POSIX stat_result instance
  321. :param hex_sha: Hex sha of the object
  322. :param flags: Index flags
  323. """
  324. if mode is None:
  325. mode = cleanup_mode(stat_val.st_mode)
  326. return (stat_val.st_ctime, stat_val.st_mtime, stat_val.st_dev,
  327. stat_val.st_ino, mode, stat_val.st_uid,
  328. stat_val.st_gid, stat_val.st_size, hex_sha, flags)
  329. def build_file_from_blob(blob, mode, target_path, honor_filemode=True):
  330. """Build a file or symlink on disk based on a Git object.
  331. :param obj: The git object
  332. :param mode: File mode
  333. :param target_path: Path to write to
  334. :param honor_filemode: An optional flag to honor core.filemode setting in
  335. config file, default is core.filemode=True, change executable bit
  336. """
  337. if stat.S_ISLNK(mode):
  338. # FIXME: This will fail on Windows. What should we do instead?
  339. src_path = blob.as_raw_string()
  340. try:
  341. os.symlink(src_path, target_path)
  342. except OSError as e:
  343. if e.errno == errno.EEXIST:
  344. os.unlink(target_path)
  345. os.symlink(src_path, target_path)
  346. else:
  347. raise
  348. else:
  349. with open(target_path, 'wb') as f:
  350. # Write out file
  351. f.write(blob.as_raw_string())
  352. if honor_filemode:
  353. os.chmod(target_path, mode)
  354. INVALID_DOTNAMES = (".git", ".", "..", "")
  355. def validate_path_element_default(element):
  356. return element.lower() not in INVALID_DOTNAMES
  357. def validate_path_element_ntfs(element):
  358. stripped = element.rstrip(". ").lower()
  359. if stripped in INVALID_DOTNAMES:
  360. return False
  361. if stripped == "git~1":
  362. return False
  363. return True
  364. def validate_path(path, element_validator=validate_path_element_default):
  365. """Default path validator that just checks for .git/."""
  366. parts = path.split(b"/")
  367. for p in parts:
  368. if not element_validator(p):
  369. return False
  370. else:
  371. return True
  372. def build_index_from_tree(prefix, index_path, object_store, tree_id,
  373. honor_filemode=True,
  374. validate_path_element=validate_path_element_default):
  375. """Generate and materialize index from a tree
  376. :param tree_id: Tree to materialize
  377. :param prefix: Target dir for materialized index files
  378. :param index_path: Target path for generated index
  379. :param object_store: Non-empty object store holding tree contents
  380. :param honor_filemode: An optional flag to honor core.filemode setting in
  381. config file, default is core.filemode=True, change executable bit
  382. :param validate_path_element: Function to validate path elements to check out;
  383. default just refuses .git and .. directories.
  384. :note:: existing index is wiped and contents are not merged
  385. in a working dir. Suiteable only for fresh clones.
  386. """
  387. index = Index(index_path)
  388. for entry in object_store.iter_tree_contents(tree_id):
  389. if not validate_path(entry.path):
  390. continue
  391. full_path = os.path.join(prefix, entry.path.decode(sys.getfilesystemencoding()))
  392. if not os.path.exists(os.path.dirname(full_path)):
  393. os.makedirs(os.path.dirname(full_path))
  394. # FIXME: Merge new index into working tree
  395. obj = object_store[entry.sha]
  396. build_file_from_blob(obj, entry.mode, full_path,
  397. honor_filemode=honor_filemode)
  398. # Add file to index
  399. st = os.lstat(full_path)
  400. index[entry.path] = index_entry_from_stat(st, entry.sha, 0)
  401. index.write()
  402. def blob_from_path_and_stat(path, st):
  403. """Create a blob from a path and a stat object.
  404. :param path: Full path to file
  405. :param st: A stat object
  406. :return: A `Blob` object
  407. """
  408. blob = Blob()
  409. if not stat.S_ISLNK(st.st_mode):
  410. with open(path, 'rb') as f:
  411. blob.data = f.read()
  412. else:
  413. blob.data = os.readlink(path).encode(sys.getfilesystemencoding())
  414. return blob
  415. def get_unstaged_changes(index, path):
  416. """Walk through an index and check for differences against working tree.
  417. :param index: index to check
  418. :param path: path in which to find files
  419. :return: iterator over paths with unstaged changes
  420. """
  421. # For each entry in the index check the sha1 & ensure not staged
  422. for name, entry in index.iteritems():
  423. fp = os.path.join(path, name)
  424. blob = blob_from_path_and_stat(fp, os.lstat(fp))
  425. if blob.id != entry.sha:
  426. yield name