index.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496
  1. # index.py -- File parser/writer for the git index file
  2. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; version 2
  7. # of the License or (at your opinion) any later version of the license.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """Parser for the git index file format."""
  19. import collections
  20. import errno
  21. import os
  22. import stat
  23. import struct
  24. from dulwich.file import GitFile
  25. from dulwich.objects import (
  26. Blob,
  27. S_IFGITLINK,
  28. S_ISGITLINK,
  29. Tree,
  30. hex_to_sha,
  31. sha_to_hex,
  32. )
  33. from dulwich.pack import (
  34. SHA1Reader,
  35. SHA1Writer,
  36. )
  37. IndexEntry = collections.namedtuple(
  38. 'IndexEntry', [
  39. 'ctime', 'mtime', 'dev', 'ino', 'mode', 'uid', 'gid', 'size', 'sha',
  40. 'flags'])
  41. def pathsplit(path):
  42. """Split a /-delimited path into a directory part and a basename.
  43. :param path: The path to split.
  44. :return: Tuple with directory name and basename
  45. """
  46. try:
  47. (dirname, basename) = path.rsplit("/", 1)
  48. except ValueError:
  49. return ("", path)
  50. else:
  51. return (dirname, basename)
  52. def pathjoin(*args):
  53. """Join a /-delimited path.
  54. """
  55. return "/".join([p for p in args if p])
  56. def read_cache_time(f):
  57. """Read a cache time.
  58. :param f: File-like object to read from
  59. :return: Tuple with seconds and nanoseconds
  60. """
  61. return struct.unpack(">LL", f.read(8))
  62. def write_cache_time(f, t):
  63. """Write a cache time.
  64. :param f: File-like object to write to
  65. :param t: Time to write (as int, float or tuple with secs and nsecs)
  66. """
  67. if isinstance(t, int):
  68. t = (t, 0)
  69. elif isinstance(t, float):
  70. (secs, nsecs) = divmod(t, 1.0)
  71. t = (int(secs), int(nsecs * 1000000000))
  72. elif not isinstance(t, tuple):
  73. raise TypeError(t)
  74. f.write(struct.pack(">LL", *t))
  75. def read_cache_entry(f):
  76. """Read an entry from a cache file.
  77. :param f: File-like object to read from
  78. :return: tuple with: device, inode, mode, uid, gid, size, sha, flags
  79. """
  80. beginoffset = f.tell()
  81. ctime = read_cache_time(f)
  82. mtime = read_cache_time(f)
  83. (dev, ino, mode, uid, gid, size, sha, flags, ) = \
  84. struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
  85. name = f.read((flags & 0x0fff))
  86. # Padding:
  87. real_size = ((f.tell() - beginoffset + 8) & ~7)
  88. f.read((beginoffset + real_size) - f.tell())
  89. return (name, ctime, mtime, dev, ino, mode, uid, gid, size,
  90. sha_to_hex(sha), flags & ~0x0fff)
  91. def write_cache_entry(f, entry):
  92. """Write an index entry to a file.
  93. :param f: File object
  94. :param entry: Entry to write, tuple with:
  95. (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags)
  96. """
  97. beginoffset = f.tell()
  98. (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags) = entry
  99. write_cache_time(f, ctime)
  100. write_cache_time(f, mtime)
  101. flags = len(name) | (flags &~ 0x0fff)
  102. f.write(struct.pack(">LLLLLL20sH", dev & 0xFFFFFFFF, ino & 0xFFFFFFFF, mode, uid, gid, size, hex_to_sha(sha), flags))
  103. f.write(name)
  104. real_size = ((f.tell() - beginoffset + 8) & ~7)
  105. f.write("\0" * ((beginoffset + real_size) - f.tell()))
  106. def read_index(f):
  107. """Read an index file, yielding the individual entries."""
  108. header = f.read(4)
  109. if header != "DIRC":
  110. raise AssertionError("Invalid index file header: %r" % header)
  111. (version, num_entries) = struct.unpack(">LL", f.read(4 * 2))
  112. assert version in (1, 2)
  113. for i in range(num_entries):
  114. yield read_cache_entry(f)
  115. def read_index_dict(f):
  116. """Read an index file and return it as a dictionary.
  117. :param f: File object to read from
  118. """
  119. ret = {}
  120. for x in read_index(f):
  121. ret[x[0]] = IndexEntry(*x[1:])
  122. return ret
  123. def write_index(f, entries):
  124. """Write an index file.
  125. :param f: File-like object to write to
  126. :param entries: Iterable over the entries to write
  127. """
  128. f.write("DIRC")
  129. f.write(struct.pack(">LL", 2, len(entries)))
  130. for x in entries:
  131. write_cache_entry(f, x)
  132. def write_index_dict(f, entries):
  133. """Write an index file based on the contents of a dictionary.
  134. """
  135. entries_list = []
  136. for name in sorted(entries):
  137. entries_list.append((name,) + tuple(entries[name]))
  138. write_index(f, entries_list)
  139. def cleanup_mode(mode):
  140. """Cleanup a mode value.
  141. This will return a mode that can be stored in a tree object.
  142. :param mode: Mode to clean up.
  143. """
  144. if stat.S_ISLNK(mode):
  145. return stat.S_IFLNK
  146. elif stat.S_ISDIR(mode):
  147. return stat.S_IFDIR
  148. elif S_ISGITLINK(mode):
  149. return S_IFGITLINK
  150. ret = stat.S_IFREG | 0o644
  151. ret |= (mode & 0o111)
  152. return ret
  153. class Index(object):
  154. """A Git Index file."""
  155. def __init__(self, filename):
  156. """Open an index file.
  157. :param filename: Path to the index file
  158. """
  159. self._filename = filename
  160. self.clear()
  161. self.read()
  162. def __repr__(self):
  163. return "%s(%r)" % (self.__class__.__name__, self._filename)
  164. def write(self):
  165. """Write current contents of index to disk."""
  166. f = GitFile(self._filename, 'wb')
  167. try:
  168. f = SHA1Writer(f)
  169. write_index_dict(f, self._byname)
  170. finally:
  171. f.close()
  172. def read(self):
  173. """Read current contents of index from disk."""
  174. if not os.path.exists(self._filename):
  175. return
  176. f = GitFile(self._filename, 'rb')
  177. try:
  178. f = SHA1Reader(f)
  179. for x in read_index(f):
  180. self[x[0]] = IndexEntry(*x[1:])
  181. # FIXME: Additional data?
  182. f.read(os.path.getsize(self._filename)-f.tell()-20)
  183. f.check_sha()
  184. finally:
  185. f.close()
  186. def __len__(self):
  187. """Number of entries in this index file."""
  188. return len(self._byname)
  189. def __getitem__(self, name):
  190. """Retrieve entry by relative path.
  191. :return: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags)
  192. """
  193. return self._byname[name]
  194. def __iter__(self):
  195. """Iterate over the paths in this index."""
  196. return iter(self._byname)
  197. def get_sha1(self, path):
  198. """Return the (git object) SHA1 for the object at a path."""
  199. return self[path].sha
  200. def get_mode(self, path):
  201. """Return the POSIX file mode for the object at a path."""
  202. return self[path].mode
  203. def iterblobs(self):
  204. """Iterate over path, sha, mode tuples for use with commit_tree."""
  205. for path in self:
  206. entry = self[path]
  207. yield path, entry.sha, cleanup_mode(entry.mode)
  208. def clear(self):
  209. """Remove all contents from this index."""
  210. self._byname = {}
  211. def __setitem__(self, name, x):
  212. assert isinstance(name, str)
  213. assert len(x) == 10
  214. # Remove the old entry if any
  215. self._byname[name] = x
  216. def __delitem__(self, name):
  217. assert isinstance(name, str)
  218. del self._byname[name]
  219. def iteritems(self):
  220. return self._byname.iteritems()
  221. def update(self, entries):
  222. for name, value in entries.iteritems():
  223. self[name] = value
  224. def changes_from_tree(self, object_store, tree, want_unchanged=False):
  225. """Find the differences between the contents of this index and a tree.
  226. :param object_store: Object store to use for retrieving tree contents
  227. :param tree: SHA1 of the root tree
  228. :param want_unchanged: Whether unchanged files should be reported
  229. :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
  230. """
  231. def lookup_entry(path):
  232. entry = self[path]
  233. return entry.sha, entry.mode
  234. for (name, mode, sha) in changes_from_tree(self._byname.keys(),
  235. lookup_entry, object_store, tree,
  236. want_unchanged=want_unchanged):
  237. yield (name, mode, sha)
  238. def commit(self, object_store):
  239. """Create a new tree from an index.
  240. :param object_store: Object store to save the tree in
  241. :return: Root tree SHA
  242. """
  243. return commit_tree(object_store, self.iterblobs())
  244. def commit_tree(object_store, blobs):
  245. """Commit a new tree.
  246. :param object_store: Object store to add trees to
  247. :param blobs: Iterable over blob path, sha, mode entries
  248. :return: SHA1 of the created tree.
  249. """
  250. trees = {"": {}}
  251. def add_tree(path):
  252. if path in trees:
  253. return trees[path]
  254. dirname, basename = pathsplit(path)
  255. t = add_tree(dirname)
  256. assert isinstance(basename, str)
  257. newtree = {}
  258. t[basename] = newtree
  259. trees[path] = newtree
  260. return newtree
  261. for path, sha, mode in blobs:
  262. tree_path, basename = pathsplit(path)
  263. tree = add_tree(tree_path)
  264. tree[basename] = (mode, sha)
  265. def build_tree(path):
  266. tree = Tree()
  267. for basename, entry in trees[path].iteritems():
  268. if isinstance(entry, dict):
  269. mode = stat.S_IFDIR
  270. sha = build_tree(pathjoin(path, basename))
  271. else:
  272. (mode, sha) = entry
  273. tree.add(basename, mode, sha)
  274. object_store.add_object(tree)
  275. return tree.id
  276. return build_tree("")
  277. def commit_index(object_store, index):
  278. """Create a new tree from an index.
  279. :param object_store: Object store to save the tree in
  280. :param index: Index file
  281. :note: This function is deprecated, use index.commit() instead.
  282. :return: Root tree sha.
  283. """
  284. return commit_tree(object_store, index.iterblobs())
  285. def changes_from_tree(names, lookup_entry, object_store, tree,
  286. want_unchanged=False):
  287. """Find the differences between the contents of a tree and
  288. a working copy.
  289. :param names: Iterable of names in the working copy
  290. :param lookup_entry: Function to lookup an entry in the working copy
  291. :param object_store: Object store to use for retrieving tree contents
  292. :param tree: SHA1 of the root tree, or None for an empty tree
  293. :param want_unchanged: Whether unchanged files should be reported
  294. :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
  295. (oldsha, newsha)
  296. """
  297. other_names = set(names)
  298. if tree is not None:
  299. for (name, mode, sha) in object_store.iter_tree_contents(tree):
  300. try:
  301. (other_sha, other_mode) = lookup_entry(name)
  302. except KeyError:
  303. # Was removed
  304. yield ((name, None), (mode, None), (sha, None))
  305. else:
  306. other_names.remove(name)
  307. if (want_unchanged or other_sha != sha or other_mode != mode):
  308. yield ((name, name), (mode, other_mode), (sha, other_sha))
  309. # Mention added files
  310. for name in other_names:
  311. (other_sha, other_mode) = lookup_entry(name)
  312. yield ((None, name), (None, other_mode), (None, other_sha))
  313. def index_entry_from_stat(stat_val, hex_sha, flags, mode=None):
  314. """Create a new index entry from a stat value.
  315. :param stat_val: POSIX stat_result instance
  316. :param hex_sha: Hex sha of the object
  317. :param flags: Index flags
  318. """
  319. if mode is None:
  320. mode = cleanup_mode(stat_val.st_mode)
  321. return (stat_val.st_ctime, stat_val.st_mtime, stat_val.st_dev,
  322. stat_val.st_ino, mode, stat_val.st_uid,
  323. stat_val.st_gid, stat_val.st_size, hex_sha, flags)
  324. def build_file_from_blob(blob, mode, target_path, honor_filemode=True):
  325. """Build a file or symlink on disk based on a Git object.
  326. :param obj: The git object
  327. :param mode: File mode
  328. :param target_path: Path to write to
  329. :param honor_filemode: An optional flag to honor core.filemode setting in
  330. config file, default is core.filemode=True, change executable bit
  331. """
  332. if stat.S_ISLNK(mode):
  333. # FIXME: This will fail on Windows. What should we do instead?
  334. src_path = blob.as_raw_string()
  335. try:
  336. os.symlink(src_path, target_path)
  337. except OSError as e:
  338. if e.errno == errno.EEXIST:
  339. os.unlink(target_path)
  340. os.symlink(src_path, target_path)
  341. else:
  342. raise
  343. else:
  344. with open(target_path, 'wb') as f:
  345. # Write out file
  346. f.write(blob.as_raw_string())
  347. if honor_filemode:
  348. os.chmod(target_path, mode)
  349. def build_index_from_tree(prefix, index_path, object_store, tree_id,
  350. honor_filemode=True):
  351. """Generate and materialize index from a tree
  352. :param tree_id: Tree to materialize
  353. :param prefix: Target dir for materialized index files
  354. :param index_path: Target path for generated index
  355. :param object_store: Non-empty object store holding tree contents
  356. :param honor_filemode: An optional flag to honor core.filemode setting in
  357. config file, default is core.filemode=True, change executable bit
  358. :note:: existing index is wiped and contents are not merged
  359. in a working dir. Suiteable only for fresh clones.
  360. """
  361. index = Index(index_path)
  362. for entry in object_store.iter_tree_contents(tree_id):
  363. full_path = os.path.join(prefix, entry.path)
  364. if not os.path.exists(os.path.dirname(full_path)):
  365. os.makedirs(os.path.dirname(full_path))
  366. # FIXME: Merge new index into working tree
  367. obj = object_store[entry.sha]
  368. build_file_from_blob(obj, entry.mode, full_path,
  369. honor_filemode=honor_filemode)
  370. # Add file to index
  371. st = os.lstat(full_path)
  372. index[entry.path] = index_entry_from_stat(st, entry.sha, 0)
  373. index.write()
  374. def blob_from_path_and_stat(path, st):
  375. """Create a blob from a path and a stat object.
  376. :param path: Full path to file
  377. :param st: A stat object
  378. :return: A `Blob` object
  379. """
  380. blob = Blob()
  381. if not stat.S_ISLNK(st.st_mode):
  382. with open(path, 'rb') as f:
  383. blob.data = f.read()
  384. else:
  385. blob.data = os.readlink(path)
  386. return blob
  387. def get_unstaged_changes(index, path):
  388. """Walk through an index and check for differences against working tree.
  389. :param index: index to check
  390. :param path: path in which to find files
  391. :return: iterator over paths with unstaged changes
  392. """
  393. # For each entry in the index check the sha1 & ensure not staged
  394. for name, entry in index.iteritems():
  395. fp = os.path.join(path, name)
  396. blob = blob_from_path_and_stat(fp, os.lstat(fp))
  397. if blob.id != entry.sha:
  398. yield name