index.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. # index.py -- File parser/writer for the git index file
  2. # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; version 2
  7. # of the License or (at your opinion) any later version of the license.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """Parser for the git index file format."""
  19. import errno
  20. import os
  21. import stat
  22. import struct
  23. from dulwich.file import GitFile
  24. from dulwich.objects import (
  25. S_IFGITLINK,
  26. S_ISGITLINK,
  27. Tree,
  28. hex_to_sha,
  29. sha_to_hex,
  30. )
  31. from dulwich.pack import (
  32. SHA1Reader,
  33. SHA1Writer,
  34. )
  35. def pathsplit(path):
  36. """Split a /-delimited path into a directory part and a basename.
  37. :param path: The path to split.
  38. :return: Tuple with directory name and basename
  39. """
  40. try:
  41. (dirname, basename) = path.rsplit("/", 1)
  42. except ValueError:
  43. return ("", path)
  44. else:
  45. return (dirname, basename)
  46. def pathjoin(*args):
  47. """Join a /-delimited path.
  48. """
  49. return "/".join([p for p in args if p])
  50. def read_cache_time(f):
  51. """Read a cache time.
  52. :param f: File-like object to read from
  53. :return: Tuple with seconds and nanoseconds
  54. """
  55. return struct.unpack(">LL", f.read(8))
  56. def write_cache_time(f, t):
  57. """Write a cache time.
  58. :param f: File-like object to write to
  59. :param t: Time to write (as int, float or tuple with secs and nsecs)
  60. """
  61. if isinstance(t, int):
  62. t = (t, 0)
  63. elif isinstance(t, float):
  64. (secs, nsecs) = divmod(t, 1.0)
  65. t = (int(secs), int(nsecs * 1000000000))
  66. elif not isinstance(t, tuple):
  67. raise TypeError(t)
  68. f.write(struct.pack(">LL", *t))
  69. def read_cache_entry(f):
  70. """Read an entry from a cache file.
  71. :param f: File-like object to read from
  72. :return: tuple with: device, inode, mode, uid, gid, size, sha, flags
  73. """
  74. beginoffset = f.tell()
  75. ctime = read_cache_time(f)
  76. mtime = read_cache_time(f)
  77. (dev, ino, mode, uid, gid, size, sha, flags, ) = \
  78. struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
  79. name = f.read((flags & 0x0fff))
  80. # Padding:
  81. real_size = ((f.tell() - beginoffset + 8) & ~7)
  82. data = f.read((beginoffset + real_size) - f.tell())
  83. return (name, ctime, mtime, dev, ino, mode, uid, gid, size,
  84. sha_to_hex(sha), flags & ~0x0fff)
  85. def write_cache_entry(f, entry):
  86. """Write an index entry to a file.
  87. :param f: File object
  88. :param entry: Entry to write, tuple with:
  89. (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags)
  90. """
  91. beginoffset = f.tell()
  92. (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags) = entry
  93. write_cache_time(f, ctime)
  94. write_cache_time(f, mtime)
  95. flags = len(name) | (flags &~ 0x0fff)
  96. f.write(struct.pack(">LLLLLL20sH", dev, ino, mode, uid, gid, size, hex_to_sha(sha), flags))
  97. f.write(name)
  98. real_size = ((f.tell() - beginoffset + 8) & ~7)
  99. f.write("\0" * ((beginoffset + real_size) - f.tell()))
  100. def read_index(f):
  101. """Read an index file, yielding the individual entries."""
  102. header = f.read(4)
  103. if header != "DIRC":
  104. raise AssertionError("Invalid index file header: %r" % header)
  105. (version, num_entries) = struct.unpack(">LL", f.read(4 * 2))
  106. assert version in (1, 2)
  107. for i in range(num_entries):
  108. yield read_cache_entry(f)
  109. def read_index_dict(f):
  110. """Read an index file and return it as a dictionary.
  111. :param f: File object to read from
  112. """
  113. ret = {}
  114. for x in read_index(f):
  115. ret[x[0]] = tuple(x[1:])
  116. return ret
  117. def write_index(f, entries):
  118. """Write an index file.
  119. :param f: File-like object to write to
  120. :param entries: Iterable over the entries to write
  121. """
  122. f.write("DIRC")
  123. f.write(struct.pack(">LL", 2, len(entries)))
  124. for x in entries:
  125. write_cache_entry(f, x)
  126. def write_index_dict(f, entries):
  127. """Write an index file based on the contents of a dictionary.
  128. """
  129. entries_list = []
  130. for name in sorted(entries):
  131. entries_list.append((name,) + tuple(entries[name]))
  132. write_index(f, entries_list)
  133. def cleanup_mode(mode):
  134. """Cleanup a mode value.
  135. This will return a mode that can be stored in a tree object.
  136. :param mode: Mode to clean up.
  137. """
  138. if stat.S_ISLNK(mode):
  139. return stat.S_IFLNK
  140. elif stat.S_ISDIR(mode):
  141. return stat.S_IFDIR
  142. elif S_ISGITLINK(mode):
  143. return S_IFGITLINK
  144. ret = stat.S_IFREG | 0644
  145. ret |= (mode & 0111)
  146. return ret
  147. class Index(object):
  148. """A Git Index file."""
  149. def __init__(self, filename):
  150. """Open an index file.
  151. :param filename: Path to the index file
  152. """
  153. self._filename = filename
  154. self.clear()
  155. self.read()
  156. def __repr__(self):
  157. return "%s(%r)" % (self.__class__.__name__, self._filename)
  158. def write(self):
  159. """Write current contents of index to disk."""
  160. f = GitFile(self._filename, 'wb')
  161. try:
  162. f = SHA1Writer(f)
  163. write_index_dict(f, self._byname)
  164. finally:
  165. f.close()
  166. def read(self):
  167. """Read current contents of index from disk."""
  168. if not os.path.exists(self._filename):
  169. return
  170. f = GitFile(self._filename, 'rb')
  171. try:
  172. f = SHA1Reader(f)
  173. for x in read_index(f):
  174. self[x[0]] = tuple(x[1:])
  175. # FIXME: Additional data?
  176. f.read(os.path.getsize(self._filename)-f.tell()-20)
  177. f.check_sha()
  178. finally:
  179. f.close()
  180. def __len__(self):
  181. """Number of entries in this index file."""
  182. return len(self._byname)
  183. def __getitem__(self, name):
  184. """Retrieve entry by relative path.
  185. :return: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags)
  186. """
  187. return self._byname[name]
  188. def __iter__(self):
  189. """Iterate over the paths in this index."""
  190. return iter(self._byname)
  191. def get_sha1(self, path):
  192. """Return the (git object) SHA1 for the object at a path."""
  193. return self[path][-2]
  194. def get_mode(self, path):
  195. """Return the POSIX file mode for the object at a path."""
  196. return self[path][-6]
  197. def iterblobs(self):
  198. """Iterate over path, sha, mode tuples for use with commit_tree."""
  199. for path in self:
  200. entry = self[path]
  201. yield path, entry[-2], cleanup_mode(entry[-6])
  202. def clear(self):
  203. """Remove all contents from this index."""
  204. self._byname = {}
  205. def __setitem__(self, name, x):
  206. assert isinstance(name, str)
  207. assert len(x) == 10
  208. # Remove the old entry if any
  209. self._byname[name] = x
  210. def __delitem__(self, name):
  211. assert isinstance(name, str)
  212. del self._byname[name]
  213. def iteritems(self):
  214. return self._byname.iteritems()
  215. def update(self, entries):
  216. for name, value in entries.iteritems():
  217. self[name] = value
  218. def changes_from_tree(self, object_store, tree, want_unchanged=False):
  219. """Find the differences between the contents of this index and a tree.
  220. :param object_store: Object store to use for retrieving tree contents
  221. :param tree: SHA1 of the root tree
  222. :param want_unchanged: Whether unchanged files should be reported
  223. :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
  224. """
  225. def lookup_entry(path):
  226. entry = self[path]
  227. return entry[-2], entry[-6]
  228. for (name, mode, sha) in changes_from_tree(self._byname.keys(),
  229. lookup_entry, object_store, tree,
  230. want_unchanged=want_unchanged):
  231. yield (name, mode, sha)
  232. def commit(self, object_store):
  233. """Create a new tree from an index.
  234. :param object_store: Object store to save the tree in
  235. :return: Root tree SHA
  236. """
  237. return commit_tree(object_store, self.iterblobs())
  238. def commit_tree(object_store, blobs):
  239. """Commit a new tree.
  240. :param object_store: Object store to add trees to
  241. :param blobs: Iterable over blob path, sha, mode entries
  242. :return: SHA1 of the created tree.
  243. """
  244. trees = {"": {}}
  245. def add_tree(path):
  246. if path in trees:
  247. return trees[path]
  248. dirname, basename = pathsplit(path)
  249. t = add_tree(dirname)
  250. assert isinstance(basename, str)
  251. newtree = {}
  252. t[basename] = newtree
  253. trees[path] = newtree
  254. return newtree
  255. for path, sha, mode in blobs:
  256. tree_path, basename = pathsplit(path)
  257. tree = add_tree(tree_path)
  258. tree[basename] = (mode, sha)
  259. def build_tree(path):
  260. tree = Tree()
  261. for basename, entry in trees[path].iteritems():
  262. if type(entry) == dict:
  263. mode = stat.S_IFDIR
  264. sha = build_tree(pathjoin(path, basename))
  265. else:
  266. (mode, sha) = entry
  267. tree.add(basename, mode, sha)
  268. object_store.add_object(tree)
  269. return tree.id
  270. return build_tree("")
  271. def commit_index(object_store, index):
  272. """Create a new tree from an index.
  273. :param object_store: Object store to save the tree in
  274. :param index: Index file
  275. :note: This function is deprecated, use index.commit() instead.
  276. :return: Root tree sha.
  277. """
  278. return commit_tree(object_store, index.iterblobs())
  279. def changes_from_tree(names, lookup_entry, object_store, tree,
  280. want_unchanged=False):
  281. """Find the differences between the contents of a tree and
  282. a working copy.
  283. :param names: Iterable of names in the working copy
  284. :param lookup_entry: Function to lookup an entry in the working copy
  285. :param object_store: Object store to use for retrieving tree contents
  286. :param tree: SHA1 of the root tree, or None for an empty tree
  287. :param want_unchanged: Whether unchanged files should be reported
  288. :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
  289. (oldsha, newsha)
  290. """
  291. other_names = set(names)
  292. if tree is not None:
  293. for (name, mode, sha) in object_store.iter_tree_contents(tree):
  294. try:
  295. (other_sha, other_mode) = lookup_entry(name)
  296. except KeyError:
  297. # Was removed
  298. yield ((name, None), (mode, None), (sha, None))
  299. else:
  300. other_names.remove(name)
  301. if (want_unchanged or other_sha != sha or other_mode != mode):
  302. yield ((name, name), (mode, other_mode), (sha, other_sha))
  303. # Mention added files
  304. for name in other_names:
  305. (other_sha, other_mode) = lookup_entry(name)
  306. yield ((None, name), (None, other_mode), (None, other_sha))
  307. def index_entry_from_stat(stat_val, hex_sha, flags, mode=None):
  308. """Create a new index entry from a stat value.
  309. :param stat_val: POSIX stat_result instance
  310. :param hex_sha: Hex sha of the object
  311. :param flags: Index flags
  312. """
  313. if mode is None:
  314. mode = cleanup_mode(stat_val.st_mode)
  315. return (stat_val.st_ctime, stat_val.st_mtime, stat_val.st_dev,
  316. stat_val.st_ino, mode, stat_val.st_uid,
  317. stat_val.st_gid, stat_val.st_size, hex_sha, flags)
  318. def build_index_from_tree(prefix, index_path, object_store, tree_id,
  319. honor_filemode=True):
  320. """Generate and materialize index from a tree
  321. :param tree_id: Tree to materialize
  322. :param prefix: Target dir for materialized index files
  323. :param index_path: Target path for generated index
  324. :param object_store: Non-empty object store holding tree contents
  325. :param honor_filemode: An optional flag to honor core.filemode setting in
  326. config file, default is core.filemode=True, change executable bit
  327. :note:: existing index is wiped and contents are not merged
  328. in a working dir. Suiteable only for fresh clones.
  329. """
  330. index = Index(index_path)
  331. for entry in object_store.iter_tree_contents(tree_id):
  332. full_path = os.path.join(prefix, entry.path)
  333. if not os.path.exists(os.path.dirname(full_path)):
  334. os.makedirs(os.path.dirname(full_path))
  335. # FIXME: Merge new index into working tree
  336. if stat.S_ISLNK(entry.mode):
  337. # FIXME: This will fail on Windows. What should we do instead?
  338. src_path = object_store[entry.sha].as_raw_string()
  339. try:
  340. os.symlink(src_path, full_path)
  341. except OSError, e:
  342. if e.errno == errno.EEXIST:
  343. os.unlink(full_path)
  344. os.symlink(src_path, full_path)
  345. else:
  346. raise
  347. else:
  348. f = open(full_path, 'wb')
  349. try:
  350. # Write out file
  351. f.write(object_store[entry.sha].as_raw_string())
  352. finally:
  353. f.close()
  354. if honor_filemode:
  355. os.chmod(full_path, entry.mode)
  356. # Add file to index
  357. st = os.lstat(full_path)
  358. index[entry.path] = index_entry_from_stat(st, entry.sha, 0)
  359. index.write()