2
0

index.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. # index.py -- File parser/write for the git index file
  2. # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
  3. # This program is free software; you can redistribute it and/or
  4. # modify it under the terms of the GNU General Public License
  5. # as published by the Free Software Foundation; version 2
  6. # of the License or (at your opinion) any later version of the license.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program; if not, write to the Free Software
  15. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  16. # MA 02110-1301, USA.
  17. """Parser for the git index file format."""
  18. import os
  19. import stat
  20. import struct
  21. from dulwich.file import GitFile
  22. from dulwich.objects import (
  23. S_IFGITLINK,
  24. S_ISGITLINK,
  25. Tree,
  26. hex_to_sha,
  27. sha_to_hex,
  28. )
  29. from dulwich.pack import (
  30. SHA1Reader,
  31. SHA1Writer,
  32. )
  33. def pathsplit(path):
  34. """Split a /-delimited path into a directory part and a basename.
  35. :param path: The path to split.
  36. :return: Tuple with directory name and basename
  37. """
  38. try:
  39. (dirname, basename) = path.rsplit("/", 1)
  40. except ValueError:
  41. return ("", path)
  42. else:
  43. return (dirname, basename)
  44. def pathjoin(*args):
  45. """Join a /-delimited path.
  46. """
  47. return "/".join([p for p in args if p])
  48. def read_cache_time(f):
  49. """Read a cache time.
  50. :param f: File-like object to read from
  51. :return: Tuple with seconds and nanoseconds
  52. """
  53. return struct.unpack(">LL", f.read(8))
  54. def write_cache_time(f, t):
  55. """Write a cache time.
  56. :param f: File-like object to write to
  57. :param t: Time to write (as int, float or tuple with secs and nsecs)
  58. """
  59. if isinstance(t, int):
  60. t = (t, 0)
  61. elif isinstance(t, float):
  62. (secs, nsecs) = divmod(t, 1.0)
  63. t = (int(secs), int(nsecs * 1000000000))
  64. elif not isinstance(t, tuple):
  65. raise TypeError(t)
  66. f.write(struct.pack(">LL", *t))
  67. def read_cache_entry(f):
  68. """Read an entry from a cache file.
  69. :param f: File-like object to read from
  70. :return: tuple with: device, inode, mode, uid, gid, size, sha, flags
  71. """
  72. beginoffset = f.tell()
  73. ctime = read_cache_time(f)
  74. mtime = read_cache_time(f)
  75. (dev, ino, mode, uid, gid, size, sha, flags, ) = \
  76. struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
  77. name = f.read((flags & 0x0fff))
  78. # Padding:
  79. real_size = ((f.tell() - beginoffset + 8) & ~7)
  80. data = f.read((beginoffset + real_size) - f.tell())
  81. return (name, ctime, mtime, dev, ino, mode, uid, gid, size,
  82. sha_to_hex(sha), flags & ~0x0fff)
  83. def write_cache_entry(f, entry):
  84. """Write an index entry to a file.
  85. :param f: File object
  86. :param entry: Entry to write, tuple with:
  87. (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags)
  88. """
  89. beginoffset = f.tell()
  90. (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags) = entry
  91. write_cache_time(f, ctime)
  92. write_cache_time(f, mtime)
  93. flags = len(name) | (flags &~ 0x0fff)
  94. f.write(struct.pack(">LLLLLL20sH", dev, ino, mode, uid, gid, size, hex_to_sha(sha), flags))
  95. f.write(name)
  96. real_size = ((f.tell() - beginoffset + 8) & ~7)
  97. f.write("\0" * ((beginoffset + real_size) - f.tell()))
  98. def read_index(f):
  99. """Read an index file, yielding the individual entries."""
  100. header = f.read(4)
  101. if header != "DIRC":
  102. raise AssertionError("Invalid index file header: %r" % header)
  103. (version, num_entries) = struct.unpack(">LL", f.read(4 * 2))
  104. assert version in (1, 2)
  105. for i in range(num_entries):
  106. yield read_cache_entry(f)
  107. def read_index_dict(f):
  108. """Read an index file and return it as a dictionary.
  109. :param f: File object to read from
  110. """
  111. ret = {}
  112. for x in read_index(f):
  113. ret[x[0]] = tuple(x[1:])
  114. return ret
  115. def write_index(f, entries):
  116. """Write an index file.
  117. :param f: File-like object to write to
  118. :param entries: Iterable over the entries to write
  119. """
  120. f.write("DIRC")
  121. f.write(struct.pack(">LL", 2, len(entries)))
  122. for x in entries:
  123. write_cache_entry(f, x)
  124. def write_index_dict(f, entries):
  125. """Write an index file based on the contents of a dictionary.
  126. """
  127. entries_list = []
  128. for name in sorted(entries):
  129. entries_list.append((name,) + tuple(entries[name]))
  130. write_index(f, entries_list)
  131. def cleanup_mode(mode):
  132. """Cleanup a mode value.
  133. This will return a mode that can be stored in a tree object.
  134. :param mode: Mode to clean up.
  135. """
  136. if stat.S_ISLNK(mode):
  137. return stat.S_IFLNK
  138. elif stat.S_ISDIR(mode):
  139. return stat.S_IFDIR
  140. elif S_ISGITLINK(mode):
  141. return S_IFGITLINK
  142. ret = stat.S_IFREG | 0644
  143. ret |= (mode & 0111)
  144. return ret
  145. class Index(object):
  146. """A Git Index file."""
  147. def __init__(self, filename):
  148. """Open an index file.
  149. :param filename: Path to the index file
  150. """
  151. self._filename = filename
  152. self.clear()
  153. self.read()
  154. def write(self):
  155. """Write current contents of index to disk."""
  156. f = GitFile(self._filename, 'wb')
  157. try:
  158. f = SHA1Writer(f)
  159. write_index_dict(f, self._byname)
  160. finally:
  161. f.close()
  162. def read(self):
  163. """Read current contents of index from disk."""
  164. if not os.path.exists(self._filename):
  165. return
  166. f = GitFile(self._filename, 'rb')
  167. try:
  168. f = SHA1Reader(f)
  169. for x in read_index(f):
  170. self[x[0]] = tuple(x[1:])
  171. # FIXME: Additional data?
  172. f.read(os.path.getsize(self._filename)-f.tell()-20)
  173. f.check_sha()
  174. finally:
  175. f.close()
  176. def __len__(self):
  177. """Number of entries in this index file."""
  178. return len(self._byname)
  179. def __getitem__(self, name):
  180. """Retrieve entry by relative path.
  181. :return: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags)
  182. """
  183. return self._byname[name]
  184. def __iter__(self):
  185. """Iterate over the paths in this index."""
  186. return iter(self._byname)
  187. def get_sha1(self, path):
  188. """Return the (git object) SHA1 for the object at a path."""
  189. return self[path][-2]
  190. def get_mode(self, path):
  191. """Return the POSIX file mode for the object at a path."""
  192. return self[path][-6]
  193. def iterblobs(self):
  194. """Iterate over path, sha, mode tuples for use with commit_tree."""
  195. for path in self:
  196. entry = self[path]
  197. yield path, entry[-2], cleanup_mode(entry[-6])
  198. def clear(self):
  199. """Remove all contents from this index."""
  200. self._byname = {}
  201. def __setitem__(self, name, x):
  202. assert isinstance(name, str)
  203. assert len(x) == 10
  204. # Remove the old entry if any
  205. self._byname[name] = x
  206. def __delitem__(self, name):
  207. assert isinstance(name, str)
  208. del self._byname[name]
  209. def iteritems(self):
  210. return self._byname.iteritems()
  211. def update(self, entries):
  212. for name, value in entries.iteritems():
  213. self[name] = value
  214. def changes_from_tree(self, object_store, tree, want_unchanged=False):
  215. """Find the differences between the contents of this index and a tree.
  216. :param object_store: Object store to use for retrieving tree contents
  217. :param tree: SHA1 of the root tree
  218. :param want_unchanged: Whether unchanged files should be reported
  219. :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
  220. """
  221. mine = set(self._byname.keys())
  222. for (name, mode, sha) in object_store.iter_tree_contents(tree):
  223. if name in mine:
  224. if (want_unchanged or self.get_sha1(name) != sha or
  225. self.get_mode(name) != mode):
  226. yield ((name, name), (mode, self.get_mode(name)), (sha, self.get_sha1(name)))
  227. mine.remove(name)
  228. else:
  229. # Was removed
  230. yield ((name, None), (mode, None), (sha, None))
  231. # Mention added files
  232. for name in mine:
  233. yield ((None, name), (None, self.get_mode(name)), (None, self.get_sha1(name)))
  234. def commit(self, object_store):
  235. """Create a new tree from an index.
  236. :param object_store: Object store to save the tree in
  237. :return: Root tree SHA
  238. """
  239. return commit_tree(object_store, self.iterblobs())
  240. def commit_tree(object_store, blobs):
  241. """Commit a new tree.
  242. :param object_store: Object store to add trees to
  243. :param blobs: Iterable over blob path, sha, mode entries
  244. :return: SHA1 of the created tree.
  245. """
  246. trees = {"": {}}
  247. def add_tree(path):
  248. if path in trees:
  249. return trees[path]
  250. dirname, basename = pathsplit(path)
  251. t = add_tree(dirname)
  252. assert isinstance(basename, str)
  253. newtree = {}
  254. t[basename] = newtree
  255. trees[path] = newtree
  256. return newtree
  257. for path, sha, mode in blobs:
  258. tree_path, basename = pathsplit(path)
  259. tree = add_tree(tree_path)
  260. tree[basename] = (mode, sha)
  261. def build_tree(path):
  262. tree = Tree()
  263. for basename, entry in trees[path].iteritems():
  264. if type(entry) == dict:
  265. mode = stat.S_IFDIR
  266. sha = build_tree(pathjoin(path, basename))
  267. else:
  268. (mode, sha) = entry
  269. tree.add(mode, basename, sha)
  270. object_store.add_object(tree)
  271. return tree.id
  272. return build_tree("")
  273. def commit_index(object_store, index):
  274. """Create a new tree from an index.
  275. :param object_store: Object store to save the tree in
  276. :param index: Index file
  277. :note: This function is deprecated, use index.commit() instead.
  278. :return: Root tree sha.
  279. """
  280. return commit_tree(object_store, index.iterblobs())