repo.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. # repo.py -- For dealing wih git repositories.
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
  4. #
  5. # This program is free software; you can redistribute it and/or
  6. # modify it under the terms of the GNU General Public License
  7. # as published by the Free Software Foundation; version 2
  8. # of the License.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program; if not, write to the Free Software
  17. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  18. # MA 02110-1301, USA.
  19. import os
  20. from commit import Commit
  21. from errors import (
  22. MissingCommitError,
  23. NotBlobError,
  24. NotCommitError,
  25. NotGitRepository,
  26. NotTreeError,
  27. )
  28. from objects import (
  29. ShaFile,
  30. Commit,
  31. Tree,
  32. Blob,
  33. )
  34. from pack import (
  35. iter_sha1,
  36. load_packs,
  37. write_pack_index_v2,
  38. PackData,
  39. )
  40. import tempfile
  41. OBJECTDIR = 'objects'
  42. PACKDIR = 'pack'
  43. SYMREF = 'ref: '
  44. class Tag(object):
  45. def __init__(self, name, ref):
  46. self.name = name
  47. self.ref = ref
  48. class Repo(object):
  49. ref_locs = ['', 'refs', 'refs/tags', 'refs/heads', 'refs/remotes']
  50. def __init__(self, root):
  51. if os.path.isdir(os.path.join(root, ".git", "objects")):
  52. self.bare = False
  53. self._controldir = os.path.join(root, ".git")
  54. elif os.path.isdir(os.path.join(root, "objects")):
  55. self.bare = True
  56. self._controldir = root
  57. else:
  58. raise NotGitRepository(root)
  59. self.path = root
  60. self.tags = [Tag(name, ref) for name, ref in self.get_tags().items()]
  61. self._object_store = None
  62. def controldir(self):
  63. return self._controldir
  64. def find_missing_objects(self, determine_wants, graph_walker, progress):
  65. """Fetch the missing objects required for a set of revisions.
  66. :param determine_wants: Function that takes a dictionary with heads
  67. and returns the list of heads to fetch.
  68. :param graph_walker: Object that can iterate over the list of revisions
  69. to fetch and has an "ack" method that will be called to acknowledge
  70. that a revision is present.
  71. :param progress: Simple progress function that will be called with
  72. updated progress strings.
  73. """
  74. wants = determine_wants(self.get_refs())
  75. commits_to_send = set(wants)
  76. sha_done = set()
  77. ref = graph_walker.next()
  78. while ref:
  79. sha_done.add(ref)
  80. if ref in self.object_store:
  81. graph_walker.ack(ref)
  82. ref = graph_walker.next()
  83. while commits_to_send:
  84. sha = commits_to_send.pop()
  85. if sha in sha_done:
  86. continue
  87. c = self.commit(sha)
  88. assert isinstance(c, Commit)
  89. sha_done.add(sha)
  90. commits_to_send.update([p for p in c.parents if not p in sha_done])
  91. def parse_tree(tree, sha_done):
  92. for mode, name, x in tree.entries():
  93. if not x in sha_done:
  94. try:
  95. t = self.tree(x)
  96. sha_done.add(x)
  97. parse_tree(t, sha_done)
  98. except:
  99. sha_done.add(x)
  100. treesha = c.tree
  101. if treesha not in sha_done:
  102. t = self.tree(treesha)
  103. sha_done.add(treesha)
  104. parse_tree(t, sha_done)
  105. progress("counting objects: %d\r" % len(sha_done))
  106. return sha_done
  107. def fetch_objects(self, determine_wants, graph_walker, progress):
  108. """Fetch the missing objects required for a set of revisions.
  109. :param determine_wants: Function that takes a dictionary with heads
  110. and returns the list of heads to fetch.
  111. :param graph_walker: Object that can iterate over the list of revisions
  112. to fetch and has an "ack" method that will be called to acknowledge
  113. that a revision is present.
  114. :param progress: Simple progress function that will be called with
  115. updated progress strings.
  116. """
  117. shas = self.find_missing_objects(determine_wants, graph_walker, progress)
  118. for sha in shas:
  119. yield self.get_object(sha)
  120. def object_dir(self):
  121. return os.path.join(self.controldir(), OBJECTDIR)
  122. @property
  123. def object_store(self):
  124. if self._object_store is None:
  125. self._object_store = ObjectStore(self.object_dir())
  126. return self._object_store
  127. def pack_dir(self):
  128. return os.path.join(self.object_dir(), PACKDIR)
  129. def _get_ref(self, file):
  130. f = open(file, 'rb')
  131. try:
  132. contents = f.read()
  133. if contents.startswith(SYMREF):
  134. ref = contents[len(SYMREF):]
  135. if ref[-1] == '\n':
  136. ref = ref[:-1]
  137. return self.ref(ref)
  138. assert len(contents) == 41, 'Invalid ref in %s' % file
  139. return contents[:-1]
  140. finally:
  141. f.close()
  142. def ref(self, name):
  143. for dir in self.ref_locs:
  144. file = os.path.join(self.controldir(), dir, name)
  145. if os.path.exists(file):
  146. return self._get_ref(file)
  147. def get_refs(self):
  148. ret = {}
  149. if self.head():
  150. ret['HEAD'] = self.head()
  151. for dir in ["refs/heads", "refs/tags"]:
  152. for name in os.listdir(os.path.join(self.controldir(), dir)):
  153. path = os.path.join(self.controldir(), dir, name)
  154. if os.path.isfile(path):
  155. ret["/".join([dir, name])] = self._get_ref(path)
  156. return ret
  157. def set_ref(self, name, value):
  158. file = os.path.join(self.controldir(), name)
  159. open(file, 'w').write(value+"\n")
  160. def remove_ref(self, name):
  161. file = os.path.join(self.controldir(), name)
  162. if os.path.exists(file):
  163. os.remove(file)
  164. return
  165. def get_tags(self):
  166. ret = {}
  167. for root, dirs, files in os.walk(os.path.join(self.controldir(), 'refs', 'tags')):
  168. for name in files:
  169. ret[name] = self._get_ref(os.path.join(root, name))
  170. return ret
  171. def heads(self):
  172. ret = {}
  173. for root, dirs, files in os.walk(os.path.join(self.controldir(), 'refs', 'heads')):
  174. for name in files:
  175. ret[name] = self._get_ref(os.path.join(root, name))
  176. return ret
  177. def head(self):
  178. return self.ref('HEAD')
  179. def _get_object(self, sha, cls):
  180. assert len(sha) in (20, 40)
  181. ret = self.get_object(sha)
  182. if ret._type != cls._type:
  183. if cls is Commit:
  184. raise NotCommitError(ret)
  185. elif cls is Blob:
  186. raise NotBlobError(ret)
  187. elif cls is Tree:
  188. raise NotTreeError(ret)
  189. else:
  190. raise Exception("Type invalid: %r != %r" % (ret._type, cls._type))
  191. return ret
  192. def get_object(self, sha):
  193. return self.object_store[sha]
  194. def get_parents(self, sha):
  195. return self.commit(sha).parents
  196. def commit(self, sha):
  197. return self._get_object(sha, Commit)
  198. def tree(self, sha):
  199. return self._get_object(sha, Tree)
  200. def get_blob(self, sha):
  201. return self._get_object(sha, Blob)
  202. def revision_history(self, head):
  203. """Returns a list of the commits reachable from head.
  204. Returns a list of commit objects. the first of which will be the commit
  205. of head, then following theat will be the parents.
  206. Raises NotCommitError if any no commits are referenced, including if the
  207. head parameter isn't the sha of a commit.
  208. XXX: work out how to handle merges.
  209. """
  210. # We build the list backwards, as parents are more likely to be older
  211. # than children
  212. pending_commits = [head]
  213. history = []
  214. while pending_commits != []:
  215. head = pending_commits.pop(0)
  216. try:
  217. commit = self.commit(head)
  218. except KeyError:
  219. raise MissingCommitError(head)
  220. if commit in history:
  221. continue
  222. i = 0
  223. for known_commit in history:
  224. if known_commit.commit_time > commit.commit_time:
  225. break
  226. i += 1
  227. history.insert(i, commit)
  228. parents = commit.parents
  229. pending_commits += parents
  230. history.reverse()
  231. return history
  232. def __repr__(self):
  233. return "<Repo at %r>" % self.path
  234. @classmethod
  235. def init_bare(cls, path, mkdir=True):
  236. for d in [["objects"],
  237. ["objects", "info"],
  238. ["objects", "pack"],
  239. ["branches"],
  240. ["refs"],
  241. ["refs", "tags"],
  242. ["refs", "heads"],
  243. ["hooks"],
  244. ["info"]]:
  245. os.mkdir(os.path.join(path, *d))
  246. open(os.path.join(path, 'HEAD'), 'w').write("ref: refs/heads/master\n")
  247. open(os.path.join(path, 'description'), 'w').write("Unnamed repository")
  248. open(os.path.join(path, 'info', 'excludes'), 'w').write("")
  249. create = init_bare
  250. class ObjectStore(object):
  251. def __init__(self, path):
  252. self.path = path
  253. self._packs = None
  254. def pack_dir(self):
  255. return os.path.join(self.path, PACKDIR)
  256. def __contains__(self, sha):
  257. # TODO: This can be more efficient
  258. try:
  259. self[sha]
  260. return True
  261. except KeyError:
  262. return False
  263. @property
  264. def packs(self):
  265. """List with pack objects."""
  266. if self._packs is None:
  267. self._packs = list(load_packs(self.pack_dir()))
  268. return self._packs
  269. def _get_shafile(self, sha):
  270. dir = sha[:2]
  271. file = sha[2:]
  272. # Check from object dir
  273. path = os.path.join(self.path, dir, file)
  274. if os.path.exists(path):
  275. return ShaFile.from_file(path)
  276. return None
  277. def get_raw(self, sha):
  278. """Obtain the raw text for an object.
  279. :param sha: Sha for the object.
  280. :return: tuple with object type and object contents.
  281. """
  282. for pack in self.packs:
  283. if sha in pack:
  284. return pack.get_raw(sha, self.get_raw)
  285. # FIXME: Are pack deltas ever against on-disk shafiles ?
  286. ret = self._get_shafile(sha)
  287. if ret is not None:
  288. return ret.as_raw_string()
  289. raise KeyError(sha)
  290. def __getitem__(self, sha):
  291. assert len(sha) == 40, "Incorrect length sha: %s" % str(sha)
  292. ret = self._get_shafile(sha)
  293. if ret is not None:
  294. return ret
  295. # Check from packs
  296. type, uncomp = self.get_raw(sha)
  297. return ShaFile.from_raw_string(type, uncomp)
  298. def move_in_pack(self, path):
  299. """Move a specific file containing a pack into the pack directory.
  300. :note: The file should be on the same file system as the
  301. packs directory.
  302. :param path: Path to the pack file.
  303. """
  304. p = PackData(path)
  305. entries = p.sorted_entries(self.get_raw)
  306. basename = os.path.join(self.pack_dir(),
  307. "pack-%s" % iter_sha1(entry[0] for entry in entries))
  308. write_pack_index_v2(basename+".idx", entries, p.calculate_checksum())
  309. os.rename(path, basename + ".pack")
  310. def add_pack(self):
  311. """Add a new pack to this object store.
  312. :return: Fileobject to write to and a commit function to
  313. call when the pack is finished.
  314. """
  315. fd, path = tempfile.mkstemp(dir=self.pack_dir(), suffix=".pack")
  316. f = os.fdopen(fd, 'w')
  317. def commit():
  318. if os.path.getsize(path) > 0:
  319. self.move_in_pack(path)
  320. return f, commit