repo.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. # repo.py -- For dealing wih git repositories.
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
  4. #
  5. # This program is free software; you can redistribute it and/or
  6. # modify it under the terms of the GNU General Public License
  7. # as published by the Free Software Foundation; version 2
  8. # of the License or (at your option) any later version of
  9. # the License.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with this program; if not, write to the Free Software
  18. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  19. # MA 02110-1301, USA.
  20. import os
  21. import stat
  22. from dulwich.errors import (
  23. MissingCommitError,
  24. NotBlobError,
  25. NotCommitError,
  26. NotGitRepository,
  27. NotTreeError,
  28. )
  29. from dulwich.object_store import (
  30. ObjectStore,
  31. )
  32. from dulwich.objects import (
  33. Blob,
  34. Commit,
  35. ShaFile,
  36. Tag,
  37. Tree,
  38. )
  39. OBJECTDIR = 'objects'
  40. SYMREF = 'ref: '
  41. REFSDIR = 'refs'
  42. INDEX_FILENAME = "index"
  43. class Tags(object):
  44. """Tags container."""
  45. def __init__(self, tagdir, tags):
  46. self.tagdir = tagdir
  47. self.tags = tags
  48. def __getitem__(self, name):
  49. return self.tags[name]
  50. def __setitem__(self, name, ref):
  51. self.tags[name] = ref
  52. f = open(os.path.join(self.tagdir, name), 'wb')
  53. try:
  54. f.write("%s\n" % ref)
  55. finally:
  56. f.close()
  57. def __len__(self):
  58. return len(self.tags)
  59. def iteritems(self):
  60. for k in self.tags:
  61. yield k, self[k]
  62. def read_packed_refs(f):
  63. """Read a packed refs file.
  64. Yields tuples with ref names and SHA1s.
  65. :param f: file-like object to read from
  66. """
  67. l = f.readline()
  68. for l in f.readlines():
  69. if l[0] == "#":
  70. # Comment
  71. continue
  72. if l[0] == "^":
  73. # FIXME: Return somehow
  74. continue
  75. yield tuple(l.rstrip("\n").split(" ", 2))
  76. class MissingObjectFinder(object):
  77. """Find the objects missing from another git repository.
  78. :param object_store: Object store containing at least all objects to be
  79. sent
  80. :param wants: SHA1s of commits to send
  81. :param graph_walker: graph walker object used to see what the remote
  82. repo has and misses
  83. :param progress: Optional function to report progress to.
  84. """
  85. def __init__(self, object_store, wants, graph_walker, progress=None):
  86. self.sha_done = set()
  87. self.objects_to_send = set([(w, None) for w in wants])
  88. self.object_store = object_store
  89. if progress is None:
  90. self.progress = lambda x: None
  91. else:
  92. self.progress = progress
  93. ref = graph_walker.next()
  94. while ref:
  95. if ref in self.object_store:
  96. graph_walker.ack(ref)
  97. ref = graph_walker.next()
  98. def add_todo(self, entries):
  99. self.objects_to_send.update([e for e in entries if not e in self.sha_done])
  100. def parse_tree(self, tree):
  101. self.add_todo([(sha, name) for (mode, name, sha) in tree.entries()])
  102. def parse_commit(self, commit):
  103. self.add_todo([(commit.tree, "")])
  104. self.add_todo([(p, None) for p in commit.parents])
  105. def parse_tag(self, tag):
  106. self.add_todo([(tag.object[1], None)])
  107. def next(self):
  108. if not self.objects_to_send:
  109. return None
  110. (sha, name) = self.objects_to_send.pop()
  111. o = self.object_store[sha]
  112. if isinstance(o, Commit):
  113. self.parse_commit(o)
  114. elif isinstance(o, Tree):
  115. self.parse_tree(o)
  116. elif isinstance(o, Tag):
  117. self.parse_tag(o)
  118. self.sha_done.add((sha, name))
  119. self.progress("counting objects: %d\r" % len(self.sha_done))
  120. return (sha, name)
  121. class Repo(object):
  122. """A local git repository."""
  123. ref_locs = ['', REFSDIR, 'refs/tags', 'refs/heads', 'refs/remotes']
  124. def __init__(self, root):
  125. if os.path.isdir(os.path.join(root, ".git", OBJECTDIR)):
  126. self.bare = False
  127. self._controldir = os.path.join(root, ".git")
  128. elif os.path.isdir(os.path.join(root, OBJECTDIR)):
  129. self.bare = True
  130. self._controldir = root
  131. else:
  132. raise NotGitRepository(root)
  133. self.path = root
  134. self.tags = Tags(self.tagdir(), self.get_tags())
  135. self._object_store = None
  136. def controldir(self):
  137. """Return the path of the control directory."""
  138. return self._controldir
  139. def index_path(self):
  140. return os.path.join(self.controldir(), INDEX_FILENAME)
  141. def open_index(self):
  142. """Open the index for this repository."""
  143. from dulwich.index import Index
  144. return Index(self.index_path())
  145. def has_index(self):
  146. """Check if an index is present."""
  147. return os.path.exists(self.index_path())
  148. def find_missing_objects(self, determine_wants, graph_walker, progress):
  149. """Find the missing objects required for a set of revisions.
  150. :param determine_wants: Function that takes a dictionary with heads
  151. and returns the list of heads to fetch.
  152. :param graph_walker: Object that can iterate over the list of revisions
  153. to fetch and has an "ack" method that will be called to acknowledge
  154. that a revision is present.
  155. :param progress: Simple progress function that will be called with
  156. updated progress strings.
  157. """
  158. wants = determine_wants(self.get_refs())
  159. return iter(MissingObjectFinder(self.object_store, wants, graph_walker,
  160. progress).next, None)
  161. def fetch_objects(self, determine_wants, graph_walker, progress):
  162. """Fetch the missing objects required for a set of revisions.
  163. :param determine_wants: Function that takes a dictionary with heads
  164. and returns the list of heads to fetch.
  165. :param graph_walker: Object that can iterate over the list of revisions
  166. to fetch and has an "ack" method that will be called to acknowledge
  167. that a revision is present.
  168. :param progress: Simple progress function that will be called with
  169. updated progress strings.
  170. :return: tuple with number of objects, iterator over objects
  171. """
  172. return self.object_store.iter_shas(
  173. self.find_missing_objects(determine_wants, graph_walker, progress))
  174. def object_dir(self):
  175. return os.path.join(self.controldir(), OBJECTDIR)
  176. @property
  177. def object_store(self):
  178. if self._object_store is None:
  179. self._object_store = ObjectStore(self.object_dir())
  180. return self._object_store
  181. def pack_dir(self):
  182. return os.path.join(self.object_dir(), PACKDIR)
  183. def _get_ref(self, file):
  184. f = open(file, 'rb')
  185. try:
  186. contents = f.read()
  187. if contents.startswith(SYMREF):
  188. ref = contents[len(SYMREF):]
  189. if ref[-1] == '\n':
  190. ref = ref[:-1]
  191. return self.ref(ref)
  192. assert len(contents) == 41, 'Invalid ref in %s' % file
  193. return contents[:-1]
  194. finally:
  195. f.close()
  196. def ref(self, name):
  197. """Return the SHA1 a ref is pointing to."""
  198. for dir in self.ref_locs:
  199. file = os.path.join(self.controldir(), dir, name)
  200. if os.path.exists(file):
  201. return self._get_ref(file)
  202. packed_refs = self.get_packed_refs()
  203. if name in packed_refs:
  204. return packed_refs[name]
  205. def get_refs(self):
  206. ret = {}
  207. if self.head():
  208. ret['HEAD'] = self.head()
  209. for dir in ["refs/heads", "refs/tags"]:
  210. for name in os.listdir(os.path.join(self.controldir(), dir)):
  211. path = os.path.join(self.controldir(), dir, name)
  212. if os.path.isfile(path):
  213. ret["/".join([dir, name])] = self._get_ref(path)
  214. ret.update(self.get_packed_refs())
  215. return ret
  216. def get_packed_refs(self):
  217. path = os.path.join(self.controldir(), 'packed-refs')
  218. if not os.path.exists(path):
  219. return {}
  220. ret = {}
  221. f = open(path, 'r')
  222. try:
  223. for entry in read_packed_refs(f):
  224. ret[entry[1]] = entry[0]
  225. return ret
  226. finally:
  227. f.close()
  228. def set_ref(self, name, value):
  229. file = os.path.join(self.controldir(), name)
  230. dirpath = os.path.dirname(file)
  231. if not os.path.exists(dirpath):
  232. os.makedirs(dirpath)
  233. f = open(file, 'w')
  234. try:
  235. f.write(value+"\n")
  236. finally:
  237. f.close()
  238. def remove_ref(self, name):
  239. file = os.path.join(self.controldir(), name)
  240. if os.path.exists(file):
  241. os.remove(file)
  242. def tagdir(self):
  243. """Tag directory."""
  244. return os.path.join(self.controldir(), REFSDIR, 'tags')
  245. def get_tags(self):
  246. ret = {}
  247. for root, dirs, files in os.walk(self.tagdir()):
  248. for name in files:
  249. ret[name] = self._get_ref(os.path.join(root, name))
  250. return ret
  251. def heads(self):
  252. ret = {}
  253. for root, dirs, files in os.walk(os.path.join(self.controldir(), REFSDIR, 'heads')):
  254. for name in files:
  255. ret[name] = self._get_ref(os.path.join(root, name))
  256. return ret
  257. def head(self):
  258. return self.ref('HEAD')
  259. def _get_object(self, sha, cls):
  260. assert len(sha) in (20, 40)
  261. ret = self.get_object(sha)
  262. if ret._type != cls._type:
  263. if cls is Commit:
  264. raise NotCommitError(ret)
  265. elif cls is Blob:
  266. raise NotBlobError(ret)
  267. elif cls is Tree:
  268. raise NotTreeError(ret)
  269. else:
  270. raise Exception("Type invalid: %r != %r" % (ret._type, cls._type))
  271. return ret
  272. def get_object(self, sha):
  273. return self.object_store[sha]
  274. def get_parents(self, sha):
  275. return self.commit(sha).parents
  276. def commit(self, sha):
  277. return self._get_object(sha, Commit)
  278. def tree(self, sha):
  279. return self._get_object(sha, Tree)
  280. def tag(self, sha):
  281. return self._get_object(sha, Tag)
  282. def get_blob(self, sha):
  283. return self._get_object(sha, Blob)
  284. def revision_history(self, head):
  285. """Returns a list of the commits reachable from head.
  286. Returns a list of commit objects. the first of which will be the commit
  287. of head, then following theat will be the parents.
  288. Raises NotCommitError if any no commits are referenced, including if the
  289. head parameter isn't the sha of a commit.
  290. XXX: work out how to handle merges.
  291. """
  292. # We build the list backwards, as parents are more likely to be older
  293. # than children
  294. pending_commits = [head]
  295. history = []
  296. while pending_commits != []:
  297. head = pending_commits.pop(0)
  298. try:
  299. commit = self.commit(head)
  300. except KeyError:
  301. raise MissingCommitError(head)
  302. if commit in history:
  303. continue
  304. i = 0
  305. for known_commit in history:
  306. if known_commit.commit_time > commit.commit_time:
  307. break
  308. i += 1
  309. history.insert(i, commit)
  310. parents = commit.parents
  311. pending_commits += parents
  312. history.reverse()
  313. return history
  314. def __repr__(self):
  315. return "<Repo at %r>" % self.path
  316. @classmethod
  317. def init(cls, path, mkdir=True):
  318. controldir = os.path.join(path, ".git")
  319. os.mkdir(controldir)
  320. cls.init_bare(controldir)
  321. @classmethod
  322. def init_bare(cls, path, mkdir=True):
  323. for d in [[OBJECTDIR],
  324. [OBJECTDIR, "info"],
  325. [OBJECTDIR, "pack"],
  326. ["branches"],
  327. [REFSDIR],
  328. ["refs", "tags"],
  329. ["refs", "heads"],
  330. ["hooks"],
  331. ["info"]]:
  332. os.mkdir(os.path.join(path, *d))
  333. open(os.path.join(path, 'HEAD'), 'w').write("ref: refs/heads/master\n")
  334. open(os.path.join(path, 'description'), 'w').write("Unnamed repository")
  335. open(os.path.join(path, 'info', 'excludes'), 'w').write("")
  336. create = init_bare