repo.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527
  1. # repo.py -- For dealing wih git repositories.
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
  4. #
  5. # This program is free software; you can redistribute it and/or
  6. # modify it under the terms of the GNU General Public License
  7. # as published by the Free Software Foundation; version 2
  8. # of the License or (at your option) any later version of
  9. # the License.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with this program; if not, write to the Free Software
  18. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  19. # MA 02110-1301, USA.
  20. """Repository access."""
  21. import os
  22. import stat
  23. from dulwich.errors import (
  24. MissingCommitError,
  25. NotBlobError,
  26. NotCommitError,
  27. NotGitRepository,
  28. NotTreeError,
  29. )
  30. from dulwich.file import GitFile
  31. from dulwich.object_store import (
  32. DiskObjectStore,
  33. )
  34. from dulwich.objects import (
  35. Blob,
  36. Commit,
  37. ShaFile,
  38. Tag,
  39. Tree,
  40. )
  41. OBJECTDIR = 'objects'
  42. SYMREF = 'ref: '
  43. REFSDIR = 'refs'
  44. REFSDIR_TAGS = 'tags'
  45. REFSDIR_HEADS = 'heads'
  46. INDEX_FILENAME = "index"
  47. def follow_ref(container, name):
  48. """Follow a ref back to a SHA1.
  49. :param container: Ref container to use for looking up refs.
  50. :param name: Name of the original ref.
  51. """
  52. contents = container[name]
  53. if contents.startswith(SYMREF):
  54. ref = contents[len(SYMREF):]
  55. if ref[-1] == '\n':
  56. ref = ref[:-1]
  57. return follow_ref(container, ref)
  58. assert len(contents) == 40, 'Invalid ref in %s' % name
  59. return contents
  60. class RefsContainer(object):
  61. """A container for refs."""
  62. def as_dict(self, base):
  63. """Return the contents of this ref container under base as a dict."""
  64. raise NotImplementedError(self.as_dict)
  65. def follow(self, name):
  66. """Follow a ref name back to a SHA1.
  67. :param name: Name of the ref
  68. """
  69. return follow_ref(self, name)
  70. def set_ref(self, name, other):
  71. """Make a ref point at another ref.
  72. :param name: Name of the ref to set
  73. :param other: Name of the ref to point at
  74. """
  75. self[name] = "ref: %s\n" % other
  76. def import_refs(self, base, other):
  77. for name, value in other.iteritems():
  78. self["%s/%s" % (base, name)] = value
  79. class DiskRefsContainer(RefsContainer):
  80. """Refs container that reads refs from disk."""
  81. def __init__(self, path):
  82. self.path = path
  83. def __repr__(self):
  84. return "%s(%r)" % (self.__class__.__name__, self.path)
  85. def keys(self, base=None):
  86. """Refs present in this container."""
  87. return list(self.iterkeys(base))
  88. def iterkeys(self, base=None):
  89. if base is not None:
  90. return self.itersubkeys(base)
  91. else:
  92. return self.iterallkeys()
  93. def itersubkeys(self, base):
  94. path = self.refpath(base)
  95. for root, dirs, files in os.walk(path):
  96. dir = root[len(path):].strip("/").replace(os.path.sep, "/")
  97. for filename in files:
  98. yield ("%s/%s" % (dir, filename)).strip("/")
  99. def iterallkeys(self):
  100. if os.path.exists(self.refpath("HEAD")):
  101. yield "HEAD"
  102. path = self.refpath("")
  103. for root, dirs, files in os.walk(self.refpath("refs")):
  104. dir = root[len(path):].strip("/").replace(os.path.sep, "/")
  105. for filename in files:
  106. yield ("%s/%s" % (dir, filename)).strip("/")
  107. def as_dict(self, base=None, follow=True):
  108. """Return the contents of this container as a dictionary.
  109. """
  110. ret = {}
  111. if base is None:
  112. keys = self.iterkeys()
  113. base = ""
  114. else:
  115. keys = self.itersubkeys(base)
  116. for key in keys:
  117. if follow:
  118. try:
  119. ret[key] = self.follow(("%s/%s" % (base, key)).strip("/"))
  120. except KeyError:
  121. continue # Unable to resolve
  122. else:
  123. ret[key] = self[("%s/%s" % (base, key)).strip("/")]
  124. return ret
  125. def refpath(self, name):
  126. """Return the disk path of a ref.
  127. """
  128. if os.path.sep != "/":
  129. name = name.replace("/", os.path.sep)
  130. return os.path.join(self.path, name)
  131. def __getitem__(self, name):
  132. file = self.refpath(name)
  133. if not os.path.exists(file):
  134. raise KeyError(name)
  135. f = GitFile(file, 'rb')
  136. try:
  137. return f.read().strip("\n")
  138. finally:
  139. f.close()
  140. def __setitem__(self, name, ref):
  141. file = self.refpath(name)
  142. dirpath = os.path.dirname(file)
  143. if not os.path.exists(dirpath):
  144. os.makedirs(dirpath)
  145. f = GitFile(file, 'wb')
  146. try:
  147. f.write(ref+"\n")
  148. finally:
  149. f.close()
  150. def __delitem__(self, name):
  151. file = self.refpath(name)
  152. if os.path.exists(file):
  153. os.remove(file)
  154. def read_packed_refs(f):
  155. """Read a packed refs file.
  156. Yields tuples with ref names and SHA1s.
  157. :param f: file-like object to read from
  158. """
  159. l = f.readline()
  160. for l in f.readlines():
  161. if l[0] == "#":
  162. # Comment
  163. continue
  164. if l[0] == "^":
  165. # FIXME: Return somehow
  166. continue
  167. yield tuple(l.rstrip("\n").split(" ", 2))
  168. class BaseRepo(object):
  169. """Base class for a git repository.
  170. :ivar object_store: Dictionary-like object for accessing
  171. the objects
  172. :ivar refs: Dictionary-like object with the refs in this repository
  173. """
  174. def __init__(self, object_store, refs):
  175. self.object_store = object_store
  176. self.refs = refs
  177. def fetch(self, target, determine_wants=None, progress=None):
  178. """Fetch objects into another repository.
  179. :param target: The target repository
  180. :param determine_wants: Optional function to determine what refs to
  181. fetch.
  182. :param progress: Optional progress function
  183. """
  184. if determine_wants is None:
  185. determine_wants = lambda heads: heads.values()
  186. target.object_store.add_objects(
  187. self.fetch_objects(determine_wants, target.get_graph_walker(),
  188. progress))
  189. return self.get_refs()
  190. def fetch_objects(self, determine_wants, graph_walker, progress):
  191. """Fetch the missing objects required for a set of revisions.
  192. :param determine_wants: Function that takes a dictionary with heads
  193. and returns the list of heads to fetch.
  194. :param graph_walker: Object that can iterate over the list of revisions
  195. to fetch and has an "ack" method that will be called to acknowledge
  196. that a revision is present.
  197. :param progress: Simple progress function that will be called with
  198. updated progress strings.
  199. :return: iterator over objects, with __len__ implemented
  200. """
  201. wants = determine_wants(self.get_refs())
  202. haves = self.object_store.find_common_revisions(graph_walker)
  203. return self.object_store.iter_shas(
  204. self.object_store.find_missing_objects(haves, wants, progress))
  205. def get_graph_walker(self, heads=None):
  206. if heads is None:
  207. heads = self.refs.as_dict('refs/heads').values()
  208. return self.object_store.get_graph_walker(heads)
  209. def ref(self, name):
  210. """Return the SHA1 a ref is pointing to."""
  211. raise NotImplementedError(self.refs)
  212. def get_refs(self):
  213. """Get dictionary with all refs."""
  214. raise NotImplementedError(self.get_refs)
  215. def head(self):
  216. """Return the SHA1 pointed at by HEAD."""
  217. return self.refs.follow('HEAD')
  218. def _get_object(self, sha, cls):
  219. assert len(sha) in (20, 40)
  220. ret = self.get_object(sha)
  221. if ret._type != cls._type:
  222. if cls is Commit:
  223. raise NotCommitError(ret)
  224. elif cls is Blob:
  225. raise NotBlobError(ret)
  226. elif cls is Tree:
  227. raise NotTreeError(ret)
  228. else:
  229. raise Exception("Type invalid: %r != %r" % (ret._type, cls._type))
  230. return ret
  231. def get_object(self, sha):
  232. return self.object_store[sha]
  233. def get_parents(self, sha):
  234. return self.commit(sha).parents
  235. def get_config(self):
  236. from configobj import ConfigObj
  237. return ConfigObj(os.path.join(self._controldir, 'config'))
  238. def commit(self, sha):
  239. return self._get_object(sha, Commit)
  240. def tree(self, sha):
  241. return self._get_object(sha, Tree)
  242. def tag(self, sha):
  243. return self._get_object(sha, Tag)
  244. def get_blob(self, sha):
  245. return self._get_object(sha, Blob)
  246. def revision_history(self, head):
  247. """Returns a list of the commits reachable from head.
  248. Returns a list of commit objects. the first of which will be the commit
  249. of head, then following theat will be the parents.
  250. Raises NotCommitError if any no commits are referenced, including if the
  251. head parameter isn't the sha of a commit.
  252. XXX: work out how to handle merges.
  253. """
  254. # We build the list backwards, as parents are more likely to be older
  255. # than children
  256. pending_commits = [head]
  257. history = []
  258. while pending_commits != []:
  259. head = pending_commits.pop(0)
  260. try:
  261. commit = self.commit(head)
  262. except KeyError:
  263. raise MissingCommitError(head)
  264. if commit in history:
  265. continue
  266. i = 0
  267. for known_commit in history:
  268. if known_commit.commit_time > commit.commit_time:
  269. break
  270. i += 1
  271. history.insert(i, commit)
  272. parents = commit.parents
  273. pending_commits += parents
  274. history.reverse()
  275. return history
  276. def __getitem__(self, name):
  277. if len(name) in (20, 40):
  278. return self.object_store[name]
  279. return self.object_store[self.refs[name]]
  280. def __setitem__(self, name, value):
  281. if name.startswith("refs/") or name == "HEAD":
  282. if isinstance(value, ShaFile):
  283. self.refs[name] = value.id
  284. elif isinstance(value, str):
  285. self.refs[name] = value
  286. else:
  287. raise TypeError(value)
  288. raise ValueError(name)
  289. def __delitem__(self, name):
  290. if name.startswith("refs") or name == "HEAD":
  291. del self.refs[name]
  292. raise ValueError(name)
  293. class Repo(BaseRepo):
  294. """A git repository backed by local disk."""
  295. def __init__(self, root):
  296. if os.path.isdir(os.path.join(root, ".git", OBJECTDIR)):
  297. self.bare = False
  298. self._controldir = os.path.join(root, ".git")
  299. elif (os.path.isdir(os.path.join(root, OBJECTDIR)) and
  300. os.path.isdir(os.path.join(root, REFSDIR))):
  301. self.bare = True
  302. self._controldir = root
  303. else:
  304. raise NotGitRepository(root)
  305. self.path = root
  306. object_store = DiskObjectStore(
  307. os.path.join(self.controldir(), OBJECTDIR))
  308. refs = DiskRefsContainer(self.controldir())
  309. BaseRepo.__init__(self, object_store, refs)
  310. def controldir(self):
  311. """Return the path of the control directory."""
  312. return self._controldir
  313. def index_path(self):
  314. """Return path to the index file."""
  315. return os.path.join(self.controldir(), INDEX_FILENAME)
  316. def open_index(self):
  317. """Open the index for this repository."""
  318. from dulwich.index import Index
  319. return Index(self.index_path())
  320. def has_index(self):
  321. """Check if an index is present."""
  322. return os.path.exists(self.index_path())
  323. def ref(self, name):
  324. """Return the SHA1 a ref is pointing to."""
  325. try:
  326. return self.refs.follow(name)
  327. except KeyError:
  328. return self.get_packed_refs()[name]
  329. def get_refs(self):
  330. """Get dictionary with all refs."""
  331. # TODO: move to base class after merging RefsContainer changes
  332. ret = {}
  333. try:
  334. if self.head():
  335. ret['HEAD'] = self.head()
  336. except KeyError:
  337. pass
  338. ret.update(self.refs.as_dict())
  339. ret.update(self.get_packed_refs())
  340. return ret
  341. def get_packed_refs(self):
  342. """Get contents of the packed-refs file.
  343. :return: Dictionary mapping ref names to SHA1s
  344. :note: Will return an empty dictionary when no packed-refs file is
  345. present.
  346. """
  347. # TODO: move to base class after merging RefsContainer changes
  348. path = os.path.join(self.controldir(), 'packed-refs')
  349. if not os.path.exists(path):
  350. return {}
  351. ret = {}
  352. f = open(path, 'rb')
  353. try:
  354. for entry in read_packed_refs(f):
  355. ret[entry[1]] = entry[0]
  356. return ret
  357. finally:
  358. f.close()
  359. def __repr__(self):
  360. return "<Repo at %r>" % self.path
  361. def do_commit(self, committer, message,
  362. author=None, commit_timestamp=None,
  363. commit_timezone=None, author_timestamp=None,
  364. author_timezone=None, tree=None):
  365. """Create a new commit.
  366. :param committer: Committer fullname
  367. :param message: Commit message
  368. :param author: Author fullname (defaults to committer)
  369. :param commit_timestamp: Commit timestamp (defaults to now)
  370. :param commit_timezone: Commit timestamp timezone (defaults to GMT)
  371. :param author_timestamp: Author timestamp (defaults to commit timestamp)
  372. :param author_timezone: Author timestamp timezone
  373. (defaults to commit timestamp timezone)
  374. :param tree: SHA1 of the tree root to use (if not specified the current index will be committed).
  375. :return: New commit SHA1
  376. """
  377. from dulwich.index import commit_index
  378. import time
  379. index = self.open_index()
  380. c = Commit()
  381. if tree is None:
  382. c.tree = commit_index(self.object_store, index)
  383. else:
  384. c.tree = tree
  385. c.committer = committer
  386. if commit_timestamp is None:
  387. commit_timestamp = time.time()
  388. c.commit_time = int(commit_timestamp)
  389. if commit_timezone is None:
  390. commit_timezone = 0
  391. c.commit_timezone = commit_timezone
  392. if author is None:
  393. author = committer
  394. c.author = author
  395. if author_timestamp is None:
  396. author_timestamp = commit_timestamp
  397. c.author_time = int(author_timestamp)
  398. if author_timezone is None:
  399. author_timezone = commit_timezone
  400. c.author_timezone = author_timezone
  401. c.message = message
  402. self.object_store.add_object(c)
  403. self.refs["HEAD"] = c.id
  404. return c.id
  405. @classmethod
  406. def init(cls, path, mkdir=True):
  407. controldir = os.path.join(path, ".git")
  408. os.mkdir(controldir)
  409. cls.init_bare(controldir)
  410. return cls(path)
  411. @classmethod
  412. def init_bare(cls, path, mkdir=True):
  413. for d in [[OBJECTDIR],
  414. [OBJECTDIR, "info"],
  415. [OBJECTDIR, "pack"],
  416. ["branches"],
  417. [REFSDIR],
  418. [REFSDIR, REFSDIR_TAGS],
  419. [REFSDIR, REFSDIR_HEADS],
  420. ["hooks"],
  421. ["info"]]:
  422. os.mkdir(os.path.join(path, *d))
  423. ret = cls(path)
  424. ret.refs.set_ref("HEAD", "refs/heads/master")
  425. f = GitFile(os.path.join(path, 'description'), 'wb')
  426. try:
  427. f.write("Unnamed repository")
  428. finally:
  429. f.close()
  430. f = GitFile(os.path.join(path, 'config'), 'wb')
  431. try:
  432. f.write("""[core]
  433. repositoryformatversion = 0
  434. filemode = true
  435. bare = false
  436. logallrefupdates = true
  437. """)
  438. finally:
  439. f.close()
  440. f = GitFile(os.path.join(path, 'info', 'excludes'), 'wb')
  441. f.close()
  442. return ret
  443. create = init_bare