repo.py 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278
  1. # repo.py -- For dealing with git repositories.
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
  4. #
  5. # This program is free software; you can redistribute it and/or
  6. # modify it under the terms of the GNU General Public License
  7. # as published by the Free Software Foundation; version 2
  8. # of the License or (at your option) any later version of
  9. # the License.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with this program; if not, write to the Free Software
  18. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  19. # MA 02110-1301, USA.
  20. """Repository access."""
  21. from cStringIO import StringIO
  22. import errno
  23. import os
  24. from dulwich.errors import (
  25. NoIndexPresent,
  26. NotBlobError,
  27. NotCommitError,
  28. NotGitRepository,
  29. NotTreeError,
  30. NotTagError,
  31. PackedRefsException,
  32. CommitError,
  33. RefFormatError,
  34. )
  35. from dulwich.file import (
  36. ensure_dir_exists,
  37. GitFile,
  38. )
  39. from dulwich.object_store import (
  40. DiskObjectStore,
  41. MemoryObjectStore,
  42. )
  43. from dulwich.objects import (
  44. Blob,
  45. Commit,
  46. ShaFile,
  47. Tag,
  48. Tree,
  49. hex_to_sha,
  50. )
  51. from dulwich.walk import (
  52. Walker,
  53. )
  54. import warnings
  55. OBJECTDIR = 'objects'
  56. SYMREF = 'ref: '
  57. REFSDIR = 'refs'
  58. REFSDIR_TAGS = 'tags'
  59. REFSDIR_HEADS = 'heads'
  60. INDEX_FILENAME = "index"
  61. BASE_DIRECTORIES = [
  62. ["branches"],
  63. [REFSDIR],
  64. [REFSDIR, REFSDIR_TAGS],
  65. [REFSDIR, REFSDIR_HEADS],
  66. ["hooks"],
  67. ["info"]
  68. ]
  69. def read_info_refs(f):
  70. ret = {}
  71. for l in f.readlines():
  72. (sha, name) = l.rstrip("\r\n").split("\t", 1)
  73. ret[name] = sha
  74. return ret
  75. def check_ref_format(refname):
  76. """Check if a refname is correctly formatted.
  77. Implements all the same rules as git-check-ref-format[1].
  78. [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
  79. :param refname: The refname to check
  80. :return: True if refname is valid, False otherwise
  81. """
  82. # These could be combined into one big expression, but are listed separately
  83. # to parallel [1].
  84. if '/.' in refname or refname.startswith('.'):
  85. return False
  86. if '/' not in refname:
  87. return False
  88. if '..' in refname:
  89. return False
  90. for c in refname:
  91. if ord(c) < 040 or c in '\177 ~^:?*[':
  92. return False
  93. if refname[-1] in '/.':
  94. return False
  95. if refname.endswith('.lock'):
  96. return False
  97. if '@{' in refname:
  98. return False
  99. if '\\' in refname:
  100. return False
  101. return True
  102. class RefsContainer(object):
  103. """A container for refs."""
  104. def set_ref(self, name, other):
  105. warnings.warn("RefsContainer.set_ref() is deprecated."
  106. "Use set_symblic_ref instead.",
  107. category=DeprecationWarning, stacklevel=2)
  108. return self.set_symbolic_ref(name, other)
  109. def set_symbolic_ref(self, name, other):
  110. """Make a ref point at another ref.
  111. :param name: Name of the ref to set
  112. :param other: Name of the ref to point at
  113. """
  114. raise NotImplementedError(self.set_symbolic_ref)
  115. def get_packed_refs(self):
  116. """Get contents of the packed-refs file.
  117. :return: Dictionary mapping ref names to SHA1s
  118. :note: Will return an empty dictionary when no packed-refs file is
  119. present.
  120. """
  121. raise NotImplementedError(self.get_packed_refs)
  122. def get_peeled(self, name):
  123. """Return the cached peeled value of a ref, if available.
  124. :param name: Name of the ref to peel
  125. :return: The peeled value of the ref. If the ref is known not point to a
  126. tag, this will be the SHA the ref refers to. If the ref may point to
  127. a tag, but no cached information is available, None is returned.
  128. """
  129. return None
  130. def import_refs(self, base, other):
  131. for name, value in other.iteritems():
  132. self["%s/%s" % (base, name)] = value
  133. def allkeys(self):
  134. """All refs present in this container."""
  135. raise NotImplementedError(self.allkeys)
  136. def keys(self, base=None):
  137. """Refs present in this container.
  138. :param base: An optional base to return refs under.
  139. :return: An unsorted set of valid refs in this container, including
  140. packed refs.
  141. """
  142. if base is not None:
  143. return self.subkeys(base)
  144. else:
  145. return self.allkeys()
  146. def subkeys(self, base):
  147. """Refs present in this container under a base.
  148. :param base: The base to return refs under.
  149. :return: A set of valid refs in this container under the base; the base
  150. prefix is stripped from the ref names returned.
  151. """
  152. keys = set()
  153. base_len = len(base) + 1
  154. for refname in self.allkeys():
  155. if refname.startswith(base):
  156. keys.add(refname[base_len:])
  157. return keys
  158. def as_dict(self, base=None):
  159. """Return the contents of this container as a dictionary.
  160. """
  161. ret = {}
  162. keys = self.keys(base)
  163. if base is None:
  164. base = ""
  165. for key in keys:
  166. try:
  167. ret[key] = self[("%s/%s" % (base, key)).strip("/")]
  168. except KeyError:
  169. continue # Unable to resolve
  170. return ret
  171. def _check_refname(self, name):
  172. """Ensure a refname is valid and lives in refs or is HEAD.
  173. HEAD is not a valid refname according to git-check-ref-format, but this
  174. class needs to be able to touch HEAD. Also, check_ref_format expects
  175. refnames without the leading 'refs/', but this class requires that
  176. so it cannot touch anything outside the refs dir (or HEAD).
  177. :param name: The name of the reference.
  178. :raises KeyError: if a refname is not HEAD or is otherwise not valid.
  179. """
  180. if name == 'HEAD':
  181. return
  182. if not name.startswith('refs/') or not check_ref_format(name[5:]):
  183. raise RefFormatError(name)
  184. def read_ref(self, refname):
  185. """Read a reference without following any references.
  186. :param refname: The name of the reference
  187. :return: The contents of the ref file, or None if it does
  188. not exist.
  189. """
  190. contents = self.read_loose_ref(refname)
  191. if not contents:
  192. contents = self.get_packed_refs().get(refname, None)
  193. return contents
  194. def read_loose_ref(self, name):
  195. """Read a loose reference and return its contents.
  196. :param name: the refname to read
  197. :return: The contents of the ref file, or None if it does
  198. not exist.
  199. """
  200. raise NotImplementedError(self.read_loose_ref)
  201. def _follow(self, name):
  202. """Follow a reference name.
  203. :return: a tuple of (refname, sha), where refname is the name of the
  204. last reference in the symbolic reference chain
  205. """
  206. contents = SYMREF + name
  207. depth = 0
  208. while contents.startswith(SYMREF):
  209. refname = contents[len(SYMREF):]
  210. contents = self.read_ref(refname)
  211. if not contents:
  212. break
  213. depth += 1
  214. if depth > 5:
  215. raise KeyError(name)
  216. return refname, contents
  217. def __contains__(self, refname):
  218. if self.read_ref(refname):
  219. return True
  220. return False
  221. def __getitem__(self, name):
  222. """Get the SHA1 for a reference name.
  223. This method follows all symbolic references.
  224. """
  225. _, sha = self._follow(name)
  226. if sha is None:
  227. raise KeyError(name)
  228. return sha
  229. def set_if_equals(self, name, old_ref, new_ref):
  230. """Set a refname to new_ref only if it currently equals old_ref.
  231. This method follows all symbolic references if applicable for the
  232. subclass, and can be used to perform an atomic compare-and-swap
  233. operation.
  234. :param name: The refname to set.
  235. :param old_ref: The old sha the refname must refer to, or None to set
  236. unconditionally.
  237. :param new_ref: The new sha the refname will refer to.
  238. :return: True if the set was successful, False otherwise.
  239. """
  240. raise NotImplementedError(self.set_if_equals)
  241. def add_if_new(self, name, ref):
  242. """Add a new reference only if it does not already exist."""
  243. raise NotImplementedError(self.add_if_new)
  244. def __setitem__(self, name, ref):
  245. """Set a reference name to point to the given SHA1.
  246. This method follows all symbolic references if applicable for the
  247. subclass.
  248. :note: This method unconditionally overwrites the contents of a
  249. reference. To update atomically only if the reference has not
  250. changed, use set_if_equals().
  251. :param name: The refname to set.
  252. :param ref: The new sha the refname will refer to.
  253. """
  254. self.set_if_equals(name, None, ref)
  255. def remove_if_equals(self, name, old_ref):
  256. """Remove a refname only if it currently equals old_ref.
  257. This method does not follow symbolic references, even if applicable for
  258. the subclass. It can be used to perform an atomic compare-and-delete
  259. operation.
  260. :param name: The refname to delete.
  261. :param old_ref: The old sha the refname must refer to, or None to delete
  262. unconditionally.
  263. :return: True if the delete was successful, False otherwise.
  264. """
  265. raise NotImplementedError(self.remove_if_equals)
  266. def __delitem__(self, name):
  267. """Remove a refname.
  268. This method does not follow symbolic references, even if applicable for
  269. the subclass.
  270. :note: This method unconditionally deletes the contents of a reference.
  271. To delete atomically only if the reference has not changed, use
  272. remove_if_equals().
  273. :param name: The refname to delete.
  274. """
  275. self.remove_if_equals(name, None)
  276. class DictRefsContainer(RefsContainer):
  277. """RefsContainer backed by a simple dict.
  278. This container does not support symbolic or packed references and is not
  279. threadsafe.
  280. """
  281. def __init__(self, refs):
  282. self._refs = refs
  283. self._peeled = {}
  284. def allkeys(self):
  285. return self._refs.keys()
  286. def read_loose_ref(self, name):
  287. return self._refs.get(name, None)
  288. def get_packed_refs(self):
  289. return {}
  290. def set_symbolic_ref(self, name, other):
  291. self._refs[name] = SYMREF + other
  292. def set_if_equals(self, name, old_ref, new_ref):
  293. if old_ref is not None and self._refs.get(name, None) != old_ref:
  294. return False
  295. realname, _ = self._follow(name)
  296. self._check_refname(realname)
  297. self._refs[realname] = new_ref
  298. return True
  299. def add_if_new(self, name, ref):
  300. if name in self._refs:
  301. return False
  302. self._refs[name] = ref
  303. return True
  304. def remove_if_equals(self, name, old_ref):
  305. if old_ref is not None and self._refs.get(name, None) != old_ref:
  306. return False
  307. del self._refs[name]
  308. return True
  309. def get_peeled(self, name):
  310. return self._peeled.get(name)
  311. def _update(self, refs):
  312. """Update multiple refs; intended only for testing."""
  313. # TODO(dborowitz): replace this with a public function that uses
  314. # set_if_equal.
  315. self._refs.update(refs)
  316. def _update_peeled(self, peeled):
  317. """Update cached peeled refs; intended only for testing."""
  318. self._peeled.update(peeled)
  319. class DiskRefsContainer(RefsContainer):
  320. """Refs container that reads refs from disk."""
  321. def __init__(self, path):
  322. self.path = path
  323. self._packed_refs = None
  324. self._peeled_refs = None
  325. def __repr__(self):
  326. return "%s(%r)" % (self.__class__.__name__, self.path)
  327. def subkeys(self, base):
  328. keys = set()
  329. path = self.refpath(base)
  330. for root, dirs, files in os.walk(path):
  331. dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
  332. for filename in files:
  333. refname = ("%s/%s" % (dir, filename)).strip("/")
  334. # check_ref_format requires at least one /, so we prepend the
  335. # base before calling it.
  336. if check_ref_format("%s/%s" % (base, refname)):
  337. keys.add(refname)
  338. for key in self.get_packed_refs():
  339. if key.startswith(base):
  340. keys.add(key[len(base):].strip("/"))
  341. return keys
  342. def allkeys(self):
  343. keys = set()
  344. if os.path.exists(self.refpath("HEAD")):
  345. keys.add("HEAD")
  346. path = self.refpath("")
  347. for root, dirs, files in os.walk(self.refpath("refs")):
  348. dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
  349. for filename in files:
  350. refname = ("%s/%s" % (dir, filename)).strip("/")
  351. if check_ref_format(refname):
  352. keys.add(refname)
  353. keys.update(self.get_packed_refs())
  354. return keys
  355. def refpath(self, name):
  356. """Return the disk path of a ref.
  357. """
  358. if os.path.sep != "/":
  359. name = name.replace("/", os.path.sep)
  360. return os.path.join(self.path, name)
  361. def get_packed_refs(self):
  362. """Get contents of the packed-refs file.
  363. :return: Dictionary mapping ref names to SHA1s
  364. :note: Will return an empty dictionary when no packed-refs file is
  365. present.
  366. """
  367. # TODO: invalidate the cache on repacking
  368. if self._packed_refs is None:
  369. # set both to empty because we want _peeled_refs to be
  370. # None if and only if _packed_refs is also None.
  371. self._packed_refs = {}
  372. self._peeled_refs = {}
  373. path = os.path.join(self.path, 'packed-refs')
  374. try:
  375. f = GitFile(path, 'rb')
  376. except IOError, e:
  377. if e.errno == errno.ENOENT:
  378. return {}
  379. raise
  380. try:
  381. first_line = iter(f).next().rstrip()
  382. if (first_line.startswith("# pack-refs") and " peeled" in
  383. first_line):
  384. for sha, name, peeled in read_packed_refs_with_peeled(f):
  385. self._packed_refs[name] = sha
  386. if peeled:
  387. self._peeled_refs[name] = peeled
  388. else:
  389. f.seek(0)
  390. for sha, name in read_packed_refs(f):
  391. self._packed_refs[name] = sha
  392. finally:
  393. f.close()
  394. return self._packed_refs
  395. def get_peeled(self, name):
  396. """Return the cached peeled value of a ref, if available.
  397. :param name: Name of the ref to peel
  398. :return: The peeled value of the ref. If the ref is known not point to a
  399. tag, this will be the SHA the ref refers to. If the ref may point to
  400. a tag, but no cached information is available, None is returned.
  401. """
  402. self.get_packed_refs()
  403. if self._peeled_refs is None or name not in self._packed_refs:
  404. # No cache: no peeled refs were read, or this ref is loose
  405. return None
  406. if name in self._peeled_refs:
  407. return self._peeled_refs[name]
  408. else:
  409. # Known not peelable
  410. return self[name]
  411. def read_loose_ref(self, name):
  412. """Read a reference file and return its contents.
  413. If the reference file a symbolic reference, only read the first line of
  414. the file. Otherwise, only read the first 40 bytes.
  415. :param name: the refname to read, relative to refpath
  416. :return: The contents of the ref file, or None if the file does not
  417. exist.
  418. :raises IOError: if any other error occurs
  419. """
  420. filename = self.refpath(name)
  421. try:
  422. f = GitFile(filename, 'rb')
  423. try:
  424. header = f.read(len(SYMREF))
  425. if header == SYMREF:
  426. # Read only the first line
  427. return header + iter(f).next().rstrip("\r\n")
  428. else:
  429. # Read only the first 40 bytes
  430. return header + f.read(40-len(SYMREF))
  431. finally:
  432. f.close()
  433. except IOError, e:
  434. if e.errno == errno.ENOENT:
  435. return None
  436. raise
  437. def _remove_packed_ref(self, name):
  438. if self._packed_refs is None:
  439. return
  440. filename = os.path.join(self.path, 'packed-refs')
  441. # reread cached refs from disk, while holding the lock
  442. f = GitFile(filename, 'wb')
  443. try:
  444. self._packed_refs = None
  445. self.get_packed_refs()
  446. if name not in self._packed_refs:
  447. return
  448. del self._packed_refs[name]
  449. if name in self._peeled_refs:
  450. del self._peeled_refs[name]
  451. write_packed_refs(f, self._packed_refs, self._peeled_refs)
  452. f.close()
  453. finally:
  454. f.abort()
  455. def set_symbolic_ref(self, name, other):
  456. """Make a ref point at another ref.
  457. :param name: Name of the ref to set
  458. :param other: Name of the ref to point at
  459. """
  460. self._check_refname(name)
  461. self._check_refname(other)
  462. filename = self.refpath(name)
  463. try:
  464. f = GitFile(filename, 'wb')
  465. try:
  466. f.write(SYMREF + other + '\n')
  467. except (IOError, OSError):
  468. f.abort()
  469. raise
  470. finally:
  471. f.close()
  472. def set_if_equals(self, name, old_ref, new_ref):
  473. """Set a refname to new_ref only if it currently equals old_ref.
  474. This method follows all symbolic references, and can be used to perform
  475. an atomic compare-and-swap operation.
  476. :param name: The refname to set.
  477. :param old_ref: The old sha the refname must refer to, or None to set
  478. unconditionally.
  479. :param new_ref: The new sha the refname will refer to.
  480. :return: True if the set was successful, False otherwise.
  481. """
  482. self._check_refname(name)
  483. try:
  484. realname, _ = self._follow(name)
  485. except KeyError:
  486. realname = name
  487. filename = self.refpath(realname)
  488. ensure_dir_exists(os.path.dirname(filename))
  489. f = GitFile(filename, 'wb')
  490. try:
  491. if old_ref is not None:
  492. try:
  493. # read again while holding the lock
  494. orig_ref = self.read_loose_ref(realname)
  495. if orig_ref is None:
  496. orig_ref = self.get_packed_refs().get(realname, None)
  497. if orig_ref != old_ref:
  498. f.abort()
  499. return False
  500. except (OSError, IOError):
  501. f.abort()
  502. raise
  503. try:
  504. f.write(new_ref+"\n")
  505. except (OSError, IOError):
  506. f.abort()
  507. raise
  508. finally:
  509. f.close()
  510. return True
  511. def add_if_new(self, name, ref):
  512. """Add a new reference only if it does not already exist.
  513. This method follows symrefs, and only ensures that the last ref in the
  514. chain does not exist.
  515. :param name: The refname to set.
  516. :param ref: The new sha the refname will refer to.
  517. :return: True if the add was successful, False otherwise.
  518. """
  519. try:
  520. realname, contents = self._follow(name)
  521. if contents is not None:
  522. return False
  523. except KeyError:
  524. realname = name
  525. self._check_refname(realname)
  526. filename = self.refpath(realname)
  527. ensure_dir_exists(os.path.dirname(filename))
  528. f = GitFile(filename, 'wb')
  529. try:
  530. if os.path.exists(filename) or name in self.get_packed_refs():
  531. f.abort()
  532. return False
  533. try:
  534. f.write(ref+"\n")
  535. except (OSError, IOError):
  536. f.abort()
  537. raise
  538. finally:
  539. f.close()
  540. return True
  541. def remove_if_equals(self, name, old_ref):
  542. """Remove a refname only if it currently equals old_ref.
  543. This method does not follow symbolic references. It can be used to
  544. perform an atomic compare-and-delete operation.
  545. :param name: The refname to delete.
  546. :param old_ref: The old sha the refname must refer to, or None to delete
  547. unconditionally.
  548. :return: True if the delete was successful, False otherwise.
  549. """
  550. self._check_refname(name)
  551. filename = self.refpath(name)
  552. ensure_dir_exists(os.path.dirname(filename))
  553. f = GitFile(filename, 'wb')
  554. try:
  555. if old_ref is not None:
  556. orig_ref = self.read_loose_ref(name)
  557. if orig_ref is None:
  558. orig_ref = self.get_packed_refs().get(name, None)
  559. if orig_ref != old_ref:
  560. return False
  561. # may only be packed
  562. try:
  563. os.remove(filename)
  564. except OSError, e:
  565. if e.errno != errno.ENOENT:
  566. raise
  567. self._remove_packed_ref(name)
  568. finally:
  569. # never write, we just wanted the lock
  570. f.abort()
  571. return True
  572. def _split_ref_line(line):
  573. """Split a single ref line into a tuple of SHA1 and name."""
  574. fields = line.rstrip("\n").split(" ")
  575. if len(fields) != 2:
  576. raise PackedRefsException("invalid ref line '%s'" % line)
  577. sha, name = fields
  578. try:
  579. hex_to_sha(sha)
  580. except (AssertionError, TypeError), e:
  581. raise PackedRefsException(e)
  582. if not check_ref_format(name):
  583. raise PackedRefsException("invalid ref name '%s'" % name)
  584. return (sha, name)
  585. def read_packed_refs(f):
  586. """Read a packed refs file.
  587. :param f: file-like object to read from
  588. :return: Iterator over tuples with SHA1s and ref names.
  589. """
  590. for l in f:
  591. if l[0] == "#":
  592. # Comment
  593. continue
  594. if l[0] == "^":
  595. raise PackedRefsException(
  596. "found peeled ref in packed-refs without peeled")
  597. yield _split_ref_line(l)
  598. def read_packed_refs_with_peeled(f):
  599. """Read a packed refs file including peeled refs.
  600. Assumes the "# pack-refs with: peeled" line was already read. Yields tuples
  601. with ref names, SHA1s, and peeled SHA1s (or None).
  602. :param f: file-like object to read from, seek'ed to the second line
  603. """
  604. last = None
  605. for l in f:
  606. if l[0] == "#":
  607. continue
  608. l = l.rstrip("\r\n")
  609. if l[0] == "^":
  610. if not last:
  611. raise PackedRefsException("unexpected peeled ref line")
  612. try:
  613. hex_to_sha(l[1:])
  614. except (AssertionError, TypeError), e:
  615. raise PackedRefsException(e)
  616. sha, name = _split_ref_line(last)
  617. last = None
  618. yield (sha, name, l[1:])
  619. else:
  620. if last:
  621. sha, name = _split_ref_line(last)
  622. yield (sha, name, None)
  623. last = l
  624. if last:
  625. sha, name = _split_ref_line(last)
  626. yield (sha, name, None)
  627. def write_packed_refs(f, packed_refs, peeled_refs=None):
  628. """Write a packed refs file.
  629. :param f: empty file-like object to write to
  630. :param packed_refs: dict of refname to sha of packed refs to write
  631. :param peeled_refs: dict of refname to peeled value of sha
  632. """
  633. if peeled_refs is None:
  634. peeled_refs = {}
  635. else:
  636. f.write('# pack-refs with: peeled\n')
  637. for refname in sorted(packed_refs.iterkeys()):
  638. f.write('%s %s\n' % (packed_refs[refname], refname))
  639. if refname in peeled_refs:
  640. f.write('^%s\n' % peeled_refs[refname])
  641. class BaseRepo(object):
  642. """Base class for a git repository.
  643. :ivar object_store: Dictionary-like object for accessing
  644. the objects
  645. :ivar refs: Dictionary-like object with the refs in this repository
  646. """
  647. def __init__(self, object_store, refs):
  648. self.object_store = object_store
  649. self.refs = refs
  650. def _init_files(self, bare):
  651. """Initialize a default set of named files."""
  652. self._put_named_file('description', "Unnamed repository")
  653. self._put_named_file('config', ('[core]\n'
  654. 'repositoryformatversion = 0\n'
  655. 'filemode = true\n'
  656. 'bare = ' + str(bare).lower() + '\n'
  657. 'logallrefupdates = true\n'))
  658. self._put_named_file(os.path.join('info', 'exclude'), '')
  659. def get_named_file(self, path):
  660. """Get a file from the control dir with a specific name.
  661. Although the filename should be interpreted as a filename relative to
  662. the control dir in a disk-based Repo, the object returned need not be
  663. pointing to a file in that location.
  664. :param path: The path to the file, relative to the control dir.
  665. :return: An open file object, or None if the file does not exist.
  666. """
  667. raise NotImplementedError(self.get_named_file)
  668. def _put_named_file(self, path, contents):
  669. """Write a file to the control dir with the given name and contents.
  670. :param path: The path to the file, relative to the control dir.
  671. :param contents: A string to write to the file.
  672. """
  673. raise NotImplementedError(self._put_named_file)
  674. def open_index(self):
  675. """Open the index for this repository.
  676. :raises NoIndexPresent: If no index is present
  677. :return: Index instance
  678. """
  679. raise NotImplementedError(self.open_index)
  680. def fetch(self, target, determine_wants=None, progress=None):
  681. """Fetch objects into another repository.
  682. :param target: The target repository
  683. :param determine_wants: Optional function to determine what refs to
  684. fetch.
  685. :param progress: Optional progress function
  686. """
  687. if determine_wants is None:
  688. determine_wants = lambda heads: heads.values()
  689. target.object_store.add_objects(
  690. self.fetch_objects(determine_wants, target.get_graph_walker(),
  691. progress))
  692. return self.get_refs()
  693. def fetch_objects(self, determine_wants, graph_walker, progress,
  694. get_tagged=None):
  695. """Fetch the missing objects required for a set of revisions.
  696. :param determine_wants: Function that takes a dictionary with heads
  697. and returns the list of heads to fetch.
  698. :param graph_walker: Object that can iterate over the list of revisions
  699. to fetch and has an "ack" method that will be called to acknowledge
  700. that a revision is present.
  701. :param progress: Simple progress function that will be called with
  702. updated progress strings.
  703. :param get_tagged: Function that returns a dict of pointed-to sha -> tag
  704. sha for including tags.
  705. :return: iterator over objects, with __len__ implemented
  706. """
  707. wants = determine_wants(self.get_refs())
  708. if wants is None:
  709. # TODO(dborowitz): find a way to short-circuit that doesn't change
  710. # this interface.
  711. return None
  712. haves = self.object_store.find_common_revisions(graph_walker)
  713. return self.object_store.iter_shas(
  714. self.object_store.find_missing_objects(haves, wants, progress,
  715. get_tagged))
  716. def get_graph_walker(self, heads=None):
  717. if heads is None:
  718. heads = self.refs.as_dict('refs/heads').values()
  719. return self.object_store.get_graph_walker(heads)
  720. def ref(self, name):
  721. """Return the SHA1 a ref is pointing to."""
  722. return self.refs[name]
  723. def get_refs(self):
  724. """Get dictionary with all refs."""
  725. return self.refs.as_dict()
  726. def head(self):
  727. """Return the SHA1 pointed at by HEAD."""
  728. return self.refs['HEAD']
  729. def _get_object(self, sha, cls):
  730. assert len(sha) in (20, 40)
  731. ret = self.get_object(sha)
  732. if not isinstance(ret, cls):
  733. if cls is Commit:
  734. raise NotCommitError(ret)
  735. elif cls is Blob:
  736. raise NotBlobError(ret)
  737. elif cls is Tree:
  738. raise NotTreeError(ret)
  739. elif cls is Tag:
  740. raise NotTagError(ret)
  741. else:
  742. raise Exception("Type invalid: %r != %r" % (
  743. ret.type_name, cls.type_name))
  744. return ret
  745. def get_object(self, sha):
  746. return self.object_store[sha]
  747. def get_parents(self, sha):
  748. return self.commit(sha).parents
  749. def get_config(self):
  750. import ConfigParser
  751. p = ConfigParser.RawConfigParser()
  752. p.read(os.path.join(self._controldir, 'config'))
  753. return dict((section, dict(p.items(section)))
  754. for section in p.sections())
  755. def commit(self, sha):
  756. """Retrieve the commit with a particular SHA.
  757. :param sha: SHA of the commit to retrieve
  758. :raise NotCommitError: If the SHA provided doesn't point at a Commit
  759. :raise KeyError: If the SHA provided didn't exist
  760. :return: A `Commit` object
  761. """
  762. warnings.warn("Repo.commit(sha) is deprecated. Use Repo[sha] instead.",
  763. category=DeprecationWarning, stacklevel=2)
  764. return self._get_object(sha, Commit)
  765. def tree(self, sha):
  766. """Retrieve the tree with a particular SHA.
  767. :param sha: SHA of the tree to retrieve
  768. :raise NotTreeError: If the SHA provided doesn't point at a Tree
  769. :raise KeyError: If the SHA provided didn't exist
  770. :return: A `Tree` object
  771. """
  772. warnings.warn("Repo.tree(sha) is deprecated. Use Repo[sha] instead.",
  773. category=DeprecationWarning, stacklevel=2)
  774. return self._get_object(sha, Tree)
  775. def tag(self, sha):
  776. """Retrieve the tag with a particular SHA.
  777. :param sha: SHA of the tag to retrieve
  778. :raise NotTagError: If the SHA provided doesn't point at a Tag
  779. :raise KeyError: If the SHA provided didn't exist
  780. :return: A `Tag` object
  781. """
  782. warnings.warn("Repo.tag(sha) is deprecated. Use Repo[sha] instead.",
  783. category=DeprecationWarning, stacklevel=2)
  784. return self._get_object(sha, Tag)
  785. def get_blob(self, sha):
  786. """Retrieve the blob with a particular SHA.
  787. :param sha: SHA of the blob to retrieve
  788. :raise NotBlobError: If the SHA provided doesn't point at a Blob
  789. :raise KeyError: If the SHA provided didn't exist
  790. :return: A `Blob` object
  791. """
  792. warnings.warn("Repo.get_blob(sha) is deprecated. Use Repo[sha] "
  793. "instead.", category=DeprecationWarning, stacklevel=2)
  794. return self._get_object(sha, Blob)
  795. def get_peeled(self, ref):
  796. """Get the peeled value of a ref.
  797. :param ref: The refname to peel.
  798. :return: The fully-peeled SHA1 of a tag object, after peeling all
  799. intermediate tags; if the original ref does not point to a tag, this
  800. will equal the original SHA1.
  801. """
  802. cached = self.refs.get_peeled(ref)
  803. if cached is not None:
  804. return cached
  805. return self.object_store.peel_sha(self.refs[ref]).id
  806. def revision_history(self, head):
  807. """Returns a list of the commits reachable from head.
  808. :param head: The SHA of the head to list revision history for.
  809. :return: A list of commit objects reachable from head, starting with
  810. head itself, in descending commit time order.
  811. :raise MissingCommitError: if any missing commits are referenced,
  812. including if the head parameter isn't the SHA of a commit.
  813. """
  814. # TODO(dborowitz): Expose more of the Walker functionality here or in a
  815. # separate Repo/BaseObjectStore method.
  816. return [e.commit for e in Walker(self.object_store, [head])]
  817. def __getitem__(self, name):
  818. if len(name) in (20, 40):
  819. try:
  820. return self.object_store[name]
  821. except KeyError:
  822. pass
  823. try:
  824. return self.object_store[self.refs[name]]
  825. except RefFormatError:
  826. raise KeyError(name)
  827. def __contains__(self, name):
  828. if len(name) in (20, 40):
  829. return name in self.object_store or name in self.refs
  830. else:
  831. return name in self.refs
  832. def __setitem__(self, name, value):
  833. if name.startswith("refs/") or name == "HEAD":
  834. if isinstance(value, ShaFile):
  835. self.refs[name] = value.id
  836. elif isinstance(value, str):
  837. self.refs[name] = value
  838. else:
  839. raise TypeError(value)
  840. else:
  841. raise ValueError(name)
  842. def __delitem__(self, name):
  843. if name.startswith("refs") or name == "HEAD":
  844. del self.refs[name]
  845. else:
  846. raise ValueError(name)
  847. def do_commit(self, message, committer=None,
  848. author=None, commit_timestamp=None,
  849. commit_timezone=None, author_timestamp=None,
  850. author_timezone=None, tree=None, encoding=None,
  851. ref='HEAD'):
  852. """Create a new commit.
  853. :param message: Commit message
  854. :param committer: Committer fullname
  855. :param author: Author fullname (defaults to committer)
  856. :param commit_timestamp: Commit timestamp (defaults to now)
  857. :param commit_timezone: Commit timestamp timezone (defaults to GMT)
  858. :param author_timestamp: Author timestamp (defaults to commit timestamp)
  859. :param author_timezone: Author timestamp timezone
  860. (defaults to commit timestamp timezone)
  861. :param tree: SHA1 of the tree root to use (if not specified the
  862. current index will be committed).
  863. :param encoding: Encoding
  864. :param ref: Ref to commit to
  865. :return: New commit SHA1
  866. """
  867. import time
  868. c = Commit()
  869. if tree is None:
  870. index = self.open_index()
  871. c.tree = index.commit(self.object_store)
  872. else:
  873. if len(tree) != 40:
  874. raise ValueError("tree must be a 40-byte hex sha string")
  875. c.tree = tree
  876. # TODO: Allow username to be missing, and get it from .git/config
  877. if committer is None:
  878. raise ValueError("committer not set")
  879. c.committer = committer
  880. if commit_timestamp is None:
  881. commit_timestamp = time.time()
  882. c.commit_time = int(commit_timestamp)
  883. if commit_timezone is None:
  884. # FIXME: Use current user timezone rather than UTC
  885. commit_timezone = 0
  886. c.commit_timezone = commit_timezone
  887. if author is None:
  888. author = committer
  889. c.author = author
  890. if author_timestamp is None:
  891. author_timestamp = commit_timestamp
  892. c.author_time = int(author_timestamp)
  893. if author_timezone is None:
  894. author_timezone = commit_timezone
  895. c.author_timezone = author_timezone
  896. if encoding is not None:
  897. c.encoding = encoding
  898. c.message = message
  899. try:
  900. old_head = self.refs[ref]
  901. c.parents = [old_head]
  902. self.object_store.add_object(c)
  903. ok = self.refs.set_if_equals(ref, old_head, c.id)
  904. except KeyError:
  905. c.parents = []
  906. self.object_store.add_object(c)
  907. ok = self.refs.add_if_new(ref, c.id)
  908. if not ok:
  909. # Fail if the atomic compare-and-swap failed, leaving the commit and
  910. # all its objects as garbage.
  911. raise CommitError("%s changed during commit" % (ref,))
  912. return c.id
  913. class Repo(BaseRepo):
  914. """A git repository backed by local disk."""
  915. def __init__(self, root):
  916. if os.path.isdir(os.path.join(root, ".git", OBJECTDIR)):
  917. self.bare = False
  918. self._controldir = os.path.join(root, ".git")
  919. elif (os.path.isdir(os.path.join(root, OBJECTDIR)) and
  920. os.path.isdir(os.path.join(root, REFSDIR))):
  921. self.bare = True
  922. self._controldir = root
  923. else:
  924. raise NotGitRepository(root)
  925. self.path = root
  926. object_store = DiskObjectStore(os.path.join(self.controldir(),
  927. OBJECTDIR))
  928. refs = DiskRefsContainer(self.controldir())
  929. BaseRepo.__init__(self, object_store, refs)
  930. def controldir(self):
  931. """Return the path of the control directory."""
  932. return self._controldir
  933. def _put_named_file(self, path, contents):
  934. """Write a file to the control dir with the given name and contents.
  935. :param path: The path to the file, relative to the control dir.
  936. :param contents: A string to write to the file.
  937. """
  938. path = path.lstrip(os.path.sep)
  939. f = GitFile(os.path.join(self.controldir(), path), 'wb')
  940. try:
  941. f.write(contents)
  942. finally:
  943. f.close()
  944. def get_named_file(self, path):
  945. """Get a file from the control dir with a specific name.
  946. Although the filename should be interpreted as a filename relative to
  947. the control dir in a disk-based Repo, the object returned need not be
  948. pointing to a file in that location.
  949. :param path: The path to the file, relative to the control dir.
  950. :return: An open file object, or None if the file does not exist.
  951. """
  952. # TODO(dborowitz): sanitize filenames, since this is used directly by
  953. # the dumb web serving code.
  954. path = path.lstrip(os.path.sep)
  955. try:
  956. return open(os.path.join(self.controldir(), path), 'rb')
  957. except (IOError, OSError), e:
  958. if e.errno == errno.ENOENT:
  959. return None
  960. raise
  961. def index_path(self):
  962. """Return path to the index file."""
  963. return os.path.join(self.controldir(), INDEX_FILENAME)
  964. def open_index(self):
  965. """Open the index for this repository."""
  966. from dulwich.index import Index
  967. if not self.has_index():
  968. raise NoIndexPresent()
  969. return Index(self.index_path())
  970. def has_index(self):
  971. """Check if an index is present."""
  972. # Bare repos must never have index files; non-bare repos may have a
  973. # missing index file, which is treated as empty.
  974. return not self.bare
  975. def stage(self, paths):
  976. """Stage a set of paths.
  977. :param paths: List of paths, relative to the repository path
  978. """
  979. from dulwich.index import cleanup_mode
  980. index = self.open_index()
  981. for path in paths:
  982. full_path = os.path.join(self.path, path)
  983. blob = Blob()
  984. try:
  985. st = os.stat(full_path)
  986. except OSError:
  987. # File no longer exists
  988. try:
  989. del index[path]
  990. except KeyError:
  991. pass # Doesn't exist in the index either
  992. else:
  993. f = open(full_path, 'rb')
  994. try:
  995. blob.data = f.read()
  996. finally:
  997. f.close()
  998. self.object_store.add_object(blob)
  999. # XXX: Cleanup some of the other file properties as well?
  1000. index[path] = (st.st_ctime, st.st_mtime, st.st_dev, st.st_ino,
  1001. cleanup_mode(st.st_mode), st.st_uid, st.st_gid, st.st_size,
  1002. blob.id, 0)
  1003. index.write()
  1004. def clone(self, target_path, mkdir=True, bare=False, origin="origin"):
  1005. """Clone this repository.
  1006. :param target_path: Target path
  1007. :param mkdir: Create the target directory
  1008. :param bare: Whether to create a bare repository
  1009. :return: Created repository
  1010. """
  1011. if not bare:
  1012. target = self.init(target_path, mkdir=mkdir)
  1013. else:
  1014. target = self.init_bare(target_path)
  1015. self.fetch(target)
  1016. target.refs.import_refs(
  1017. 'refs/remotes/'+origin, self.refs.as_dict('refs/heads'))
  1018. target.refs.import_refs(
  1019. 'refs/tags', self.refs.as_dict('refs/tags'))
  1020. try:
  1021. target.refs.add_if_new(
  1022. 'refs/heads/master',
  1023. self.refs['refs/heads/master'])
  1024. except KeyError:
  1025. pass
  1026. return target
  1027. def __repr__(self):
  1028. return "<Repo at %r>" % self.path
  1029. @classmethod
  1030. def _init_maybe_bare(cls, path, bare):
  1031. for d in BASE_DIRECTORIES:
  1032. os.mkdir(os.path.join(path, *d))
  1033. DiskObjectStore.init(os.path.join(path, OBJECTDIR))
  1034. ret = cls(path)
  1035. ret.refs.set_symbolic_ref("HEAD", "refs/heads/master")
  1036. ret._init_files(bare)
  1037. return ret
  1038. @classmethod
  1039. def init(cls, path, mkdir=False):
  1040. if mkdir:
  1041. os.mkdir(path)
  1042. controldir = os.path.join(path, ".git")
  1043. os.mkdir(controldir)
  1044. cls._init_maybe_bare(controldir, False)
  1045. return cls(path)
  1046. @classmethod
  1047. def init_bare(cls, path):
  1048. return cls._init_maybe_bare(path, True)
  1049. create = init_bare
  1050. class MemoryRepo(BaseRepo):
  1051. """Repo that stores refs, objects, and named files in memory.
  1052. MemoryRepos are always bare: they have no working tree and no index, since
  1053. those have a stronger dependency on the filesystem.
  1054. """
  1055. def __init__(self):
  1056. BaseRepo.__init__(self, MemoryObjectStore(), DictRefsContainer({}))
  1057. self._named_files = {}
  1058. self.bare = True
  1059. def _put_named_file(self, path, contents):
  1060. """Write a file to the control dir with the given name and contents.
  1061. :param path: The path to the file, relative to the control dir.
  1062. :param contents: A string to write to the file.
  1063. """
  1064. self._named_files[path] = contents
  1065. def get_named_file(self, path):
  1066. """Get a file from the control dir with a specific name.
  1067. Although the filename should be interpreted as a filename relative to
  1068. the control dir in a disk-baked Repo, the object returned need not be
  1069. pointing to a file in that location.
  1070. :param path: The path to the file, relative to the control dir.
  1071. :return: An open file object, or None if the file does not exist.
  1072. """
  1073. contents = self._named_files.get(path, None)
  1074. if contents is None:
  1075. return None
  1076. return StringIO(contents)
  1077. def open_index(self):
  1078. """Fail to open index for this repo, since it is bare."""
  1079. raise NoIndexPresent()
  1080. @classmethod
  1081. def init_bare(cls, objects, refs):
  1082. ret = cls()
  1083. for obj in objects:
  1084. ret.object_store.add_object(obj)
  1085. for refname, sha in refs.iteritems():
  1086. ret.refs[refname] = sha
  1087. ret._init_files(bare=True)
  1088. return ret