2
0

repo.py 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254
  1. # repo.py -- For dealing with git repositories.
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
  4. #
  5. # This program is free software; you can redistribute it and/or
  6. # modify it under the terms of the GNU General Public License
  7. # as published by the Free Software Foundation; version 2
  8. # of the License or (at your option) any later version of
  9. # the License.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with this program; if not, write to the Free Software
  18. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  19. # MA 02110-1301, USA.
  20. """Repository access."""
  21. from cStringIO import StringIO
  22. import errno
  23. import os
  24. from dulwich.errors import (
  25. MissingCommitError,
  26. NoIndexPresent,
  27. NotBlobError,
  28. NotCommitError,
  29. NotGitRepository,
  30. NotTreeError,
  31. NotTagError,
  32. PackedRefsException,
  33. CommitError,
  34. RefFormatError,
  35. )
  36. from dulwich.file import (
  37. ensure_dir_exists,
  38. GitFile,
  39. )
  40. from dulwich.object_store import (
  41. DiskObjectStore,
  42. MemoryObjectStore,
  43. )
  44. from dulwich.objects import (
  45. Blob,
  46. Commit,
  47. ShaFile,
  48. Tag,
  49. Tree,
  50. hex_to_sha,
  51. )
  52. from dulwich.walk import (
  53. Walker,
  54. )
  55. import warnings
  56. OBJECTDIR = 'objects'
  57. SYMREF = 'ref: '
  58. REFSDIR = 'refs'
  59. REFSDIR_TAGS = 'tags'
  60. REFSDIR_HEADS = 'heads'
  61. INDEX_FILENAME = "index"
  62. BASE_DIRECTORIES = [
  63. ["branches"],
  64. [REFSDIR],
  65. [REFSDIR, REFSDIR_TAGS],
  66. [REFSDIR, REFSDIR_HEADS],
  67. ["hooks"],
  68. ["info"]
  69. ]
  70. def read_info_refs(f):
  71. ret = {}
  72. for l in f.readlines():
  73. (sha, name) = l.rstrip("\r\n").split("\t", 1)
  74. ret[name] = sha
  75. return ret
  76. def check_ref_format(refname):
  77. """Check if a refname is correctly formatted.
  78. Implements all the same rules as git-check-ref-format[1].
  79. [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
  80. :param refname: The refname to check
  81. :return: True if refname is valid, False otherwise
  82. """
  83. # These could be combined into one big expression, but are listed separately
  84. # to parallel [1].
  85. if '/.' in refname or refname.startswith('.'):
  86. return False
  87. if '/' not in refname:
  88. return False
  89. if '..' in refname:
  90. return False
  91. for c in refname:
  92. if ord(c) < 040 or c in '\177 ~^:?*[':
  93. return False
  94. if refname[-1] in '/.':
  95. return False
  96. if refname.endswith('.lock'):
  97. return False
  98. if '@{' in refname:
  99. return False
  100. if '\\' in refname:
  101. return False
  102. return True
  103. class RefsContainer(object):
  104. """A container for refs."""
  105. def set_ref(self, name, other):
  106. warnings.warn("RefsContainer.set_ref() is deprecated."
  107. "Use set_symblic_ref instead.",
  108. category=DeprecationWarning, stacklevel=2)
  109. return self.set_symbolic_ref(name, other)
  110. def set_symbolic_ref(self, name, other):
  111. """Make a ref point at another ref.
  112. :param name: Name of the ref to set
  113. :param other: Name of the ref to point at
  114. """
  115. raise NotImplementedError(self.set_symbolic_ref)
  116. def get_packed_refs(self):
  117. """Get contents of the packed-refs file.
  118. :return: Dictionary mapping ref names to SHA1s
  119. :note: Will return an empty dictionary when no packed-refs file is
  120. present.
  121. """
  122. raise NotImplementedError(self.get_packed_refs)
  123. def get_peeled(self, name):
  124. """Return the cached peeled value of a ref, if available.
  125. :param name: Name of the ref to peel
  126. :return: The peeled value of the ref. If the ref is known not point to a
  127. tag, this will be the SHA the ref refers to. If the ref may point to
  128. a tag, but no cached information is available, None is returned.
  129. """
  130. return None
  131. def import_refs(self, base, other):
  132. for name, value in other.iteritems():
  133. self["%s/%s" % (base, name)] = value
  134. def allkeys(self):
  135. """All refs present in this container."""
  136. raise NotImplementedError(self.allkeys)
  137. def keys(self, base=None):
  138. """Refs present in this container.
  139. :param base: An optional base to return refs under.
  140. :return: An unsorted set of valid refs in this container, including
  141. packed refs.
  142. """
  143. if base is not None:
  144. return self.subkeys(base)
  145. else:
  146. return self.allkeys()
  147. def subkeys(self, base):
  148. """Refs present in this container under a base.
  149. :param base: The base to return refs under.
  150. :return: A set of valid refs in this container under the base; the base
  151. prefix is stripped from the ref names returned.
  152. """
  153. keys = set()
  154. base_len = len(base) + 1
  155. for refname in self.allkeys():
  156. if refname.startswith(base):
  157. keys.add(refname[base_len:])
  158. return keys
  159. def as_dict(self, base=None):
  160. """Return the contents of this container as a dictionary.
  161. """
  162. ret = {}
  163. keys = self.keys(base)
  164. if base is None:
  165. base = ""
  166. for key in keys:
  167. try:
  168. ret[key] = self[("%s/%s" % (base, key)).strip("/")]
  169. except KeyError:
  170. continue # Unable to resolve
  171. return ret
  172. def _check_refname(self, name):
  173. """Ensure a refname is valid and lives in refs or is HEAD.
  174. HEAD is not a valid refname according to git-check-ref-format, but this
  175. class needs to be able to touch HEAD. Also, check_ref_format expects
  176. refnames without the leading 'refs/', but this class requires that
  177. so it cannot touch anything outside the refs dir (or HEAD).
  178. :param name: The name of the reference.
  179. :raises KeyError: if a refname is not HEAD or is otherwise not valid.
  180. """
  181. if name == 'HEAD':
  182. return
  183. if not name.startswith('refs/') or not check_ref_format(name[5:]):
  184. raise RefFormatError(name)
  185. def read_ref(self, refname):
  186. """Read a reference without following any references.
  187. :param refname: The name of the reference
  188. :return: The contents of the ref file, or None if it does
  189. not exist.
  190. """
  191. contents = self.read_loose_ref(refname)
  192. if not contents:
  193. contents = self.get_packed_refs().get(refname, None)
  194. return contents
  195. def read_loose_ref(self, name):
  196. """Read a loose reference and return its contents.
  197. :param name: the refname to read
  198. :return: The contents of the ref file, or None if it does
  199. not exist.
  200. """
  201. raise NotImplementedError(self.read_loose_ref)
  202. def _follow(self, name):
  203. """Follow a reference name.
  204. :return: a tuple of (refname, sha), where refname is the name of the
  205. last reference in the symbolic reference chain
  206. """
  207. contents = SYMREF + name
  208. depth = 0
  209. while contents.startswith(SYMREF):
  210. refname = contents[len(SYMREF):]
  211. contents = self.read_ref(refname)
  212. if not contents:
  213. break
  214. depth += 1
  215. if depth > 5:
  216. raise KeyError(name)
  217. return refname, contents
  218. def __contains__(self, refname):
  219. if self.read_ref(refname):
  220. return True
  221. return False
  222. def __getitem__(self, name):
  223. """Get the SHA1 for a reference name.
  224. This method follows all symbolic references.
  225. """
  226. _, sha = self._follow(name)
  227. if sha is None:
  228. raise KeyError(name)
  229. return sha
  230. def set_if_equals(self, name, old_ref, new_ref):
  231. """Set a refname to new_ref only if it currently equals old_ref.
  232. This method follows all symbolic references if applicable for the
  233. subclass, and can be used to perform an atomic compare-and-swap
  234. operation.
  235. :param name: The refname to set.
  236. :param old_ref: The old sha the refname must refer to, or None to set
  237. unconditionally.
  238. :param new_ref: The new sha the refname will refer to.
  239. :return: True if the set was successful, False otherwise.
  240. """
  241. raise NotImplementedError(self.set_if_equals)
  242. def add_if_new(self, name, ref):
  243. """Add a new reference only if it does not already exist."""
  244. raise NotImplementedError(self.add_if_new)
  245. def __setitem__(self, name, ref):
  246. """Set a reference name to point to the given SHA1.
  247. This method follows all symbolic references if applicable for the
  248. subclass.
  249. :note: This method unconditionally overwrites the contents of a
  250. reference. To update atomically only if the reference has not
  251. changed, use set_if_equals().
  252. :param name: The refname to set.
  253. :param ref: The new sha the refname will refer to.
  254. """
  255. self.set_if_equals(name, None, ref)
  256. def remove_if_equals(self, name, old_ref):
  257. """Remove a refname only if it currently equals old_ref.
  258. This method does not follow symbolic references, even if applicable for
  259. the subclass. It can be used to perform an atomic compare-and-delete
  260. operation.
  261. :param name: The refname to delete.
  262. :param old_ref: The old sha the refname must refer to, or None to delete
  263. unconditionally.
  264. :return: True if the delete was successful, False otherwise.
  265. """
  266. raise NotImplementedError(self.remove_if_equals)
  267. def __delitem__(self, name):
  268. """Remove a refname.
  269. This method does not follow symbolic references, even if applicable for
  270. the subclass.
  271. :note: This method unconditionally deletes the contents of a reference.
  272. To delete atomically only if the reference has not changed, use
  273. remove_if_equals().
  274. :param name: The refname to delete.
  275. """
  276. self.remove_if_equals(name, None)
  277. class DictRefsContainer(RefsContainer):
  278. """RefsContainer backed by a simple dict.
  279. This container does not support symbolic or packed references and is not
  280. threadsafe.
  281. """
  282. def __init__(self, refs):
  283. self._refs = refs
  284. self._peeled = {}
  285. def allkeys(self):
  286. return self._refs.keys()
  287. def read_loose_ref(self, name):
  288. return self._refs.get(name, None)
  289. def get_packed_refs(self):
  290. return {}
  291. def set_symbolic_ref(self, name, other):
  292. self._refs[name] = SYMREF + other
  293. def set_if_equals(self, name, old_ref, new_ref):
  294. if old_ref is not None and self._refs.get(name, None) != old_ref:
  295. return False
  296. realname, _ = self._follow(name)
  297. self._check_refname(realname)
  298. self._refs[realname] = new_ref
  299. return True
  300. def add_if_new(self, name, ref):
  301. if name in self._refs:
  302. return False
  303. self._refs[name] = ref
  304. return True
  305. def remove_if_equals(self, name, old_ref):
  306. if old_ref is not None and self._refs.get(name, None) != old_ref:
  307. return False
  308. del self._refs[name]
  309. return True
  310. def get_peeled(self, name):
  311. return self._peeled.get(name)
  312. def _update(self, refs):
  313. """Update multiple refs; intended only for testing."""
  314. # TODO(dborowitz): replace this with a public function that uses
  315. # set_if_equal.
  316. self._refs.update(refs)
  317. def _update_peeled(self, peeled):
  318. """Update cached peeled refs; intended only for testing."""
  319. self._peeled.update(peeled)
  320. class DiskRefsContainer(RefsContainer):
  321. """Refs container that reads refs from disk."""
  322. def __init__(self, path):
  323. self.path = path
  324. self._packed_refs = None
  325. self._peeled_refs = None
  326. def __repr__(self):
  327. return "%s(%r)" % (self.__class__.__name__, self.path)
  328. def subkeys(self, base):
  329. keys = set()
  330. path = self.refpath(base)
  331. for root, dirs, files in os.walk(path):
  332. dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
  333. for filename in files:
  334. refname = ("%s/%s" % (dir, filename)).strip("/")
  335. # check_ref_format requires at least one /, so we prepend the
  336. # base before calling it.
  337. if check_ref_format("%s/%s" % (base, refname)):
  338. keys.add(refname)
  339. for key in self.get_packed_refs():
  340. if key.startswith(base):
  341. keys.add(key[len(base):].strip("/"))
  342. return keys
  343. def allkeys(self):
  344. keys = set()
  345. if os.path.exists(self.refpath("HEAD")):
  346. keys.add("HEAD")
  347. path = self.refpath("")
  348. for root, dirs, files in os.walk(self.refpath("refs")):
  349. dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
  350. for filename in files:
  351. refname = ("%s/%s" % (dir, filename)).strip("/")
  352. if check_ref_format(refname):
  353. keys.add(refname)
  354. keys.update(self.get_packed_refs())
  355. return keys
  356. def refpath(self, name):
  357. """Return the disk path of a ref.
  358. """
  359. if os.path.sep != "/":
  360. name = name.replace("/", os.path.sep)
  361. return os.path.join(self.path, name)
  362. def get_packed_refs(self):
  363. """Get contents of the packed-refs file.
  364. :return: Dictionary mapping ref names to SHA1s
  365. :note: Will return an empty dictionary when no packed-refs file is
  366. present.
  367. """
  368. # TODO: invalidate the cache on repacking
  369. if self._packed_refs is None:
  370. # set both to empty because we want _peeled_refs to be
  371. # None if and only if _packed_refs is also None.
  372. self._packed_refs = {}
  373. self._peeled_refs = {}
  374. path = os.path.join(self.path, 'packed-refs')
  375. try:
  376. f = GitFile(path, 'rb')
  377. except IOError, e:
  378. if e.errno == errno.ENOENT:
  379. return {}
  380. raise
  381. try:
  382. first_line = iter(f).next().rstrip()
  383. if (first_line.startswith("# pack-refs") and " peeled" in
  384. first_line):
  385. for sha, name, peeled in read_packed_refs_with_peeled(f):
  386. self._packed_refs[name] = sha
  387. if peeled:
  388. self._peeled_refs[name] = peeled
  389. else:
  390. f.seek(0)
  391. for sha, name in read_packed_refs(f):
  392. self._packed_refs[name] = sha
  393. finally:
  394. f.close()
  395. return self._packed_refs
  396. def get_peeled(self, name):
  397. """Return the cached peeled value of a ref, if available.
  398. :param name: Name of the ref to peel
  399. :return: The peeled value of the ref. If the ref is known not point to a
  400. tag, this will be the SHA the ref refers to. If the ref may point to
  401. a tag, but no cached information is available, None is returned.
  402. """
  403. self.get_packed_refs()
  404. if self._peeled_refs is None or name not in self._packed_refs:
  405. # No cache: no peeled refs were read, or this ref is loose
  406. return None
  407. if name in self._peeled_refs:
  408. return self._peeled_refs[name]
  409. else:
  410. # Known not peelable
  411. return self[name]
  412. def read_loose_ref(self, name):
  413. """Read a reference file and return its contents.
  414. If the reference file a symbolic reference, only read the first line of
  415. the file. Otherwise, only read the first 40 bytes.
  416. :param name: the refname to read, relative to refpath
  417. :return: The contents of the ref file, or None if the file does not
  418. exist.
  419. :raises IOError: if any other error occurs
  420. """
  421. filename = self.refpath(name)
  422. try:
  423. f = GitFile(filename, 'rb')
  424. try:
  425. header = f.read(len(SYMREF))
  426. if header == SYMREF:
  427. # Read only the first line
  428. return header + iter(f).next().rstrip("\r\n")
  429. else:
  430. # Read only the first 40 bytes
  431. return header + f.read(40-len(SYMREF))
  432. finally:
  433. f.close()
  434. except IOError, e:
  435. if e.errno == errno.ENOENT:
  436. return None
  437. raise
  438. def _remove_packed_ref(self, name):
  439. if self._packed_refs is None:
  440. return
  441. filename = os.path.join(self.path, 'packed-refs')
  442. # reread cached refs from disk, while holding the lock
  443. f = GitFile(filename, 'wb')
  444. try:
  445. self._packed_refs = None
  446. self.get_packed_refs()
  447. if name not in self._packed_refs:
  448. return
  449. del self._packed_refs[name]
  450. if name in self._peeled_refs:
  451. del self._peeled_refs[name]
  452. write_packed_refs(f, self._packed_refs, self._peeled_refs)
  453. f.close()
  454. finally:
  455. f.abort()
  456. def set_symbolic_ref(self, name, other):
  457. """Make a ref point at another ref.
  458. :param name: Name of the ref to set
  459. :param other: Name of the ref to point at
  460. """
  461. self._check_refname(name)
  462. self._check_refname(other)
  463. filename = self.refpath(name)
  464. try:
  465. f = GitFile(filename, 'wb')
  466. try:
  467. f.write(SYMREF + other + '\n')
  468. except (IOError, OSError):
  469. f.abort()
  470. raise
  471. finally:
  472. f.close()
  473. def set_if_equals(self, name, old_ref, new_ref):
  474. """Set a refname to new_ref only if it currently equals old_ref.
  475. This method follows all symbolic references, and can be used to perform
  476. an atomic compare-and-swap operation.
  477. :param name: The refname to set.
  478. :param old_ref: The old sha the refname must refer to, or None to set
  479. unconditionally.
  480. :param new_ref: The new sha the refname will refer to.
  481. :return: True if the set was successful, False otherwise.
  482. """
  483. self._check_refname(name)
  484. try:
  485. realname, _ = self._follow(name)
  486. except KeyError:
  487. realname = name
  488. filename = self.refpath(realname)
  489. ensure_dir_exists(os.path.dirname(filename))
  490. f = GitFile(filename, 'wb')
  491. try:
  492. if old_ref is not None:
  493. try:
  494. # read again while holding the lock
  495. orig_ref = self.read_loose_ref(realname)
  496. if orig_ref is None:
  497. orig_ref = self.get_packed_refs().get(realname, None)
  498. if orig_ref != old_ref:
  499. f.abort()
  500. return False
  501. except (OSError, IOError):
  502. f.abort()
  503. raise
  504. try:
  505. f.write(new_ref+"\n")
  506. except (OSError, IOError):
  507. f.abort()
  508. raise
  509. finally:
  510. f.close()
  511. return True
  512. def add_if_new(self, name, ref):
  513. """Add a new reference only if it does not already exist.
  514. This method follows symrefs, and only ensures that the last ref in the
  515. chain does not exist.
  516. :param name: The refname to set.
  517. :param ref: The new sha the refname will refer to.
  518. :return: True if the add was successful, False otherwise.
  519. """
  520. try:
  521. realname, contents = self._follow(name)
  522. if contents is not None:
  523. return False
  524. except KeyError:
  525. realname = name
  526. self._check_refname(realname)
  527. filename = self.refpath(realname)
  528. ensure_dir_exists(os.path.dirname(filename))
  529. f = GitFile(filename, 'wb')
  530. try:
  531. if os.path.exists(filename) or name in self.get_packed_refs():
  532. f.abort()
  533. return False
  534. try:
  535. f.write(ref+"\n")
  536. except (OSError, IOError):
  537. f.abort()
  538. raise
  539. finally:
  540. f.close()
  541. return True
  542. def remove_if_equals(self, name, old_ref):
  543. """Remove a refname only if it currently equals old_ref.
  544. This method does not follow symbolic references. It can be used to
  545. perform an atomic compare-and-delete operation.
  546. :param name: The refname to delete.
  547. :param old_ref: The old sha the refname must refer to, or None to delete
  548. unconditionally.
  549. :return: True if the delete was successful, False otherwise.
  550. """
  551. self._check_refname(name)
  552. filename = self.refpath(name)
  553. ensure_dir_exists(os.path.dirname(filename))
  554. f = GitFile(filename, 'wb')
  555. try:
  556. if old_ref is not None:
  557. orig_ref = self.read_loose_ref(name)
  558. if orig_ref is None:
  559. orig_ref = self.get_packed_refs().get(name, None)
  560. if orig_ref != old_ref:
  561. return False
  562. # may only be packed
  563. try:
  564. os.remove(filename)
  565. except OSError, e:
  566. if e.errno != errno.ENOENT:
  567. raise
  568. self._remove_packed_ref(name)
  569. finally:
  570. # never write, we just wanted the lock
  571. f.abort()
  572. return True
  573. def _split_ref_line(line):
  574. """Split a single ref line into a tuple of SHA1 and name."""
  575. fields = line.rstrip("\n").split(" ")
  576. if len(fields) != 2:
  577. raise PackedRefsException("invalid ref line '%s'" % line)
  578. sha, name = fields
  579. try:
  580. hex_to_sha(sha)
  581. except (AssertionError, TypeError), e:
  582. raise PackedRefsException(e)
  583. if not check_ref_format(name):
  584. raise PackedRefsException("invalid ref name '%s'" % name)
  585. return (sha, name)
  586. def read_packed_refs(f):
  587. """Read a packed refs file.
  588. :param f: file-like object to read from
  589. :return: Iterator over tuples with SHA1s and ref names.
  590. """
  591. for l in f:
  592. if l[0] == "#":
  593. # Comment
  594. continue
  595. if l[0] == "^":
  596. raise PackedRefsException(
  597. "found peeled ref in packed-refs without peeled")
  598. yield _split_ref_line(l)
  599. def read_packed_refs_with_peeled(f):
  600. """Read a packed refs file including peeled refs.
  601. Assumes the "# pack-refs with: peeled" line was already read. Yields tuples
  602. with ref names, SHA1s, and peeled SHA1s (or None).
  603. :param f: file-like object to read from, seek'ed to the second line
  604. """
  605. last = None
  606. for l in f:
  607. if l[0] == "#":
  608. continue
  609. l = l.rstrip("\r\n")
  610. if l[0] == "^":
  611. if not last:
  612. raise PackedRefsException("unexpected peeled ref line")
  613. try:
  614. hex_to_sha(l[1:])
  615. except (AssertionError, TypeError), e:
  616. raise PackedRefsException(e)
  617. sha, name = _split_ref_line(last)
  618. last = None
  619. yield (sha, name, l[1:])
  620. else:
  621. if last:
  622. sha, name = _split_ref_line(last)
  623. yield (sha, name, None)
  624. last = l
  625. if last:
  626. sha, name = _split_ref_line(last)
  627. yield (sha, name, None)
  628. def write_packed_refs(f, packed_refs, peeled_refs=None):
  629. """Write a packed refs file.
  630. :param f: empty file-like object to write to
  631. :param packed_refs: dict of refname to sha of packed refs to write
  632. :param peeled_refs: dict of refname to peeled value of sha
  633. """
  634. if peeled_refs is None:
  635. peeled_refs = {}
  636. else:
  637. f.write('# pack-refs with: peeled\n')
  638. for refname in sorted(packed_refs.iterkeys()):
  639. f.write('%s %s\n' % (packed_refs[refname], refname))
  640. if refname in peeled_refs:
  641. f.write('^%s\n' % peeled_refs[refname])
  642. class BaseRepo(object):
  643. """Base class for a git repository.
  644. :ivar object_store: Dictionary-like object for accessing
  645. the objects
  646. :ivar refs: Dictionary-like object with the refs in this repository
  647. """
  648. def __init__(self, object_store, refs):
  649. self.object_store = object_store
  650. self.refs = refs
  651. def _init_files(self, bare):
  652. """Initialize a default set of named files."""
  653. self._put_named_file('description', "Unnamed repository")
  654. self._put_named_file('config', ('[core]\n'
  655. 'repositoryformatversion = 0\n'
  656. 'filemode = true\n'
  657. 'bare = ' + str(bare).lower() + '\n'
  658. 'logallrefupdates = true\n'))
  659. self._put_named_file(os.path.join('info', 'exclude'), '')
  660. def get_named_file(self, path):
  661. """Get a file from the control dir with a specific name.
  662. Although the filename should be interpreted as a filename relative to
  663. the control dir in a disk-based Repo, the object returned need not be
  664. pointing to a file in that location.
  665. :param path: The path to the file, relative to the control dir.
  666. :return: An open file object, or None if the file does not exist.
  667. """
  668. raise NotImplementedError(self.get_named_file)
  669. def _put_named_file(self, path, contents):
  670. """Write a file to the control dir with the given name and contents.
  671. :param path: The path to the file, relative to the control dir.
  672. :param contents: A string to write to the file.
  673. """
  674. raise NotImplementedError(self._put_named_file)
  675. def open_index(self):
  676. """Open the index for this repository.
  677. :raises NoIndexPresent: If no index is present
  678. :return: Index instance
  679. """
  680. raise NotImplementedError(self.open_index)
  681. def fetch(self, target, determine_wants=None, progress=None):
  682. """Fetch objects into another repository.
  683. :param target: The target repository
  684. :param determine_wants: Optional function to determine what refs to
  685. fetch.
  686. :param progress: Optional progress function
  687. """
  688. if determine_wants is None:
  689. determine_wants = lambda heads: heads.values()
  690. target.object_store.add_objects(
  691. self.fetch_objects(determine_wants, target.get_graph_walker(),
  692. progress))
  693. return self.get_refs()
  694. def fetch_objects(self, determine_wants, graph_walker, progress,
  695. get_tagged=None):
  696. """Fetch the missing objects required for a set of revisions.
  697. :param determine_wants: Function that takes a dictionary with heads
  698. and returns the list of heads to fetch.
  699. :param graph_walker: Object that can iterate over the list of revisions
  700. to fetch and has an "ack" method that will be called to acknowledge
  701. that a revision is present.
  702. :param progress: Simple progress function that will be called with
  703. updated progress strings.
  704. :param get_tagged: Function that returns a dict of pointed-to sha -> tag
  705. sha for including tags.
  706. :return: iterator over objects, with __len__ implemented
  707. """
  708. wants = determine_wants(self.get_refs())
  709. if wants is None:
  710. # TODO(dborowitz): find a way to short-circuit that doesn't change
  711. # this interface.
  712. return None
  713. haves = self.object_store.find_common_revisions(graph_walker)
  714. return self.object_store.iter_shas(
  715. self.object_store.find_missing_objects(haves, wants, progress,
  716. get_tagged))
  717. def get_graph_walker(self, heads=None):
  718. if heads is None:
  719. heads = self.refs.as_dict('refs/heads').values()
  720. return self.object_store.get_graph_walker(heads)
  721. def ref(self, name):
  722. """Return the SHA1 a ref is pointing to."""
  723. return self.refs[name]
  724. def get_refs(self):
  725. """Get dictionary with all refs."""
  726. return self.refs.as_dict()
  727. def head(self):
  728. """Return the SHA1 pointed at by HEAD."""
  729. return self.refs['HEAD']
  730. def _get_object(self, sha, cls):
  731. assert len(sha) in (20, 40)
  732. ret = self.get_object(sha)
  733. if not isinstance(ret, cls):
  734. if cls is Commit:
  735. raise NotCommitError(ret)
  736. elif cls is Blob:
  737. raise NotBlobError(ret)
  738. elif cls is Tree:
  739. raise NotTreeError(ret)
  740. elif cls is Tag:
  741. raise NotTagError(ret)
  742. else:
  743. raise Exception("Type invalid: %r != %r" % (
  744. ret.type_name, cls.type_name))
  745. return ret
  746. def get_object(self, sha):
  747. return self.object_store[sha]
  748. def get_parents(self, sha):
  749. return self.commit(sha).parents
  750. def get_config(self):
  751. import ConfigParser
  752. p = ConfigParser.RawConfigParser()
  753. p.read(os.path.join(self._controldir, 'config'))
  754. return dict((section, dict(p.items(section)))
  755. for section in p.sections())
  756. def commit(self, sha):
  757. """Retrieve the commit with a particular SHA.
  758. :param sha: SHA of the commit to retrieve
  759. :raise NotCommitError: If the SHA provided doesn't point at a Commit
  760. :raise KeyError: If the SHA provided didn't exist
  761. :return: A `Commit` object
  762. """
  763. warnings.warn("Repo.commit(sha) is deprecated. Use Repo[sha] instead.",
  764. category=DeprecationWarning, stacklevel=2)
  765. return self._get_object(sha, Commit)
  766. def tree(self, sha):
  767. """Retrieve the tree with a particular SHA.
  768. :param sha: SHA of the tree to retrieve
  769. :raise NotTreeError: If the SHA provided doesn't point at a Tree
  770. :raise KeyError: If the SHA provided didn't exist
  771. :return: A `Tree` object
  772. """
  773. warnings.warn("Repo.tree(sha) is deprecated. Use Repo[sha] instead.",
  774. category=DeprecationWarning, stacklevel=2)
  775. return self._get_object(sha, Tree)
  776. def tag(self, sha):
  777. """Retrieve the tag with a particular SHA.
  778. :param sha: SHA of the tag to retrieve
  779. :raise NotTagError: If the SHA provided doesn't point at a Tag
  780. :raise KeyError: If the SHA provided didn't exist
  781. :return: A `Tag` object
  782. """
  783. warnings.warn("Repo.tag(sha) is deprecated. Use Repo[sha] instead.",
  784. category=DeprecationWarning, stacklevel=2)
  785. return self._get_object(sha, Tag)
  786. def get_blob(self, sha):
  787. """Retrieve the blob with a particular SHA.
  788. :param sha: SHA of the blob to retrieve
  789. :raise NotBlobError: If the SHA provided doesn't point at a Blob
  790. :raise KeyError: If the SHA provided didn't exist
  791. :return: A `Blob` object
  792. """
  793. warnings.warn("Repo.get_blob(sha) is deprecated. Use Repo[sha] "
  794. "instead.", category=DeprecationWarning, stacklevel=2)
  795. return self._get_object(sha, Blob)
  796. def get_peeled(self, ref):
  797. """Get the peeled value of a ref.
  798. :param ref: The refname to peel.
  799. :return: The fully-peeled SHA1 of a tag object, after peeling all
  800. intermediate tags; if the original ref does not point to a tag, this
  801. will equal the original SHA1.
  802. """
  803. cached = self.refs.get_peeled(ref)
  804. if cached is not None:
  805. return cached
  806. return self.object_store.peel_sha(self.refs[ref]).id
  807. def revision_history(self, head):
  808. """Returns a list of the commits reachable from head.
  809. :param head: The SHA of the head to list revision history for.
  810. :return: A list of commit objects reachable from head, starting with
  811. head itself, in descending commit time order.
  812. :raise MissingCommitError: if any missing commits are referenced,
  813. including if the head parameter isn't the SHA of a commit.
  814. """
  815. # TODO(dborowitz): Expose more of the Walker functionality here or in a
  816. # separate Repo/BaseObjectStore method.
  817. return [e.commit for e in Walker(self.object_store, [head])]
  818. def __getitem__(self, name):
  819. if len(name) in (20, 40):
  820. try:
  821. return self.object_store[name]
  822. except KeyError:
  823. pass
  824. try:
  825. return self.object_store[self.refs[name]]
  826. except RefFormatError:
  827. raise KeyError(name)
  828. def __contains__(self, name):
  829. if len(name) in (20, 40):
  830. return name in self.object_store or name in self.refs
  831. else:
  832. return name in self.refs
  833. def __setitem__(self, name, value):
  834. if name.startswith("refs/") or name == "HEAD":
  835. if isinstance(value, ShaFile):
  836. self.refs[name] = value.id
  837. elif isinstance(value, str):
  838. self.refs[name] = value
  839. else:
  840. raise TypeError(value)
  841. else:
  842. raise ValueError(name)
  843. def __delitem__(self, name):
  844. if name.startswith("refs") or name == "HEAD":
  845. del self.refs[name]
  846. else:
  847. raise ValueError(name)
  848. def do_commit(self, message, committer=None,
  849. author=None, commit_timestamp=None,
  850. commit_timezone=None, author_timestamp=None,
  851. author_timezone=None, tree=None, encoding=None,
  852. ref='HEAD'):
  853. """Create a new commit.
  854. :param message: Commit message
  855. :param committer: Committer fullname
  856. :param author: Author fullname (defaults to committer)
  857. :param commit_timestamp: Commit timestamp (defaults to now)
  858. :param commit_timezone: Commit timestamp timezone (defaults to GMT)
  859. :param author_timestamp: Author timestamp (defaults to commit timestamp)
  860. :param author_timezone: Author timestamp timezone
  861. (defaults to commit timestamp timezone)
  862. :param tree: SHA1 of the tree root to use (if not specified the
  863. current index will be committed).
  864. :param encoding: Encoding
  865. :param ref: Ref to commit to
  866. :return: New commit SHA1
  867. """
  868. import time
  869. c = Commit()
  870. if tree is None:
  871. index = self.open_index()
  872. c.tree = index.commit(self.object_store)
  873. else:
  874. if len(tree) != 40:
  875. raise ValueError("tree must be a 40-byte hex sha string")
  876. c.tree = tree
  877. # TODO: Allow username to be missing, and get it from .git/config
  878. if committer is None:
  879. raise ValueError("committer not set")
  880. c.committer = committer
  881. if commit_timestamp is None:
  882. commit_timestamp = time.time()
  883. c.commit_time = int(commit_timestamp)
  884. if commit_timezone is None:
  885. # FIXME: Use current user timezone rather than UTC
  886. commit_timezone = 0
  887. c.commit_timezone = commit_timezone
  888. if author is None:
  889. author = committer
  890. c.author = author
  891. if author_timestamp is None:
  892. author_timestamp = commit_timestamp
  893. c.author_time = int(author_timestamp)
  894. if author_timezone is None:
  895. author_timezone = commit_timezone
  896. c.author_timezone = author_timezone
  897. if encoding is not None:
  898. c.encoding = encoding
  899. c.message = message
  900. try:
  901. old_head = self.refs[ref]
  902. c.parents = [old_head]
  903. self.object_store.add_object(c)
  904. ok = self.refs.set_if_equals(ref, old_head, c.id)
  905. except KeyError:
  906. c.parents = []
  907. self.object_store.add_object(c)
  908. ok = self.refs.add_if_new(ref, c.id)
  909. if not ok:
  910. # Fail if the atomic compare-and-swap failed, leaving the commit and
  911. # all its objects as garbage.
  912. raise CommitError("%s changed during commit" % (ref,))
  913. return c.id
  914. class Repo(BaseRepo):
  915. """A git repository backed by local disk."""
  916. def __init__(self, root):
  917. if os.path.isdir(os.path.join(root, ".git", OBJECTDIR)):
  918. self.bare = False
  919. self._controldir = os.path.join(root, ".git")
  920. elif (os.path.isdir(os.path.join(root, OBJECTDIR)) and
  921. os.path.isdir(os.path.join(root, REFSDIR))):
  922. self.bare = True
  923. self._controldir = root
  924. else:
  925. raise NotGitRepository(root)
  926. self.path = root
  927. object_store = DiskObjectStore(os.path.join(self.controldir(),
  928. OBJECTDIR))
  929. refs = DiskRefsContainer(self.controldir())
  930. BaseRepo.__init__(self, object_store, refs)
  931. def controldir(self):
  932. """Return the path of the control directory."""
  933. return self._controldir
  934. def _put_named_file(self, path, contents):
  935. """Write a file to the control dir with the given name and contents.
  936. :param path: The path to the file, relative to the control dir.
  937. :param contents: A string to write to the file.
  938. """
  939. path = path.lstrip(os.path.sep)
  940. f = GitFile(os.path.join(self.controldir(), path), 'wb')
  941. try:
  942. f.write(contents)
  943. finally:
  944. f.close()
  945. def get_named_file(self, path):
  946. """Get a file from the control dir with a specific name.
  947. Although the filename should be interpreted as a filename relative to
  948. the control dir in a disk-based Repo, the object returned need not be
  949. pointing to a file in that location.
  950. :param path: The path to the file, relative to the control dir.
  951. :return: An open file object, or None if the file does not exist.
  952. """
  953. # TODO(dborowitz): sanitize filenames, since this is used directly by
  954. # the dumb web serving code.
  955. path = path.lstrip(os.path.sep)
  956. try:
  957. return open(os.path.join(self.controldir(), path), 'rb')
  958. except (IOError, OSError), e:
  959. if e.errno == errno.ENOENT:
  960. return None
  961. raise
  962. def index_path(self):
  963. """Return path to the index file."""
  964. return os.path.join(self.controldir(), INDEX_FILENAME)
  965. def open_index(self):
  966. """Open the index for this repository."""
  967. from dulwich.index import Index
  968. if not self.has_index():
  969. raise NoIndexPresent()
  970. return Index(self.index_path())
  971. def has_index(self):
  972. """Check if an index is present."""
  973. # Bare repos must never have index files; non-bare repos may have a
  974. # missing index file, which is treated as empty.
  975. return not self.bare
  976. def stage(self, paths):
  977. """Stage a set of paths.
  978. :param paths: List of paths, relative to the repository path
  979. """
  980. from dulwich.index import cleanup_mode
  981. index = self.open_index()
  982. for path in paths:
  983. full_path = os.path.join(self.path, path)
  984. blob = Blob()
  985. try:
  986. st = os.stat(full_path)
  987. except OSError:
  988. # File no longer exists
  989. try:
  990. del index[path]
  991. except KeyError:
  992. pass # Doesn't exist in the index either
  993. else:
  994. f = open(full_path, 'rb')
  995. try:
  996. blob.data = f.read()
  997. finally:
  998. f.close()
  999. self.object_store.add_object(blob)
  1000. # XXX: Cleanup some of the other file properties as well?
  1001. index[path] = (st.st_ctime, st.st_mtime, st.st_dev, st.st_ino,
  1002. cleanup_mode(st.st_mode), st.st_uid, st.st_gid, st.st_size,
  1003. blob.id, 0)
  1004. index.write()
  1005. def __repr__(self):
  1006. return "<Repo at %r>" % self.path
  1007. @classmethod
  1008. def _init_maybe_bare(cls, path, bare):
  1009. for d in BASE_DIRECTORIES:
  1010. os.mkdir(os.path.join(path, *d))
  1011. DiskObjectStore.init(os.path.join(path, OBJECTDIR))
  1012. ret = cls(path)
  1013. ret.refs.set_symbolic_ref("HEAD", "refs/heads/master")
  1014. ret._init_files(bare)
  1015. return ret
  1016. @classmethod
  1017. def init(cls, path, mkdir=False):
  1018. if mkdir:
  1019. os.mkdir(path)
  1020. controldir = os.path.join(path, ".git")
  1021. os.mkdir(controldir)
  1022. cls._init_maybe_bare(controldir, False)
  1023. return cls(path)
  1024. @classmethod
  1025. def init_bare(cls, path):
  1026. return cls._init_maybe_bare(path, True)
  1027. create = init_bare
  1028. class MemoryRepo(BaseRepo):
  1029. """Repo that stores refs, objects, and named files in memory.
  1030. MemoryRepos are always bare: they have no working tree and no index, since
  1031. those have a stronger dependency on the filesystem.
  1032. """
  1033. def __init__(self):
  1034. BaseRepo.__init__(self, MemoryObjectStore(), DictRefsContainer({}))
  1035. self._named_files = {}
  1036. self.bare = True
  1037. def _put_named_file(self, path, contents):
  1038. """Write a file to the control dir with the given name and contents.
  1039. :param path: The path to the file, relative to the control dir.
  1040. :param contents: A string to write to the file.
  1041. """
  1042. self._named_files[path] = contents
  1043. def get_named_file(self, path):
  1044. """Get a file from the control dir with a specific name.
  1045. Although the filename should be interpreted as a filename relative to
  1046. the control dir in a disk-baked Repo, the object returned need not be
  1047. pointing to a file in that location.
  1048. :param path: The path to the file, relative to the control dir.
  1049. :return: An open file object, or None if the file does not exist.
  1050. """
  1051. contents = self._named_files.get(path, None)
  1052. if contents is None:
  1053. return None
  1054. return StringIO(contents)
  1055. def open_index(self):
  1056. """Fail to open index for this repo, since it is bare."""
  1057. raise NoIndexPresent()
  1058. @classmethod
  1059. def init_bare(cls, objects, refs):
  1060. ret = cls()
  1061. for obj in objects:
  1062. ret.object_store.add_object(obj)
  1063. for refname, sha in refs.iteritems():
  1064. ret.refs[refname] = sha
  1065. ret._init_files(bare=True)
  1066. return ret