repo.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026
  1. # repo.py -- For dealing with git repositories.
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
  4. #
  5. # This program is free software; you can redistribute it and/or
  6. # modify it under the terms of the GNU General Public License
  7. # as published by the Free Software Foundation; version 2
  8. # of the License or (at your option) any later version of
  9. # the License.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with this program; if not, write to the Free Software
  18. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  19. # MA 02110-1301, USA.
  20. """Repository access.
  21. This module contains the base class for git repositories
  22. (BaseRepo) and an implementation which uses a repository on
  23. local disk (Repo).
  24. """
  25. from io import BytesIO
  26. import errno
  27. import os
  28. import sys
  29. from dulwich.errors import (
  30. NoIndexPresent,
  31. NotBlobError,
  32. NotCommitError,
  33. NotGitRepository,
  34. NotTreeError,
  35. NotTagError,
  36. CommitError,
  37. RefFormatError,
  38. HookError,
  39. )
  40. from dulwich.file import (
  41. GitFile,
  42. )
  43. from dulwich.object_store import (
  44. DiskObjectStore,
  45. MemoryObjectStore,
  46. ObjectStoreGraphWalker,
  47. )
  48. from dulwich.objects import (
  49. check_hexsha,
  50. Blob,
  51. Commit,
  52. ShaFile,
  53. Tag,
  54. Tree,
  55. )
  56. from dulwich.hooks import (
  57. PreCommitShellHook,
  58. PostCommitShellHook,
  59. CommitMsgShellHook,
  60. )
  61. from dulwich.refs import (
  62. check_ref_format,
  63. RefsContainer,
  64. DictRefsContainer,
  65. InfoRefsContainer,
  66. DiskRefsContainer,
  67. read_packed_refs,
  68. read_packed_refs_with_peeled,
  69. write_packed_refs,
  70. SYMREF,
  71. )
  72. import warnings
  73. CONTROLDIR = '.git'
  74. OBJECTDIR = 'objects'
  75. REFSDIR = 'refs'
  76. REFSDIR_TAGS = 'tags'
  77. REFSDIR_HEADS = 'heads'
  78. INDEX_FILENAME = "index"
  79. BASE_DIRECTORIES = [
  80. ["branches"],
  81. [REFSDIR],
  82. [REFSDIR, REFSDIR_TAGS],
  83. [REFSDIR, REFSDIR_HEADS],
  84. ["hooks"],
  85. ["info"]
  86. ]
  87. def parse_graftpoints(graftpoints):
  88. """Convert a list of graftpoints into a dict
  89. :param graftpoints: Iterator of graftpoint lines
  90. Each line is formatted as:
  91. <commit sha1> <parent sha1> [<parent sha1>]*
  92. Resulting dictionary is:
  93. <commit sha1>: [<parent sha1>*]
  94. https://git.wiki.kernel.org/index.php/GraftPoint
  95. """
  96. grafts = {}
  97. for l in graftpoints:
  98. raw_graft = l.split(None, 1)
  99. commit = raw_graft[0]
  100. if len(raw_graft) == 2:
  101. parents = raw_graft[1].split()
  102. else:
  103. parents = []
  104. for sha in [commit] + parents:
  105. check_hexsha(sha, 'Invalid graftpoint')
  106. grafts[commit] = parents
  107. return grafts
  108. def serialize_graftpoints(graftpoints):
  109. """Convert a dictionary of grafts into string
  110. The graft dictionary is:
  111. <commit sha1>: [<parent sha1>*]
  112. Each line is formatted as:
  113. <commit sha1> <parent sha1> [<parent sha1>]*
  114. https://git.wiki.kernel.org/index.php/GraftPoint
  115. """
  116. graft_lines = []
  117. for commit, parents in graftpoints.items():
  118. if parents:
  119. graft_lines.append(commit + b' ' + b' '.join(parents))
  120. else:
  121. graft_lines.append(commit)
  122. return b'\n'.join(graft_lines)
  123. class BaseRepo(object):
  124. """Base class for a git repository.
  125. :ivar object_store: Dictionary-like object for accessing
  126. the objects
  127. :ivar refs: Dictionary-like object with the refs in this
  128. repository
  129. """
  130. def __init__(self, object_store, refs):
  131. """Open a repository.
  132. This shouldn't be called directly, but rather through one of the
  133. base classes, such as MemoryRepo or Repo.
  134. :param object_store: Object store to use
  135. :param refs: Refs container to use
  136. """
  137. self.object_store = object_store
  138. self.refs = refs
  139. self._graftpoints = {}
  140. self.hooks = {}
  141. def _init_files(self, bare):
  142. """Initialize a default set of named files."""
  143. from dulwich.config import ConfigFile
  144. self._put_named_file('description', b"Unnamed repository")
  145. f = BytesIO()
  146. cf = ConfigFile()
  147. cf.set(b"core", b"repositoryformatversion", b"0")
  148. cf.set(b"core", b"filemode", b"true")
  149. cf.set(b"core", b"bare", bare)
  150. cf.set(b"core", b"logallrefupdates", True)
  151. cf.write_to_file(f)
  152. self._put_named_file('config', f.getvalue())
  153. self._put_named_file(os.path.join('info', 'exclude'), b'')
  154. def get_named_file(self, path):
  155. """Get a file from the control dir with a specific name.
  156. Although the filename should be interpreted as a filename relative to
  157. the control dir in a disk-based Repo, the object returned need not be
  158. pointing to a file in that location.
  159. :param path: The path to the file, relative to the control dir.
  160. :return: An open file object, or None if the file does not exist.
  161. """
  162. raise NotImplementedError(self.get_named_file)
  163. def _put_named_file(self, path, contents):
  164. """Write a file to the control dir with the given name and contents.
  165. :param path: The path to the file, relative to the control dir.
  166. :param contents: A string to write to the file.
  167. """
  168. raise NotImplementedError(self._put_named_file)
  169. def open_index(self):
  170. """Open the index for this repository.
  171. :raise NoIndexPresent: If no index is present
  172. :return: The matching `Index`
  173. """
  174. raise NotImplementedError(self.open_index)
  175. def fetch(self, target, determine_wants=None, progress=None):
  176. """Fetch objects into another repository.
  177. :param target: The target repository
  178. :param determine_wants: Optional function to determine what refs to
  179. fetch.
  180. :param progress: Optional progress function
  181. :return: The local refs
  182. """
  183. if determine_wants is None:
  184. determine_wants = target.object_store.determine_wants_all
  185. target.object_store.add_objects(
  186. self.fetch_objects(determine_wants, target.get_graph_walker(),
  187. progress))
  188. return self.get_refs()
  189. def fetch_objects(self, determine_wants, graph_walker, progress,
  190. get_tagged=None):
  191. """Fetch the missing objects required for a set of revisions.
  192. :param determine_wants: Function that takes a dictionary with heads
  193. and returns the list of heads to fetch.
  194. :param graph_walker: Object that can iterate over the list of revisions
  195. to fetch and has an "ack" method that will be called to acknowledge
  196. that a revision is present.
  197. :param progress: Simple progress function that will be called with
  198. updated progress strings.
  199. :param get_tagged: Function that returns a dict of pointed-to sha -> tag
  200. sha for including tags.
  201. :return: iterator over objects, with __len__ implemented
  202. """
  203. wants = determine_wants(self.get_refs())
  204. if not isinstance(wants, list):
  205. raise TypeError("determine_wants() did not return a list")
  206. shallows = getattr(graph_walker, 'shallow', frozenset())
  207. unshallows = getattr(graph_walker, 'unshallow', frozenset())
  208. if wants == []:
  209. # TODO(dborowitz): find a way to short-circuit that doesn't change
  210. # this interface.
  211. if shallows or unshallows:
  212. # Do not send a pack in shallow short-circuit path
  213. return None
  214. return []
  215. # If the graph walker is set up with an implementation that can
  216. # ACK/NAK to the wire, it will write data to the client through
  217. # this call as a side-effect.
  218. haves = self.object_store.find_common_revisions(graph_walker)
  219. # Deal with shallow requests separately because the haves do
  220. # not reflect what objects are missing
  221. if shallows or unshallows:
  222. haves = [] # TODO: filter the haves commits from iter_shas.
  223. # the specific commits aren't missing.
  224. def get_parents(commit):
  225. if commit.id in shallows:
  226. return []
  227. return self.get_parents(commit.id, commit)
  228. return self.object_store.iter_shas(
  229. self.object_store.find_missing_objects(
  230. haves, wants, progress,
  231. get_tagged,
  232. get_parents=get_parents))
  233. def get_graph_walker(self, heads=None):
  234. """Retrieve a graph walker.
  235. A graph walker is used by a remote repository (or proxy)
  236. to find out which objects are present in this repository.
  237. :param heads: Repository heads to use (optional)
  238. :return: A graph walker object
  239. """
  240. if heads is None:
  241. heads = self.refs.as_dict(b'refs/heads').values()
  242. return ObjectStoreGraphWalker(heads, self.get_parents)
  243. def get_refs(self):
  244. """Get dictionary with all refs.
  245. :return: A ``dict`` mapping ref names to SHA1s
  246. """
  247. return self.refs.as_dict()
  248. def head(self):
  249. """Return the SHA1 pointed at by HEAD."""
  250. return self.refs[b'HEAD']
  251. def _get_object(self, sha, cls):
  252. assert len(sha) in (20, 40)
  253. ret = self.get_object(sha)
  254. if not isinstance(ret, cls):
  255. if cls is Commit:
  256. raise NotCommitError(ret)
  257. elif cls is Blob:
  258. raise NotBlobError(ret)
  259. elif cls is Tree:
  260. raise NotTreeError(ret)
  261. elif cls is Tag:
  262. raise NotTagError(ret)
  263. else:
  264. raise Exception("Type invalid: %r != %r" % (
  265. ret.type_name, cls.type_name))
  266. return ret
  267. def get_object(self, sha):
  268. """Retrieve the object with the specified SHA.
  269. :param sha: SHA to retrieve
  270. :return: A ShaFile object
  271. :raise KeyError: when the object can not be found
  272. """
  273. return self.object_store[sha]
  274. def get_parents(self, sha, commit=None):
  275. """Retrieve the parents of a specific commit.
  276. If the specific commit is a graftpoint, the graft parents
  277. will be returned instead.
  278. :param sha: SHA of the commit for which to retrieve the parents
  279. :param commit: Optional commit matching the sha
  280. :return: List of parents
  281. """
  282. try:
  283. return self._graftpoints[sha]
  284. except KeyError:
  285. if commit is None:
  286. commit = self[sha]
  287. return commit.parents
  288. def get_config(self):
  289. """Retrieve the config object.
  290. :return: `ConfigFile` object for the ``.git/config`` file.
  291. """
  292. raise NotImplementedError(self.get_config)
  293. def get_description(self):
  294. """Retrieve the description for this repository.
  295. :return: String with the description of the repository
  296. as set by the user.
  297. """
  298. raise NotImplementedError(self.get_description)
  299. def set_description(self, description):
  300. """Set the description for this repository.
  301. :param description: Text to set as description for this repository.
  302. """
  303. raise NotImplementedError(self.set_description)
  304. def get_config_stack(self):
  305. """Return a config stack for this repository.
  306. This stack accesses the configuration for both this repository
  307. itself (.git/config) and the global configuration, which usually
  308. lives in ~/.gitconfig.
  309. :return: `Config` instance for this repository
  310. """
  311. from dulwich.config import StackedConfig
  312. backends = [self.get_config()] + StackedConfig.default_backends()
  313. return StackedConfig(backends, writable=backends[0])
  314. def get_peeled(self, ref):
  315. """Get the peeled value of a ref.
  316. :param ref: The refname to peel.
  317. :return: The fully-peeled SHA1 of a tag object, after peeling all
  318. intermediate tags; if the original ref does not point to a tag, this
  319. will equal the original SHA1.
  320. """
  321. cached = self.refs.get_peeled(ref)
  322. if cached is not None:
  323. return cached
  324. return self.object_store.peel_sha(self.refs[ref]).id
  325. def get_walker(self, include=None, *args, **kwargs):
  326. """Obtain a walker for this repository.
  327. :param include: Iterable of SHAs of commits to include along with their
  328. ancestors. Defaults to [HEAD]
  329. :param exclude: Iterable of SHAs of commits to exclude along with their
  330. ancestors, overriding includes.
  331. :param order: ORDER_* constant specifying the order of results. Anything
  332. other than ORDER_DATE may result in O(n) memory usage.
  333. :param reverse: If True, reverse the order of output, requiring O(n)
  334. memory.
  335. :param max_entries: The maximum number of entries to yield, or None for
  336. no limit.
  337. :param paths: Iterable of file or subtree paths to show entries for.
  338. :param rename_detector: diff.RenameDetector object for detecting
  339. renames.
  340. :param follow: If True, follow path across renames/copies. Forces a
  341. default rename_detector.
  342. :param since: Timestamp to list commits after.
  343. :param until: Timestamp to list commits before.
  344. :param queue_cls: A class to use for a queue of commits, supporting the
  345. iterator protocol. The constructor takes a single argument, the
  346. Walker.
  347. :return: A `Walker` object
  348. """
  349. from dulwich.walk import Walker
  350. if include is None:
  351. include = [self.head()]
  352. if isinstance(include, str):
  353. include = [include]
  354. kwargs['get_parents'] = lambda commit: self.get_parents(commit.id, commit)
  355. return Walker(self.object_store, include, *args, **kwargs)
  356. def __getitem__(self, name):
  357. """Retrieve a Git object by SHA1 or ref.
  358. :param name: A Git object SHA1 or a ref name
  359. :return: A `ShaFile` object, such as a Commit or Blob
  360. :raise KeyError: when the specified ref or object does not exist
  361. """
  362. if not isinstance(name, bytes):
  363. raise TypeError("'name' must be bytestring, not %.80s" %
  364. type(name).__name__)
  365. if len(name) in (20, 40):
  366. try:
  367. return self.object_store[name]
  368. except (KeyError, ValueError):
  369. pass
  370. try:
  371. return self.object_store[self.refs[name]]
  372. except RefFormatError:
  373. raise KeyError(name)
  374. def __contains__(self, name):
  375. """Check if a specific Git object or ref is present.
  376. :param name: Git object SHA1 or ref name
  377. """
  378. if len(name) in (20, 40):
  379. return name in self.object_store or name in self.refs
  380. else:
  381. return name in self.refs
  382. def __setitem__(self, name, value):
  383. """Set a ref.
  384. :param name: ref name
  385. :param value: Ref value - either a ShaFile object, or a hex sha
  386. """
  387. if name.startswith(b"refs/") or name == b'HEAD':
  388. if isinstance(value, ShaFile):
  389. self.refs[name] = value.id
  390. elif isinstance(value, bytes):
  391. self.refs[name] = value
  392. else:
  393. raise TypeError(value)
  394. else:
  395. raise ValueError(name)
  396. def __delitem__(self, name):
  397. """Remove a ref.
  398. :param name: Name of the ref to remove
  399. """
  400. if name.startswith(b"refs/") or name == b"HEAD":
  401. del self.refs[name]
  402. else:
  403. raise ValueError(name)
  404. def _get_user_identity(self):
  405. """Determine the identity to use for new commits.
  406. """
  407. config = self.get_config_stack()
  408. return (config.get((b"user", ), b"name") + b" <" +
  409. config.get((b"user", ), b"email") + b">")
  410. def _add_graftpoints(self, updated_graftpoints):
  411. """Add or modify graftpoints
  412. :param updated_graftpoints: Dict of commit shas to list of parent shas
  413. """
  414. # Simple validation
  415. for commit, parents in updated_graftpoints.items():
  416. for sha in [commit] + parents:
  417. check_hexsha(sha, 'Invalid graftpoint')
  418. self._graftpoints.update(updated_graftpoints)
  419. def _remove_graftpoints(self, to_remove=[]):
  420. """Remove graftpoints
  421. :param to_remove: List of commit shas
  422. """
  423. for sha in to_remove:
  424. del self._graftpoints[sha]
  425. def do_commit(self, message=None, committer=None,
  426. author=None, commit_timestamp=None,
  427. commit_timezone=None, author_timestamp=None,
  428. author_timezone=None, tree=None, encoding=None,
  429. ref=b'HEAD', merge_heads=None):
  430. """Create a new commit.
  431. :param message: Commit message
  432. :param committer: Committer fullname
  433. :param author: Author fullname (defaults to committer)
  434. :param commit_timestamp: Commit timestamp (defaults to now)
  435. :param commit_timezone: Commit timestamp timezone (defaults to GMT)
  436. :param author_timestamp: Author timestamp (defaults to commit timestamp)
  437. :param author_timezone: Author timestamp timezone
  438. (defaults to commit timestamp timezone)
  439. :param tree: SHA1 of the tree root to use (if not specified the
  440. current index will be committed).
  441. :param encoding: Encoding
  442. :param ref: Optional ref to commit to (defaults to current branch)
  443. :param merge_heads: Merge heads (defaults to .git/MERGE_HEADS)
  444. :return: New commit SHA1
  445. """
  446. import time
  447. c = Commit()
  448. if tree is None:
  449. index = self.open_index()
  450. c.tree = index.commit(self.object_store)
  451. else:
  452. if len(tree) != 40:
  453. raise ValueError("tree must be a 40-byte hex sha string")
  454. c.tree = tree
  455. try:
  456. self.hooks['pre-commit'].execute()
  457. except HookError as e:
  458. raise CommitError(e)
  459. except KeyError: # no hook defined, silent fallthrough
  460. pass
  461. if merge_heads is None:
  462. # FIXME: Read merge heads from .git/MERGE_HEADS
  463. merge_heads = []
  464. if committer is None:
  465. # FIXME: Support GIT_COMMITTER_NAME/GIT_COMMITTER_EMAIL environment
  466. # variables
  467. committer = self._get_user_identity()
  468. c.committer = committer
  469. if commit_timestamp is None:
  470. # FIXME: Support GIT_COMMITTER_DATE environment variable
  471. commit_timestamp = time.time()
  472. c.commit_time = int(commit_timestamp)
  473. if commit_timezone is None:
  474. # FIXME: Use current user timezone rather than UTC
  475. commit_timezone = 0
  476. c.commit_timezone = commit_timezone
  477. if author is None:
  478. # FIXME: Support GIT_AUTHOR_NAME/GIT_AUTHOR_EMAIL environment
  479. # variables
  480. author = committer
  481. c.author = author
  482. if author_timestamp is None:
  483. # FIXME: Support GIT_AUTHOR_DATE environment variable
  484. author_timestamp = commit_timestamp
  485. c.author_time = int(author_timestamp)
  486. if author_timezone is None:
  487. author_timezone = commit_timezone
  488. c.author_timezone = author_timezone
  489. if encoding is not None:
  490. c.encoding = encoding
  491. if message is None:
  492. # FIXME: Try to read commit message from .git/MERGE_MSG
  493. raise ValueError("No commit message specified")
  494. try:
  495. c.message = self.hooks['commit-msg'].execute(message)
  496. if c.message is None:
  497. c.message = message
  498. except HookError as e:
  499. raise CommitError(e)
  500. except KeyError: # no hook defined, message not modified
  501. c.message = message
  502. if ref is None:
  503. # Create a dangling commit
  504. c.parents = merge_heads
  505. self.object_store.add_object(c)
  506. else:
  507. try:
  508. old_head = self.refs[ref]
  509. c.parents = [old_head] + merge_heads
  510. self.object_store.add_object(c)
  511. ok = self.refs.set_if_equals(ref, old_head, c.id)
  512. except KeyError:
  513. c.parents = merge_heads
  514. self.object_store.add_object(c)
  515. ok = self.refs.add_if_new(ref, c.id)
  516. if not ok:
  517. # Fail if the atomic compare-and-swap failed, leaving the commit and
  518. # all its objects as garbage.
  519. raise CommitError("%s changed during commit" % (ref,))
  520. try:
  521. self.hooks['post-commit'].execute()
  522. except HookError as e: # silent failure
  523. warnings.warn("post-commit hook failed: %s" % e, UserWarning)
  524. except KeyError: # no hook defined, silent fallthrough
  525. pass
  526. return c.id
  527. def read_gitfile(f):
  528. """Read a ``.git`` file.
  529. The first line of the file should start with "gitdir: "
  530. :param f: File-like object to read from
  531. :return: A path
  532. """
  533. cs = f.read()
  534. if not cs.startswith("gitdir: "):
  535. raise ValueError("Expected file to start with 'gitdir: '")
  536. return cs[len("gitdir: "):].rstrip("\n")
  537. class Repo(BaseRepo):
  538. """A git repository backed by local disk.
  539. To open an existing repository, call the contructor with
  540. the path of the repository.
  541. To create a new repository, use the Repo.init class method.
  542. """
  543. def __init__(self, root):
  544. hidden_path = os.path.join(root, CONTROLDIR)
  545. if os.path.isdir(os.path.join(hidden_path, OBJECTDIR)):
  546. self.bare = False
  547. self._controldir = hidden_path
  548. elif (os.path.isdir(os.path.join(root, OBJECTDIR)) and
  549. os.path.isdir(os.path.join(root, REFSDIR))):
  550. self.bare = True
  551. self._controldir = root
  552. elif os.path.isfile(hidden_path):
  553. self.bare = False
  554. with open(hidden_path, 'r') as f:
  555. path = read_gitfile(f)
  556. self.bare = False
  557. self._controldir = os.path.join(root, path)
  558. else:
  559. raise NotGitRepository(
  560. "No git repository was found at %(path)s" % dict(path=root)
  561. )
  562. self.path = root
  563. object_store = DiskObjectStore(os.path.join(self.controldir(),
  564. OBJECTDIR))
  565. refs = DiskRefsContainer(self.controldir())
  566. BaseRepo.__init__(self, object_store, refs)
  567. self._graftpoints = {}
  568. graft_file = self.get_named_file(os.path.join("info", "grafts"))
  569. if graft_file:
  570. with graft_file:
  571. self._graftpoints.update(parse_graftpoints(graft_file))
  572. graft_file = self.get_named_file("shallow")
  573. if graft_file:
  574. with graft_file:
  575. self._graftpoints.update(parse_graftpoints(graft_file))
  576. self.hooks['pre-commit'] = PreCommitShellHook(self.controldir())
  577. self.hooks['commit-msg'] = CommitMsgShellHook(self.controldir())
  578. self.hooks['post-commit'] = PostCommitShellHook(self.controldir())
  579. @classmethod
  580. def discover(cls, start='.'):
  581. """Iterate parent directories to discover a repository
  582. Return a Repo object for the first parent directory that looks like a
  583. Git repository.
  584. :param start: The directory to start discovery from (defaults to '.')
  585. """
  586. remaining = True
  587. path = os.path.abspath(start)
  588. while remaining:
  589. try:
  590. return cls(path)
  591. except NotGitRepository:
  592. path, remaining = os.path.split(path)
  593. raise NotGitRepository(
  594. "No git repository was found at %(path)s" % dict(path=start)
  595. )
  596. def controldir(self):
  597. """Return the path of the control directory."""
  598. return self._controldir
  599. def _put_named_file(self, path, contents):
  600. """Write a file to the control dir with the given name and contents.
  601. :param path: The path to the file, relative to the control dir.
  602. :param contents: A string to write to the file.
  603. """
  604. path = path.lstrip(os.path.sep)
  605. with GitFile(os.path.join(self.controldir(), path), 'wb') as f:
  606. f.write(contents)
  607. def get_named_file(self, path):
  608. """Get a file from the control dir with a specific name.
  609. Although the filename should be interpreted as a filename relative to
  610. the control dir in a disk-based Repo, the object returned need not be
  611. pointing to a file in that location.
  612. :param path: The path to the file, relative to the control dir.
  613. :return: An open file object, or None if the file does not exist.
  614. """
  615. # TODO(dborowitz): sanitize filenames, since this is used directly by
  616. # the dumb web serving code.
  617. path = path.lstrip(os.path.sep)
  618. try:
  619. return open(os.path.join(self.controldir(), path), 'rb')
  620. except (IOError, OSError) as e:
  621. if e.errno == errno.ENOENT:
  622. return None
  623. raise
  624. def index_path(self):
  625. """Return path to the index file."""
  626. return os.path.join(self.controldir(), INDEX_FILENAME)
  627. def open_index(self):
  628. """Open the index for this repository.
  629. :raise NoIndexPresent: If no index is present
  630. :return: The matching `Index`
  631. """
  632. from dulwich.index import Index
  633. if not self.has_index():
  634. raise NoIndexPresent()
  635. return Index(self.index_path())
  636. def has_index(self):
  637. """Check if an index is present."""
  638. # Bare repos must never have index files; non-bare repos may have a
  639. # missing index file, which is treated as empty.
  640. return not self.bare
  641. def stage(self, fs_paths):
  642. """Stage a set of paths.
  643. :param fs_paths: List of paths, relative to the repository path
  644. """
  645. root_path_bytes = self.path.encode(sys.getfilesystemencoding())
  646. if not isinstance(fs_paths, list):
  647. fs_paths = [fs_paths]
  648. from dulwich.index import (
  649. blob_from_path_and_stat,
  650. index_entry_from_stat,
  651. _fs_to_tree_path,
  652. )
  653. index = self.open_index()
  654. for fs_path in fs_paths:
  655. if not isinstance(fs_path, bytes):
  656. fs_path = fs_path.encode(sys.getfilesystemencoding())
  657. tree_path = _fs_to_tree_path(fs_path)
  658. full_path = os.path.join(root_path_bytes, fs_path)
  659. try:
  660. st = os.lstat(full_path)
  661. except OSError:
  662. # File no longer exists
  663. try:
  664. del index[tree_path]
  665. except KeyError:
  666. pass # already removed
  667. else:
  668. blob = blob_from_path_and_stat(full_path, st)
  669. self.object_store.add_object(blob)
  670. index[tree_path] = index_entry_from_stat(st, blob.id, 0)
  671. index.write()
  672. def clone(self, target_path, mkdir=True, bare=False,
  673. origin=b"origin"):
  674. """Clone this repository.
  675. :param target_path: Target path
  676. :param mkdir: Create the target directory
  677. :param bare: Whether to create a bare repository
  678. :param origin: Base name for refs in target repository
  679. cloned from this repository
  680. :return: Created repository as `Repo`
  681. """
  682. if not bare:
  683. target = self.init(target_path, mkdir=mkdir)
  684. else:
  685. target = self.init_bare(target_path)
  686. self.fetch(target)
  687. target.refs.import_refs(
  688. b'refs/remotes/' + origin, self.refs.as_dict(b'refs/heads'))
  689. target.refs.import_refs(
  690. b'refs/tags', self.refs.as_dict(b'refs/tags'))
  691. try:
  692. target.refs.add_if_new(
  693. b'refs/heads/master',
  694. self.refs[b'refs/heads/master'])
  695. except KeyError:
  696. pass
  697. # Update target head
  698. head, head_sha = self.refs._follow(b'HEAD')
  699. if head is not None and head_sha is not None:
  700. target.refs.set_symbolic_ref(b'HEAD', head)
  701. target[b'HEAD'] = head_sha
  702. if not bare:
  703. # Checkout HEAD to target dir
  704. target.reset_index()
  705. return target
  706. def reset_index(self, tree=None):
  707. """Reset the index back to a specific tree.
  708. :param tree: Tree SHA to reset to, None for current HEAD tree.
  709. """
  710. from dulwich.index import (
  711. build_index_from_tree,
  712. validate_path_element_default,
  713. validate_path_element_ntfs,
  714. )
  715. if tree is None:
  716. tree = self[b'HEAD'].tree
  717. config = self.get_config()
  718. honor_filemode = config.get_boolean('core', 'filemode', os.name != "nt")
  719. if config.get_boolean('core', 'core.protectNTFS', os.name == "nt"):
  720. validate_path_element = validate_path_element_ntfs
  721. else:
  722. validate_path_element = validate_path_element_default
  723. return build_index_from_tree(self.path, self.index_path(),
  724. self.object_store, tree, honor_filemode=honor_filemode,
  725. validate_path_element=validate_path_element)
  726. def get_config(self):
  727. """Retrieve the config object.
  728. :return: `ConfigFile` object for the ``.git/config`` file.
  729. """
  730. from dulwich.config import ConfigFile
  731. path = os.path.join(self._controldir, 'config')
  732. try:
  733. return ConfigFile.from_path(path)
  734. except (IOError, OSError) as e:
  735. if e.errno != errno.ENOENT:
  736. raise
  737. ret = ConfigFile()
  738. ret.path = path
  739. return ret
  740. def get_description(self):
  741. """Retrieve the description of this repository.
  742. :return: A string describing the repository or None.
  743. """
  744. path = os.path.join(self._controldir, 'description')
  745. try:
  746. with GitFile(path, 'rb') as f:
  747. return f.read()
  748. except (IOError, OSError) as e:
  749. if e.errno != errno.ENOENT:
  750. raise
  751. return None
  752. def __repr__(self):
  753. return "<Repo at %r>" % self.path
  754. def set_description(self, description):
  755. """Set the description for this repository.
  756. :param description: Text to set as description for this repository.
  757. """
  758. self._put_named_file('description', description)
  759. @classmethod
  760. def _init_maybe_bare(cls, path, bare):
  761. for d in BASE_DIRECTORIES:
  762. os.mkdir(os.path.join(path, *d))
  763. DiskObjectStore.init(os.path.join(path, OBJECTDIR))
  764. ret = cls(path)
  765. ret.refs.set_symbolic_ref(b'HEAD', b"refs/heads/master")
  766. ret._init_files(bare)
  767. return ret
  768. @classmethod
  769. def init(cls, path, mkdir=False):
  770. """Create a new repository.
  771. :param path: Path in which to create the repository
  772. :param mkdir: Whether to create the directory
  773. :return: `Repo` instance
  774. """
  775. if mkdir:
  776. os.mkdir(path)
  777. controldir = os.path.join(path, CONTROLDIR)
  778. os.mkdir(controldir)
  779. cls._init_maybe_bare(controldir, False)
  780. return cls(path)
  781. @classmethod
  782. def init_bare(cls, path):
  783. """Create a new bare repository.
  784. ``path`` should already exist and be an emty directory.
  785. :param path: Path to create bare repository in
  786. :return: a `Repo` instance
  787. """
  788. return cls._init_maybe_bare(path, True)
  789. create = init_bare
  790. def close(self):
  791. """Close any files opened by this repository."""
  792. self.object_store.close()
  793. class MemoryRepo(BaseRepo):
  794. """Repo that stores refs, objects, and named files in memory.
  795. MemoryRepos are always bare: they have no working tree and no index, since
  796. those have a stronger dependency on the filesystem.
  797. """
  798. def __init__(self):
  799. from dulwich.config import ConfigFile
  800. BaseRepo.__init__(self, MemoryObjectStore(), DictRefsContainer({}))
  801. self._named_files = {}
  802. self.bare = True
  803. self._config = ConfigFile()
  804. def _put_named_file(self, path, contents):
  805. """Write a file to the control dir with the given name and contents.
  806. :param path: The path to the file, relative to the control dir.
  807. :param contents: A string to write to the file.
  808. """
  809. self._named_files[path] = contents
  810. def get_named_file(self, path):
  811. """Get a file from the control dir with a specific name.
  812. Although the filename should be interpreted as a filename relative to
  813. the control dir in a disk-baked Repo, the object returned need not be
  814. pointing to a file in that location.
  815. :param path: The path to the file, relative to the control dir.
  816. :return: An open file object, or None if the file does not exist.
  817. """
  818. contents = self._named_files.get(path, None)
  819. if contents is None:
  820. return None
  821. return BytesIO(contents)
  822. def open_index(self):
  823. """Fail to open index for this repo, since it is bare.
  824. :raise NoIndexPresent: Raised when no index is present
  825. """
  826. raise NoIndexPresent()
  827. def get_config(self):
  828. """Retrieve the config object.
  829. :return: `ConfigFile` object.
  830. """
  831. return self._config
  832. def get_description(self):
  833. """Retrieve the repository description.
  834. This defaults to None, for no description.
  835. """
  836. return None
  837. @classmethod
  838. def init_bare(cls, objects, refs):
  839. """Create a new bare repository in memory.
  840. :param objects: Objects for the new repository,
  841. as iterable
  842. :param refs: Refs as dictionary, mapping names
  843. to object SHA1s
  844. """
  845. ret = cls()
  846. for obj in objects:
  847. ret.object_store.add_object(obj)
  848. for refname, sha in refs.items():
  849. ret.refs[refname] = sha
  850. ret._init_files(bare=True)
  851. return ret