repo.py 34 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009
  1. # repo.py -- For dealing with git repositories.
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
  4. #
  5. # This program is free software; you can redistribute it and/or
  6. # modify it under the terms of the GNU General Public License
  7. # as published by the Free Software Foundation; version 2
  8. # of the License or (at your option) any later version of
  9. # the License.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with this program; if not, write to the Free Software
  18. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  19. # MA 02110-1301, USA.
  20. """Repository access.
  21. This module contains the base class for git repositories
  22. (BaseRepo) and an implementation which uses a repository on
  23. local disk (Repo).
  24. """
  25. from io import BytesIO
  26. import errno
  27. import os
  28. import sys
  29. from dulwich.errors import (
  30. NoIndexPresent,
  31. NotBlobError,
  32. NotCommitError,
  33. NotGitRepository,
  34. NotTreeError,
  35. NotTagError,
  36. CommitError,
  37. RefFormatError,
  38. HookError,
  39. )
  40. from dulwich.file import (
  41. GitFile,
  42. )
  43. from dulwich.object_store import (
  44. DiskObjectStore,
  45. MemoryObjectStore,
  46. ObjectStoreGraphWalker,
  47. )
  48. from dulwich.objects import (
  49. check_hexsha,
  50. Blob,
  51. Commit,
  52. ShaFile,
  53. Tag,
  54. Tree,
  55. )
  56. from dulwich.hooks import (
  57. PreCommitShellHook,
  58. PostCommitShellHook,
  59. CommitMsgShellHook,
  60. )
  61. from dulwich.refs import (
  62. check_ref_format,
  63. RefsContainer,
  64. DictRefsContainer,
  65. InfoRefsContainer,
  66. DiskRefsContainer,
  67. read_packed_refs,
  68. read_packed_refs_with_peeled,
  69. write_packed_refs,
  70. SYMREF,
  71. )
  72. import warnings
  73. OBJECTDIR = 'objects'
  74. REFSDIR = 'refs'
  75. REFSDIR_TAGS = 'tags'
  76. REFSDIR_HEADS = 'heads'
  77. INDEX_FILENAME = "index"
  78. BASE_DIRECTORIES = [
  79. ["branches"],
  80. [REFSDIR],
  81. [REFSDIR, REFSDIR_TAGS],
  82. [REFSDIR, REFSDIR_HEADS],
  83. ["hooks"],
  84. ["info"]
  85. ]
  86. def parse_graftpoints(graftpoints):
  87. """Convert a list of graftpoints into a dict
  88. :param graftpoints: Iterator of graftpoint lines
  89. Each line is formatted as:
  90. <commit sha1> <parent sha1> [<parent sha1>]*
  91. Resulting dictionary is:
  92. <commit sha1>: [<parent sha1>*]
  93. https://git.wiki.kernel.org/index.php/GraftPoint
  94. """
  95. grafts = {}
  96. for l in graftpoints:
  97. raw_graft = l.split(None, 1)
  98. commit = raw_graft[0]
  99. if len(raw_graft) == 2:
  100. parents = raw_graft[1].split()
  101. else:
  102. parents = []
  103. for sha in [commit] + parents:
  104. check_hexsha(sha, 'Invalid graftpoint')
  105. grafts[commit] = parents
  106. return grafts
  107. def serialize_graftpoints(graftpoints):
  108. """Convert a dictionary of grafts into string
  109. The graft dictionary is:
  110. <commit sha1>: [<parent sha1>*]
  111. Each line is formatted as:
  112. <commit sha1> <parent sha1> [<parent sha1>]*
  113. https://git.wiki.kernel.org/index.php/GraftPoint
  114. """
  115. graft_lines = []
  116. for commit, parents in graftpoints.items():
  117. if parents:
  118. graft_lines.append(commit + b' ' + b' '.join(parents))
  119. else:
  120. graft_lines.append(commit)
  121. return b'\n'.join(graft_lines)
  122. class BaseRepo(object):
  123. """Base class for a git repository.
  124. :ivar object_store: Dictionary-like object for accessing
  125. the objects
  126. :ivar refs: Dictionary-like object with the refs in this
  127. repository
  128. """
  129. def __init__(self, object_store, refs):
  130. """Open a repository.
  131. This shouldn't be called directly, but rather through one of the
  132. base classes, such as MemoryRepo or Repo.
  133. :param object_store: Object store to use
  134. :param refs: Refs container to use
  135. """
  136. self.object_store = object_store
  137. self.refs = refs
  138. self._graftpoints = {}
  139. self.hooks = {}
  140. def _init_files(self, bare):
  141. """Initialize a default set of named files."""
  142. from dulwich.config import ConfigFile
  143. self._put_named_file('description', b"Unnamed repository")
  144. f = BytesIO()
  145. cf = ConfigFile()
  146. cf.set(b"core", b"repositoryformatversion", b"0")
  147. cf.set(b"core", b"filemode", b"true")
  148. cf.set(b"core", b"bare", bare)
  149. cf.set(b"core", b"logallrefupdates", True)
  150. cf.write_to_file(f)
  151. self._put_named_file('config', f.getvalue())
  152. self._put_named_file(os.path.join('info', 'exclude'), b'')
  153. def get_named_file(self, path):
  154. """Get a file from the control dir with a specific name.
  155. Although the filename should be interpreted as a filename relative to
  156. the control dir in a disk-based Repo, the object returned need not be
  157. pointing to a file in that location.
  158. :param path: The path to the file, relative to the control dir.
  159. :return: An open file object, or None if the file does not exist.
  160. """
  161. raise NotImplementedError(self.get_named_file)
  162. def _put_named_file(self, path, contents):
  163. """Write a file to the control dir with the given name and contents.
  164. :param path: The path to the file, relative to the control dir.
  165. :param contents: A string to write to the file.
  166. """
  167. raise NotImplementedError(self._put_named_file)
  168. def open_index(self):
  169. """Open the index for this repository.
  170. :raise NoIndexPresent: If no index is present
  171. :return: The matching `Index`
  172. """
  173. raise NotImplementedError(self.open_index)
  174. def fetch(self, target, determine_wants=None, progress=None):
  175. """Fetch objects into another repository.
  176. :param target: The target repository
  177. :param determine_wants: Optional function to determine what refs to
  178. fetch.
  179. :param progress: Optional progress function
  180. :return: The local refs
  181. """
  182. if determine_wants is None:
  183. determine_wants = target.object_store.determine_wants_all
  184. target.object_store.add_objects(
  185. self.fetch_objects(determine_wants, target.get_graph_walker(),
  186. progress))
  187. return self.get_refs()
  188. def fetch_objects(self, determine_wants, graph_walker, progress,
  189. get_tagged=None):
  190. """Fetch the missing objects required for a set of revisions.
  191. :param determine_wants: Function that takes a dictionary with heads
  192. and returns the list of heads to fetch.
  193. :param graph_walker: Object that can iterate over the list of revisions
  194. to fetch and has an "ack" method that will be called to acknowledge
  195. that a revision is present.
  196. :param progress: Simple progress function that will be called with
  197. updated progress strings.
  198. :param get_tagged: Function that returns a dict of pointed-to sha -> tag
  199. sha for including tags.
  200. :return: iterator over objects, with __len__ implemented
  201. """
  202. wants = determine_wants(self.get_refs())
  203. if not isinstance(wants, list):
  204. raise TypeError("determine_wants() did not return a list")
  205. shallows = getattr(graph_walker, 'shallow', frozenset())
  206. unshallows = getattr(graph_walker, 'unshallow', frozenset())
  207. if wants == []:
  208. # TODO(dborowitz): find a way to short-circuit that doesn't change
  209. # this interface.
  210. if shallows or unshallows:
  211. # Do not send a pack in shallow short-circuit path
  212. return None
  213. return []
  214. # If the graph walker is set up with an implementation that can
  215. # ACK/NAK to the wire, it will write data to the client through
  216. # this call as a side-effect.
  217. haves = self.object_store.find_common_revisions(graph_walker)
  218. # Deal with shallow requests separately because the haves do
  219. # not reflect what objects are missing
  220. if shallows or unshallows:
  221. haves = [] # TODO: filter the haves commits from iter_shas.
  222. # the specific commits aren't missing.
  223. def get_parents(commit):
  224. if commit.id in shallows:
  225. return []
  226. return self.get_parents(commit.id, commit)
  227. return self.object_store.iter_shas(
  228. self.object_store.find_missing_objects(
  229. haves, wants, progress,
  230. get_tagged,
  231. get_parents=get_parents))
  232. def get_graph_walker(self, heads=None):
  233. """Retrieve a graph walker.
  234. A graph walker is used by a remote repository (or proxy)
  235. to find out which objects are present in this repository.
  236. :param heads: Repository heads to use (optional)
  237. :return: A graph walker object
  238. """
  239. if heads is None:
  240. heads = self.refs.as_dict(b'refs/heads').values()
  241. return ObjectStoreGraphWalker(heads, self.get_parents)
  242. def get_refs(self):
  243. """Get dictionary with all refs.
  244. :return: A ``dict`` mapping ref names to SHA1s
  245. """
  246. return self.refs.as_dict()
  247. def head(self):
  248. """Return the SHA1 pointed at by HEAD."""
  249. return self.refs[b'HEAD']
  250. def _get_object(self, sha, cls):
  251. assert len(sha) in (20, 40)
  252. ret = self.get_object(sha)
  253. if not isinstance(ret, cls):
  254. if cls is Commit:
  255. raise NotCommitError(ret)
  256. elif cls is Blob:
  257. raise NotBlobError(ret)
  258. elif cls is Tree:
  259. raise NotTreeError(ret)
  260. elif cls is Tag:
  261. raise NotTagError(ret)
  262. else:
  263. raise Exception("Type invalid: %r != %r" % (
  264. ret.type_name, cls.type_name))
  265. return ret
  266. def get_object(self, sha):
  267. """Retrieve the object with the specified SHA.
  268. :param sha: SHA to retrieve
  269. :return: A ShaFile object
  270. :raise KeyError: when the object can not be found
  271. """
  272. return self.object_store[sha]
  273. def get_parents(self, sha, commit=None):
  274. """Retrieve the parents of a specific commit.
  275. If the specific commit is a graftpoint, the graft parents
  276. will be returned instead.
  277. :param sha: SHA of the commit for which to retrieve the parents
  278. :param commit: Optional commit matching the sha
  279. :return: List of parents
  280. """
  281. try:
  282. return self._graftpoints[sha]
  283. except KeyError:
  284. if commit is None:
  285. commit = self[sha]
  286. return commit.parents
  287. def get_config(self):
  288. """Retrieve the config object.
  289. :return: `ConfigFile` object for the ``.git/config`` file.
  290. """
  291. raise NotImplementedError(self.get_config)
  292. def get_description(self):
  293. """Retrieve the description for this repository.
  294. :return: String with the description of the repository
  295. as set by the user.
  296. """
  297. raise NotImplementedError(self.get_description)
  298. def set_description(self, description):
  299. """Set the description for this repository.
  300. :param description: Text to set as description for this repository.
  301. """
  302. raise NotImplementedError(self.set_description)
  303. def get_config_stack(self):
  304. """Return a config stack for this repository.
  305. This stack accesses the configuration for both this repository
  306. itself (.git/config) and the global configuration, which usually
  307. lives in ~/.gitconfig.
  308. :return: `Config` instance for this repository
  309. """
  310. from dulwich.config import StackedConfig
  311. backends = [self.get_config()] + StackedConfig.default_backends()
  312. return StackedConfig(backends, writable=backends[0])
  313. def get_peeled(self, ref):
  314. """Get the peeled value of a ref.
  315. :param ref: The refname to peel.
  316. :return: The fully-peeled SHA1 of a tag object, after peeling all
  317. intermediate tags; if the original ref does not point to a tag, this
  318. will equal the original SHA1.
  319. """
  320. cached = self.refs.get_peeled(ref)
  321. if cached is not None:
  322. return cached
  323. return self.object_store.peel_sha(self.refs[ref]).id
  324. def get_walker(self, include=None, *args, **kwargs):
  325. """Obtain a walker for this repository.
  326. :param include: Iterable of SHAs of commits to include along with their
  327. ancestors. Defaults to [HEAD]
  328. :param exclude: Iterable of SHAs of commits to exclude along with their
  329. ancestors, overriding includes.
  330. :param order: ORDER_* constant specifying the order of results. Anything
  331. other than ORDER_DATE may result in O(n) memory usage.
  332. :param reverse: If True, reverse the order of output, requiring O(n)
  333. memory.
  334. :param max_entries: The maximum number of entries to yield, or None for
  335. no limit.
  336. :param paths: Iterable of file or subtree paths to show entries for.
  337. :param rename_detector: diff.RenameDetector object for detecting
  338. renames.
  339. :param follow: If True, follow path across renames/copies. Forces a
  340. default rename_detector.
  341. :param since: Timestamp to list commits after.
  342. :param until: Timestamp to list commits before.
  343. :param queue_cls: A class to use for a queue of commits, supporting the
  344. iterator protocol. The constructor takes a single argument, the
  345. Walker.
  346. :return: A `Walker` object
  347. """
  348. from dulwich.walk import Walker
  349. if include is None:
  350. include = [self.head()]
  351. if isinstance(include, str):
  352. include = [include]
  353. kwargs['get_parents'] = lambda commit: self.get_parents(commit.id, commit)
  354. return Walker(self.object_store, include, *args, **kwargs)
  355. def __getitem__(self, name):
  356. """Retrieve a Git object by SHA1 or ref.
  357. :param name: A Git object SHA1 or a ref name
  358. :return: A `ShaFile` object, such as a Commit or Blob
  359. :raise KeyError: when the specified ref or object does not exist
  360. """
  361. if not isinstance(name, bytes):
  362. raise TypeError("'name' must be bytestring, not %.80s" %
  363. type(name).__name__)
  364. if len(name) in (20, 40):
  365. try:
  366. return self.object_store[name]
  367. except (KeyError, ValueError):
  368. pass
  369. try:
  370. return self.object_store[self.refs[name]]
  371. except RefFormatError:
  372. raise KeyError(name)
  373. def __contains__(self, name):
  374. """Check if a specific Git object or ref is present.
  375. :param name: Git object SHA1 or ref name
  376. """
  377. if len(name) in (20, 40):
  378. return name in self.object_store or name in self.refs
  379. else:
  380. return name in self.refs
  381. def __setitem__(self, name, value):
  382. """Set a ref.
  383. :param name: ref name
  384. :param value: Ref value - either a ShaFile object, or a hex sha
  385. """
  386. if name.startswith(b"refs/") or name == b'HEAD':
  387. if isinstance(value, ShaFile):
  388. self.refs[name] = value.id
  389. elif isinstance(value, bytes):
  390. self.refs[name] = value
  391. else:
  392. raise TypeError(value)
  393. else:
  394. raise ValueError(name)
  395. def __delitem__(self, name):
  396. """Remove a ref.
  397. :param name: Name of the ref to remove
  398. """
  399. if name.startswith(b"refs/") or name == b"HEAD":
  400. del self.refs[name]
  401. else:
  402. raise ValueError(name)
  403. def _get_user_identity(self):
  404. """Determine the identity to use for new commits.
  405. """
  406. config = self.get_config_stack()
  407. return (config.get((b"user", ), b"name") + b" <" +
  408. config.get((b"user", ), b"email") + b">")
  409. def _add_graftpoints(self, updated_graftpoints):
  410. """Add or modify graftpoints
  411. :param updated_graftpoints: Dict of commit shas to list of parent shas
  412. """
  413. # Simple validation
  414. for commit, parents in updated_graftpoints.items():
  415. for sha in [commit] + parents:
  416. check_hexsha(sha, 'Invalid graftpoint')
  417. self._graftpoints.update(updated_graftpoints)
  418. def _remove_graftpoints(self, to_remove=[]):
  419. """Remove graftpoints
  420. :param to_remove: List of commit shas
  421. """
  422. for sha in to_remove:
  423. del self._graftpoints[sha]
  424. def do_commit(self, message=None, committer=None,
  425. author=None, commit_timestamp=None,
  426. commit_timezone=None, author_timestamp=None,
  427. author_timezone=None, tree=None, encoding=None,
  428. ref=b'HEAD', merge_heads=None):
  429. """Create a new commit.
  430. :param message: Commit message
  431. :param committer: Committer fullname
  432. :param author: Author fullname (defaults to committer)
  433. :param commit_timestamp: Commit timestamp (defaults to now)
  434. :param commit_timezone: Commit timestamp timezone (defaults to GMT)
  435. :param author_timestamp: Author timestamp (defaults to commit timestamp)
  436. :param author_timezone: Author timestamp timezone
  437. (defaults to commit timestamp timezone)
  438. :param tree: SHA1 of the tree root to use (if not specified the
  439. current index will be committed).
  440. :param encoding: Encoding
  441. :param ref: Optional ref to commit to (defaults to current branch)
  442. :param merge_heads: Merge heads (defaults to .git/MERGE_HEADS)
  443. :return: New commit SHA1
  444. """
  445. import time
  446. c = Commit()
  447. if tree is None:
  448. index = self.open_index()
  449. c.tree = index.commit(self.object_store)
  450. else:
  451. if len(tree) != 40:
  452. raise ValueError("tree must be a 40-byte hex sha string")
  453. c.tree = tree
  454. try:
  455. self.hooks['pre-commit'].execute()
  456. except HookError as e:
  457. raise CommitError(e)
  458. except KeyError: # no hook defined, silent fallthrough
  459. pass
  460. if merge_heads is None:
  461. # FIXME: Read merge heads from .git/MERGE_HEADS
  462. merge_heads = []
  463. if committer is None:
  464. # FIXME: Support GIT_COMMITTER_NAME/GIT_COMMITTER_EMAIL environment
  465. # variables
  466. committer = self._get_user_identity()
  467. c.committer = committer
  468. if commit_timestamp is None:
  469. # FIXME: Support GIT_COMMITTER_DATE environment variable
  470. commit_timestamp = time.time()
  471. c.commit_time = int(commit_timestamp)
  472. if commit_timezone is None:
  473. # FIXME: Use current user timezone rather than UTC
  474. commit_timezone = 0
  475. c.commit_timezone = commit_timezone
  476. if author is None:
  477. # FIXME: Support GIT_AUTHOR_NAME/GIT_AUTHOR_EMAIL environment
  478. # variables
  479. author = committer
  480. c.author = author
  481. if author_timestamp is None:
  482. # FIXME: Support GIT_AUTHOR_DATE environment variable
  483. author_timestamp = commit_timestamp
  484. c.author_time = int(author_timestamp)
  485. if author_timezone is None:
  486. author_timezone = commit_timezone
  487. c.author_timezone = author_timezone
  488. if encoding is not None:
  489. c.encoding = encoding
  490. if message is None:
  491. # FIXME: Try to read commit message from .git/MERGE_MSG
  492. raise ValueError("No commit message specified")
  493. try:
  494. c.message = self.hooks['commit-msg'].execute(message)
  495. if c.message is None:
  496. c.message = message
  497. except HookError as e:
  498. raise CommitError(e)
  499. except KeyError: # no hook defined, message not modified
  500. c.message = message
  501. if ref is None:
  502. # Create a dangling commit
  503. c.parents = merge_heads
  504. self.object_store.add_object(c)
  505. else:
  506. try:
  507. old_head = self.refs[ref]
  508. c.parents = [old_head] + merge_heads
  509. self.object_store.add_object(c)
  510. ok = self.refs.set_if_equals(ref, old_head, c.id)
  511. except KeyError:
  512. c.parents = merge_heads
  513. self.object_store.add_object(c)
  514. ok = self.refs.add_if_new(ref, c.id)
  515. if not ok:
  516. # Fail if the atomic compare-and-swap failed, leaving the commit and
  517. # all its objects as garbage.
  518. raise CommitError("%s changed during commit" % (ref,))
  519. try:
  520. self.hooks['post-commit'].execute()
  521. except HookError as e: # silent failure
  522. warnings.warn("post-commit hook failed: %s" % e, UserWarning)
  523. except KeyError: # no hook defined, silent fallthrough
  524. pass
  525. return c.id
  526. class Repo(BaseRepo):
  527. """A git repository backed by local disk.
  528. To open an existing repository, call the contructor with
  529. the path of the repository.
  530. To create a new repository, use the Repo.init class method.
  531. """
  532. def __init__(self, root):
  533. if os.path.isdir(os.path.join(root, ".git", OBJECTDIR)):
  534. self.bare = False
  535. self._controldir = os.path.join(root, ".git")
  536. elif (os.path.isdir(os.path.join(root, OBJECTDIR)) and
  537. os.path.isdir(os.path.join(root, REFSDIR))):
  538. self.bare = True
  539. self._controldir = root
  540. elif (os.path.isfile(os.path.join(root, ".git"))):
  541. import re
  542. with open(os.path.join(root, ".git"), 'r') as f:
  543. _, path = re.match('(gitdir: )(.+$)', f.read()).groups()
  544. self.bare = False
  545. self._controldir = os.path.join(root, path)
  546. else:
  547. raise NotGitRepository(
  548. "No git repository was found at %(path)s" % dict(path=root)
  549. )
  550. self.path = root
  551. object_store = DiskObjectStore(os.path.join(self.controldir(),
  552. OBJECTDIR))
  553. refs = DiskRefsContainer(self.controldir())
  554. BaseRepo.__init__(self, object_store, refs)
  555. self._graftpoints = {}
  556. graft_file = self.get_named_file(os.path.join("info", "grafts"))
  557. if graft_file:
  558. with graft_file:
  559. self._graftpoints.update(parse_graftpoints(graft_file))
  560. graft_file = self.get_named_file("shallow")
  561. if graft_file:
  562. with graft_file:
  563. self._graftpoints.update(parse_graftpoints(graft_file))
  564. self.hooks['pre-commit'] = PreCommitShellHook(self.controldir())
  565. self.hooks['commit-msg'] = CommitMsgShellHook(self.controldir())
  566. self.hooks['post-commit'] = PostCommitShellHook(self.controldir())
  567. @classmethod
  568. def discover(cls, start='.'):
  569. """Iterate parent directories to discover a repository
  570. Return a Repo object for the first parent directory that looks like a
  571. Git repository.
  572. :param start: The directory to start discovery from (defaults to '.')
  573. """
  574. remaining = True
  575. path = os.path.abspath(start)
  576. while remaining:
  577. try:
  578. return cls(path)
  579. except NotGitRepository:
  580. path, remaining = os.path.split(path)
  581. raise NotGitRepository(
  582. "No git repository was found at %(path)s" % dict(path=start)
  583. )
  584. def controldir(self):
  585. """Return the path of the control directory."""
  586. return self._controldir
  587. def _put_named_file(self, path, contents):
  588. """Write a file to the control dir with the given name and contents.
  589. :param path: The path to the file, relative to the control dir.
  590. :param contents: A string to write to the file.
  591. """
  592. path = path.lstrip(os.path.sep)
  593. with GitFile(os.path.join(self.controldir(), path), 'wb') as f:
  594. f.write(contents)
  595. def get_named_file(self, path):
  596. """Get a file from the control dir with a specific name.
  597. Although the filename should be interpreted as a filename relative to
  598. the control dir in a disk-based Repo, the object returned need not be
  599. pointing to a file in that location.
  600. :param path: The path to the file, relative to the control dir.
  601. :return: An open file object, or None if the file does not exist.
  602. """
  603. # TODO(dborowitz): sanitize filenames, since this is used directly by
  604. # the dumb web serving code.
  605. path = path.lstrip(os.path.sep)
  606. try:
  607. return open(os.path.join(self.controldir(), path), 'rb')
  608. except (IOError, OSError) as e:
  609. if e.errno == errno.ENOENT:
  610. return None
  611. raise
  612. def index_path(self):
  613. """Return path to the index file."""
  614. return os.path.join(self.controldir(), INDEX_FILENAME)
  615. def open_index(self):
  616. """Open the index for this repository.
  617. :raise NoIndexPresent: If no index is present
  618. :return: The matching `Index`
  619. """
  620. from dulwich.index import Index
  621. if not self.has_index():
  622. raise NoIndexPresent()
  623. return Index(self.index_path())
  624. def has_index(self):
  625. """Check if an index is present."""
  626. # Bare repos must never have index files; non-bare repos may have a
  627. # missing index file, which is treated as empty.
  628. return not self.bare
  629. def stage(self, fs_paths):
  630. """Stage a set of paths.
  631. :param fs_paths: List of paths, relative to the repository path
  632. """
  633. root_path_bytes = self.path.encode(sys.getfilesystemencoding())
  634. if not isinstance(fs_paths, list):
  635. fs_paths = [fs_paths]
  636. from dulwich.index import (
  637. blob_from_path_and_stat,
  638. index_entry_from_stat,
  639. _fs_to_tree_path,
  640. )
  641. index = self.open_index()
  642. for fs_path in fs_paths:
  643. if not isinstance(fs_path, bytes):
  644. fs_path = fs_path.encode(sys.getfilesystemencoding())
  645. tree_path = _fs_to_tree_path(fs_path)
  646. full_path = os.path.join(root_path_bytes, fs_path)
  647. try:
  648. st = os.lstat(full_path)
  649. except OSError:
  650. # File no longer exists
  651. try:
  652. del index[tree_path]
  653. except KeyError:
  654. pass # already removed
  655. else:
  656. blob = blob_from_path_and_stat(full_path, st)
  657. self.object_store.add_object(blob)
  658. index[tree_path] = index_entry_from_stat(st, blob.id, 0)
  659. index.write()
  660. def clone(self, target_path, mkdir=True, bare=False,
  661. origin=b"origin"):
  662. """Clone this repository.
  663. :param target_path: Target path
  664. :param mkdir: Create the target directory
  665. :param bare: Whether to create a bare repository
  666. :param origin: Base name for refs in target repository
  667. cloned from this repository
  668. :return: Created repository as `Repo`
  669. """
  670. if not bare:
  671. target = self.init(target_path, mkdir=mkdir)
  672. else:
  673. target = self.init_bare(target_path)
  674. self.fetch(target)
  675. target.refs.import_refs(
  676. b'refs/remotes/' + origin, self.refs.as_dict(b'refs/heads'))
  677. target.refs.import_refs(
  678. b'refs/tags', self.refs.as_dict(b'refs/tags'))
  679. try:
  680. target.refs.add_if_new(
  681. b'refs/heads/master',
  682. self.refs[b'refs/heads/master'])
  683. except KeyError:
  684. pass
  685. # Update target head
  686. head, head_sha = self.refs._follow(b'HEAD')
  687. if head is not None and head_sha is not None:
  688. target.refs.set_symbolic_ref(b'HEAD', head)
  689. target[b'HEAD'] = head_sha
  690. if not bare:
  691. # Checkout HEAD to target dir
  692. target.reset_index()
  693. return target
  694. def reset_index(self, tree=None):
  695. """Reset the index back to a specific tree.
  696. :param tree: Tree SHA to reset to, None for current HEAD tree.
  697. """
  698. from dulwich.index import (
  699. build_index_from_tree,
  700. validate_path_element_default,
  701. validate_path_element_ntfs,
  702. )
  703. if tree is None:
  704. tree = self[b'HEAD'].tree
  705. config = self.get_config()
  706. honor_filemode = config.get_boolean('core', 'filemode', os.name != "nt")
  707. if config.get_boolean('core', 'core.protectNTFS', os.name == "nt"):
  708. validate_path_element = validate_path_element_ntfs
  709. else:
  710. validate_path_element = validate_path_element_default
  711. return build_index_from_tree(self.path, self.index_path(),
  712. self.object_store, tree, honor_filemode=honor_filemode,
  713. validate_path_element=validate_path_element)
  714. def get_config(self):
  715. """Retrieve the config object.
  716. :return: `ConfigFile` object for the ``.git/config`` file.
  717. """
  718. from dulwich.config import ConfigFile
  719. path = os.path.join(self._controldir, 'config')
  720. try:
  721. return ConfigFile.from_path(path)
  722. except (IOError, OSError) as e:
  723. if e.errno != errno.ENOENT:
  724. raise
  725. ret = ConfigFile()
  726. ret.path = path
  727. return ret
  728. def get_description(self):
  729. """Retrieve the description of this repository.
  730. :return: A string describing the repository or None.
  731. """
  732. path = os.path.join(self._controldir, 'description')
  733. try:
  734. with GitFile(path, 'rb') as f:
  735. return f.read()
  736. except (IOError, OSError) as e:
  737. if e.errno != errno.ENOENT:
  738. raise
  739. return None
  740. def __repr__(self):
  741. return "<Repo at %r>" % self.path
  742. def set_description(self, description):
  743. """Set the description for this repository.
  744. :param description: Text to set as description for this repository.
  745. """
  746. self._put_named_file('description', description)
  747. @classmethod
  748. def _init_maybe_bare(cls, path, bare):
  749. for d in BASE_DIRECTORIES:
  750. os.mkdir(os.path.join(path, *d))
  751. DiskObjectStore.init(os.path.join(path, OBJECTDIR))
  752. ret = cls(path)
  753. ret.refs.set_symbolic_ref(b'HEAD', b"refs/heads/master")
  754. ret._init_files(bare)
  755. return ret
  756. @classmethod
  757. def init(cls, path, mkdir=False):
  758. """Create a new repository.
  759. :param path: Path in which to create the repository
  760. :param mkdir: Whether to create the directory
  761. :return: `Repo` instance
  762. """
  763. if mkdir:
  764. os.mkdir(path)
  765. controldir = os.path.join(path, ".git")
  766. os.mkdir(controldir)
  767. cls._init_maybe_bare(controldir, False)
  768. return cls(path)
  769. @classmethod
  770. def init_bare(cls, path):
  771. """Create a new bare repository.
  772. ``path`` should already exist and be an emty directory.
  773. :param path: Path to create bare repository in
  774. :return: a `Repo` instance
  775. """
  776. return cls._init_maybe_bare(path, True)
  777. create = init_bare
  778. def close(self):
  779. """Close any files opened by this repository."""
  780. self.object_store.close()
  781. class MemoryRepo(BaseRepo):
  782. """Repo that stores refs, objects, and named files in memory.
  783. MemoryRepos are always bare: they have no working tree and no index, since
  784. those have a stronger dependency on the filesystem.
  785. """
  786. def __init__(self):
  787. from dulwich.config import ConfigFile
  788. BaseRepo.__init__(self, MemoryObjectStore(), DictRefsContainer({}))
  789. self._named_files = {}
  790. self.bare = True
  791. self._config = ConfigFile()
  792. def _put_named_file(self, path, contents):
  793. """Write a file to the control dir with the given name and contents.
  794. :param path: The path to the file, relative to the control dir.
  795. :param contents: A string to write to the file.
  796. """
  797. self._named_files[path] = contents
  798. def get_named_file(self, path):
  799. """Get a file from the control dir with a specific name.
  800. Although the filename should be interpreted as a filename relative to
  801. the control dir in a disk-baked Repo, the object returned need not be
  802. pointing to a file in that location.
  803. :param path: The path to the file, relative to the control dir.
  804. :return: An open file object, or None if the file does not exist.
  805. """
  806. contents = self._named_files.get(path, None)
  807. if contents is None:
  808. return None
  809. return BytesIO(contents)
  810. def open_index(self):
  811. """Fail to open index for this repo, since it is bare.
  812. :raise NoIndexPresent: Raised when no index is present
  813. """
  814. raise NoIndexPresent()
  815. def get_config(self):
  816. """Retrieve the config object.
  817. :return: `ConfigFile` object.
  818. """
  819. return self._config
  820. def get_description(self):
  821. """Retrieve the repository description.
  822. This defaults to None, for no description.
  823. """
  824. return None
  825. @classmethod
  826. def init_bare(cls, objects, refs):
  827. """Create a new bare repository in memory.
  828. :param objects: Objects for the new repository,
  829. as iterable
  830. :param refs: Refs as dictionary, mapping names
  831. to object SHA1s
  832. """
  833. ret = cls()
  834. for obj in objects:
  835. ret.object_store.add_object(obj)
  836. for refname, sha in refs.items():
  837. ret.refs[refname] = sha
  838. ret._init_files(bare=True)
  839. return ret