walk.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. # walk.py -- General implementation of walking commits and their contents.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; version 2
  7. # or (at your option) any later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """General implementation of walking commits and their contents."""
  19. import heapq
  20. import itertools
  21. import os
  22. from dulwich.diff_tree import (
  23. RENAME_CHANGE_TYPES,
  24. tree_changes,
  25. tree_changes_for_merge,
  26. RenameDetector,
  27. )
  28. from dulwich.errors import (
  29. MissingCommitError,
  30. )
  31. ORDER_DATE = 'date'
  32. # Maximum number of commits to walk past a commit time boundary.
  33. _MAX_EXTRA_COMMITS = 5
  34. class WalkEntry(object):
  35. """Object encapsulating a single result from a walk."""
  36. def __init__(self, walker, commit):
  37. self.commit = commit
  38. self._store = walker.store
  39. self._changes = None
  40. self._rename_detector = walker.rename_detector
  41. def changes(self):
  42. """Get the tree changes for this entry.
  43. :return: For commits with up to one parent, a list of TreeChange
  44. objects; if the commit has no parents, these will be relative to the
  45. empty tree. For merge commits, a list of lists of TreeChange
  46. objects; see dulwich.diff.tree_changes_for_merge.
  47. """
  48. if self._changes is None:
  49. commit = self.commit
  50. if not commit.parents:
  51. changes_func = tree_changes
  52. parent = None
  53. elif len(commit.parents) == 1:
  54. changes_func = tree_changes
  55. parent = self._store[commit.parents[0]].tree
  56. else:
  57. changes_func = tree_changes_for_merge
  58. parent = [self._store[p].tree for p in commit.parents]
  59. self._changes = list(changes_func(
  60. self._store, parent, commit.tree,
  61. rename_detector=self._rename_detector))
  62. return self._changes
  63. def __repr__(self):
  64. return '<WalkEntry commit=%s, changes=%r>' % (
  65. self.commit.id, self.changes())
  66. class _CommitTimeQueue(object):
  67. """Priority queue of WalkEntry objects by commit time."""
  68. def __init__(self, walker):
  69. self._walker = walker
  70. self._store = walker.store
  71. self._excluded = walker.excluded
  72. self._pq = []
  73. self._pq_set = set()
  74. self._done = set()
  75. self._min_time = walker.since
  76. self._extra_commits_left = _MAX_EXTRA_COMMITS
  77. for commit_id in itertools.chain(walker.include, walker.excluded):
  78. self._push(commit_id)
  79. def _push(self, commit_id):
  80. try:
  81. commit = self._store[commit_id]
  82. except KeyError:
  83. raise MissingCommitError(commit_id)
  84. if commit_id not in self._pq_set and commit_id not in self._done:
  85. heapq.heappush(self._pq, (-commit.commit_time, commit))
  86. self._pq_set.add(commit_id)
  87. def next(self):
  88. while self._pq:
  89. _, commit = heapq.heappop(self._pq)
  90. sha = commit.id
  91. self._pq_set.remove(sha)
  92. if sha in self._done:
  93. continue
  94. is_excluded = sha in self._excluded
  95. if is_excluded:
  96. self._excluded.update(commit.parents)
  97. self._done.add(commit.id)
  98. if self._min_time is not None:
  99. if commit.commit_time < self._min_time:
  100. # We want to stop walking at min_time, but commits at the
  101. # boundary may be out of order with respect to their
  102. # parents. So we walk _MAX_EXTRA_COMMITS more commits once
  103. # we hit this boundary.
  104. self._extra_commits_left -= 1
  105. if not self._extra_commits_left:
  106. break
  107. else:
  108. # We're not at a boundary, so reset the counter.
  109. self._extra_commits_left = _MAX_EXTRA_COMMITS
  110. for parent_id in commit.parents:
  111. self._push(parent_id)
  112. if not is_excluded:
  113. return WalkEntry(self._walker, commit)
  114. return None
  115. class Walker(object):
  116. """Object for performing a walk of commits in a store.
  117. Walker objects are initialized with a store and other options and can then
  118. be treated as iterators of Commit objects.
  119. """
  120. def __init__(self, store, include, exclude=None, order=ORDER_DATE,
  121. reverse=False, max_entries=None, paths=None,
  122. rename_detector=None, follow=False, since=None, until=None,
  123. queue_cls=_CommitTimeQueue):
  124. """Constructor.
  125. :param store: ObjectStore instance for looking up objects.
  126. :param include: Iterable of SHAs of commits to include along with their
  127. ancestors.
  128. :param exclude: Iterable of SHAs of commits to exclude along with their
  129. ancestors, overriding includes.
  130. :param order: ORDER_* constant specifying the order of results. Anything
  131. other than ORDER_DATE may result in O(n) memory usage.
  132. :param reverse: If True, reverse the order of output, requiring O(n)
  133. memory.
  134. :param max_entries: The maximum number of entries to yield, or None for
  135. no limit.
  136. :param paths: Iterable of file or subtree paths to show entries for.
  137. :param rename_detector: diff.RenameDetector object for detecting
  138. renames.
  139. :param follow: If True, follow path across renames/copies. Forces a
  140. default rename_detector.
  141. :param since: Timestamp to list commits after.
  142. :param until: Timestamp to list commits before.
  143. :param queue_cls: A class to use for a queue of commits, supporting the
  144. iterator protocol. The constructor takes a single argument, the
  145. Walker.
  146. """
  147. if order not in (ORDER_DATE,):
  148. raise ValueError('Unknown walk order %s' % order)
  149. self.store = store
  150. self.include = include
  151. self.excluded = set(exclude or [])
  152. self.order = order
  153. self.reverse = reverse
  154. self.max_entries = max_entries
  155. self.paths = paths and set(paths) or None
  156. if follow and not rename_detector:
  157. rename_detector = RenameDetector(store)
  158. self.rename_detector = rename_detector
  159. self.follow = follow
  160. self.since = since
  161. self.until = until
  162. self._num_entries = 0
  163. self._queue = queue_cls(self)
  164. def _path_matches(self, changed_path):
  165. if changed_path is None:
  166. return False
  167. for followed_path in self.paths:
  168. if changed_path == followed_path:
  169. return True
  170. if (changed_path.startswith(followed_path) and
  171. changed_path[len(followed_path)] == '/'):
  172. return True
  173. return False
  174. def _change_matches(self, change):
  175. old_path = change.old.path
  176. new_path = change.new.path
  177. if self._path_matches(new_path):
  178. if self.follow and change.type in RENAME_CHANGE_TYPES:
  179. self.paths.add(old_path)
  180. self.paths.remove(new_path)
  181. return True
  182. elif self._path_matches(old_path):
  183. return True
  184. return False
  185. def _should_return(self, entry):
  186. """Determine if a walk entry should be returned..
  187. :param entry: The WalkEntry to consider.
  188. :return: True if the WalkEntry should be returned by this walk, or False
  189. otherwise (e.g. if it doesn't match any requested paths).
  190. """
  191. commit = entry.commit
  192. if self.since is not None and commit.commit_time < self.since:
  193. return False
  194. if self.until is not None and commit.commit_time > self.until:
  195. return False
  196. if self.paths is None:
  197. return True
  198. if len(commit.parents) > 1:
  199. for path_changes in entry.changes():
  200. # For merge commits, only include changes with conflicts for
  201. # this path. Since a rename conflict may include different
  202. # old.paths, we have to check all of them.
  203. for change in path_changes:
  204. if self._change_matches(change):
  205. return True
  206. else:
  207. for change in entry.changes():
  208. if self._change_matches(change):
  209. return True
  210. return None
  211. def _next(self):
  212. max_entries = self.max_entries
  213. while True:
  214. if max_entries is not None and self._num_entries >= max_entries:
  215. return None
  216. entry = self._queue.next()
  217. if entry is None:
  218. return None
  219. if self._should_return(entry):
  220. self._num_entries += 1
  221. return entry
  222. def __iter__(self):
  223. results = iter(self._next, None)
  224. if self.reverse:
  225. results = reversed(list(results))
  226. return iter(results)