walk.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. # walk.py -- General implementation of walking commits and their contents.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; version 2
  7. # or (at your option) any later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """General implementation of walking commits and their contents."""
  19. import heapq
  20. import itertools
  21. import os
  22. from dulwich.diff_tree import (
  23. RENAME_CHANGE_TYPES,
  24. tree_changes,
  25. tree_changes_for_merge,
  26. RenameDetector,
  27. )
  28. from dulwich.errors import (
  29. MissingCommitError,
  30. )
  31. ORDER_DATE = 'date'
  32. class WalkEntry(object):
  33. """Object encapsulating a single result from a walk."""
  34. def __init__(self, store, commit, rename_detector):
  35. self.commit = commit
  36. self._store = store
  37. self._changes = None
  38. self._rename_detector = rename_detector
  39. def changes(self):
  40. """Get the tree changes for this entry.
  41. :return: For commits with up to one parent, a list of TreeChange
  42. objects; if the commit has no parents, these will be relative to the
  43. empty tree. For merge commits, a list of lists of TreeChange
  44. objects; see dulwich.diff.tree_changes_for_merge.
  45. """
  46. if self._changes is None:
  47. commit = self.commit
  48. if not commit.parents:
  49. changes_func = tree_changes
  50. parent = None
  51. elif len(commit.parents) == 1:
  52. changes_func = tree_changes
  53. parent = self._store[commit.parents[0]].tree
  54. else:
  55. changes_func = tree_changes_for_merge
  56. parent = [self._store[p].tree for p in commit.parents]
  57. self._changes = list(changes_func(
  58. self._store, parent, commit.tree,
  59. rename_detector=self._rename_detector))
  60. return self._changes
  61. def __repr__(self):
  62. return '<WalkEntry commit=%s, changes=%r>' % (
  63. self.commit.id, self.changes())
  64. class Walker(object):
  65. """Object for performing a walk of commits in a store.
  66. Walker objects are initialized with a store and other options and can then
  67. be treated as iterators of Commit objects.
  68. """
  69. def __init__(self, store, include, exclude=None, order=ORDER_DATE,
  70. reverse=False, max_entries=None, paths=None,
  71. rename_detector=None, follow=False, since=None, until=None):
  72. """Constructor.
  73. :param store: ObjectStore instance for looking up objects.
  74. :param include: Iterable of SHAs of commits to include along with their
  75. ancestors.
  76. :param exclude: Iterable of SHAs of commits to exclude along with their
  77. ancestors, overriding includes.
  78. :param order: ORDER_* constant specifying the order of results. Anything
  79. other than ORDER_DATE may result in O(n) memory usage.
  80. :param reverse: If True, reverse the order of output, requiring O(n)
  81. memory.
  82. :param max_entries: The maximum number of entries to yield, or None for
  83. no limit.
  84. :param paths: Iterable of file or subtree paths to show entries for.
  85. :param rename_detector: diff.RenameDetector object for detecting
  86. renames.
  87. :param follow: If True, follow path across renames/copies. Forces a
  88. default rename_detector.
  89. :param since: Timestamp to list commits after.
  90. :param until: Timestamp to list commits before.
  91. """
  92. self._store = store
  93. if order not in (ORDER_DATE,):
  94. raise ValueError('Unknown walk order %s' % order)
  95. self._order = order
  96. self._reverse = reverse
  97. self._max_entries = max_entries
  98. self._num_entries = 0
  99. if follow and not rename_detector:
  100. rename_detector = RenameDetector(store)
  101. self._rename_detector = rename_detector
  102. exclude = exclude or []
  103. self._excluded = set(exclude)
  104. self._pq = []
  105. self._pq_set = set()
  106. self._done = set()
  107. self._paths = paths and set(paths) or None
  108. self._follow = follow
  109. self._since = since
  110. self._until = until
  111. for commit_id in itertools.chain(include, exclude):
  112. self._push(commit_id)
  113. def _push(self, commit_id):
  114. try:
  115. commit = self._store[commit_id]
  116. except KeyError:
  117. raise MissingCommitError(commit_id)
  118. if commit_id not in self._pq_set and commit_id not in self._done:
  119. heapq.heappush(self._pq, (-commit.commit_time, commit))
  120. self._pq_set.add(commit_id)
  121. def _pop(self):
  122. while self._pq:
  123. _, commit = heapq.heappop(self._pq)
  124. sha = commit.id
  125. self._pq_set.remove(sha)
  126. if sha in self._done:
  127. continue
  128. is_excluded = sha in self._excluded
  129. if is_excluded:
  130. self._excluded.update(commit.parents)
  131. self._done.add(commit.id)
  132. if self._since is None or commit.commit_time >= self._since:
  133. for parent_id in commit.parents:
  134. self._push(parent_id)
  135. if not is_excluded:
  136. return commit
  137. return None
  138. def _path_matches(self, changed_path):
  139. if changed_path is None:
  140. return False
  141. for followed_path in self._paths:
  142. if changed_path == followed_path:
  143. return True
  144. if (changed_path.startswith(followed_path) and
  145. changed_path[len(followed_path)] == '/'):
  146. return True
  147. return False
  148. def _change_matches(self, change):
  149. old_path = change.old.path
  150. new_path = change.new.path
  151. if self._path_matches(new_path):
  152. if self._follow and change.type in RENAME_CHANGE_TYPES:
  153. self._paths.add(old_path)
  154. self._paths.remove(new_path)
  155. return True
  156. elif self._path_matches(old_path):
  157. return True
  158. return False
  159. def _make_entry(self, commit):
  160. """Make a WalkEntry from a commit.
  161. :param commit: The commit for the WalkEntry.
  162. :return: A WalkEntry object, or None if no entry should be returned for
  163. this commit (e.g. if it doesn't match any requested paths).
  164. """
  165. if self._since is not None and commit.commit_time < self._since:
  166. return None
  167. if self._until is not None and commit.commit_time > self._until:
  168. return None
  169. entry = WalkEntry(self._store, commit, self._rename_detector)
  170. if self._paths is None:
  171. return entry
  172. if len(commit.parents) > 1:
  173. for path_changes in entry.changes():
  174. # For merge commits, only include changes with conflicts for
  175. # this path. Since a rename conflict may include different
  176. # old.paths, we have to check all of them.
  177. for change in path_changes:
  178. if self._change_matches(change):
  179. return entry
  180. else:
  181. for change in entry.changes():
  182. if self._change_matches(change):
  183. return entry
  184. return None
  185. def _next(self):
  186. max_entries = self._max_entries
  187. while True:
  188. if max_entries is not None and self._num_entries >= max_entries:
  189. return None
  190. commit = self._pop()
  191. if commit is None:
  192. return None
  193. entry = self._make_entry(commit)
  194. if entry:
  195. self._num_entries += 1
  196. return entry
  197. def __iter__(self):
  198. results = iter(self._next, None)
  199. if self._reverse:
  200. results = reversed(list(results))
  201. return iter(results)