walk.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. # walk.py -- General implementation of walking commits and their contents.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; version 2
  7. # or (at your option) any later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """General implementation of walking commits and their contents."""
  19. import heapq
  20. import itertools
  21. from dulwich.diff_tree import (
  22. tree_changes,
  23. tree_changes_for_merge,
  24. )
  25. ORDER_DATE = 'date'
  26. class WalkEntry(object):
  27. """Object encapsulating a single result from a walk."""
  28. def __init__(self, store, commit):
  29. self.commit = commit
  30. self._store = store
  31. self._changes = None
  32. def changes(self):
  33. """Get the tree changes for this entry.
  34. :return: For commits with up to one parent, a list of TreeChange
  35. objects; if the commit has no parents, these will be relative to the
  36. empty tree. For merge commits, a list of lists of TreeChange
  37. objects; see dulwich.diff.tree_changes_for_merge.
  38. """
  39. if self._changes is None:
  40. commit = self.commit
  41. if not commit.parents:
  42. changes = list(tree_changes(self._store, None, commit.tree))
  43. elif len(commit.parents) == 1:
  44. ptree = self._store[commit.parents[0]].tree
  45. changes = list(tree_changes(self._store, ptree, commit.tree))
  46. else:
  47. ptrees = [self._store[p].tree for p in commit.parents]
  48. changes = list(tree_changes_for_merge(self._store, ptrees,
  49. commit.tree))
  50. self._changes = changes
  51. return self._changes
  52. def __repr__(self):
  53. return '<WalkEntry commit=%s, changes=%r>' % (
  54. self.commit.id, self.changes())
  55. class Walker(object):
  56. """Object for performing a walk of commits in a store.
  57. Walker objects are initialized with a store and other options and can then
  58. be treated as iterators of Commit objects.
  59. """
  60. def __init__(self, store, include, exclude=None, order=ORDER_DATE,
  61. reverse=False, max_entries=None):
  62. """Constructor.
  63. :param store: ObjectStore instance for looking up objects.
  64. :param include: Iterable of SHAs of commits to include along with their
  65. ancestors.
  66. :param exclude: Iterable of SHAs of commits to exclude along with their
  67. ancestors, overriding includes.
  68. :param order: ORDER_* constant specifying the order of results. Anything
  69. other than ORDER_DATE may result in O(n) memory usage.
  70. :param reverse: If True, reverse the order of output, requiring O(n)
  71. memory.
  72. :param max_entries: The maximum number of entries to yield, or None for
  73. no limit.
  74. """
  75. self._store = store
  76. if order not in (ORDER_DATE,):
  77. raise ValueError('Unknown walk order %s' % order)
  78. self._order = order
  79. self._reverse = reverse
  80. self._max_entries = max_entries
  81. exclude = exclude or []
  82. self._excluded = set(exclude)
  83. self._pq = []
  84. self._pq_set = set()
  85. self._done = set()
  86. for commit_id in itertools.chain(include, exclude):
  87. self._push(store[commit_id])
  88. def _push(self, commit):
  89. sha = commit.id
  90. if sha not in self._pq_set and sha not in self._done:
  91. heapq.heappush(self._pq, (-commit.commit_time, commit))
  92. self._pq_set.add(sha)
  93. def _pop(self):
  94. while self._pq:
  95. _, commit = heapq.heappop(self._pq)
  96. sha = commit.id
  97. self._pq_set.remove(sha)
  98. if sha in self._done:
  99. continue
  100. is_excluded = sha in self._excluded
  101. if is_excluded:
  102. self._excluded.update(commit.parents)
  103. self._done.add(commit.id)
  104. for parent_id in commit.parents:
  105. self._push(self._store[parent_id])
  106. if not is_excluded:
  107. return commit
  108. return None
  109. def _make_entry(self, commit):
  110. if commit is None:
  111. return None
  112. return WalkEntry(self._store, commit)
  113. def _next(self):
  114. limit = self._max_entries
  115. if limit is not None and len(self._done) >= limit:
  116. return None
  117. return self._make_entry(self._pop())
  118. def __iter__(self):
  119. results = iter(self._next, None)
  120. if self._reverse:
  121. results = reversed(list(results))
  122. return iter(results)