graph.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
  2. # Copyright (c) 2020 Kevin B. Hendricks, Stratford Ontario Canada
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. """Implementation of merge-base following the approach of git."""
  20. from heapq import heappop, heappush
  21. from .lru_cache import LRUCache
  22. # priority queue using builtin python minheap tools
  23. # why they do not have a builtin maxheap is simply ridiculous but
  24. # liveable with integer time stamps using negation
  25. class WorkList:
  26. def __init__(self):
  27. self.pq = []
  28. def add(self, item):
  29. dt, cmt = item
  30. heappush(self.pq, (-dt, cmt))
  31. def get(self):
  32. item = heappop(self.pq)
  33. if item:
  34. pr, cmt = item
  35. return -pr, cmt
  36. return None
  37. def iter(self):
  38. for pr, cmt in self.pq:
  39. yield (-pr, cmt)
  40. def _find_lcas(lookup_parents, c1, c2s, lookup_stamp, min_stamp=0):
  41. cands = []
  42. cstates = {}
  43. # Flags to Record State
  44. _ANC_OF_1 = 1 # ancestor of commit 1
  45. _ANC_OF_2 = 2 # ancestor of commit 2
  46. _DNC = 4 # Do Not Consider
  47. _LCA = 8 # potential LCA (Lowest Common Ancestor)
  48. def _has_candidates(wlst, cstates):
  49. for dt, cmt in wlst.iter():
  50. if cmt in cstates:
  51. if not ((cstates[cmt] & _DNC) == _DNC):
  52. return True
  53. return False
  54. # initialize the working list states with ancestry info
  55. # note possibility of c1 being one of c2s should be handled
  56. wlst = WorkList()
  57. cstates[c1] = _ANC_OF_1
  58. wlst.add((lookup_stamp(c1), c1))
  59. for c2 in c2s:
  60. cflags = cstates.get(c2, 0)
  61. cstates[c2] = cflags | _ANC_OF_2
  62. wlst.add((lookup_stamp(c2), c2))
  63. # loop while at least one working list commit is still viable (not marked as _DNC)
  64. # adding any parents to the list in a breadth first manner
  65. while _has_candidates(wlst, cstates):
  66. dt, cmt = wlst.get()
  67. # Look only at ANCESTRY and _DNC flags so that already
  68. # found _LCAs can still be marked _DNC by lower _LCAS
  69. cflags = cstates[cmt] & (_ANC_OF_1 | _ANC_OF_2 | _DNC)
  70. if cflags == (_ANC_OF_1 | _ANC_OF_2):
  71. # potential common ancestor if not already in candidates add it
  72. if not (cstates[cmt] & _LCA) == _LCA:
  73. cstates[cmt] = cstates[cmt] | _LCA
  74. cands.append((dt, cmt))
  75. # mark any parents of this node _DNC as all parents
  76. # would be one generation further removed common ancestors
  77. cflags = cflags | _DNC
  78. parents = lookup_parents(cmt)
  79. if parents:
  80. for pcmt in parents:
  81. pflags = cstates.get(pcmt, 0)
  82. # if this parent was already visited with no new ancestry/flag information
  83. # do not add it to the working list again
  84. if (pflags & cflags) == cflags:
  85. continue
  86. pdt = lookup_stamp(pcmt)
  87. if pdt < min_stamp:
  88. continue
  89. cstates[pcmt] = pflags | cflags
  90. wlst.add((pdt, pcmt))
  91. # walk final candidates removing any superseded by _DNC by later lower _LCAs
  92. # remove any duplicates and sort it so that earliest is first
  93. results = []
  94. for dt, cmt in cands:
  95. if not ((cstates[cmt] & _DNC) == _DNC) and (dt, cmt) not in results:
  96. results.append((dt, cmt))
  97. results.sort(key=lambda x: x[0])
  98. lcas = [cmt for dt, cmt in results]
  99. return lcas
  100. # actual git sorts these based on commit times
  101. def find_merge_base(repo, commit_ids):
  102. """Find lowest common ancestors of commit_ids[0] and *any* of commits_ids[1:].
  103. Args:
  104. repo: Repository object
  105. commit_ids: list of commit ids
  106. Returns:
  107. list of lowest common ancestor commit_ids
  108. """
  109. cmtcache = LRUCache(max_cache=128)
  110. parents_provider = repo.parents_provider()
  111. def lookup_stamp(cmtid):
  112. if cmtid not in cmtcache:
  113. cmtcache[cmtid] = repo.object_store[cmtid]
  114. return cmtcache[cmtid].commit_time
  115. def lookup_parents(cmtid):
  116. commit = None
  117. if cmtid in cmtcache:
  118. commit = cmtcache[cmtid]
  119. # must use parents provider to handle grafts and shallow
  120. return parents_provider.get_parents(cmtid, commit=commit)
  121. if not commit_ids:
  122. return []
  123. c1 = commit_ids[0]
  124. if not len(commit_ids) > 1:
  125. return [c1]
  126. c2s = commit_ids[1:]
  127. if c1 in c2s:
  128. return [c1]
  129. lcas = _find_lcas(lookup_parents, c1, c2s, lookup_stamp)
  130. return lcas
  131. def find_octopus_base(repo, commit_ids):
  132. """Find lowest common ancestors of *all* provided commit_ids.
  133. Args:
  134. repo: Repository
  135. commit_ids: list of commit ids
  136. Returns:
  137. list of lowest common ancestor commit_ids
  138. """
  139. cmtcache = LRUCache(max_cache=128)
  140. parents_provider = repo.parents_provider()
  141. def lookup_stamp(cmtid):
  142. if cmtid not in cmtcache:
  143. cmtcache[cmtid] = repo.object_store[cmtid]
  144. return cmtcache[cmtid].commit_time
  145. def lookup_parents(cmtid):
  146. commit = None
  147. if cmtid in cmtcache:
  148. commit = cmtcache[cmtid]
  149. # must use parents provider to handle grafts and shallow
  150. return parents_provider.get_parents(cmtid, commit=commit)
  151. if not commit_ids:
  152. return []
  153. if len(commit_ids) <= 2:
  154. return find_merge_base(repo, commit_ids)
  155. lcas = [commit_ids[0]]
  156. others = commit_ids[1:]
  157. for cmt in others:
  158. next_lcas = []
  159. for ca in lcas:
  160. res = _find_lcas(lookup_parents, cmt, [ca], lookup_stamp)
  161. next_lcas.extend(res)
  162. lcas = next_lcas[:]
  163. return lcas
  164. def can_fast_forward(repo, c1, c2):
  165. """Is it possible to fast-forward from c1 to c2?
  166. Args:
  167. repo: Repository to retrieve objects from
  168. c1: Commit id for first commit
  169. c2: Commit id for second commit
  170. """
  171. cmtcache = LRUCache(max_cache=128)
  172. parents_provider = repo.parents_provider()
  173. def lookup_stamp(cmtid):
  174. if cmtid not in cmtcache:
  175. cmtcache[cmtid] = repo.object_store[cmtid]
  176. return cmtcache[cmtid].commit_time
  177. def lookup_parents(cmtid):
  178. commit = None
  179. if cmtid in cmtcache:
  180. commit = cmtcache[cmtid]
  181. # must use parents provider to handle grafts and shallow
  182. return parents_provider.get_parents(cmtid, commit=commit)
  183. if c1 == c2:
  184. return True
  185. # Algorithm: Find the common ancestor
  186. min_stamp = lookup_stamp(c1)
  187. lcas = _find_lcas(lookup_parents, c1, [c2], lookup_stamp, min_stamp=min_stamp)
  188. return lcas == [c1]