graph.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
  2. # Copyright (c) 2020 Kevin B. Hendricks, Stratford Ontario Canada
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. """Implementation of merge-base following the approach of git."""
  21. from collections.abc import Iterator
  22. from heapq import heappop, heappush
  23. from typing import TYPE_CHECKING, Any, Callable, Generic, Optional, TypeVar
  24. if TYPE_CHECKING:
  25. from .repo import BaseRepo
  26. from .lru_cache import LRUCache
  27. from .objects import ObjectID
  28. T = TypeVar("T")
  29. # priority queue using builtin python minheap tools
  30. # why they do not have a builtin maxheap is simply ridiculous but
  31. # liveable with integer time stamps using negation
  32. class WorkList(Generic[T]):
  33. def __init__(self) -> None:
  34. self.pq: list[tuple[int, T]] = []
  35. def add(self, item: tuple[int, T]) -> None:
  36. dt, cmt = item
  37. heappush(self.pq, (-dt, cmt))
  38. def get(self) -> Optional[tuple[int, T]]:
  39. item = heappop(self.pq)
  40. if item:
  41. pr, cmt = item
  42. return -pr, cmt
  43. return None
  44. def iter(self) -> Iterator[tuple[int, T]]:
  45. for pr, cmt in self.pq:
  46. yield (-pr, cmt)
  47. def _find_lcas(
  48. lookup_parents: Callable[[ObjectID], list[ObjectID]],
  49. c1: ObjectID,
  50. c2s: list[ObjectID],
  51. lookup_stamp: Callable[[ObjectID], int],
  52. min_stamp: int = 0,
  53. shallows: Optional[set[ObjectID]] = None,
  54. ) -> list[ObjectID]:
  55. cands = []
  56. cstates = {}
  57. # Flags to Record State
  58. _ANC_OF_1 = 1 # ancestor of commit 1
  59. _ANC_OF_2 = 2 # ancestor of commit 2
  60. _DNC = 4 # Do Not Consider
  61. _LCA = 8 # potential LCA (Lowest Common Ancestor)
  62. def _has_candidates(wlst: WorkList[ObjectID], cstates: dict[ObjectID, int]) -> bool:
  63. for dt, cmt in wlst.iter():
  64. if cmt in cstates:
  65. if not ((cstates[cmt] & _DNC) == _DNC):
  66. return True
  67. return False
  68. # initialize the working list states with ancestry info
  69. # note possibility of c1 being one of c2s should be handled
  70. wlst: WorkList[bytes] = WorkList()
  71. cstates[c1] = _ANC_OF_1
  72. try:
  73. wlst.add((lookup_stamp(c1), c1))
  74. except KeyError:
  75. # If c1 doesn't exist and we have shallow commits, it might be a missing parent
  76. if shallows is None or not shallows:
  77. raise
  78. # For missing commits in shallow repos, use a minimal timestamp
  79. wlst.add((0, c1))
  80. for c2 in c2s:
  81. cflags = cstates.get(c2, 0)
  82. cstates[c2] = cflags | _ANC_OF_2
  83. try:
  84. wlst.add((lookup_stamp(c2), c2))
  85. except KeyError:
  86. # If c2 doesn't exist and we have shallow commits, it might be a missing parent
  87. if shallows is None or not shallows:
  88. raise
  89. # For missing commits in shallow repos, use a minimal timestamp
  90. wlst.add((0, c2))
  91. # loop while at least one working list commit is still viable (not marked as _DNC)
  92. # adding any parents to the list in a breadth first manner
  93. while _has_candidates(wlst, cstates):
  94. result = wlst.get()
  95. if result is None:
  96. break
  97. dt, cmt = result
  98. # Look only at ANCESTRY and _DNC flags so that already
  99. # found _LCAs can still be marked _DNC by lower _LCAS
  100. cflags = cstates[cmt] & (_ANC_OF_1 | _ANC_OF_2 | _DNC)
  101. if cflags == (_ANC_OF_1 | _ANC_OF_2):
  102. # potential common ancestor if not already in candidates add it
  103. if not (cstates[cmt] & _LCA) == _LCA:
  104. cstates[cmt] = cstates[cmt] | _LCA
  105. cands.append((dt, cmt))
  106. # mark any parents of this node _DNC as all parents
  107. # would be one generation further removed common ancestors
  108. cflags = cflags | _DNC
  109. try:
  110. parents = lookup_parents(cmt)
  111. except KeyError:
  112. # If we can't get parents in a shallow repo, skip this node
  113. # This is safer than pretending it has no parents
  114. if shallows is not None and shallows:
  115. continue
  116. raise
  117. if parents:
  118. for pcmt in parents:
  119. pflags = cstates.get(pcmt, 0)
  120. # if this parent was already visited with no new ancestry/flag information
  121. # do not add it to the working list again
  122. if (pflags & cflags) == cflags:
  123. continue
  124. try:
  125. pdt = lookup_stamp(pcmt)
  126. except KeyError:
  127. # Parent doesn't exist - if we're in a shallow repo, skip it
  128. if shallows is not None and shallows:
  129. continue
  130. raise
  131. if pdt < min_stamp:
  132. continue
  133. cstates[pcmt] = pflags | cflags
  134. wlst.add((pdt, pcmt))
  135. # walk final candidates removing any superseded by _DNC by later lower _LCAs
  136. # remove any duplicates and sort it so that earliest is first
  137. results = []
  138. for dt, cmt in cands:
  139. if not ((cstates[cmt] & _DNC) == _DNC) and (dt, cmt) not in results:
  140. results.append((dt, cmt))
  141. results.sort(key=lambda x: x[0])
  142. lcas = [cmt for dt, cmt in results]
  143. return lcas
  144. # actual git sorts these based on commit times
  145. def find_merge_base(repo: "BaseRepo", commit_ids: list[ObjectID]) -> list[ObjectID]:
  146. """Find lowest common ancestors of commit_ids[0] and *any* of commits_ids[1:].
  147. Args:
  148. repo: Repository object
  149. commit_ids: list of commit ids
  150. Returns:
  151. list of lowest common ancestor commit_ids
  152. """
  153. cmtcache: LRUCache[ObjectID, Any] = LRUCache(max_cache=128)
  154. parents_provider = repo.parents_provider()
  155. def lookup_stamp(cmtid: ObjectID) -> int:
  156. if cmtid not in cmtcache:
  157. cmtcache[cmtid] = repo.object_store[cmtid]
  158. return cmtcache[cmtid].commit_time
  159. def lookup_parents(cmtid: ObjectID) -> list[ObjectID]:
  160. commit = None
  161. if cmtid in cmtcache:
  162. commit = cmtcache[cmtid]
  163. # must use parents provider to handle grafts and shallow
  164. return parents_provider.get_parents(cmtid, commit=commit)
  165. if not commit_ids:
  166. return []
  167. c1 = commit_ids[0]
  168. if not len(commit_ids) > 1:
  169. return [c1]
  170. c2s = commit_ids[1:]
  171. if c1 in c2s:
  172. return [c1]
  173. lcas = _find_lcas(
  174. lookup_parents, c1, c2s, lookup_stamp, shallows=parents_provider.shallows
  175. )
  176. return lcas
  177. def find_octopus_base(repo: "BaseRepo", commit_ids: list[ObjectID]) -> list[ObjectID]:
  178. """Find lowest common ancestors of *all* provided commit_ids.
  179. Args:
  180. repo: Repository
  181. commit_ids: list of commit ids
  182. Returns:
  183. list of lowest common ancestor commit_ids
  184. """
  185. cmtcache: LRUCache[ObjectID, Any] = LRUCache(max_cache=128)
  186. parents_provider = repo.parents_provider()
  187. def lookup_stamp(cmtid: ObjectID) -> int:
  188. if cmtid not in cmtcache:
  189. cmtcache[cmtid] = repo.object_store[cmtid]
  190. return cmtcache[cmtid].commit_time
  191. def lookup_parents(cmtid: ObjectID) -> list[ObjectID]:
  192. commit = None
  193. if cmtid in cmtcache:
  194. commit = cmtcache[cmtid]
  195. # must use parents provider to handle grafts and shallow
  196. return parents_provider.get_parents(cmtid, commit=commit)
  197. if not commit_ids:
  198. return []
  199. if len(commit_ids) <= 2:
  200. return find_merge_base(repo, commit_ids)
  201. lcas = [commit_ids[0]]
  202. others = commit_ids[1:]
  203. for cmt in others:
  204. next_lcas = []
  205. for ca in lcas:
  206. res = _find_lcas(
  207. lookup_parents,
  208. cmt,
  209. [ca],
  210. lookup_stamp,
  211. shallows=parents_provider.shallows,
  212. )
  213. next_lcas.extend(res)
  214. lcas = next_lcas[:]
  215. return lcas
  216. def can_fast_forward(repo: "BaseRepo", c1: bytes, c2: bytes) -> bool:
  217. """Is it possible to fast-forward from c1 to c2?
  218. Args:
  219. repo: Repository to retrieve objects from
  220. c1: Commit id for first commit
  221. c2: Commit id for second commit
  222. """
  223. cmtcache: LRUCache[ObjectID, Any] = LRUCache(max_cache=128)
  224. parents_provider = repo.parents_provider()
  225. def lookup_stamp(cmtid: ObjectID) -> int:
  226. if cmtid not in cmtcache:
  227. cmtcache[cmtid] = repo.object_store[cmtid]
  228. return cmtcache[cmtid].commit_time
  229. def lookup_parents(cmtid: ObjectID) -> list[ObjectID]:
  230. commit = None
  231. if cmtid in cmtcache:
  232. commit = cmtcache[cmtid]
  233. # must use parents provider to handle grafts and shallow
  234. return parents_provider.get_parents(cmtid, commit=commit)
  235. if c1 == c2:
  236. return True
  237. # Algorithm: Find the common ancestor
  238. try:
  239. min_stamp = lookup_stamp(c1)
  240. except KeyError:
  241. # If c1 doesn't exist in the object store, we can't determine fast-forward
  242. # This can happen in shallow clones where c1 is a missing parent
  243. # Check if any shallow commits have c1 as a parent
  244. if parents_provider.shallows:
  245. # We're in a shallow repository and c1 doesn't exist
  246. # We can't determine if fast-forward is possible
  247. return False
  248. raise
  249. lcas = _find_lcas(
  250. lookup_parents,
  251. c1,
  252. [c2],
  253. lookup_stamp,
  254. min_stamp=min_stamp,
  255. shallows=parents_provider.shallows,
  256. )
  257. return lcas == [c1]