bisect.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. # bisect.py -- Git bisect algorithm implementation
  2. # Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as published by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Git bisect implementation."""
  21. import os
  22. from collections.abc import Sequence, Set
  23. from typing import Optional
  24. from dulwich.object_store import peel_sha
  25. from dulwich.objects import Commit
  26. from dulwich.repo import Repo
  27. class BisectState:
  28. """Manages the state of a bisect session."""
  29. def __init__(self, repo: Repo) -> None:
  30. """Initialize BisectState.
  31. Args:
  32. repo: Repository to perform bisect on
  33. """
  34. self.repo = repo
  35. self._bisect_dir = os.path.join(repo.controldir(), "BISECT_START")
  36. @property
  37. def is_active(self) -> bool:
  38. """Check if a bisect session is active."""
  39. return os.path.exists(self._bisect_dir)
  40. def start(
  41. self,
  42. bad: Optional[bytes] = None,
  43. good: Optional[Sequence[bytes]] = None,
  44. paths: Optional[Sequence[bytes]] = None,
  45. no_checkout: bool = False,
  46. term_bad: str = "bad",
  47. term_good: str = "good",
  48. ) -> None:
  49. """Start a new bisect session.
  50. Args:
  51. bad: The bad commit SHA (defaults to HEAD)
  52. good: List of good commit SHAs
  53. paths: Optional paths to limit bisect to
  54. no_checkout: If True, don't checkout commits during bisect
  55. term_bad: Term to use for bad commits (default: "bad")
  56. term_good: Term to use for good commits (default: "good")
  57. """
  58. if self.is_active:
  59. raise ValueError("Bisect session already in progress")
  60. # Create bisect state directory
  61. bisect_refs_dir = os.path.join(self.repo.controldir(), "refs", "bisect")
  62. os.makedirs(bisect_refs_dir, exist_ok=True)
  63. # Store current branch/commit
  64. try:
  65. ref_chain, sha = self.repo.refs.follow(b"HEAD")
  66. if sha is None:
  67. # No HEAD exists
  68. raise ValueError("Cannot start bisect: repository has no HEAD")
  69. # Use the first non-HEAD ref in the chain, or the SHA itself
  70. if len(ref_chain) > 1:
  71. current_branch = ref_chain[1] # The actual branch ref
  72. else:
  73. current_branch = sha # Detached HEAD
  74. except KeyError:
  75. # Detached HEAD
  76. try:
  77. current_branch = self.repo.head()
  78. except KeyError:
  79. # No HEAD exists - can't start bisect
  80. raise ValueError("Cannot start bisect: repository has no HEAD")
  81. # Write BISECT_START
  82. with open(self._bisect_dir, "wb") as f:
  83. f.write(current_branch)
  84. # Write BISECT_TERMS
  85. terms_file = os.path.join(self.repo.controldir(), "BISECT_TERMS")
  86. with open(terms_file, "w") as f:
  87. f.write(f"{term_bad}\n{term_good}\n")
  88. # Write BISECT_NAMES (paths)
  89. names_file = os.path.join(self.repo.controldir(), "BISECT_NAMES")
  90. with open(names_file, "w") as f:
  91. if paths:
  92. f.write(
  93. "\n".join(path.decode("utf-8", "replace") for path in paths) + "\n"
  94. )
  95. else:
  96. f.write("\n")
  97. # Initialize BISECT_LOG
  98. log_file = os.path.join(self.repo.controldir(), "BISECT_LOG")
  99. with open(log_file, "w") as f:
  100. f.write("git bisect start\n")
  101. f.write("# status: waiting for both good and bad commits\n")
  102. # Mark bad commit if provided
  103. if bad is not None:
  104. self.mark_bad(bad)
  105. # Mark good commits if provided
  106. if good:
  107. for g in good:
  108. self.mark_good(g)
  109. def mark_bad(self, rev: Optional[bytes] = None) -> Optional[bytes]:
  110. """Mark a commit as bad.
  111. Args:
  112. rev: Commit SHA to mark as bad (defaults to HEAD)
  113. Returns:
  114. The SHA of the next commit to test, or None if bisect is complete
  115. """
  116. if not self.is_active:
  117. raise ValueError("No bisect session in progress")
  118. if rev is None:
  119. rev = self.repo.head()
  120. else:
  121. rev = peel_sha(self.repo.object_store, rev)[1].id
  122. # Write bad ref
  123. bad_ref_path = os.path.join(self.repo.controldir(), "refs", "bisect", "bad")
  124. with open(bad_ref_path, "wb") as f:
  125. f.write(rev + b"\n")
  126. # Update log
  127. self._append_to_log(
  128. f"# bad: [{rev.decode('ascii')}] {self._get_commit_subject(rev)}"
  129. )
  130. self._append_to_log(f"git bisect bad {rev.decode('ascii')}")
  131. return self._find_next_commit()
  132. def mark_good(self, rev: Optional[bytes] = None) -> Optional[bytes]:
  133. """Mark a commit as good.
  134. Args:
  135. rev: Commit SHA to mark as good (defaults to HEAD)
  136. Returns:
  137. The SHA of the next commit to test, or None if bisect is complete
  138. """
  139. if not self.is_active:
  140. raise ValueError("No bisect session in progress")
  141. if rev is None:
  142. rev = self.repo.head()
  143. else:
  144. rev = peel_sha(self.repo.object_store, rev)[1].id
  145. # Write good ref
  146. good_ref_path = os.path.join(
  147. self.repo.controldir(), "refs", "bisect", f"good-{rev.decode('ascii')}"
  148. )
  149. with open(good_ref_path, "wb") as f:
  150. f.write(rev + b"\n")
  151. # Update log
  152. self._append_to_log(
  153. f"# good: [{rev.decode('ascii')}] {self._get_commit_subject(rev)}"
  154. )
  155. self._append_to_log(f"git bisect good {rev.decode('ascii')}")
  156. return self._find_next_commit()
  157. def skip(self, revs: Optional[Sequence[bytes]] = None) -> Optional[bytes]:
  158. """Skip one or more commits.
  159. Args:
  160. revs: List of commits to skip (defaults to [HEAD])
  161. Returns:
  162. The SHA of the next commit to test, or None if bisect is complete
  163. """
  164. if not self.is_active:
  165. raise ValueError("No bisect session in progress")
  166. if revs is None:
  167. revs = [self.repo.head()]
  168. for rev in revs:
  169. rev = peel_sha(self.repo.object_store, rev)[1].id
  170. skip_ref_path = os.path.join(
  171. self.repo.controldir(), "refs", "bisect", f"skip-{rev.decode('ascii')}"
  172. )
  173. with open(skip_ref_path, "wb") as f:
  174. f.write(rev + b"\n")
  175. self._append_to_log(f"git bisect skip {rev.decode('ascii')}")
  176. return self._find_next_commit()
  177. def reset(self, commit: Optional[bytes] = None) -> None:
  178. """Reset bisect state and return to original branch/commit.
  179. Args:
  180. commit: Optional commit to reset to (defaults to original branch/commit)
  181. """
  182. if not self.is_active:
  183. raise ValueError("No bisect session in progress")
  184. # Read original branch/commit
  185. with open(self._bisect_dir, "rb") as f:
  186. original = f.read().strip()
  187. # Clean up bisect files
  188. for filename in [
  189. "BISECT_START",
  190. "BISECT_TERMS",
  191. "BISECT_NAMES",
  192. "BISECT_LOG",
  193. "BISECT_EXPECTED_REV",
  194. "BISECT_ANCESTORS_OK",
  195. ]:
  196. filepath = os.path.join(self.repo.controldir(), filename)
  197. if os.path.exists(filepath):
  198. os.remove(filepath)
  199. # Clean up refs/bisect directory
  200. bisect_refs_dir = os.path.join(self.repo.controldir(), "refs", "bisect")
  201. if os.path.exists(bisect_refs_dir):
  202. for filename in os.listdir(bisect_refs_dir):
  203. os.remove(os.path.join(bisect_refs_dir, filename))
  204. os.rmdir(bisect_refs_dir)
  205. # Reset to target commit/branch
  206. if commit is None:
  207. if original.startswith(b"refs/"):
  208. # It's a branch reference - need to create a symbolic ref
  209. self.repo.refs.set_symbolic_ref(b"HEAD", original)
  210. else:
  211. # It's a commit SHA
  212. self.repo.refs[b"HEAD"] = original
  213. else:
  214. commit = peel_sha(self.repo.object_store, commit)[1].id
  215. self.repo.refs[b"HEAD"] = commit
  216. def get_log(self) -> str:
  217. """Get the bisect log."""
  218. if not self.is_active:
  219. raise ValueError("No bisect session in progress")
  220. log_file = os.path.join(self.repo.controldir(), "BISECT_LOG")
  221. with open(log_file) as f:
  222. return f.read()
  223. def replay(self, log_content: str) -> None:
  224. """Replay a bisect log.
  225. Args:
  226. log_content: The bisect log content to replay
  227. """
  228. # Parse and execute commands from log
  229. for line in log_content.splitlines():
  230. line = line.strip()
  231. if line.startswith("#") or not line:
  232. continue
  233. parts = line.split()
  234. if len(parts) < 3 or parts[0] != "git" or parts[1] != "bisect":
  235. continue
  236. cmd = parts[2]
  237. args = parts[3:] if len(parts) > 3 else []
  238. if cmd == "start":
  239. self.start()
  240. elif cmd == "bad":
  241. rev = args[0].encode("ascii") if args else None
  242. self.mark_bad(rev)
  243. elif cmd == "good":
  244. rev = args[0].encode("ascii") if args else None
  245. self.mark_good(rev)
  246. elif cmd == "skip":
  247. revs = [arg.encode("ascii") for arg in args] if args else None
  248. self.skip(revs)
  249. def _find_next_commit(self) -> Optional[bytes]:
  250. """Find the next commit to test using binary search.
  251. Returns:
  252. The SHA of the next commit to test, or None if bisect is complete
  253. """
  254. # Get bad commit
  255. bad_ref_path = os.path.join(self.repo.controldir(), "refs", "bisect", "bad")
  256. if not os.path.exists(bad_ref_path):
  257. self._append_to_log("# status: waiting for both good and bad commits")
  258. return None
  259. with open(bad_ref_path, "rb") as f:
  260. bad_sha = f.read().strip()
  261. # Get all good commits
  262. good_shas = []
  263. bisect_refs_dir = os.path.join(self.repo.controldir(), "refs", "bisect")
  264. for filename in os.listdir(bisect_refs_dir):
  265. if filename.startswith("good-"):
  266. with open(os.path.join(bisect_refs_dir, filename), "rb") as f:
  267. good_shas.append(f.read().strip())
  268. if not good_shas:
  269. self._append_to_log(
  270. "# status: waiting for good commit(s), bad commit known"
  271. )
  272. return None
  273. # Get skip commits
  274. skip_shas = set()
  275. for filename in os.listdir(bisect_refs_dir):
  276. if filename.startswith("skip-"):
  277. with open(os.path.join(bisect_refs_dir, filename), "rb") as f:
  278. skip_shas.add(f.read().strip())
  279. # Find commits between good and bad
  280. candidates = self._find_bisect_candidates(bad_sha, good_shas, skip_shas)
  281. if not candidates:
  282. # Bisect complete - the first bad commit is found
  283. self._append_to_log(
  284. f"# first bad commit: [{bad_sha.decode('ascii')}] "
  285. f"{self._get_commit_subject(bad_sha)}"
  286. )
  287. return None
  288. # Find midpoint
  289. mid_idx = len(candidates) // 2
  290. next_commit = candidates[mid_idx]
  291. # Write BISECT_EXPECTED_REV
  292. expected_file = os.path.join(self.repo.controldir(), "BISECT_EXPECTED_REV")
  293. with open(expected_file, "wb") as f:
  294. f.write(next_commit + b"\n")
  295. # Update status in log
  296. steps_remaining = self._estimate_steps(len(candidates))
  297. self._append_to_log(
  298. f"Bisecting: {len(candidates) - 1} revisions left to test after this "
  299. f"(roughly {steps_remaining} steps)"
  300. )
  301. self._append_to_log(
  302. f"[{next_commit.decode('ascii')}] {self._get_commit_subject(next_commit)}"
  303. )
  304. return next_commit
  305. def _find_bisect_candidates(
  306. self, bad_sha: bytes, good_shas: Sequence[bytes], skip_shas: Set[bytes]
  307. ) -> list[bytes]:
  308. """Find all commits between good and bad commits.
  309. Args:
  310. bad_sha: The bad commit SHA
  311. good_shas: List of good commit SHAs
  312. skip_shas: Set of commits to skip
  313. Returns:
  314. List of candidate commit SHAs in topological order
  315. """
  316. # Use git's graph walking to find commits
  317. # This is a simplified version - a full implementation would need
  318. # to handle merge commits properly
  319. candidates = []
  320. visited = set(good_shas)
  321. queue = [bad_sha]
  322. while queue:
  323. sha = queue.pop(0)
  324. if sha in visited or sha in skip_shas:
  325. continue
  326. visited.add(sha)
  327. commit = self.repo.object_store[sha]
  328. # Don't include good commits
  329. if sha not in good_shas:
  330. candidates.append(sha)
  331. # Add parents to queue
  332. if isinstance(commit, Commit):
  333. for parent in commit.parents:
  334. if parent not in visited:
  335. queue.append(parent)
  336. # Remove the bad commit itself
  337. if bad_sha in candidates:
  338. candidates.remove(bad_sha)
  339. return candidates
  340. def _get_commit_subject(self, sha: bytes) -> str:
  341. """Get the subject line of a commit message."""
  342. obj = self.repo.object_store[sha]
  343. if isinstance(obj, Commit):
  344. message = obj.message.decode("utf-8", errors="replace")
  345. lines = message.split("\n")
  346. return lines[0] if lines else ""
  347. return ""
  348. def _append_to_log(self, line: str) -> None:
  349. """Append a line to the bisect log."""
  350. log_file = os.path.join(self.repo.controldir(), "BISECT_LOG")
  351. with open(log_file, "a") as f:
  352. f.write(line + "\n")
  353. def _estimate_steps(self, num_candidates: int) -> int:
  354. """Estimate the number of steps remaining in bisect."""
  355. if num_candidates <= 1:
  356. return 0
  357. steps = 0
  358. while num_candidates > 1:
  359. num_candidates //= 2
  360. steps += 1
  361. return steps