2
0

bisect.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. # bisect.py -- Git bisect algorithm implementation
  2. # Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as published by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Git bisect implementation."""
  21. import os
  22. from typing import Optional
  23. from dulwich.object_store import peel_sha
  24. from dulwich.objects import Commit
  25. from dulwich.repo import Repo
  26. class BisectState:
  27. """Manages the state of a bisect session."""
  28. def __init__(self, repo: Repo) -> None:
  29. self.repo = repo
  30. self._bisect_dir = os.path.join(repo.controldir(), "BISECT_START")
  31. @property
  32. def is_active(self) -> bool:
  33. """Check if a bisect session is active."""
  34. return os.path.exists(self._bisect_dir)
  35. def start(
  36. self,
  37. bad: Optional[bytes] = None,
  38. good: Optional[list[bytes]] = None,
  39. paths: Optional[list[str]] = None,
  40. no_checkout: bool = False,
  41. term_bad: str = "bad",
  42. term_good: str = "good",
  43. ) -> None:
  44. """Start a new bisect session.
  45. Args:
  46. bad: The bad commit SHA (defaults to HEAD)
  47. good: List of good commit SHAs
  48. paths: Optional paths to limit bisect to
  49. no_checkout: If True, don't checkout commits during bisect
  50. term_bad: Term to use for bad commits (default: "bad")
  51. term_good: Term to use for good commits (default: "good")
  52. """
  53. if self.is_active:
  54. raise ValueError("Bisect session already in progress")
  55. # Create bisect state directory
  56. bisect_refs_dir = os.path.join(self.repo.controldir(), "refs", "bisect")
  57. os.makedirs(bisect_refs_dir, exist_ok=True)
  58. # Store current branch/commit
  59. try:
  60. ref_chain, sha = self.repo.refs.follow(b"HEAD")
  61. if sha is None:
  62. # No HEAD exists
  63. raise ValueError("Cannot start bisect: repository has no HEAD")
  64. # Use the first non-HEAD ref in the chain, or the SHA itself
  65. if len(ref_chain) > 1:
  66. current_branch = ref_chain[1] # The actual branch ref
  67. else:
  68. current_branch = sha # Detached HEAD
  69. except KeyError:
  70. # Detached HEAD
  71. try:
  72. current_branch = self.repo.head()
  73. except KeyError:
  74. # No HEAD exists - can't start bisect
  75. raise ValueError("Cannot start bisect: repository has no HEAD")
  76. # Write BISECT_START
  77. with open(self._bisect_dir, "wb") as f:
  78. f.write(current_branch)
  79. # Write BISECT_TERMS
  80. terms_file = os.path.join(self.repo.controldir(), "BISECT_TERMS")
  81. with open(terms_file, "w") as f:
  82. f.write(f"{term_bad}\n{term_good}\n")
  83. # Write BISECT_NAMES (paths)
  84. names_file = os.path.join(self.repo.controldir(), "BISECT_NAMES")
  85. with open(names_file, "w") as f:
  86. if paths:
  87. f.write("\n".join(paths) + "\n")
  88. else:
  89. f.write("\n")
  90. # Initialize BISECT_LOG
  91. log_file = os.path.join(self.repo.controldir(), "BISECT_LOG")
  92. with open(log_file, "w") as f:
  93. f.write("git bisect start\n")
  94. f.write("# status: waiting for both good and bad commits\n")
  95. # Mark bad commit if provided
  96. if bad is not None:
  97. self.mark_bad(bad)
  98. # Mark good commits if provided
  99. if good:
  100. for g in good:
  101. self.mark_good(g)
  102. def mark_bad(self, rev: Optional[bytes] = None) -> Optional[bytes]:
  103. """Mark a commit as bad.
  104. Args:
  105. rev: Commit SHA to mark as bad (defaults to HEAD)
  106. Returns:
  107. The SHA of the next commit to test, or None if bisect is complete
  108. """
  109. if not self.is_active:
  110. raise ValueError("No bisect session in progress")
  111. if rev is None:
  112. rev = self.repo.head()
  113. else:
  114. rev = peel_sha(self.repo.object_store, rev)[1].id
  115. # Write bad ref
  116. bad_ref_path = os.path.join(self.repo.controldir(), "refs", "bisect", "bad")
  117. with open(bad_ref_path, "wb") as f:
  118. f.write(rev + b"\n")
  119. # Update log
  120. self._append_to_log(
  121. f"# bad: [{rev.decode('ascii')}] {self._get_commit_subject(rev)}"
  122. )
  123. self._append_to_log(f"git bisect bad {rev.decode('ascii')}")
  124. return self._find_next_commit()
  125. def mark_good(self, rev: Optional[bytes] = None) -> Optional[bytes]:
  126. """Mark a commit as good.
  127. Args:
  128. rev: Commit SHA to mark as good (defaults to HEAD)
  129. Returns:
  130. The SHA of the next commit to test, or None if bisect is complete
  131. """
  132. if not self.is_active:
  133. raise ValueError("No bisect session in progress")
  134. if rev is None:
  135. rev = self.repo.head()
  136. else:
  137. rev = peel_sha(self.repo.object_store, rev)[1].id
  138. # Write good ref
  139. good_ref_path = os.path.join(
  140. self.repo.controldir(), "refs", "bisect", f"good-{rev.decode('ascii')}"
  141. )
  142. with open(good_ref_path, "wb") as f:
  143. f.write(rev + b"\n")
  144. # Update log
  145. self._append_to_log(
  146. f"# good: [{rev.decode('ascii')}] {self._get_commit_subject(rev)}"
  147. )
  148. self._append_to_log(f"git bisect good {rev.decode('ascii')}")
  149. return self._find_next_commit()
  150. def skip(self, revs: Optional[list[bytes]] = None) -> Optional[bytes]:
  151. """Skip one or more commits.
  152. Args:
  153. revs: List of commits to skip (defaults to [HEAD])
  154. Returns:
  155. The SHA of the next commit to test, or None if bisect is complete
  156. """
  157. if not self.is_active:
  158. raise ValueError("No bisect session in progress")
  159. if revs is None:
  160. revs = [self.repo.head()]
  161. for rev in revs:
  162. rev = peel_sha(self.repo.object_store, rev)[1].id
  163. skip_ref_path = os.path.join(
  164. self.repo.controldir(), "refs", "bisect", f"skip-{rev.decode('ascii')}"
  165. )
  166. with open(skip_ref_path, "wb") as f:
  167. f.write(rev + b"\n")
  168. self._append_to_log(f"git bisect skip {rev.decode('ascii')}")
  169. return self._find_next_commit()
  170. def reset(self, commit: Optional[bytes] = None) -> None:
  171. """Reset bisect state and return to original branch/commit.
  172. Args:
  173. commit: Optional commit to reset to (defaults to original branch/commit)
  174. """
  175. if not self.is_active:
  176. raise ValueError("No bisect session in progress")
  177. # Read original branch/commit
  178. with open(self._bisect_dir, "rb") as f:
  179. original = f.read().strip()
  180. # Clean up bisect files
  181. for filename in [
  182. "BISECT_START",
  183. "BISECT_TERMS",
  184. "BISECT_NAMES",
  185. "BISECT_LOG",
  186. "BISECT_EXPECTED_REV",
  187. "BISECT_ANCESTORS_OK",
  188. ]:
  189. filepath = os.path.join(self.repo.controldir(), filename)
  190. if os.path.exists(filepath):
  191. os.remove(filepath)
  192. # Clean up refs/bisect directory
  193. bisect_refs_dir = os.path.join(self.repo.controldir(), "refs", "bisect")
  194. if os.path.exists(bisect_refs_dir):
  195. for filename in os.listdir(bisect_refs_dir):
  196. os.remove(os.path.join(bisect_refs_dir, filename))
  197. os.rmdir(bisect_refs_dir)
  198. # Reset to target commit/branch
  199. if commit is None:
  200. if original.startswith(b"refs/"):
  201. # It's a branch reference - need to create a symbolic ref
  202. self.repo.refs.set_symbolic_ref(b"HEAD", original)
  203. else:
  204. # It's a commit SHA
  205. self.repo.refs[b"HEAD"] = original
  206. else:
  207. commit = peel_sha(self.repo.object_store, commit)[1].id
  208. self.repo.refs[b"HEAD"] = commit
  209. def get_log(self) -> str:
  210. """Get the bisect log."""
  211. if not self.is_active:
  212. raise ValueError("No bisect session in progress")
  213. log_file = os.path.join(self.repo.controldir(), "BISECT_LOG")
  214. with open(log_file) as f:
  215. return f.read()
  216. def replay(self, log_content: str) -> None:
  217. """Replay a bisect log.
  218. Args:
  219. log_content: The bisect log content to replay
  220. """
  221. # Parse and execute commands from log
  222. for line in log_content.splitlines():
  223. line = line.strip()
  224. if line.startswith("#") or not line:
  225. continue
  226. parts = line.split()
  227. if len(parts) < 3 or parts[0] != "git" or parts[1] != "bisect":
  228. continue
  229. cmd = parts[2]
  230. args = parts[3:] if len(parts) > 3 else []
  231. if cmd == "start":
  232. self.start()
  233. elif cmd == "bad":
  234. rev = args[0].encode("ascii") if args else None
  235. self.mark_bad(rev)
  236. elif cmd == "good":
  237. rev = args[0].encode("ascii") if args else None
  238. self.mark_good(rev)
  239. elif cmd == "skip":
  240. revs = [arg.encode("ascii") for arg in args] if args else None
  241. self.skip(revs)
  242. def _find_next_commit(self) -> Optional[bytes]:
  243. """Find the next commit to test using binary search.
  244. Returns:
  245. The SHA of the next commit to test, or None if bisect is complete
  246. """
  247. # Get bad commit
  248. bad_ref_path = os.path.join(self.repo.controldir(), "refs", "bisect", "bad")
  249. if not os.path.exists(bad_ref_path):
  250. self._append_to_log("# status: waiting for both good and bad commits")
  251. return None
  252. with open(bad_ref_path, "rb") as f:
  253. bad_sha = f.read().strip()
  254. # Get all good commits
  255. good_shas = []
  256. bisect_refs_dir = os.path.join(self.repo.controldir(), "refs", "bisect")
  257. for filename in os.listdir(bisect_refs_dir):
  258. if filename.startswith("good-"):
  259. with open(os.path.join(bisect_refs_dir, filename), "rb") as f:
  260. good_shas.append(f.read().strip())
  261. if not good_shas:
  262. self._append_to_log(
  263. "# status: waiting for good commit(s), bad commit known"
  264. )
  265. return None
  266. # Get skip commits
  267. skip_shas = set()
  268. for filename in os.listdir(bisect_refs_dir):
  269. if filename.startswith("skip-"):
  270. with open(os.path.join(bisect_refs_dir, filename), "rb") as f:
  271. skip_shas.add(f.read().strip())
  272. # Find commits between good and bad
  273. candidates = self._find_bisect_candidates(bad_sha, good_shas, skip_shas)
  274. if not candidates:
  275. # Bisect complete - the first bad commit is found
  276. self._append_to_log(
  277. f"# first bad commit: [{bad_sha.decode('ascii')}] "
  278. f"{self._get_commit_subject(bad_sha)}"
  279. )
  280. return None
  281. # Find midpoint
  282. mid_idx = len(candidates) // 2
  283. next_commit = candidates[mid_idx]
  284. # Write BISECT_EXPECTED_REV
  285. expected_file = os.path.join(self.repo.controldir(), "BISECT_EXPECTED_REV")
  286. with open(expected_file, "wb") as f:
  287. f.write(next_commit + b"\n")
  288. # Update status in log
  289. steps_remaining = self._estimate_steps(len(candidates))
  290. self._append_to_log(
  291. f"Bisecting: {len(candidates) - 1} revisions left to test after this "
  292. f"(roughly {steps_remaining} steps)"
  293. )
  294. self._append_to_log(
  295. f"[{next_commit.decode('ascii')}] {self._get_commit_subject(next_commit)}"
  296. )
  297. return next_commit
  298. def _find_bisect_candidates(
  299. self, bad_sha: bytes, good_shas: list[bytes], skip_shas: set
  300. ) -> list[bytes]:
  301. """Find all commits between good and bad commits.
  302. Args:
  303. bad_sha: The bad commit SHA
  304. good_shas: List of good commit SHAs
  305. skip_shas: Set of commits to skip
  306. Returns:
  307. List of candidate commit SHAs in topological order
  308. """
  309. # Use git's graph walking to find commits
  310. # This is a simplified version - a full implementation would need
  311. # to handle merge commits properly
  312. candidates = []
  313. visited = set(good_shas)
  314. queue = [bad_sha]
  315. while queue:
  316. sha = queue.pop(0)
  317. if sha in visited or sha in skip_shas:
  318. continue
  319. visited.add(sha)
  320. commit = self.repo.object_store[sha]
  321. # Don't include good commits
  322. if sha not in good_shas:
  323. candidates.append(sha)
  324. # Add parents to queue
  325. if isinstance(commit, Commit):
  326. for parent in commit.parents:
  327. if parent not in visited:
  328. queue.append(parent)
  329. # Remove the bad commit itself
  330. if bad_sha in candidates:
  331. candidates.remove(bad_sha)
  332. return candidates
  333. def _get_commit_subject(self, sha: bytes) -> str:
  334. """Get the subject line of a commit message."""
  335. obj = self.repo.object_store[sha]
  336. if isinstance(obj, Commit):
  337. message = obj.message.decode("utf-8", errors="replace")
  338. return message.split("\n")[0]
  339. return ""
  340. def _append_to_log(self, line: str) -> None:
  341. """Append a line to the bisect log."""
  342. log_file = os.path.join(self.repo.controldir(), "BISECT_LOG")
  343. with open(log_file, "a") as f:
  344. f.write(line + "\n")
  345. def _estimate_steps(self, num_candidates: int) -> int:
  346. """Estimate the number of steps remaining in bisect."""
  347. if num_candidates <= 1:
  348. return 0
  349. steps = 0
  350. while num_candidates > 1:
  351. num_candidates //= 2
  352. steps += 1
  353. return steps