test_commit_graph.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. # test_commit_graph.py -- Compatibility tests for commit graph functionality
  2. # Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. """Compatibility tests for Git commit graph functionality.
  10. These tests verify that dulwich's commit graph implementation behaves
  11. identically to C Git's implementation.
  12. """
  13. import os
  14. import shutil
  15. import tempfile
  16. from dulwich.commit_graph import find_commit_graph_file, read_commit_graph
  17. from dulwich.graph import can_fast_forward, find_merge_base
  18. from dulwich.repo import Repo
  19. from .utils import CompatTestCase, run_git_or_fail
  20. class CommitGraphCompatTests(CompatTestCase):
  21. """Compatibility tests for commit graph functionality."""
  22. # Commit graph was introduced in Git 2.18.0
  23. min_git_version = (2, 18, 0)
  24. def setUp(self):
  25. super().setUp()
  26. self.test_dir = tempfile.mkdtemp()
  27. self.repo_path = os.path.join(self.test_dir, "test-repo")
  28. # Set up git identity to avoid committer identity errors
  29. self.overrideEnv("GIT_COMMITTER_NAME", "Test Author")
  30. self.overrideEnv("GIT_COMMITTER_EMAIL", "test@example.com")
  31. self.overrideEnv("GIT_AUTHOR_NAME", "Test Author")
  32. self.overrideEnv("GIT_AUTHOR_EMAIL", "test@example.com")
  33. def tearDown(self):
  34. shutil.rmtree(self.test_dir)
  35. def create_test_repo_with_history(self):
  36. """Create a test repository with some commit history."""
  37. # Initialize repository
  38. run_git_or_fail(["init"], cwd=self.test_dir)
  39. os.rename(os.path.join(self.test_dir, ".git"), self.repo_path)
  40. work_dir = os.path.join(self.test_dir, "work")
  41. os.makedirs(work_dir)
  42. # Create .git file pointing to our repo
  43. with open(os.path.join(work_dir, ".git"), "w") as f:
  44. f.write(f"gitdir: {self.repo_path}\n")
  45. # Create some commits
  46. commits = []
  47. for i in range(5):
  48. filename = f"file{i}.txt"
  49. with open(os.path.join(work_dir, filename), "w") as f:
  50. f.write(f"Content {i}\n")
  51. run_git_or_fail(["add", filename], cwd=work_dir)
  52. run_git_or_fail(
  53. [
  54. "commit",
  55. "-m",
  56. f"Commit {i}",
  57. "--author",
  58. "Test Author <test@example.com>",
  59. "--date",
  60. f"2024-01-0{i + 1} 12:00:00 +0000",
  61. ],
  62. cwd=work_dir,
  63. )
  64. # Get the commit SHA
  65. result = run_git_or_fail(["rev-parse", "HEAD"], cwd=work_dir)
  66. commits.append(result.strip())
  67. # Create a branch and merge
  68. run_git_or_fail(["checkout", "-b", "feature"], cwd=work_dir)
  69. with open(os.path.join(work_dir, "feature.txt"), "w") as f:
  70. f.write("Feature content\n")
  71. run_git_or_fail(["add", "feature.txt"], cwd=work_dir)
  72. run_git_or_fail(
  73. [
  74. "commit",
  75. "-m",
  76. "Feature commit",
  77. "--author",
  78. "Test Author <test@example.com>",
  79. "--date",
  80. "2024-01-06 12:00:00 +0000",
  81. ],
  82. cwd=work_dir,
  83. )
  84. result = run_git_or_fail(["rev-parse", "HEAD"], cwd=work_dir)
  85. feature_commit = result.strip()
  86. # Merge back to master
  87. run_git_or_fail(["checkout", "master"], cwd=work_dir)
  88. run_git_or_fail(
  89. ["merge", "feature", "--no-ff", "-m", "Merge feature"], cwd=work_dir
  90. )
  91. result = run_git_or_fail(["rev-parse", "HEAD"], cwd=work_dir)
  92. merge_commit = result.strip()
  93. commits.extend([feature_commit, merge_commit])
  94. return commits, work_dir
  95. def test_commit_graph_generation_and_reading(self):
  96. """Test that dulwich can read commit graphs generated by C Git."""
  97. commits, work_dir = self.create_test_repo_with_history()
  98. # Generate commit graph with C Git
  99. run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
  100. # Verify commit graph file exists
  101. graph_file = find_commit_graph_file(self.repo_path)
  102. self.assertIsNotNone(graph_file, "Commit graph file should exist")
  103. # Read with dulwich
  104. commit_graph = read_commit_graph(graph_file)
  105. self.assertIsNotNone(commit_graph, "Should be able to read commit graph")
  106. # Verify we have the expected number of commits
  107. self.assertGreater(len(commit_graph), 0, "Commit graph should contain commits")
  108. # Open the repository with dulwich
  109. repo = Repo(self.repo_path)
  110. # Verify that all commits in the graph are accessible
  111. for entry in commit_graph:
  112. # entry.commit_id is hex ObjectID
  113. commit_obj = repo.object_store[entry.commit_id]
  114. self.assertIsNotNone(
  115. commit_obj, f"Commit {entry.commit_id.decode()} should be accessible"
  116. )
  117. # Verify tree ID matches
  118. self.assertEqual(
  119. entry.tree_id,
  120. commit_obj.tree,
  121. f"Tree ID mismatch for commit {entry.commit_id.decode()}",
  122. )
  123. # Verify parent information
  124. self.assertEqual(
  125. entry.parents,
  126. list(commit_obj.parents),
  127. f"Parent mismatch for commit {entry.commit_id.decode()}",
  128. )
  129. def test_merge_base_with_commit_graph(self):
  130. """Test that merge-base calculations work the same with and without commit graph."""
  131. commits, work_dir = self.create_test_repo_with_history()
  132. repo = Repo(self.repo_path)
  133. # Get some commit IDs for testing
  134. main_head = repo.refs[b"refs/heads/master"]
  135. feature_head = repo.refs[b"refs/heads/feature"]
  136. # Calculate merge base without commit graph
  137. merge_base_no_graph = find_merge_base(repo, [main_head, feature_head])
  138. # Generate commit graph
  139. run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
  140. # Force reload of repository to pick up commit graph
  141. repo = Repo(self.repo_path)
  142. # Calculate merge base with commit graph
  143. merge_base_with_graph = find_merge_base(repo, [main_head, feature_head])
  144. # Results should be identical
  145. self.assertEqual(
  146. merge_base_no_graph,
  147. merge_base_with_graph,
  148. "Merge base should be same with and without commit graph",
  149. )
  150. # Compare with C Git's result
  151. git_result = run_git_or_fail(
  152. ["merge-base", main_head.decode(), feature_head.decode()], cwd=work_dir
  153. )
  154. git_merge_base = [git_result.strip()]
  155. self.assertEqual(
  156. merge_base_with_graph,
  157. git_merge_base,
  158. "Dulwich merge base should match C Git result",
  159. )
  160. def test_fast_forward_with_commit_graph(self):
  161. """Test that fast-forward detection works the same with and without commit graph."""
  162. commits, work_dir = self.create_test_repo_with_history()
  163. repo = Repo(self.repo_path)
  164. # Test with a simple fast-forward case (older commit to newer commit)
  165. commit1 = commits[1] # Second commit
  166. commit2 = commits[3] # Fourth commit
  167. # Check without commit graph
  168. can_ff_no_graph = can_fast_forward(repo, commit1, commit2)
  169. # Generate commit graph
  170. run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
  171. # Force reload
  172. repo = Repo(self.repo_path)
  173. # Check with commit graph
  174. can_ff_with_graph = can_fast_forward(repo, commit1, commit2)
  175. # Results should be identical
  176. self.assertEqual(
  177. can_ff_no_graph,
  178. can_ff_with_graph,
  179. "Fast-forward detection should be same with and without commit graph",
  180. )
  181. # Compare with C Git (check if commit1 is ancestor of commit2)
  182. from .utils import run_git
  183. returncode, stdout, stderr = run_git(
  184. ["merge-base", "--is-ancestor", commit1.decode(), commit2.decode()],
  185. cwd=work_dir,
  186. capture_stdout=True,
  187. capture_stderr=True,
  188. )
  189. git_can_ff = returncode == 0
  190. self.assertEqual(
  191. can_ff_with_graph,
  192. git_can_ff,
  193. "Dulwich fast-forward detection should match C Git",
  194. )
  195. def test_generation_numbers_consistency(self):
  196. """Test that generation numbers are consistent with Git's topology."""
  197. commits, work_dir = self.create_test_repo_with_history()
  198. # Generate commit graph
  199. run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
  200. # Read with dulwich
  201. graph_file = find_commit_graph_file(self.repo_path)
  202. commit_graph = read_commit_graph(graph_file)
  203. repo = Repo(self.repo_path)
  204. # Build a map of commit to generation number
  205. generation_map = {}
  206. for entry in commit_graph:
  207. generation_map[entry.commit_id] = entry.generation
  208. # Verify generation number properties:
  209. # 1. Root commits have generation 1
  210. # 2. For any commit, generation > max(parent_generations)
  211. for entry in commit_graph:
  212. repo.object_store[entry.commit_id]
  213. if not entry.parents:
  214. # Root commit should have generation 1
  215. self.assertGreaterEqual(
  216. entry.generation,
  217. 1,
  218. f"Root commit {entry.commit_id.decode()} should have generation >= 1",
  219. )
  220. else:
  221. # Non-root commit should have generation > max parent generation
  222. max_parent_gen = 0
  223. for parent_id in entry.parents:
  224. if parent_id in generation_map:
  225. max_parent_gen = max(max_parent_gen, generation_map[parent_id])
  226. if max_parent_gen > 0: # Only check if we have parent generation info
  227. self.assertGreater(
  228. entry.generation,
  229. max_parent_gen,
  230. f"Commit {entry.commit_id.decode()} generation should be > max parent generation",
  231. )
  232. def test_commit_graph_with_different_options(self):
  233. """Test commit graph generation with different C Git options."""
  234. commits, work_dir = self.create_test_repo_with_history()
  235. # Test different generation strategies
  236. strategies = [
  237. ["--reachable"],
  238. ["--stdin-commits"],
  239. ["--append"],
  240. ]
  241. for i, strategy in enumerate(strategies):
  242. with self.subTest(strategy=strategy):
  243. # Clean up any existing commit graph
  244. graph_path = os.path.join(
  245. self.repo_path, "objects", "info", "commit-graph"
  246. )
  247. if os.path.exists(graph_path):
  248. os.remove(graph_path)
  249. if strategy == ["--stdin-commits"]:
  250. # For stdin-commits, we need to provide commit IDs
  251. process_input = b"\n".join(commits[:3]) + b"\n" # First 3 commits
  252. run_git_or_fail(
  253. ["commit-graph", "write", *strategy],
  254. cwd=work_dir,
  255. input=process_input,
  256. )
  257. elif strategy == ["--append"]:
  258. # First create a base graph
  259. run_git_or_fail(
  260. ["commit-graph", "write", "--reachable"], cwd=work_dir
  261. )
  262. # Then append (this should work even if nothing new to add)
  263. run_git_or_fail(["commit-graph", "write", *strategy], cwd=work_dir)
  264. else:
  265. run_git_or_fail(["commit-graph", "write", *strategy], cwd=work_dir)
  266. # Verify dulwich can read the generated graph
  267. graph_file = find_commit_graph_file(self.repo_path)
  268. if graph_file: # Some strategies might not generate a graph
  269. commit_graph = read_commit_graph(graph_file)
  270. self.assertIsNotNone(
  271. commit_graph,
  272. f"Should be able to read commit graph generated with {strategy}",
  273. )
  274. def test_commit_graph_incremental_update(self):
  275. """Test that dulwich can read incrementally updated commit graphs."""
  276. commits, work_dir = self.create_test_repo_with_history()
  277. # Create initial commit graph
  278. run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
  279. # Read initial graph
  280. graph_file = find_commit_graph_file(self.repo_path)
  281. initial_graph = read_commit_graph(graph_file)
  282. initial_count = len(initial_graph)
  283. # Add another commit
  284. with open(os.path.join(work_dir, "new_file.txt"), "w") as f:
  285. f.write("New content\n")
  286. run_git_or_fail(["add", "new_file.txt"], cwd=work_dir)
  287. run_git_or_fail(
  288. [
  289. "commit",
  290. "-m",
  291. "New commit",
  292. "--author",
  293. "Test Author <test@example.com>",
  294. "--date",
  295. "2024-01-08 12:00:00 +0000",
  296. ],
  297. cwd=work_dir,
  298. )
  299. # Update commit graph
  300. run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
  301. # Read updated graph
  302. updated_graph = read_commit_graph(graph_file)
  303. # Should have one more commit
  304. self.assertEqual(
  305. len(updated_graph),
  306. initial_count + 1,
  307. "Updated commit graph should have one more commit",
  308. )
  309. # Verify all original commits are still present
  310. initial_commit_ids = {entry.commit_id for entry in initial_graph}
  311. updated_commit_ids = {entry.commit_id for entry in updated_graph}
  312. self.assertTrue(
  313. initial_commit_ids.issubset(updated_commit_ids),
  314. "All original commits should still be in updated graph",
  315. )
  316. def test_commit_graph_with_tags_and_refs(self):
  317. """Test commit graph behavior with various refs and tags."""
  318. commits, work_dir = self.create_test_repo_with_history()
  319. # Create some tags
  320. run_git_or_fail(["tag", "v1.0", commits[2].decode()], cwd=work_dir)
  321. run_git_or_fail(
  322. ["tag", "-a", "v2.0", commits[4].decode(), "-m", "Version 2.0"],
  323. cwd=work_dir,
  324. )
  325. # Generate commit graph
  326. run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
  327. # Verify dulwich can read the graph
  328. graph_file = find_commit_graph_file(self.repo_path)
  329. commit_graph = read_commit_graph(graph_file)
  330. repo = Repo(self.repo_path)
  331. # Verify tagged commits are in the graph
  332. tagged_commits = [commits[2], commits[4]]
  333. graph_commit_ids = {entry.commit_id for entry in commit_graph}
  334. for tagged_commit in tagged_commits:
  335. self.assertIn(
  336. tagged_commit,
  337. graph_commit_ids,
  338. f"Tagged commit {tagged_commit.decode()} should be in commit graph",
  339. )
  340. # Test merge base with tagged commits
  341. merge_base = find_merge_base(repo, [commits[0], commits[4]])
  342. self.assertEqual(
  343. merge_base,
  344. [commits[0]],
  345. "Merge base calculation should work with tagged commits",
  346. )
  347. def test_empty_repository_commit_graph(self):
  348. """Test commit graph behavior with empty repository."""
  349. # Create empty repository
  350. run_git_or_fail(["init", "--bare"], cwd=self.test_dir)
  351. empty_repo_path = os.path.join(self.test_dir, ".git")
  352. # Try to write commit graph (should succeed but create empty graph)
  353. try:
  354. run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=self.test_dir)
  355. except Exception:
  356. # Some Git versions might fail on empty repos, which is fine
  357. pass
  358. # Check if commit graph file exists
  359. graph_file = find_commit_graph_file(empty_repo_path)
  360. if graph_file:
  361. # If it exists, dulwich should be able to read it
  362. commit_graph = read_commit_graph(graph_file)
  363. self.assertIsNotNone(
  364. commit_graph, "Should be able to read empty commit graph"
  365. )
  366. self.assertEqual(
  367. len(commit_graph), 0, "Empty repository should have empty commit graph"
  368. )