test_commit_graph.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
  1. # test_commit_graph.py -- Compatibility tests for commit graph functionality
  2. # Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. """Compatibility tests for Git commit graph functionality.
  10. These tests verify that dulwich's commit graph implementation behaves
  11. identically to C Git's implementation.
  12. """
  13. import os
  14. import tempfile
  15. from dulwich.commit_graph import find_commit_graph_file, read_commit_graph
  16. from dulwich.graph import can_fast_forward, find_merge_base
  17. from dulwich.repo import Repo
  18. from .utils import CompatTestCase, run_git_or_fail
  19. class CommitGraphCompatTests(CompatTestCase):
  20. """Compatibility tests for commit graph functionality."""
  21. # Commit graph was introduced in Git 2.18.0
  22. min_git_version = (2, 18, 0)
  23. def setUp(self):
  24. super().setUp()
  25. self.test_dir = tempfile.mkdtemp()
  26. self.repo_path = os.path.join(self.test_dir, "test-repo")
  27. # Set up git identity to avoid committer identity errors
  28. self.overrideEnv("GIT_COMMITTER_NAME", "Test Author")
  29. self.overrideEnv("GIT_COMMITTER_EMAIL", "test@example.com")
  30. self.overrideEnv("GIT_AUTHOR_NAME", "Test Author")
  31. self.overrideEnv("GIT_AUTHOR_EMAIL", "test@example.com")
  32. def tearDown(self):
  33. from .utils import rmtree_ro
  34. rmtree_ro(self.test_dir)
  35. def create_test_repo_with_history(self):
  36. """Create a test repository with some commit history."""
  37. # Initialize repository
  38. run_git_or_fail(["init"], cwd=self.test_dir)
  39. os.rename(os.path.join(self.test_dir, ".git"), self.repo_path)
  40. work_dir = os.path.join(self.test_dir, "work")
  41. os.makedirs(work_dir)
  42. # Create .git file pointing to our repo
  43. with open(os.path.join(work_dir, ".git"), "w") as f:
  44. f.write(f"gitdir: {self.repo_path}\n")
  45. # Create some commits
  46. commits = []
  47. for i in range(5):
  48. filename = f"file{i}.txt"
  49. with open(os.path.join(work_dir, filename), "w") as f:
  50. f.write(f"Content {i}\n")
  51. run_git_or_fail(["add", filename], cwd=work_dir)
  52. run_git_or_fail(
  53. [
  54. "commit",
  55. "-m",
  56. f"Commit {i}",
  57. "--author",
  58. "Test Author <test@example.com>",
  59. "--date",
  60. f"2024-01-0{i + 1} 12:00:00 +0000",
  61. ],
  62. cwd=work_dir,
  63. )
  64. # Get the commit SHA
  65. result = run_git_or_fail(["rev-parse", "HEAD"], cwd=work_dir)
  66. commits.append(result.strip())
  67. # Create a branch and merge
  68. run_git_or_fail(["checkout", "-b", "feature"], cwd=work_dir)
  69. with open(os.path.join(work_dir, "feature.txt"), "w") as f:
  70. f.write("Feature content\n")
  71. run_git_or_fail(["add", "feature.txt"], cwd=work_dir)
  72. run_git_or_fail(
  73. [
  74. "commit",
  75. "-m",
  76. "Feature commit",
  77. "--author",
  78. "Test Author <test@example.com>",
  79. "--date",
  80. "2024-01-06 12:00:00 +0000",
  81. ],
  82. cwd=work_dir,
  83. )
  84. result = run_git_or_fail(["rev-parse", "HEAD"], cwd=work_dir)
  85. feature_commit = result.strip()
  86. # Merge back to master
  87. run_git_or_fail(["checkout", "master"], cwd=work_dir)
  88. run_git_or_fail(
  89. ["merge", "feature", "--no-ff", "-m", "Merge feature"], cwd=work_dir
  90. )
  91. result = run_git_or_fail(["rev-parse", "HEAD"], cwd=work_dir)
  92. merge_commit = result.strip()
  93. commits.extend([feature_commit, merge_commit])
  94. return commits, work_dir
  95. def test_commit_graph_generation_and_reading(self):
  96. """Test that dulwich can read commit graphs generated by C Git."""
  97. commits, work_dir = self.create_test_repo_with_history()
  98. # Generate commit graph with C Git
  99. run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
  100. # Verify commit graph file exists
  101. graph_file = find_commit_graph_file(self.repo_path)
  102. self.assertIsNotNone(graph_file, "Commit graph file should exist")
  103. # Read with dulwich
  104. commit_graph = read_commit_graph(graph_file)
  105. self.assertIsNotNone(commit_graph, "Should be able to read commit graph")
  106. # Verify we have the expected number of commits
  107. self.assertGreater(len(commit_graph), 0, "Commit graph should contain commits")
  108. # Open the repository with dulwich
  109. repo = Repo(self.repo_path)
  110. # Verify that all commits in the graph are accessible
  111. for entry in commit_graph:
  112. # entry.commit_id is hex ObjectID
  113. commit_obj = repo.object_store[entry.commit_id]
  114. self.assertIsNotNone(
  115. commit_obj, f"Commit {entry.commit_id.decode()} should be accessible"
  116. )
  117. # Verify tree ID matches
  118. self.assertEqual(
  119. entry.tree_id,
  120. commit_obj.tree,
  121. f"Tree ID mismatch for commit {entry.commit_id.decode()}",
  122. )
  123. # Verify parent information
  124. self.assertEqual(
  125. entry.parents,
  126. list(commit_obj.parents),
  127. f"Parent mismatch for commit {entry.commit_id.decode()}",
  128. )
  129. def test_merge_base_with_commit_graph(self):
  130. """Test that merge-base calculations work the same with and without commit graph."""
  131. commits, work_dir = self.create_test_repo_with_history()
  132. repo = Repo(self.repo_path)
  133. # Get some commit IDs for testing
  134. main_head = repo.refs[b"refs/heads/master"]
  135. feature_head = repo.refs[b"refs/heads/feature"]
  136. # Calculate merge base without commit graph
  137. merge_base_no_graph = find_merge_base(repo, [main_head, feature_head])
  138. # Generate commit graph
  139. run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
  140. # Force reload of repository to pick up commit graph
  141. repo = Repo(self.repo_path)
  142. # Calculate merge base with commit graph
  143. merge_base_with_graph = find_merge_base(repo, [main_head, feature_head])
  144. # Results should be identical
  145. self.assertEqual(
  146. merge_base_no_graph,
  147. merge_base_with_graph,
  148. "Merge base should be same with and without commit graph",
  149. )
  150. # Compare with C Git's result
  151. git_result = run_git_or_fail(
  152. ["merge-base", main_head.decode(), feature_head.decode()], cwd=work_dir
  153. )
  154. git_merge_base = [git_result.strip()]
  155. self.assertEqual(
  156. merge_base_with_graph,
  157. git_merge_base,
  158. "Dulwich merge base should match C Git result",
  159. )
  160. def test_fast_forward_with_commit_graph(self):
  161. """Test that fast-forward detection works the same with and without commit graph."""
  162. commits, work_dir = self.create_test_repo_with_history()
  163. repo = Repo(self.repo_path)
  164. # Test with a simple fast-forward case (older commit to newer commit)
  165. commit1 = commits[1] # Second commit
  166. commit2 = commits[3] # Fourth commit
  167. # Check without commit graph
  168. can_ff_no_graph = can_fast_forward(repo, commit1, commit2)
  169. # Generate commit graph
  170. run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
  171. # Force reload
  172. repo = Repo(self.repo_path)
  173. # Check with commit graph
  174. can_ff_with_graph = can_fast_forward(repo, commit1, commit2)
  175. # Results should be identical
  176. self.assertEqual(
  177. can_ff_no_graph,
  178. can_ff_with_graph,
  179. "Fast-forward detection should be same with and without commit graph",
  180. )
  181. # Compare with C Git (check if commit1 is ancestor of commit2)
  182. from .utils import run_git
  183. returncode, stdout, stderr = run_git(
  184. ["merge-base", "--is-ancestor", commit1.decode(), commit2.decode()],
  185. cwd=work_dir,
  186. capture_stdout=True,
  187. capture_stderr=True,
  188. )
  189. git_can_ff = returncode == 0
  190. self.assertEqual(
  191. can_ff_with_graph,
  192. git_can_ff,
  193. "Dulwich fast-forward detection should match C Git",
  194. )
  195. def test_generation_numbers_consistency(self):
  196. """Test that generation numbers are consistent with Git's topology."""
  197. commits, work_dir = self.create_test_repo_with_history()
  198. # Generate commit graph
  199. run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
  200. # Read with dulwich
  201. graph_file = find_commit_graph_file(self.repo_path)
  202. commit_graph = read_commit_graph(graph_file)
  203. repo = Repo(self.repo_path)
  204. # Build a map of commit to generation number
  205. generation_map = {}
  206. for entry in commit_graph:
  207. generation_map[entry.commit_id] = entry.generation
  208. # Verify generation number properties:
  209. # 1. Root commits have generation 1
  210. # 2. For any commit, generation > max(parent_generations)
  211. for entry in commit_graph:
  212. repo.object_store[entry.commit_id]
  213. if not entry.parents:
  214. # Root commit should have generation 1
  215. self.assertGreaterEqual(
  216. entry.generation,
  217. 1,
  218. f"Root commit {entry.commit_id.decode()} should have generation >= 1",
  219. )
  220. else:
  221. # Non-root commit should have generation > max parent generation
  222. max_parent_gen = 0
  223. for parent_id in entry.parents:
  224. if parent_id in generation_map:
  225. max_parent_gen = max(max_parent_gen, generation_map[parent_id])
  226. if max_parent_gen > 0: # Only check if we have parent generation info
  227. self.assertGreater(
  228. entry.generation,
  229. max_parent_gen,
  230. f"Commit {entry.commit_id.decode()} generation should be > max parent generation",
  231. )
  232. def test_commit_graph_with_different_options(self):
  233. """Test commit graph generation with different C Git options."""
  234. commits, work_dir = self.create_test_repo_with_history()
  235. # Test different generation strategies
  236. strategies = [
  237. ["--reachable"],
  238. ["--stdin-commits"],
  239. ["--append"],
  240. ]
  241. for i, strategy in enumerate(strategies):
  242. with self.subTest(strategy=strategy):
  243. # Clean up any existing commit graph
  244. graph_path = os.path.join(
  245. self.repo_path, "objects", "info", "commit-graph"
  246. )
  247. if os.path.exists(graph_path):
  248. try:
  249. os.remove(graph_path)
  250. except PermissionError:
  251. # On Windows, handle read-only files
  252. from .utils import remove_ro
  253. remove_ro(graph_path)
  254. if strategy == ["--stdin-commits"]:
  255. # For stdin-commits, we need to provide commit IDs
  256. process_input = b"\n".join(commits[:3]) + b"\n" # First 3 commits
  257. run_git_or_fail(
  258. ["commit-graph", "write", *strategy],
  259. cwd=work_dir,
  260. input=process_input,
  261. )
  262. elif strategy == ["--append"]:
  263. # First create a base graph
  264. run_git_or_fail(
  265. ["commit-graph", "write", "--reachable"], cwd=work_dir
  266. )
  267. # Then append (this should work even if nothing new to add)
  268. run_git_or_fail(["commit-graph", "write", *strategy], cwd=work_dir)
  269. else:
  270. run_git_or_fail(["commit-graph", "write", *strategy], cwd=work_dir)
  271. # Verify dulwich can read the generated graph
  272. graph_file = find_commit_graph_file(self.repo_path)
  273. if graph_file: # Some strategies might not generate a graph
  274. commit_graph = read_commit_graph(graph_file)
  275. self.assertIsNotNone(
  276. commit_graph,
  277. f"Should be able to read commit graph generated with {strategy}",
  278. )
  279. def test_commit_graph_incremental_update(self):
  280. """Test that dulwich can read incrementally updated commit graphs."""
  281. commits, work_dir = self.create_test_repo_with_history()
  282. # Create initial commit graph
  283. run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
  284. # Read initial graph
  285. graph_file = find_commit_graph_file(self.repo_path)
  286. initial_graph = read_commit_graph(graph_file)
  287. initial_count = len(initial_graph)
  288. # Add another commit
  289. with open(os.path.join(work_dir, "new_file.txt"), "w") as f:
  290. f.write("New content\n")
  291. run_git_or_fail(["add", "new_file.txt"], cwd=work_dir)
  292. run_git_or_fail(
  293. [
  294. "commit",
  295. "-m",
  296. "New commit",
  297. "--author",
  298. "Test Author <test@example.com>",
  299. "--date",
  300. "2024-01-08 12:00:00 +0000",
  301. ],
  302. cwd=work_dir,
  303. )
  304. # Update commit graph
  305. run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
  306. # Read updated graph
  307. updated_graph = read_commit_graph(graph_file)
  308. # Should have one more commit
  309. self.assertEqual(
  310. len(updated_graph),
  311. initial_count + 1,
  312. "Updated commit graph should have one more commit",
  313. )
  314. # Verify all original commits are still present
  315. initial_commit_ids = {entry.commit_id for entry in initial_graph}
  316. updated_commit_ids = {entry.commit_id for entry in updated_graph}
  317. self.assertTrue(
  318. initial_commit_ids.issubset(updated_commit_ids),
  319. "All original commits should still be in updated graph",
  320. )
  321. def test_commit_graph_with_tags_and_refs(self):
  322. """Test commit graph behavior with various refs and tags."""
  323. commits, work_dir = self.create_test_repo_with_history()
  324. # Create some tags
  325. run_git_or_fail(["tag", "v1.0", commits[2].decode()], cwd=work_dir)
  326. run_git_or_fail(
  327. ["tag", "-a", "v2.0", commits[4].decode(), "-m", "Version 2.0"],
  328. cwd=work_dir,
  329. )
  330. # Generate commit graph
  331. run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
  332. # Verify dulwich can read the graph
  333. graph_file = find_commit_graph_file(self.repo_path)
  334. commit_graph = read_commit_graph(graph_file)
  335. repo = Repo(self.repo_path)
  336. # Verify tagged commits are in the graph
  337. tagged_commits = [commits[2], commits[4]]
  338. graph_commit_ids = {entry.commit_id for entry in commit_graph}
  339. for tagged_commit in tagged_commits:
  340. self.assertIn(
  341. tagged_commit,
  342. graph_commit_ids,
  343. f"Tagged commit {tagged_commit.decode()} should be in commit graph",
  344. )
  345. # Test merge base with tagged commits
  346. merge_base = find_merge_base(repo, [commits[0], commits[4]])
  347. self.assertEqual(
  348. merge_base,
  349. [commits[0]],
  350. "Merge base calculation should work with tagged commits",
  351. )
  352. def test_empty_repository_commit_graph(self):
  353. """Test commit graph behavior with empty repository."""
  354. # Create empty repository
  355. run_git_or_fail(["init", "--bare"], cwd=self.test_dir)
  356. empty_repo_path = os.path.join(self.test_dir, ".git")
  357. # Try to write commit graph (should succeed but create empty graph)
  358. try:
  359. run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=self.test_dir)
  360. except AssertionError:
  361. # Some Git versions might fail on empty repos, which is fine
  362. pass
  363. # Check if commit graph file exists
  364. graph_file = find_commit_graph_file(empty_repo_path)
  365. if graph_file:
  366. # If it exists, dulwich should be able to read it
  367. commit_graph = read_commit_graph(graph_file)
  368. self.assertIsNotNone(
  369. commit_graph, "Should be able to read empty commit graph"
  370. )
  371. self.assertEqual(
  372. len(commit_graph), 0, "Empty repository should have empty commit graph"
  373. )