test_commit_graph.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863
  1. # test_commit_graph.py -- Tests for commit graph functionality
  2. # Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. """Tests for Git commit graph functionality."""
  10. import io
  11. import os
  12. import struct
  13. import tempfile
  14. import unittest
  15. from dulwich.commit_graph import (
  16. CHUNK_COMMIT_DATA,
  17. CHUNK_OID_FANOUT,
  18. CHUNK_OID_LOOKUP,
  19. COMMIT_GRAPH_SIGNATURE,
  20. COMMIT_GRAPH_VERSION,
  21. HASH_VERSION_SHA1,
  22. CommitGraph,
  23. CommitGraphChunk,
  24. CommitGraphEntry,
  25. find_commit_graph_file,
  26. generate_commit_graph,
  27. get_reachable_commits,
  28. read_commit_graph,
  29. )
  30. from dulwich.object_format import SHA1
  31. class CommitGraphEntryTests(unittest.TestCase):
  32. """Tests for CommitGraphEntry."""
  33. def test_init(self) -> None:
  34. commit_id = b"a" * 40
  35. tree_id = b"b" * 40
  36. parents = [b"c" * 40, b"d" * 40]
  37. generation = 42
  38. commit_time = 1234567890
  39. entry = CommitGraphEntry(commit_id, tree_id, parents, generation, commit_time)
  40. self.assertEqual(entry.commit_id, commit_id)
  41. self.assertEqual(entry.tree_id, tree_id)
  42. self.assertEqual(entry.parents, parents)
  43. self.assertEqual(entry.generation, generation)
  44. self.assertEqual(entry.commit_time, commit_time)
  45. def test_repr(self) -> None:
  46. entry = CommitGraphEntry(b"a" * 40, b"b" * 40, [], 1, 1000)
  47. repr_str = repr(entry)
  48. self.assertIn("CommitGraphEntry", repr_str)
  49. self.assertIn("generation=1", repr_str)
  50. class CommitGraphChunkTests(unittest.TestCase):
  51. """Tests for CommitGraphChunk."""
  52. def test_init(self) -> None:
  53. chunk = CommitGraphChunk(b"TEST", b"test data")
  54. self.assertEqual(chunk.chunk_id, b"TEST")
  55. self.assertEqual(chunk.data, b"test data")
  56. def test_repr(self) -> None:
  57. chunk = CommitGraphChunk(b"TEST", b"x" * 100)
  58. repr_str = repr(chunk)
  59. self.assertIn("CommitGraphChunk", repr_str)
  60. self.assertIn("size=100", repr_str)
  61. class CommitGraphTests(unittest.TestCase):
  62. """Tests for CommitGraph."""
  63. def test_init(self) -> None:
  64. graph = CommitGraph(object_format=SHA1)
  65. self.assertEqual(graph.hash_version, HASH_VERSION_SHA1)
  66. self.assertEqual(len(graph.entries), 0)
  67. self.assertEqual(len(graph.chunks), 0)
  68. def test_len(self) -> None:
  69. graph = CommitGraph(object_format=SHA1)
  70. self.assertEqual(len(graph), 0)
  71. # Add a dummy entry
  72. entry = CommitGraphEntry(b"a" * 40, b"b" * 40, [], 1, 1000)
  73. graph.entries.append(entry)
  74. self.assertEqual(len(graph), 1)
  75. def test_iter(self) -> None:
  76. graph = CommitGraph(object_format=SHA1)
  77. entry1 = CommitGraphEntry(b"a" * 40, b"b" * 40, [], 1, 1000)
  78. entry2 = CommitGraphEntry(b"c" * 40, b"d" * 40, [], 2, 2000)
  79. graph.entries.extend([entry1, entry2])
  80. entries = list(graph)
  81. self.assertEqual(len(entries), 2)
  82. self.assertEqual(entries[0], entry1)
  83. self.assertEqual(entries[1], entry2)
  84. def test_get_entry_by_oid_missing(self) -> None:
  85. graph = CommitGraph(object_format=SHA1)
  86. result = graph.get_entry_by_oid(b"f" * 40)
  87. self.assertIsNone(result)
  88. def test_get_generation_number_missing(self) -> None:
  89. graph = CommitGraph(object_format=SHA1)
  90. result = graph.get_generation_number(b"f" * 40)
  91. self.assertIsNone(result)
  92. def test_get_parents_missing(self) -> None:
  93. graph = CommitGraph(object_format=SHA1)
  94. result = graph.get_parents(b"f" * 40)
  95. self.assertIsNone(result)
  96. def test_from_invalid_signature(self) -> None:
  97. data = b"XXXX" + b"\\x00" * 100
  98. f = io.BytesIO(data)
  99. try:
  100. with self.assertRaises(ValueError) as cm:
  101. CommitGraph.from_file(f)
  102. self.assertIn("Invalid commit graph signature", str(cm.exception))
  103. finally:
  104. f.close()
  105. def test_from_invalid_version(self) -> None:
  106. data = COMMIT_GRAPH_SIGNATURE + struct.pack(">B", 99) + b"\\x00" * 100
  107. f = io.BytesIO(data)
  108. try:
  109. with self.assertRaises(ValueError) as cm:
  110. CommitGraph.from_file(f)
  111. self.assertIn("Unsupported commit graph version", str(cm.exception))
  112. finally:
  113. f.close()
  114. def test_from_invalid_hash_version(self) -> None:
  115. data = (
  116. COMMIT_GRAPH_SIGNATURE
  117. + struct.pack(">B", COMMIT_GRAPH_VERSION)
  118. + struct.pack(">B", 99) # Invalid hash version
  119. + b"\\x00" * 100
  120. )
  121. f = io.BytesIO(data)
  122. try:
  123. with self.assertRaises(ValueError) as cm:
  124. CommitGraph.from_file(f)
  125. self.assertIn("Unsupported hash version", str(cm.exception))
  126. finally:
  127. f.close()
  128. def create_minimal_commit_graph_data(self) -> bytes:
  129. """Create minimal valid commit graph data for testing."""
  130. # Create the data in order and calculate offsets properly
  131. # Header: signature + version + hash_version + num_chunks + base_graph_count
  132. header = (
  133. COMMIT_GRAPH_SIGNATURE
  134. + struct.pack(">B", COMMIT_GRAPH_VERSION)
  135. + struct.pack(">B", HASH_VERSION_SHA1)
  136. + struct.pack(">B", 3) # 3 chunks
  137. + struct.pack(">B", 0)
  138. ) # 0 base graphs
  139. # Table of contents: 4 entries (3 chunks + terminator) = 4 * 12 = 48 bytes
  140. toc_size = 4 * 12
  141. # Calculate chunk offsets from start of file
  142. header_size = 8
  143. chunk1_offset = header_size + toc_size # OID Fanout
  144. chunk2_offset = chunk1_offset + 256 * 4 # OID Lookup (after fanout)
  145. chunk3_offset = chunk2_offset + 20 # Commit Data (after 1 commit * 20 bytes)
  146. terminator_offset = (
  147. chunk3_offset + 36
  148. ) # After commit data (1 commit * 36 bytes)
  149. # Build table of contents
  150. toc = (
  151. CHUNK_OID_FANOUT
  152. + struct.pack(">Q", chunk1_offset)
  153. + CHUNK_OID_LOOKUP
  154. + struct.pack(">Q", chunk2_offset)
  155. + CHUNK_COMMIT_DATA
  156. + struct.pack(">Q", chunk3_offset)
  157. + b"\x00\x00\x00\x00"
  158. + struct.pack(">Q", terminator_offset)
  159. )
  160. # OID Fanout chunk (256 * 4 bytes)
  161. fanout = b""
  162. for i in range(256):
  163. if i < 0xAA: # Our test commit starts with 0xaa
  164. fanout += struct.pack(">L", 0)
  165. else:
  166. fanout += struct.pack(">L", 1) # 1 commit total
  167. # OID Lookup chunk (1 commit = 20 bytes)
  168. commit_oid = b"\xaa" + b"\x00" * 19
  169. oid_lookup = commit_oid
  170. # Commit Data chunk (1 commit = 20 + 16 = 36 bytes)
  171. tree_oid = b"\xbb" + b"\x00" * 19
  172. parent1_pos = 0x70000000 # GRAPH_PARENT_MISSING
  173. parent2_pos = 0x70000000 # GRAPH_PARENT_MISSING
  174. generation = 1
  175. commit_time = 1234567890
  176. gen_and_time = (generation << 2) | (commit_time >> 32)
  177. commit_data = (
  178. tree_oid
  179. + struct.pack(">LL", parent1_pos, parent2_pos)
  180. + struct.pack(">LL", gen_and_time, commit_time & 0xFFFFFFFF)
  181. )
  182. return header + toc + fanout + oid_lookup + commit_data
  183. def test_from_minimal_valid_file(self) -> None:
  184. """Test parsing a minimal but valid commit graph file."""
  185. data = self.create_minimal_commit_graph_data()
  186. f = io.BytesIO(data)
  187. graph = CommitGraph.from_file(f)
  188. self.assertEqual(graph.hash_version, HASH_VERSION_SHA1)
  189. self.assertEqual(len(graph), 1)
  190. # Check the parsed entry
  191. entry = graph.entries[0]
  192. self.assertEqual(entry.commit_id, b"aa" + b"00" * 19)
  193. self.assertEqual(entry.tree_id, b"bb" + b"00" * 19)
  194. self.assertEqual(entry.parents, []) # No parents
  195. self.assertEqual(entry.generation, 1)
  196. self.assertEqual(entry.commit_time, 1234567890)
  197. # Test lookup methods
  198. commit_oid = b"aa" + b"00" * 19
  199. self.assertEqual(graph.get_generation_number(commit_oid), 1)
  200. self.assertEqual(graph.get_parents(commit_oid), [])
  201. self.assertIsNotNone(graph.get_entry_by_oid(commit_oid))
  202. def test_missing_required_chunks(self) -> None:
  203. """Test error handling for missing required chunks."""
  204. # Create data with header but no chunks
  205. header = (
  206. COMMIT_GRAPH_SIGNATURE
  207. + struct.pack(">B", COMMIT_GRAPH_VERSION)
  208. + struct.pack(">B", HASH_VERSION_SHA1)
  209. + struct.pack(">B", 0) # 0 chunks
  210. + struct.pack(">B", 0)
  211. )
  212. # TOC with just terminator
  213. toc = b"\\x00\\x00\\x00\\x00" + struct.pack(">Q", 12)
  214. data = header + toc
  215. f = io.BytesIO(data)
  216. with self.assertRaises(ValueError) as cm:
  217. CommitGraph.from_file(f)
  218. self.assertIn("Missing required OID lookup chunk", str(cm.exception))
  219. def test_write_empty_graph_raises(self) -> None:
  220. """Test that writing empty graph raises ValueError."""
  221. graph = CommitGraph(object_format=SHA1)
  222. f = io.BytesIO()
  223. with self.assertRaises(ValueError):
  224. graph.write_to_file(f)
  225. def test_write_and_read_round_trip(self) -> None:
  226. """Test writing and reading a commit graph."""
  227. # Create a simple commit graph
  228. graph = CommitGraph(object_format=SHA1)
  229. entry = CommitGraphEntry(
  230. commit_id=b"aa" + b"00" * 19,
  231. tree_id=b"bb" + b"00" * 19,
  232. parents=[],
  233. generation=1,
  234. commit_time=1234567890,
  235. )
  236. graph.entries.append(entry)
  237. graph._oid_to_index = {bytes.fromhex(entry.commit_id.decode()): 0}
  238. # Write to bytes
  239. f = io.BytesIO()
  240. graph.write_to_file(f)
  241. # Read back
  242. f.seek(0)
  243. read_graph = CommitGraph.from_file(f)
  244. # Verify
  245. self.assertEqual(len(read_graph), 1)
  246. read_entry = read_graph.entries[0]
  247. self.assertEqual(read_entry.commit_id, entry.commit_id)
  248. self.assertEqual(read_entry.tree_id, entry.tree_id)
  249. self.assertEqual(read_entry.parents, entry.parents)
  250. self.assertEqual(read_entry.generation, entry.generation)
  251. self.assertEqual(read_entry.commit_time, entry.commit_time)
  252. class CommitGraphFileOperationsTests(unittest.TestCase):
  253. """Tests for commit graph file operations."""
  254. def setUp(self) -> None:
  255. self.tempdir = tempfile.mkdtemp()
  256. def tearDown(self) -> None:
  257. import shutil
  258. shutil.rmtree(self.tempdir, ignore_errors=True)
  259. def test_read_commit_graph_missing_file(self) -> None:
  260. """Test reading from non-existent file."""
  261. missing_path = os.path.join(self.tempdir, "missing.graph")
  262. result = read_commit_graph(missing_path)
  263. self.assertIsNone(result)
  264. def test_read_commit_graph_invalid_file(self) -> None:
  265. """Test reading from invalid file."""
  266. invalid_path = os.path.join(self.tempdir, "invalid.graph")
  267. with open(invalid_path, "wb") as f:
  268. f.write(b"invalid data")
  269. with self.assertRaises(ValueError):
  270. read_commit_graph(invalid_path)
  271. def test_find_commit_graph_file_missing(self) -> None:
  272. """Test finding commit graph file when it doesn't exist."""
  273. result = find_commit_graph_file(self.tempdir)
  274. self.assertIsNone(result)
  275. def test_find_commit_graph_file_standard_location(self) -> None:
  276. """Test finding commit graph file in standard location."""
  277. # Create .git/objects/info/commit-graph
  278. objects_dir = os.path.join(self.tempdir, "objects")
  279. info_dir = os.path.join(objects_dir, "info")
  280. os.makedirs(info_dir)
  281. graph_path = os.path.join(info_dir, "commit-graph")
  282. with open(graph_path, "wb") as f:
  283. f.write(b"dummy")
  284. result = find_commit_graph_file(self.tempdir)
  285. self.assertEqual(result, graph_path.encode())
  286. def test_find_commit_graph_file_chain_location(self) -> None:
  287. """Test finding commit graph file in chain location."""
  288. # Create .git/objects/info/commit-graphs/graph-{hash}.graph
  289. objects_dir = os.path.join(self.tempdir, "objects")
  290. info_dir = os.path.join(objects_dir, "info")
  291. graphs_dir = os.path.join(info_dir, "commit-graphs")
  292. os.makedirs(graphs_dir)
  293. graph_path = os.path.join(graphs_dir, "graph-abc123.graph")
  294. with open(graph_path, "wb") as f:
  295. f.write(b"dummy")
  296. result = find_commit_graph_file(self.tempdir)
  297. self.assertEqual(result, graph_path.encode())
  298. def test_find_commit_graph_file_prefers_standard(self) -> None:
  299. """Test that standard location is preferred over chain location."""
  300. # Create both locations
  301. objects_dir = os.path.join(self.tempdir, "objects")
  302. info_dir = os.path.join(objects_dir, "info")
  303. graphs_dir = os.path.join(info_dir, "commit-graphs")
  304. os.makedirs(info_dir)
  305. os.makedirs(graphs_dir)
  306. # Standard location
  307. standard_path = os.path.join(info_dir, "commit-graph")
  308. with open(standard_path, "wb") as f:
  309. f.write(b"standard")
  310. # Chain location
  311. chain_path = os.path.join(graphs_dir, "graph-abc123.graph")
  312. with open(chain_path, "wb") as f:
  313. f.write(b"chain")
  314. result = find_commit_graph_file(self.tempdir)
  315. self.assertEqual(result, standard_path.encode())
  316. class CommitGraphGenerationTests(unittest.TestCase):
  317. """Tests for commit graph generation functionality."""
  318. def setUp(self) -> None:
  319. self.tempdir = tempfile.mkdtemp()
  320. def tearDown(self) -> None:
  321. import shutil
  322. shutil.rmtree(self.tempdir, ignore_errors=True)
  323. def test_generate_commit_graph_empty(self) -> None:
  324. """Test generating commit graph with no commits."""
  325. from dulwich.object_store import MemoryObjectStore
  326. object_store = MemoryObjectStore()
  327. graph = generate_commit_graph(object_store, [])
  328. self.assertEqual(len(graph), 0)
  329. def test_generate_commit_graph_single_commit(self) -> None:
  330. """Test generating commit graph with single commit."""
  331. from dulwich.object_store import MemoryObjectStore
  332. from dulwich.objects import Commit, Tree
  333. object_store = MemoryObjectStore()
  334. # Create a tree and commit
  335. tree = Tree()
  336. object_store.add_object(tree)
  337. commit = Commit()
  338. commit.tree = tree.id
  339. commit.author = b"Test Author <test@example.com>"
  340. commit.committer = b"Test Author <test@example.com>"
  341. commit.commit_time = commit.author_time = 1234567890
  342. commit.commit_timezone = commit.author_timezone = 0
  343. commit.message = b"Test commit"
  344. object_store.add_object(commit)
  345. # Generate graph
  346. graph = generate_commit_graph(object_store, [commit.id])
  347. self.assertEqual(len(graph), 1)
  348. entry = graph.entries[0]
  349. self.assertEqual(entry.commit_id, commit.id)
  350. self.assertEqual(entry.tree_id, commit.tree)
  351. self.assertEqual(entry.parents, [])
  352. self.assertEqual(entry.generation, 1)
  353. self.assertEqual(entry.commit_time, 1234567890)
  354. def test_generate_commit_graph_oid_index_uses_binary_keys(self) -> None:
  355. """Test that generated commit graph _oid_to_index uses binary RawObjectID keys."""
  356. from dulwich.object_store import MemoryObjectStore
  357. from dulwich.objects import Commit, Tree, hex_to_sha
  358. object_store = MemoryObjectStore()
  359. # Create a tree and commit
  360. tree = Tree()
  361. object_store.add_object(tree)
  362. commit = Commit()
  363. commit.tree = tree.id
  364. commit.author = b"Test Author <test@example.com>"
  365. commit.committer = b"Test Author <test@example.com>"
  366. commit.commit_time = commit.author_time = 1234567890
  367. commit.commit_timezone = commit.author_timezone = 0
  368. commit.message = b"Test commit"
  369. object_store.add_object(commit)
  370. # Generate graph
  371. graph = generate_commit_graph(object_store, [commit.id])
  372. # Verify _oid_to_index uses binary keys (RawObjectID)
  373. # commit.id is hex (ObjectID), so we need to convert to binary for lookup
  374. binary_commit_id = hex_to_sha(commit.id)
  375. self.assertIn(binary_commit_id, graph._oid_to_index)
  376. self.assertEqual(graph._oid_to_index[binary_commit_id], 0)
  377. # Verify lookup with hex ObjectID works via get_entry_by_oid
  378. entry = graph.get_entry_by_oid(commit.id)
  379. self.assertIsNotNone(entry)
  380. assert entry is not None
  381. self.assertEqual(entry.commit_id, commit.id)
  382. # Verify lookup with binary RawObjectID also works
  383. entry_binary = graph.get_entry_by_oid(binary_commit_id)
  384. self.assertIsNotNone(entry_binary)
  385. assert entry_binary is not None
  386. self.assertEqual(entry_binary.commit_id, commit.id)
  387. def test_get_reachable_commits(self) -> None:
  388. """Test getting reachable commits."""
  389. from dulwich.object_store import MemoryObjectStore
  390. from dulwich.objects import Commit, Tree
  391. object_store = MemoryObjectStore()
  392. # Create tree
  393. tree = Tree()
  394. object_store.add_object(tree)
  395. # Create commit chain: commit1 -> commit2
  396. commit1 = Commit()
  397. commit1.tree = tree.id
  398. commit1.author = commit1.committer = b"Test <test@example.com>"
  399. commit1.commit_time = commit1.author_time = 1234567890
  400. commit1.commit_timezone = commit1.author_timezone = 0
  401. commit1.message = b"First commit"
  402. object_store.add_object(commit1)
  403. commit2 = Commit()
  404. commit2.tree = tree.id
  405. commit2.parents = [commit1.id]
  406. commit2.author = commit2.committer = b"Test <test@example.com>"
  407. commit2.commit_time = commit2.author_time = 1234567891
  408. commit2.commit_timezone = commit2.author_timezone = 0
  409. commit2.message = b"Second commit"
  410. object_store.add_object(commit2)
  411. # Get reachable commits from commit2
  412. reachable = get_reachable_commits(object_store, [commit2.id])
  413. # Should include both commits
  414. self.assertEqual(len(reachable), 2)
  415. self.assertIn(commit1.id, reachable)
  416. self.assertIn(commit2.id, reachable)
  417. def test_write_commit_graph_to_file(self) -> None:
  418. """Test writing commit graph to file."""
  419. from dulwich.object_store import DiskObjectStore
  420. from dulwich.objects import Commit, Tree
  421. # Create a disk object store
  422. object_store_path = os.path.join(self.tempdir, "objects")
  423. os.makedirs(object_store_path, exist_ok=True)
  424. object_store = DiskObjectStore(object_store_path)
  425. self.addCleanup(object_store.close)
  426. # Create a tree and commit
  427. tree = Tree()
  428. object_store.add_object(tree)
  429. commit = Commit()
  430. commit.tree = tree.id
  431. commit.author = b"Test Author <test@example.com>"
  432. commit.committer = b"Test Author <test@example.com>"
  433. commit.commit_time = commit.author_time = 1234567890
  434. commit.commit_timezone = commit.author_timezone = 0
  435. commit.message = b"Test commit"
  436. object_store.add_object(commit)
  437. # Write commit graph using ObjectStore method
  438. object_store.write_commit_graph([commit.id], reachable=False)
  439. # Verify file was created
  440. graph_path = os.path.join(object_store_path, "info", "commit-graph")
  441. self.assertTrue(os.path.exists(graph_path))
  442. # Read back and verify
  443. graph = read_commit_graph(graph_path)
  444. self.assertIsNotNone(graph)
  445. assert graph is not None # For mypy
  446. self.assertEqual(len(graph), 1)
  447. entry = graph.entries[0]
  448. self.assertEqual(entry.commit_id, commit.id)
  449. self.assertEqual(entry.tree_id, commit.tree)
  450. def test_object_store_commit_graph_methods(self) -> None:
  451. """Test ObjectStore commit graph methods."""
  452. from dulwich.object_store import DiskObjectStore
  453. from dulwich.objects import Commit, Tree
  454. # Create a disk object store
  455. object_store_path = os.path.join(self.tempdir, "objects")
  456. os.makedirs(object_store_path, exist_ok=True)
  457. object_store = DiskObjectStore(object_store_path)
  458. # Initially no commit graph
  459. self.assertIsNone(object_store.get_commit_graph()) # type: ignore[no-untyped-call]
  460. # Create a tree and commit
  461. tree = Tree()
  462. object_store.add_object(tree)
  463. commit = Commit()
  464. commit.tree = tree.id
  465. commit.author = b"Test Author <test@example.com>"
  466. commit.committer = b"Test Author <test@example.com>"
  467. commit.commit_time = commit.author_time = 1234567890
  468. commit.commit_timezone = commit.author_timezone = 0
  469. commit.message = b"Test commit"
  470. object_store.add_object(commit)
  471. # Write commit graph (disable reachable to avoid traversal issue)
  472. object_store.write_commit_graph([commit.id], reachable=False)
  473. # Now should have commit graph
  474. self.assertIsNotNone(object_store.get_commit_graph()) # type: ignore[no-untyped-call]
  475. # Test update (should still have commit graph)
  476. object_store.write_commit_graph()
  477. self.assertIsNot(None, object_store.get_commit_graph()) # type: ignore[no-untyped-call]
  478. def test_parents_provider_commit_graph_integration(self) -> None:
  479. """Test that ParentsProvider uses commit graph when available."""
  480. from dulwich.object_store import DiskObjectStore
  481. from dulwich.objects import Commit, Tree
  482. from dulwich.repo import ParentsProvider
  483. # Create a disk object store
  484. object_store_path = os.path.join(self.tempdir, "objects")
  485. os.makedirs(object_store_path, exist_ok=True)
  486. object_store = DiskObjectStore(object_store_path)
  487. # Create a tree and two commits
  488. tree = Tree()
  489. object_store.add_object(tree)
  490. # First commit (no parents)
  491. commit1 = Commit()
  492. commit1.tree = tree.id
  493. commit1.author = commit1.committer = b"Test <test@example.com>"
  494. commit1.commit_time = commit1.author_time = 1234567890
  495. commit1.commit_timezone = commit1.author_timezone = 0
  496. commit1.message = b"First commit"
  497. object_store.add_object(commit1)
  498. # Second commit (child of first)
  499. commit2 = Commit()
  500. commit2.tree = tree.id
  501. commit2.parents = [commit1.id]
  502. commit2.author = commit2.committer = b"Test <test@example.com>"
  503. commit2.commit_time = commit2.author_time = 1234567891
  504. commit2.commit_timezone = commit2.author_timezone = 0
  505. commit2.message = b"Second commit"
  506. object_store.add_object(commit2)
  507. # Write commit graph
  508. object_store.write_commit_graph([commit1.id, commit2.id], reachable=False)
  509. # Test ParentsProvider with commit graph
  510. provider = ParentsProvider(object_store)
  511. # Verify commit graph is loaded
  512. self.assertIsNotNone(provider.commit_graph)
  513. # Test parent lookups
  514. parents1 = provider.get_parents(commit1.id) # type: ignore[no-untyped-call]
  515. self.assertEqual(parents1, [])
  516. parents2 = provider.get_parents(commit2.id) # type: ignore[no-untyped-call]
  517. self.assertEqual(parents2, [commit1.id])
  518. # Test fallback behavior by creating provider without commit graph
  519. object_store_no_graph_path = os.path.join(self.tempdir, "objects2")
  520. os.makedirs(object_store_no_graph_path, exist_ok=True)
  521. object_store_no_graph = DiskObjectStore(object_store_no_graph_path)
  522. self.addCleanup(object_store_no_graph.close)
  523. object_store_no_graph.add_object(tree)
  524. object_store_no_graph.add_object(commit1)
  525. object_store_no_graph.add_object(commit2)
  526. provider_no_graph = ParentsProvider(object_store_no_graph)
  527. self.assertIsNone(provider_no_graph.commit_graph)
  528. # Should still work via commit object fallback
  529. parents1_fallback = provider_no_graph.get_parents(commit1.id) # type: ignore[no-untyped-call]
  530. self.assertEqual(parents1_fallback, [])
  531. parents2_fallback = provider_no_graph.get_parents(commit2.id) # type: ignore[no-untyped-call]
  532. self.assertEqual(parents2_fallback, [commit1.id])
  533. def test_graph_operations_use_commit_graph(self) -> None:
  534. """Test that graph operations use commit graph when available."""
  535. from dulwich.graph import can_fast_forward, find_merge_base
  536. from dulwich.object_store import DiskObjectStore
  537. from dulwich.objects import Commit, Tree
  538. from dulwich.repo import Repo
  539. # Create a disk object store
  540. object_store_path = os.path.join(self.tempdir, "objects")
  541. os.makedirs(object_store_path, exist_ok=True)
  542. object_store = DiskObjectStore(object_store_path)
  543. # Create a tree and a more complex commit graph for testing
  544. tree = Tree()
  545. object_store.add_object(tree)
  546. # Create commit chain: commit1 -> commit2 -> commit3
  547. # \-> commit4 -> commit5 (merge)
  548. commit1 = Commit()
  549. commit1.tree = tree.id
  550. commit1.author = commit1.committer = b"Test <test@example.com>"
  551. commit1.commit_time = commit1.author_time = 1234567890
  552. commit1.commit_timezone = commit1.author_timezone = 0
  553. commit1.message = b"First commit"
  554. object_store.add_object(commit1)
  555. commit2 = Commit()
  556. commit2.tree = tree.id
  557. commit2.parents = [commit1.id]
  558. commit2.author = commit2.committer = b"Test <test@example.com>"
  559. commit2.commit_time = commit2.author_time = 1234567891
  560. commit2.commit_timezone = commit2.author_timezone = 0
  561. commit2.message = b"Second commit"
  562. object_store.add_object(commit2)
  563. commit3 = Commit()
  564. commit3.tree = tree.id
  565. commit3.parents = [commit2.id]
  566. commit3.author = commit3.committer = b"Test <test@example.com>"
  567. commit3.commit_time = commit3.author_time = 1234567892
  568. commit3.commit_timezone = commit3.author_timezone = 0
  569. commit3.message = b"Third commit"
  570. object_store.add_object(commit3)
  571. # Branch from commit2
  572. commit4 = Commit()
  573. commit4.tree = tree.id
  574. commit4.parents = [commit2.id]
  575. commit4.author = commit4.committer = b"Test <test@example.com>"
  576. commit4.commit_time = commit4.author_time = 1234567893
  577. commit4.commit_timezone = commit4.author_timezone = 0
  578. commit4.message = b"Fourth commit (branch)"
  579. object_store.add_object(commit4)
  580. # Merge commit
  581. commit5 = Commit()
  582. commit5.tree = tree.id
  583. commit5.parents = [commit3.id, commit4.id]
  584. commit5.author = commit5.committer = b"Test <test@example.com>"
  585. commit5.commit_time = commit5.author_time = 1234567894
  586. commit5.commit_timezone = commit5.author_timezone = 0
  587. commit5.message = b"Merge commit"
  588. object_store.add_object(commit5)
  589. # Create refs
  590. refs_path = os.path.join(self.tempdir, "refs")
  591. os.makedirs(refs_path, exist_ok=True)
  592. repo_path = self.tempdir
  593. repo = Repo.init(repo_path)
  594. repo.object_store = object_store
  595. # Test graph operations WITHOUT commit graph first
  596. merge_base_no_graph = find_merge_base(repo, [commit3.id, commit4.id])
  597. can_ff_no_graph = can_fast_forward(repo, commit1.id, commit3.id)
  598. # Now write commit graph
  599. object_store.write_commit_graph(
  600. [commit1.id, commit2.id, commit3.id, commit4.id, commit5.id],
  601. reachable=False,
  602. )
  603. # Verify commit graph is loaded by creating new repo instance
  604. repo2 = Repo(repo_path)
  605. self.addCleanup(repo2.close)
  606. repo2.object_store = object_store
  607. # Verify commit graph is available
  608. commit_graph = repo2.object_store.get_commit_graph() # type: ignore[no-untyped-call]
  609. self.assertIsNotNone(commit_graph)
  610. # Test graph operations WITH commit graph
  611. merge_base_with_graph = find_merge_base(repo2, [commit3.id, commit4.id])
  612. can_ff_with_graph = can_fast_forward(repo2, commit1.id, commit3.id)
  613. # Results should be identical
  614. self.assertEqual(
  615. merge_base_no_graph,
  616. merge_base_with_graph,
  617. "Merge base should be same with and without commit graph",
  618. )
  619. self.assertEqual(
  620. can_ff_no_graph,
  621. can_ff_with_graph,
  622. "Fast-forward detection should be same with and without commit graph",
  623. )
  624. # Expected results
  625. self.assertEqual(
  626. merge_base_with_graph,
  627. [commit2.id],
  628. "Merge base of commit3 and commit4 should be commit2",
  629. )
  630. self.assertTrue(
  631. can_ff_with_graph, "Should be able to fast-forward from commit1 to commit3"
  632. )
  633. # Test that ParentsProvider in the repo uses commit graph
  634. parents_provider = repo2.parents_provider()
  635. self.assertIsNotNone(
  636. parents_provider.commit_graph,
  637. "Repository's parents provider should have commit graph",
  638. )
  639. # Verify parent lookups work through the provider
  640. self.assertEqual(parents_provider.get_parents(commit1.id), []) # type: ignore[no-untyped-call]
  641. self.assertEqual(parents_provider.get_parents(commit2.id), [commit1.id]) # type: ignore[no-untyped-call]
  642. self.assertEqual(
  643. parents_provider.get_parents(commit5.id),
  644. [commit3.id, commit4.id], # type: ignore[no-untyped-call]
  645. )
  646. def test_performance_with_commit_graph(self) -> None:
  647. """Test that using commit graph provides performance benefits."""
  648. from dulwich.graph import find_merge_base
  649. from dulwich.object_store import DiskObjectStore
  650. from dulwich.objects import Commit, Tree
  651. from dulwich.repo import Repo
  652. # Create a larger commit history to better measure performance
  653. object_store_path = os.path.join(self.tempdir, "objects")
  654. os.makedirs(object_store_path, exist_ok=True)
  655. object_store = DiskObjectStore(object_store_path)
  656. tree = Tree()
  657. object_store.add_object(tree)
  658. # Create a chain of 20 commits
  659. commits: list[Commit] = []
  660. for i in range(20):
  661. commit = Commit()
  662. commit.tree = tree.id
  663. if i > 0:
  664. commit.parents = [commits[i - 1].id]
  665. commit.author = commit.committer = b"Test <test@example.com>"
  666. commit.commit_time = commit.author_time = 1234567890 + i
  667. commit.commit_timezone = commit.author_timezone = 0
  668. commit.message = f"Commit {i}".encode()
  669. object_store.add_object(commit)
  670. commits.append(commit)
  671. # Create repository
  672. repo_path = self.tempdir
  673. repo = Repo.init(repo_path)
  674. repo.object_store = object_store
  675. # Time operations without commit graph
  676. for _ in range(10): # Run multiple times for better measurement
  677. find_merge_base(repo, [commits[0].id, commits[-1].id])
  678. # Write commit graph
  679. object_store.write_commit_graph([c.id for c in commits], reachable=False)
  680. # Create new repo instance to pick up commit graph
  681. repo2 = Repo(repo_path)
  682. self.addCleanup(repo2.close)
  683. repo2.object_store = object_store
  684. # Verify commit graph is loaded
  685. self.assertIsNotNone(repo2.object_store.get_commit_graph()) # type: ignore[no-untyped-call]
  686. # Time operations with commit graph
  687. for _ in range(10): # Run multiple times for better measurement
  688. find_merge_base(repo2, [commits[0].id, commits[-1].id])
  689. # With commit graph should be at least as fast (usually faster)
  690. # We don't assert a specific speedup since it depends on the machine
  691. # But we verify both approaches give the same result
  692. result_no_graph = find_merge_base(repo, [commits[0].id, commits[-1].id])
  693. result_with_graph = find_merge_base(repo2, [commits[0].id, commits[-1].id])
  694. self.assertEqual(
  695. result_no_graph,
  696. result_with_graph,
  697. "Results should be identical with and without commit graph",
  698. )
  699. self.assertEqual(
  700. result_with_graph, [commits[0].id], "Merge base should be the first commit"
  701. )
  702. if __name__ == "__main__":
  703. unittest.main()