2
0

test_commit_graph.py 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972
  1. # test_commit_graph.py -- Tests for commit graph functionality
  2. # Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. """Tests for Git commit graph functionality."""
  10. import io
  11. import os
  12. import struct
  13. import tempfile
  14. import unittest
  15. from dulwich.commit_graph import (
  16. CHUNK_COMMIT_DATA,
  17. CHUNK_EXTRA_EDGE_LIST,
  18. CHUNK_OID_FANOUT,
  19. CHUNK_OID_LOOKUP,
  20. COMMIT_GRAPH_SIGNATURE,
  21. COMMIT_GRAPH_VERSION,
  22. GRAPH_EXTRA_EDGES_NEEDED,
  23. GRAPH_PARENT_MISSING,
  24. HASH_VERSION_SHA1,
  25. CommitGraph,
  26. CommitGraphChunk,
  27. CommitGraphEntry,
  28. find_commit_graph_file,
  29. generate_commit_graph,
  30. get_reachable_commits,
  31. read_commit_graph,
  32. )
  33. from dulwich.object_format import SHA1
  34. class CommitGraphEntryTests(unittest.TestCase):
  35. """Tests for CommitGraphEntry."""
  36. def test_init(self) -> None:
  37. commit_id = b"a" * 40
  38. tree_id = b"b" * 40
  39. parents = [b"c" * 40, b"d" * 40]
  40. generation = 42
  41. commit_time = 1234567890
  42. entry = CommitGraphEntry(commit_id, tree_id, parents, generation, commit_time)
  43. self.assertEqual(entry.commit_id, commit_id)
  44. self.assertEqual(entry.tree_id, tree_id)
  45. self.assertEqual(entry.parents, parents)
  46. self.assertEqual(entry.generation, generation)
  47. self.assertEqual(entry.commit_time, commit_time)
  48. def test_repr(self) -> None:
  49. entry = CommitGraphEntry(b"a" * 40, b"b" * 40, [], 1, 1000)
  50. repr_str = repr(entry)
  51. self.assertIn("CommitGraphEntry", repr_str)
  52. self.assertIn("generation=1", repr_str)
  53. class CommitGraphChunkTests(unittest.TestCase):
  54. """Tests for CommitGraphChunk."""
  55. def test_init(self) -> None:
  56. chunk = CommitGraphChunk(b"TEST", b"test data")
  57. self.assertEqual(chunk.chunk_id, b"TEST")
  58. self.assertEqual(chunk.data, b"test data")
  59. def test_repr(self) -> None:
  60. chunk = CommitGraphChunk(b"TEST", b"x" * 100)
  61. repr_str = repr(chunk)
  62. self.assertIn("CommitGraphChunk", repr_str)
  63. self.assertIn("size=100", repr_str)
  64. class CommitGraphTests(unittest.TestCase):
  65. """Tests for CommitGraph."""
  66. def test_init(self) -> None:
  67. graph = CommitGraph(object_format=SHA1)
  68. self.assertEqual(graph.hash_version, HASH_VERSION_SHA1)
  69. self.assertEqual(len(graph.entries), 0)
  70. self.assertEqual(len(graph.chunks), 0)
  71. def test_len(self) -> None:
  72. graph = CommitGraph(object_format=SHA1)
  73. self.assertEqual(len(graph), 0)
  74. # Add a dummy entry
  75. entry = CommitGraphEntry(b"a" * 40, b"b" * 40, [], 1, 1000)
  76. graph.entries.append(entry)
  77. self.assertEqual(len(graph), 1)
  78. def test_iter(self) -> None:
  79. graph = CommitGraph(object_format=SHA1)
  80. entry1 = CommitGraphEntry(b"a" * 40, b"b" * 40, [], 1, 1000)
  81. entry2 = CommitGraphEntry(b"c" * 40, b"d" * 40, [], 2, 2000)
  82. graph.entries.extend([entry1, entry2])
  83. entries = list(graph)
  84. self.assertEqual(len(entries), 2)
  85. self.assertEqual(entries[0], entry1)
  86. self.assertEqual(entries[1], entry2)
  87. def test_get_entry_by_oid_missing(self) -> None:
  88. graph = CommitGraph(object_format=SHA1)
  89. result = graph.get_entry_by_oid(b"f" * 40)
  90. self.assertIsNone(result)
  91. def test_get_generation_number_missing(self) -> None:
  92. graph = CommitGraph(object_format=SHA1)
  93. result = graph.get_generation_number(b"f" * 40)
  94. self.assertIsNone(result)
  95. def test_get_parents_missing(self) -> None:
  96. graph = CommitGraph(object_format=SHA1)
  97. result = graph.get_parents(b"f" * 40)
  98. self.assertIsNone(result)
  99. def test_from_invalid_signature(self) -> None:
  100. data = b"XXXX" + b"\\x00" * 100
  101. f = io.BytesIO(data)
  102. try:
  103. with self.assertRaises(ValueError) as cm:
  104. CommitGraph.from_file(f)
  105. self.assertIn("Invalid commit graph signature", str(cm.exception))
  106. finally:
  107. f.close()
  108. def test_from_invalid_version(self) -> None:
  109. data = COMMIT_GRAPH_SIGNATURE + struct.pack(">B", 99) + b"\\x00" * 100
  110. f = io.BytesIO(data)
  111. try:
  112. with self.assertRaises(ValueError) as cm:
  113. CommitGraph.from_file(f)
  114. self.assertIn("Unsupported commit graph version", str(cm.exception))
  115. finally:
  116. f.close()
  117. def test_from_invalid_hash_version(self) -> None:
  118. data = (
  119. COMMIT_GRAPH_SIGNATURE
  120. + struct.pack(">B", COMMIT_GRAPH_VERSION)
  121. + struct.pack(">B", 99) # Invalid hash version
  122. + b"\\x00" * 100
  123. )
  124. f = io.BytesIO(data)
  125. try:
  126. with self.assertRaises(ValueError) as cm:
  127. CommitGraph.from_file(f)
  128. self.assertIn("Unsupported hash version", str(cm.exception))
  129. finally:
  130. f.close()
  131. def create_minimal_commit_graph_data(self) -> bytes:
  132. """Create minimal valid commit graph data for testing."""
  133. # Create the data in order and calculate offsets properly
  134. # Header: signature + version + hash_version + num_chunks + base_graph_count
  135. header = (
  136. COMMIT_GRAPH_SIGNATURE
  137. + struct.pack(">B", COMMIT_GRAPH_VERSION)
  138. + struct.pack(">B", HASH_VERSION_SHA1)
  139. + struct.pack(">B", 3) # 3 chunks
  140. + struct.pack(">B", 0)
  141. ) # 0 base graphs
  142. # Table of contents: 4 entries (3 chunks + terminator) = 4 * 12 = 48 bytes
  143. toc_size = 4 * 12
  144. # Calculate chunk offsets from start of file
  145. header_size = 8
  146. chunk1_offset = header_size + toc_size # OID Fanout
  147. chunk2_offset = chunk1_offset + 256 * 4 # OID Lookup (after fanout)
  148. chunk3_offset = chunk2_offset + 20 # Commit Data (after 1 commit * 20 bytes)
  149. terminator_offset = (
  150. chunk3_offset + 36
  151. ) # After commit data (1 commit * 36 bytes)
  152. # Build table of contents
  153. toc = (
  154. CHUNK_OID_FANOUT
  155. + struct.pack(">Q", chunk1_offset)
  156. + CHUNK_OID_LOOKUP
  157. + struct.pack(">Q", chunk2_offset)
  158. + CHUNK_COMMIT_DATA
  159. + struct.pack(">Q", chunk3_offset)
  160. + b"\x00\x00\x00\x00"
  161. + struct.pack(">Q", terminator_offset)
  162. )
  163. # OID Fanout chunk (256 * 4 bytes)
  164. fanout = b""
  165. for i in range(256):
  166. if i < 0xAA: # Our test commit starts with 0xaa
  167. fanout += struct.pack(">L", 0)
  168. else:
  169. fanout += struct.pack(">L", 1) # 1 commit total
  170. # OID Lookup chunk (1 commit = 20 bytes)
  171. commit_oid = b"\xaa" + b"\x00" * 19
  172. oid_lookup = commit_oid
  173. # Commit Data chunk (1 commit = 20 + 16 = 36 bytes)
  174. tree_oid = b"\xbb" + b"\x00" * 19
  175. parent1_pos = 0x70000000 # GRAPH_PARENT_MISSING
  176. parent2_pos = 0x70000000 # GRAPH_PARENT_MISSING
  177. generation = 1
  178. commit_time = 1234567890
  179. gen_and_time = (generation << 2) | (commit_time >> 32)
  180. commit_data = (
  181. tree_oid
  182. + struct.pack(">LL", parent1_pos, parent2_pos)
  183. + struct.pack(">LL", gen_and_time, commit_time & 0xFFFFFFFF)
  184. )
  185. return header + toc + fanout + oid_lookup + commit_data
  186. def test_from_minimal_valid_file(self) -> None:
  187. """Test parsing a minimal but valid commit graph file."""
  188. data = self.create_minimal_commit_graph_data()
  189. f = io.BytesIO(data)
  190. graph = CommitGraph.from_file(f)
  191. self.assertEqual(graph.hash_version, HASH_VERSION_SHA1)
  192. self.assertEqual(len(graph), 1)
  193. # Check the parsed entry
  194. entry = graph.entries[0]
  195. self.assertEqual(entry.commit_id, b"aa" + b"00" * 19)
  196. self.assertEqual(entry.tree_id, b"bb" + b"00" * 19)
  197. self.assertEqual(entry.parents, []) # No parents
  198. self.assertEqual(entry.generation, 1)
  199. self.assertEqual(entry.commit_time, 1234567890)
  200. # Test lookup methods
  201. commit_oid = b"aa" + b"00" * 19
  202. self.assertEqual(graph.get_generation_number(commit_oid), 1)
  203. self.assertEqual(graph.get_parents(commit_oid), [])
  204. self.assertIsNotNone(graph.get_entry_by_oid(commit_oid))
  205. def test_missing_required_chunks(self) -> None:
  206. """Test error handling for missing required chunks."""
  207. # Create data with header but no chunks
  208. header = (
  209. COMMIT_GRAPH_SIGNATURE
  210. + struct.pack(">B", COMMIT_GRAPH_VERSION)
  211. + struct.pack(">B", HASH_VERSION_SHA1)
  212. + struct.pack(">B", 0) # 0 chunks
  213. + struct.pack(">B", 0)
  214. )
  215. # TOC with just terminator
  216. toc = b"\\x00\\x00\\x00\\x00" + struct.pack(">Q", 12)
  217. data = header + toc
  218. f = io.BytesIO(data)
  219. with self.assertRaises(ValueError) as cm:
  220. CommitGraph.from_file(f)
  221. self.assertIn("Missing required OID lookup chunk", str(cm.exception))
  222. def test_write_empty_graph_raises(self) -> None:
  223. """Test that writing empty graph raises ValueError."""
  224. graph = CommitGraph(object_format=SHA1)
  225. f = io.BytesIO()
  226. with self.assertRaises(ValueError):
  227. graph.write_to_file(f)
  228. def test_write_and_read_round_trip(self) -> None:
  229. """Test writing and reading a commit graph."""
  230. # Create a simple commit graph
  231. graph = CommitGraph(object_format=SHA1)
  232. entry = CommitGraphEntry(
  233. commit_id=b"aa" + b"00" * 19,
  234. tree_id=b"bb" + b"00" * 19,
  235. parents=[],
  236. generation=1,
  237. commit_time=1234567890,
  238. )
  239. graph.entries.append(entry)
  240. graph._oid_to_index = {bytes.fromhex(entry.commit_id.decode()): 0}
  241. # Write to bytes
  242. f = io.BytesIO()
  243. graph.write_to_file(f)
  244. # Read back
  245. f.seek(0)
  246. read_graph = CommitGraph.from_file(f)
  247. # Verify
  248. self.assertEqual(len(read_graph), 1)
  249. read_entry = read_graph.entries[0]
  250. self.assertEqual(read_entry.commit_id, entry.commit_id)
  251. self.assertEqual(read_entry.tree_id, entry.tree_id)
  252. self.assertEqual(read_entry.parents, entry.parents)
  253. self.assertEqual(read_entry.generation, entry.generation)
  254. self.assertEqual(read_entry.commit_time, entry.commit_time)
  255. def test_parse_extra_edges_incomplete_data(self) -> None:
  256. """Test parsing extra edges with incomplete data at end of chunk.
  257. This reproduces https://github.com/jelmer/dulwich/issues/2054
  258. where parsing commits with 3+ parents fails when the EXTRA_EDGE_LIST
  259. chunk has incomplete data (fewer than 4 bytes remaining).
  260. """
  261. # Create commit graph with 4 commits:
  262. # commit0, commit1, commit2, commit3 (with 3 parents: 0, 1, 2)
  263. # Header
  264. header = (
  265. COMMIT_GRAPH_SIGNATURE
  266. + struct.pack(">B", COMMIT_GRAPH_VERSION)
  267. + struct.pack(">B", HASH_VERSION_SHA1)
  268. + struct.pack(">B", 4) # 4 chunks
  269. + struct.pack(">B", 0) # 0 base graphs
  270. )
  271. # Create 4 commit OIDs
  272. oids = [b"\x00" + bytes([i]) + b"\x00" * 18 for i in range(4)]
  273. # OID Fanout chunk
  274. # All 4 commits start with 0x00, so fanout is 4 for all indices
  275. fanout = b""
  276. for i in range(256):
  277. fanout += struct.pack(">L", 4)
  278. # OID Lookup chunk
  279. oid_lookup = b"".join(oids)
  280. # Commit Data chunk - commit3 has 3+ parents
  281. commit_data = b""
  282. for i in range(3):
  283. # Regular commits with no parents
  284. tree_oid = b"\xbb" + bytes([i]) + b"\x00" * 18
  285. parent1_pos = GRAPH_PARENT_MISSING
  286. parent2_pos = GRAPH_PARENT_MISSING
  287. generation = 1
  288. commit_time = 1234567890 + i
  289. gen_and_time = (generation << 2) | (commit_time >> 32)
  290. commit_data += (
  291. tree_oid
  292. + struct.pack(">LL", parent1_pos, parent2_pos)
  293. + struct.pack(">LL", gen_and_time, commit_time & 0xFFFFFFFF)
  294. )
  295. # Commit3 with 3 parents (requires EXTRA_EDGE_LIST)
  296. tree_oid = b"\xbb\x03" + b"\x00" * 18
  297. parent1_pos = 0 # First parent is commit0
  298. parent2_pos = (
  299. GRAPH_EXTRA_EDGES_NEEDED | 0
  300. ) # More parents in EXTRA_EDGE_LIST at offset 0
  301. generation = 2
  302. commit_time = 1234567893
  303. gen_and_time = (generation << 2) | (commit_time >> 32)
  304. commit_data += (
  305. tree_oid
  306. + struct.pack(">LL", parent1_pos, parent2_pos)
  307. + struct.pack(">LL", gen_and_time, commit_time & 0xFFFFFFFF)
  308. )
  309. # EXTRA_EDGE_LIST chunk with INCOMPLETE data
  310. # Should have: parent at index 1 (4 bytes), then parent at index 2 with LAST_EDGE flag (4 bytes)
  311. # But we only include the first parent and 2 bytes of the second (triggers the bug)
  312. extra_edges = struct.pack(">L", 1) # Second parent (commit1)
  313. extra_edges += b"\x80" # Only 1 byte of what should be a 4-byte entry - this triggers the bug
  314. # Calculate offsets
  315. header_size = 8
  316. toc_size = 5 * 12 # 5 entries (4 chunks + terminator)
  317. chunk1_offset = header_size + toc_size
  318. chunk2_offset = chunk1_offset + len(fanout)
  319. chunk3_offset = chunk2_offset + len(oid_lookup)
  320. chunk4_offset = chunk3_offset + len(commit_data)
  321. terminator_offset = chunk4_offset + len(extra_edges)
  322. # Table of contents
  323. toc = (
  324. CHUNK_OID_FANOUT
  325. + struct.pack(">Q", chunk1_offset)
  326. + CHUNK_OID_LOOKUP
  327. + struct.pack(">Q", chunk2_offset)
  328. + CHUNK_COMMIT_DATA
  329. + struct.pack(">Q", chunk3_offset)
  330. + CHUNK_EXTRA_EDGE_LIST
  331. + struct.pack(">Q", chunk4_offset)
  332. + b"\x00\x00\x00\x00"
  333. + struct.pack(">Q", terminator_offset)
  334. )
  335. # Build complete file
  336. data = header + toc + fanout + oid_lookup + commit_data + extra_edges
  337. # Before the fix, this should raise struct.error: unpack requires a buffer of 4 bytes
  338. # After the fix, it should handle the incomplete data gracefully
  339. f = io.BytesIO(data)
  340. graph = CommitGraph.from_file(f)
  341. # Verify we parsed what we could
  342. self.assertEqual(len(graph), 4)
  343. # Commit3 should have parsed parents (commit0 and commit1)
  344. # The incomplete third parent entry should be ignored
  345. entry3 = graph.entries[3]
  346. self.assertEqual(len(entry3.parents), 2) # Should have commit0 and commit1
  347. class CommitGraphFileOperationsTests(unittest.TestCase):
  348. """Tests for commit graph file operations."""
  349. def setUp(self) -> None:
  350. self.tempdir = tempfile.mkdtemp()
  351. def tearDown(self) -> None:
  352. import shutil
  353. shutil.rmtree(self.tempdir, ignore_errors=True)
  354. def test_read_commit_graph_missing_file(self) -> None:
  355. """Test reading from non-existent file."""
  356. missing_path = os.path.join(self.tempdir, "missing.graph")
  357. result = read_commit_graph(missing_path)
  358. self.assertIsNone(result)
  359. def test_read_commit_graph_invalid_file(self) -> None:
  360. """Test reading from invalid file."""
  361. invalid_path = os.path.join(self.tempdir, "invalid.graph")
  362. with open(invalid_path, "wb") as f:
  363. f.write(b"invalid data")
  364. with self.assertRaises(ValueError):
  365. read_commit_graph(invalid_path)
  366. def test_find_commit_graph_file_missing(self) -> None:
  367. """Test finding commit graph file when it doesn't exist."""
  368. result = find_commit_graph_file(self.tempdir)
  369. self.assertIsNone(result)
  370. def test_find_commit_graph_file_standard_location(self) -> None:
  371. """Test finding commit graph file in standard location."""
  372. # Create .git/objects/info/commit-graph
  373. objects_dir = os.path.join(self.tempdir, "objects")
  374. info_dir = os.path.join(objects_dir, "info")
  375. os.makedirs(info_dir)
  376. graph_path = os.path.join(info_dir, "commit-graph")
  377. with open(graph_path, "wb") as f:
  378. f.write(b"dummy")
  379. result = find_commit_graph_file(self.tempdir)
  380. self.assertEqual(result, graph_path.encode())
  381. def test_find_commit_graph_file_chain_location(self) -> None:
  382. """Test finding commit graph file in chain location."""
  383. # Create .git/objects/info/commit-graphs/graph-{hash}.graph
  384. objects_dir = os.path.join(self.tempdir, "objects")
  385. info_dir = os.path.join(objects_dir, "info")
  386. graphs_dir = os.path.join(info_dir, "commit-graphs")
  387. os.makedirs(graphs_dir)
  388. graph_path = os.path.join(graphs_dir, "graph-abc123.graph")
  389. with open(graph_path, "wb") as f:
  390. f.write(b"dummy")
  391. result = find_commit_graph_file(self.tempdir)
  392. self.assertEqual(result, graph_path.encode())
  393. def test_find_commit_graph_file_prefers_standard(self) -> None:
  394. """Test that standard location is preferred over chain location."""
  395. # Create both locations
  396. objects_dir = os.path.join(self.tempdir, "objects")
  397. info_dir = os.path.join(objects_dir, "info")
  398. graphs_dir = os.path.join(info_dir, "commit-graphs")
  399. os.makedirs(info_dir)
  400. os.makedirs(graphs_dir)
  401. # Standard location
  402. standard_path = os.path.join(info_dir, "commit-graph")
  403. with open(standard_path, "wb") as f:
  404. f.write(b"standard")
  405. # Chain location
  406. chain_path = os.path.join(graphs_dir, "graph-abc123.graph")
  407. with open(chain_path, "wb") as f:
  408. f.write(b"chain")
  409. result = find_commit_graph_file(self.tempdir)
  410. self.assertEqual(result, standard_path.encode())
  411. class CommitGraphGenerationTests(unittest.TestCase):
  412. """Tests for commit graph generation functionality."""
  413. def setUp(self) -> None:
  414. self.tempdir = tempfile.mkdtemp()
  415. def tearDown(self) -> None:
  416. import shutil
  417. shutil.rmtree(self.tempdir, ignore_errors=True)
  418. def test_generate_commit_graph_empty(self) -> None:
  419. """Test generating commit graph with no commits."""
  420. from dulwich.object_store import MemoryObjectStore
  421. object_store = MemoryObjectStore()
  422. graph = generate_commit_graph(object_store, [])
  423. self.assertEqual(len(graph), 0)
  424. def test_generate_commit_graph_single_commit(self) -> None:
  425. """Test generating commit graph with single commit."""
  426. from dulwich.object_store import MemoryObjectStore
  427. from dulwich.objects import Commit, Tree
  428. object_store = MemoryObjectStore()
  429. # Create a tree and commit
  430. tree = Tree()
  431. object_store.add_object(tree)
  432. commit = Commit()
  433. commit.tree = tree.id
  434. commit.author = b"Test Author <test@example.com>"
  435. commit.committer = b"Test Author <test@example.com>"
  436. commit.commit_time = commit.author_time = 1234567890
  437. commit.commit_timezone = commit.author_timezone = 0
  438. commit.message = b"Test commit"
  439. object_store.add_object(commit)
  440. # Generate graph
  441. graph = generate_commit_graph(object_store, [commit.id])
  442. self.assertEqual(len(graph), 1)
  443. entry = graph.entries[0]
  444. self.assertEqual(entry.commit_id, commit.id)
  445. self.assertEqual(entry.tree_id, commit.tree)
  446. self.assertEqual(entry.parents, [])
  447. self.assertEqual(entry.generation, 1)
  448. self.assertEqual(entry.commit_time, 1234567890)
  449. def test_generate_commit_graph_oid_index_uses_binary_keys(self) -> None:
  450. """Test that generated commit graph _oid_to_index uses binary RawObjectID keys."""
  451. from dulwich.object_store import MemoryObjectStore
  452. from dulwich.objects import Commit, Tree, hex_to_sha
  453. object_store = MemoryObjectStore()
  454. # Create a tree and commit
  455. tree = Tree()
  456. object_store.add_object(tree)
  457. commit = Commit()
  458. commit.tree = tree.id
  459. commit.author = b"Test Author <test@example.com>"
  460. commit.committer = b"Test Author <test@example.com>"
  461. commit.commit_time = commit.author_time = 1234567890
  462. commit.commit_timezone = commit.author_timezone = 0
  463. commit.message = b"Test commit"
  464. object_store.add_object(commit)
  465. # Generate graph
  466. graph = generate_commit_graph(object_store, [commit.id])
  467. # Verify _oid_to_index uses binary keys (RawObjectID)
  468. # commit.id is hex (ObjectID), so we need to convert to binary for lookup
  469. binary_commit_id = hex_to_sha(commit.id)
  470. self.assertIn(binary_commit_id, graph._oid_to_index)
  471. self.assertEqual(graph._oid_to_index[binary_commit_id], 0)
  472. # Verify lookup with hex ObjectID works via get_entry_by_oid
  473. entry = graph.get_entry_by_oid(commit.id)
  474. self.assertIsNotNone(entry)
  475. assert entry is not None
  476. self.assertEqual(entry.commit_id, commit.id)
  477. # Verify lookup with binary RawObjectID also works
  478. entry_binary = graph.get_entry_by_oid(binary_commit_id)
  479. self.assertIsNotNone(entry_binary)
  480. assert entry_binary is not None
  481. self.assertEqual(entry_binary.commit_id, commit.id)
  482. def test_get_reachable_commits(self) -> None:
  483. """Test getting reachable commits."""
  484. from dulwich.object_store import MemoryObjectStore
  485. from dulwich.objects import Commit, Tree
  486. object_store = MemoryObjectStore()
  487. # Create tree
  488. tree = Tree()
  489. object_store.add_object(tree)
  490. # Create commit chain: commit1 -> commit2
  491. commit1 = Commit()
  492. commit1.tree = tree.id
  493. commit1.author = commit1.committer = b"Test <test@example.com>"
  494. commit1.commit_time = commit1.author_time = 1234567890
  495. commit1.commit_timezone = commit1.author_timezone = 0
  496. commit1.message = b"First commit"
  497. object_store.add_object(commit1)
  498. commit2 = Commit()
  499. commit2.tree = tree.id
  500. commit2.parents = [commit1.id]
  501. commit2.author = commit2.committer = b"Test <test@example.com>"
  502. commit2.commit_time = commit2.author_time = 1234567891
  503. commit2.commit_timezone = commit2.author_timezone = 0
  504. commit2.message = b"Second commit"
  505. object_store.add_object(commit2)
  506. # Get reachable commits from commit2
  507. reachable = get_reachable_commits(object_store, [commit2.id])
  508. # Should include both commits
  509. self.assertEqual(len(reachable), 2)
  510. self.assertIn(commit1.id, reachable)
  511. self.assertIn(commit2.id, reachable)
  512. def test_write_commit_graph_to_file(self) -> None:
  513. """Test writing commit graph to file."""
  514. from dulwich.object_store import DiskObjectStore
  515. from dulwich.objects import Commit, Tree
  516. # Create a disk object store
  517. object_store_path = os.path.join(self.tempdir, "objects")
  518. os.makedirs(object_store_path, exist_ok=True)
  519. object_store = DiskObjectStore(object_store_path)
  520. self.addCleanup(object_store.close)
  521. # Create a tree and commit
  522. tree = Tree()
  523. object_store.add_object(tree)
  524. commit = Commit()
  525. commit.tree = tree.id
  526. commit.author = b"Test Author <test@example.com>"
  527. commit.committer = b"Test Author <test@example.com>"
  528. commit.commit_time = commit.author_time = 1234567890
  529. commit.commit_timezone = commit.author_timezone = 0
  530. commit.message = b"Test commit"
  531. object_store.add_object(commit)
  532. # Write commit graph using ObjectStore method
  533. object_store.write_commit_graph([commit.id], reachable=False)
  534. # Verify file was created
  535. graph_path = os.path.join(object_store_path, "info", "commit-graph")
  536. self.assertTrue(os.path.exists(graph_path))
  537. # Read back and verify
  538. graph = read_commit_graph(graph_path)
  539. self.assertIsNotNone(graph)
  540. assert graph is not None # For mypy
  541. self.assertEqual(len(graph), 1)
  542. entry = graph.entries[0]
  543. self.assertEqual(entry.commit_id, commit.id)
  544. self.assertEqual(entry.tree_id, commit.tree)
  545. def test_object_store_commit_graph_methods(self) -> None:
  546. """Test ObjectStore commit graph methods."""
  547. from dulwich.object_store import DiskObjectStore
  548. from dulwich.objects import Commit, Tree
  549. # Create a disk object store
  550. object_store_path = os.path.join(self.tempdir, "objects")
  551. os.makedirs(object_store_path, exist_ok=True)
  552. object_store = DiskObjectStore(object_store_path)
  553. # Initially no commit graph
  554. self.assertIsNone(object_store.get_commit_graph()) # type: ignore[no-untyped-call]
  555. # Create a tree and commit
  556. tree = Tree()
  557. object_store.add_object(tree)
  558. commit = Commit()
  559. commit.tree = tree.id
  560. commit.author = b"Test Author <test@example.com>"
  561. commit.committer = b"Test Author <test@example.com>"
  562. commit.commit_time = commit.author_time = 1234567890
  563. commit.commit_timezone = commit.author_timezone = 0
  564. commit.message = b"Test commit"
  565. object_store.add_object(commit)
  566. # Write commit graph (disable reachable to avoid traversal issue)
  567. object_store.write_commit_graph([commit.id], reachable=False)
  568. # Now should have commit graph
  569. self.assertIsNotNone(object_store.get_commit_graph()) # type: ignore[no-untyped-call]
  570. # Test update (should still have commit graph)
  571. object_store.write_commit_graph()
  572. self.assertIsNot(None, object_store.get_commit_graph()) # type: ignore[no-untyped-call]
  573. def test_parents_provider_commit_graph_integration(self) -> None:
  574. """Test that ParentsProvider uses commit graph when available."""
  575. from dulwich.object_store import DiskObjectStore
  576. from dulwich.objects import Commit, Tree
  577. from dulwich.repo import ParentsProvider
  578. # Create a disk object store
  579. object_store_path = os.path.join(self.tempdir, "objects")
  580. os.makedirs(object_store_path, exist_ok=True)
  581. object_store = DiskObjectStore(object_store_path)
  582. # Create a tree and two commits
  583. tree = Tree()
  584. object_store.add_object(tree)
  585. # First commit (no parents)
  586. commit1 = Commit()
  587. commit1.tree = tree.id
  588. commit1.author = commit1.committer = b"Test <test@example.com>"
  589. commit1.commit_time = commit1.author_time = 1234567890
  590. commit1.commit_timezone = commit1.author_timezone = 0
  591. commit1.message = b"First commit"
  592. object_store.add_object(commit1)
  593. # Second commit (child of first)
  594. commit2 = Commit()
  595. commit2.tree = tree.id
  596. commit2.parents = [commit1.id]
  597. commit2.author = commit2.committer = b"Test <test@example.com>"
  598. commit2.commit_time = commit2.author_time = 1234567891
  599. commit2.commit_timezone = commit2.author_timezone = 0
  600. commit2.message = b"Second commit"
  601. object_store.add_object(commit2)
  602. # Write commit graph
  603. object_store.write_commit_graph([commit1.id, commit2.id], reachable=False)
  604. # Test ParentsProvider with commit graph
  605. provider = ParentsProvider(object_store)
  606. # Verify commit graph is loaded
  607. self.assertIsNotNone(provider.commit_graph)
  608. # Test parent lookups
  609. parents1 = provider.get_parents(commit1.id) # type: ignore[no-untyped-call]
  610. self.assertEqual(parents1, [])
  611. parents2 = provider.get_parents(commit2.id) # type: ignore[no-untyped-call]
  612. self.assertEqual(parents2, [commit1.id])
  613. # Test fallback behavior by creating provider without commit graph
  614. object_store_no_graph_path = os.path.join(self.tempdir, "objects2")
  615. os.makedirs(object_store_no_graph_path, exist_ok=True)
  616. object_store_no_graph = DiskObjectStore(object_store_no_graph_path)
  617. self.addCleanup(object_store_no_graph.close)
  618. object_store_no_graph.add_object(tree)
  619. object_store_no_graph.add_object(commit1)
  620. object_store_no_graph.add_object(commit2)
  621. provider_no_graph = ParentsProvider(object_store_no_graph)
  622. self.assertIsNone(provider_no_graph.commit_graph)
  623. # Should still work via commit object fallback
  624. parents1_fallback = provider_no_graph.get_parents(commit1.id) # type: ignore[no-untyped-call]
  625. self.assertEqual(parents1_fallback, [])
  626. parents2_fallback = provider_no_graph.get_parents(commit2.id) # type: ignore[no-untyped-call]
  627. self.assertEqual(parents2_fallback, [commit1.id])
  628. def test_graph_operations_use_commit_graph(self) -> None:
  629. """Test that graph operations use commit graph when available."""
  630. from dulwich.graph import can_fast_forward, find_merge_base
  631. from dulwich.object_store import DiskObjectStore
  632. from dulwich.objects import Commit, Tree
  633. from dulwich.repo import Repo
  634. # Create a disk object store
  635. object_store_path = os.path.join(self.tempdir, "objects")
  636. os.makedirs(object_store_path, exist_ok=True)
  637. object_store = DiskObjectStore(object_store_path)
  638. # Create a tree and a more complex commit graph for testing
  639. tree = Tree()
  640. object_store.add_object(tree)
  641. # Create commit chain: commit1 -> commit2 -> commit3
  642. # \-> commit4 -> commit5 (merge)
  643. commit1 = Commit()
  644. commit1.tree = tree.id
  645. commit1.author = commit1.committer = b"Test <test@example.com>"
  646. commit1.commit_time = commit1.author_time = 1234567890
  647. commit1.commit_timezone = commit1.author_timezone = 0
  648. commit1.message = b"First commit"
  649. object_store.add_object(commit1)
  650. commit2 = Commit()
  651. commit2.tree = tree.id
  652. commit2.parents = [commit1.id]
  653. commit2.author = commit2.committer = b"Test <test@example.com>"
  654. commit2.commit_time = commit2.author_time = 1234567891
  655. commit2.commit_timezone = commit2.author_timezone = 0
  656. commit2.message = b"Second commit"
  657. object_store.add_object(commit2)
  658. commit3 = Commit()
  659. commit3.tree = tree.id
  660. commit3.parents = [commit2.id]
  661. commit3.author = commit3.committer = b"Test <test@example.com>"
  662. commit3.commit_time = commit3.author_time = 1234567892
  663. commit3.commit_timezone = commit3.author_timezone = 0
  664. commit3.message = b"Third commit"
  665. object_store.add_object(commit3)
  666. # Branch from commit2
  667. commit4 = Commit()
  668. commit4.tree = tree.id
  669. commit4.parents = [commit2.id]
  670. commit4.author = commit4.committer = b"Test <test@example.com>"
  671. commit4.commit_time = commit4.author_time = 1234567893
  672. commit4.commit_timezone = commit4.author_timezone = 0
  673. commit4.message = b"Fourth commit (branch)"
  674. object_store.add_object(commit4)
  675. # Merge commit
  676. commit5 = Commit()
  677. commit5.tree = tree.id
  678. commit5.parents = [commit3.id, commit4.id]
  679. commit5.author = commit5.committer = b"Test <test@example.com>"
  680. commit5.commit_time = commit5.author_time = 1234567894
  681. commit5.commit_timezone = commit5.author_timezone = 0
  682. commit5.message = b"Merge commit"
  683. object_store.add_object(commit5)
  684. # Create refs
  685. refs_path = os.path.join(self.tempdir, "refs")
  686. os.makedirs(refs_path, exist_ok=True)
  687. repo_path = self.tempdir
  688. repo = Repo.init(repo_path)
  689. repo.object_store = object_store
  690. # Test graph operations WITHOUT commit graph first
  691. merge_base_no_graph = find_merge_base(repo, [commit3.id, commit4.id])
  692. can_ff_no_graph = can_fast_forward(repo, commit1.id, commit3.id)
  693. # Now write commit graph
  694. object_store.write_commit_graph(
  695. [commit1.id, commit2.id, commit3.id, commit4.id, commit5.id],
  696. reachable=False,
  697. )
  698. # Verify commit graph is loaded by creating new repo instance
  699. repo2 = Repo(repo_path)
  700. self.addCleanup(repo2.close)
  701. repo2.object_store = object_store
  702. # Verify commit graph is available
  703. commit_graph = repo2.object_store.get_commit_graph() # type: ignore[no-untyped-call]
  704. self.assertIsNotNone(commit_graph)
  705. # Test graph operations WITH commit graph
  706. merge_base_with_graph = find_merge_base(repo2, [commit3.id, commit4.id])
  707. can_ff_with_graph = can_fast_forward(repo2, commit1.id, commit3.id)
  708. # Results should be identical
  709. self.assertEqual(
  710. merge_base_no_graph,
  711. merge_base_with_graph,
  712. "Merge base should be same with and without commit graph",
  713. )
  714. self.assertEqual(
  715. can_ff_no_graph,
  716. can_ff_with_graph,
  717. "Fast-forward detection should be same with and without commit graph",
  718. )
  719. # Expected results
  720. self.assertEqual(
  721. merge_base_with_graph,
  722. [commit2.id],
  723. "Merge base of commit3 and commit4 should be commit2",
  724. )
  725. self.assertTrue(
  726. can_ff_with_graph, "Should be able to fast-forward from commit1 to commit3"
  727. )
  728. # Test that ParentsProvider in the repo uses commit graph
  729. parents_provider = repo2.parents_provider()
  730. self.assertIsNotNone(
  731. parents_provider.commit_graph,
  732. "Repository's parents provider should have commit graph",
  733. )
  734. # Verify parent lookups work through the provider
  735. self.assertEqual(parents_provider.get_parents(commit1.id), []) # type: ignore[no-untyped-call]
  736. self.assertEqual(parents_provider.get_parents(commit2.id), [commit1.id]) # type: ignore[no-untyped-call]
  737. self.assertEqual(
  738. parents_provider.get_parents(commit5.id),
  739. [commit3.id, commit4.id], # type: ignore[no-untyped-call]
  740. )
  741. def test_performance_with_commit_graph(self) -> None:
  742. """Test that using commit graph provides performance benefits."""
  743. from dulwich.graph import find_merge_base
  744. from dulwich.object_store import DiskObjectStore
  745. from dulwich.objects import Commit, Tree
  746. from dulwich.repo import Repo
  747. # Create a larger commit history to better measure performance
  748. object_store_path = os.path.join(self.tempdir, "objects")
  749. os.makedirs(object_store_path, exist_ok=True)
  750. object_store = DiskObjectStore(object_store_path)
  751. tree = Tree()
  752. object_store.add_object(tree)
  753. # Create a chain of 20 commits
  754. commits: list[Commit] = []
  755. for i in range(20):
  756. commit = Commit()
  757. commit.tree = tree.id
  758. if i > 0:
  759. commit.parents = [commits[i - 1].id]
  760. commit.author = commit.committer = b"Test <test@example.com>"
  761. commit.commit_time = commit.author_time = 1234567890 + i
  762. commit.commit_timezone = commit.author_timezone = 0
  763. commit.message = f"Commit {i}".encode()
  764. object_store.add_object(commit)
  765. commits.append(commit)
  766. # Create repository
  767. repo_path = self.tempdir
  768. repo = Repo.init(repo_path)
  769. repo.object_store = object_store
  770. # Time operations without commit graph
  771. for _ in range(10): # Run multiple times for better measurement
  772. find_merge_base(repo, [commits[0].id, commits[-1].id])
  773. # Write commit graph
  774. object_store.write_commit_graph([c.id for c in commits], reachable=False)
  775. # Create new repo instance to pick up commit graph
  776. repo2 = Repo(repo_path)
  777. self.addCleanup(repo2.close)
  778. repo2.object_store = object_store
  779. # Verify commit graph is loaded
  780. self.assertIsNotNone(repo2.object_store.get_commit_graph()) # type: ignore[no-untyped-call]
  781. # Time operations with commit graph
  782. for _ in range(10): # Run multiple times for better measurement
  783. find_merge_base(repo2, [commits[0].id, commits[-1].id])
  784. # With commit graph should be at least as fast (usually faster)
  785. # We don't assert a specific speedup since it depends on the machine
  786. # But we verify both approaches give the same result
  787. result_no_graph = find_merge_base(repo, [commits[0].id, commits[-1].id])
  788. result_with_graph = find_merge_base(repo2, [commits[0].id, commits[-1].id])
  789. self.assertEqual(
  790. result_no_graph,
  791. result_with_graph,
  792. "Results should be identical with and without commit graph",
  793. )
  794. self.assertEqual(
  795. result_with_graph, [commits[0].id], "Merge base should be the first commit"
  796. )
  797. if __name__ == "__main__":
  798. unittest.main()