test_bitmap.py 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164
  1. # test_bitmap.py -- Tests for bitmap support
  2. # Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for bitmap support."""
  22. import os
  23. import tempfile
  24. import unittest
  25. from io import BytesIO
  26. from dulwich.bitmap import (
  27. BITMAP_OPT_FULL_DAG,
  28. BITMAP_OPT_HASH_CACHE,
  29. BITMAP_OPT_LOOKUP_TABLE,
  30. BITMAP_SIGNATURE,
  31. BITMAP_VERSION,
  32. BitmapEntry,
  33. EWAHBitmap,
  34. PackBitmap,
  35. _encode_ewah_words,
  36. read_bitmap_file,
  37. write_bitmap_file,
  38. )
  39. from dulwich.object_store import BitmapReachability, GraphTraversalReachability
  40. class EWAHCompressionTests(unittest.TestCase):
  41. """Tests for EWAH compression helper functions."""
  42. def test_encode_empty_words(self):
  43. """Test encoding empty word list."""
  44. result = _encode_ewah_words([])
  45. self.assertEqual([], result)
  46. def test_encode_single_literal(self):
  47. """Test encoding single literal word."""
  48. result = _encode_ewah_words([0x123])
  49. # Should be: RLW(0 run, 1 literal) + literal
  50. self.assertEqual(2, len(result))
  51. # RLW bit layout: [literal_words(31)][running_len(32)][running_bit(1)]
  52. # running_bit=0, running_len=0, literal_words=1
  53. expected_rlw = (1 << 33) | (0 << 1) | 0
  54. self.assertEqual(expected_rlw, result[0])
  55. self.assertEqual(0x123, result[1])
  56. def test_encode_zero_run(self):
  57. """Test encoding run of zeros."""
  58. result = _encode_ewah_words([0, 0, 0])
  59. # Should be: RLW(3 zeros, 0 literals)
  60. self.assertEqual(1, len(result))
  61. # RLW bit layout: [literal_words(31)][running_len(32)][running_bit(1)]
  62. # running_bit=0, running_len=3, literal_words=0
  63. expected_rlw = (0 << 33) | (3 << 1) | 0
  64. self.assertEqual(expected_rlw, result[0])
  65. def test_encode_ones_run(self):
  66. """Test encoding run of all-ones."""
  67. result = _encode_ewah_words([0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF])
  68. # Should be: RLW(2 ones, 0 literals)
  69. self.assertEqual(1, len(result))
  70. # RLW bit layout: [literal_words(31)][running_len(32)][running_bit(1)]
  71. # running_bit=1, running_len=2, literal_words=0
  72. expected_rlw = (0 << 33) | (2 << 1) | 1
  73. self.assertEqual(expected_rlw, result[0])
  74. def test_encode_run_followed_by_literals(self):
  75. """Test encoding run followed by literal words."""
  76. result = _encode_ewah_words([0, 0, 0x123, 0x456])
  77. # Should be: RLW(2 zeros, 2 literals) + literals
  78. self.assertEqual(3, len(result))
  79. # RLW bit layout: [literal_words(31)][running_len(32)][running_bit(1)]
  80. # running_bit=0, running_len=2, literal_words=2
  81. expected_rlw = (2 << 33) | (2 << 1) | 0
  82. self.assertEqual(expected_rlw, result[0])
  83. self.assertEqual(0x123, result[1])
  84. self.assertEqual(0x456, result[2])
  85. def test_encode_mixed_pattern(self):
  86. """Test encoding mixed runs and literals."""
  87. result = _encode_ewah_words(
  88. [0, 0, 0x123, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF]
  89. )
  90. # Should be: RLW(2 zeros, 1 literal) + literal + RLW(2 ones, 0 literals)
  91. self.assertEqual(3, len(result))
  92. class EWAHCompatibilityTests(unittest.TestCase):
  93. """Tests for EWAH encode/decode compatibility."""
  94. def test_encode_decode_sparse_bitmap(self):
  95. """Test encoding and decoding a sparse bitmap with runs."""
  96. # Create a bitmap with a pattern that benefits from run-length encoding
  97. # Bits: 0, 1, 128 (has runs of zeros between set bits)
  98. bitmap = EWAHBitmap()
  99. bitmap.add(0)
  100. bitmap.add(1)
  101. bitmap.add(128)
  102. # Encode
  103. encoded = bitmap.encode()
  104. # Decode
  105. bitmap2 = EWAHBitmap(encoded)
  106. # Verify all bits are preserved
  107. self.assertEqual(len(bitmap), len(bitmap2))
  108. self.assertIn(0, bitmap2)
  109. self.assertIn(1, bitmap2)
  110. self.assertIn(128, bitmap2)
  111. self.assertNotIn(2, bitmap2)
  112. self.assertNotIn(127, bitmap2)
  113. def test_encode_decode_dense_bitmap(self):
  114. """Test encoding and decoding a dense bitmap."""
  115. # Create a bitmap with many consecutive bits set
  116. bitmap = EWAHBitmap()
  117. for i in range(64):
  118. bitmap.add(i)
  119. # Encode
  120. encoded = bitmap.encode()
  121. # Decode
  122. bitmap2 = EWAHBitmap(encoded)
  123. # Verify all bits are preserved
  124. self.assertEqual(64, len(bitmap2))
  125. for i in range(64):
  126. self.assertIn(i, bitmap2)
  127. self.assertNotIn(64, bitmap2)
  128. def test_encode_decode_runs_of_zeros(self):
  129. """Test encoding and decoding bitmap with long runs of zeros."""
  130. # Create bitmap: bits 0, 200, 400 (lots of zeros in between)
  131. bitmap = EWAHBitmap()
  132. bitmap.add(0)
  133. bitmap.add(200)
  134. bitmap.add(400)
  135. # Encode
  136. encoded = bitmap.encode()
  137. # Decode
  138. bitmap2 = EWAHBitmap(encoded)
  139. # Verify
  140. self.assertEqual(3, len(bitmap2))
  141. self.assertIn(0, bitmap2)
  142. self.assertIn(200, bitmap2)
  143. self.assertIn(400, bitmap2)
  144. self.assertNotIn(1, bitmap2)
  145. self.assertNotIn(199, bitmap2)
  146. self.assertNotIn(201, bitmap2)
  147. def test_encode_decode_mixed_pattern(self):
  148. """Test encoding and decoding bitmap with mixed dense/sparse regions."""
  149. bitmap = EWAHBitmap()
  150. # Dense region: 0-63
  151. for i in range(64):
  152. bitmap.add(i)
  153. # Sparse region: 200, 300, 400
  154. bitmap.add(200)
  155. bitmap.add(300)
  156. bitmap.add(400)
  157. # Another dense region: 500-563
  158. for i in range(500, 564):
  159. bitmap.add(i)
  160. # Encode
  161. encoded = bitmap.encode()
  162. # Decode
  163. bitmap2 = EWAHBitmap(encoded)
  164. # Verify all bits preserved
  165. self.assertEqual(len(bitmap), len(bitmap2))
  166. for i in range(64):
  167. self.assertIn(i, bitmap2)
  168. self.assertIn(200, bitmap2)
  169. self.assertIn(300, bitmap2)
  170. self.assertIn(400, bitmap2)
  171. for i in range(500, 564):
  172. self.assertIn(i, bitmap2)
  173. # Check some bits that shouldn't be set
  174. self.assertNotIn(64, bitmap2)
  175. self.assertNotIn(199, bitmap2)
  176. self.assertNotIn(201, bitmap2)
  177. self.assertNotIn(499, bitmap2)
  178. def test_encode_decode_preserves_bitwise_ops(self):
  179. """Test that encode/decode doesn't break bitwise operations."""
  180. # Create two bitmaps
  181. bitmap1 = EWAHBitmap()
  182. bitmap1.add(0)
  183. bitmap1.add(5)
  184. bitmap1.add(100)
  185. bitmap2 = EWAHBitmap()
  186. bitmap2.add(5)
  187. bitmap2.add(10)
  188. bitmap2.add(100)
  189. # Encode and decode both
  190. bitmap1_decoded = EWAHBitmap(bitmap1.encode())
  191. bitmap2_decoded = EWAHBitmap(bitmap2.encode())
  192. # Perform operations on decoded bitmaps
  193. or_result = bitmap1_decoded | bitmap2_decoded
  194. and_result = bitmap1_decoded & bitmap2_decoded
  195. xor_result = bitmap1_decoded ^ bitmap2_decoded
  196. # Verify results
  197. # OR: {0, 5, 10, 100}
  198. self.assertEqual(4, len(or_result))
  199. self.assertIn(0, or_result)
  200. self.assertIn(5, or_result)
  201. self.assertIn(10, or_result)
  202. self.assertIn(100, or_result)
  203. # AND: {5, 100}
  204. self.assertEqual(2, len(and_result))
  205. self.assertIn(5, and_result)
  206. self.assertIn(100, and_result)
  207. # XOR: {0, 10}
  208. self.assertEqual(2, len(xor_result))
  209. self.assertIn(0, xor_result)
  210. self.assertIn(10, xor_result)
  211. def test_round_trip_large_bitmap(self):
  212. """Test round-trip encoding/decoding of a large bitmap."""
  213. # Create a large bitmap with various patterns
  214. bitmap = EWAHBitmap()
  215. # Add bits at various intervals
  216. for i in range(0, 10000, 7):
  217. bitmap.add(i)
  218. original_bits = set(bitmap.bits)
  219. # Encode and decode
  220. encoded = bitmap.encode()
  221. decoded = EWAHBitmap(encoded)
  222. # Verify all bits preserved
  223. self.assertEqual(original_bits, decoded.bits)
  224. self.assertEqual(len(bitmap), len(decoded))
  225. class EWAHBitmapTests(unittest.TestCase):
  226. """Tests for EWAH bitmap compression."""
  227. def test_empty_bitmap(self):
  228. """Test empty bitmap."""
  229. bitmap = EWAHBitmap()
  230. self.assertEqual(0, len(bitmap))
  231. self.assertEqual(0, bitmap.bit_count)
  232. def test_add_bit(self):
  233. """Test adding bits to bitmap."""
  234. bitmap = EWAHBitmap()
  235. bitmap.add(0)
  236. bitmap.add(5)
  237. bitmap.add(100)
  238. self.assertEqual(3, len(bitmap))
  239. self.assertEqual(101, bitmap.bit_count)
  240. self.assertIn(0, bitmap)
  241. self.assertIn(5, bitmap)
  242. self.assertIn(100, bitmap)
  243. self.assertNotIn(1, bitmap)
  244. self.assertNotIn(99, bitmap)
  245. def test_encode_decode(self):
  246. """Test encoding and decoding bitmaps."""
  247. bitmap = EWAHBitmap()
  248. bitmap.add(0)
  249. bitmap.add(1)
  250. bitmap.add(64)
  251. bitmap.add(128)
  252. # Encode
  253. data = bitmap.encode()
  254. self.assertIsInstance(data, bytes)
  255. # Decode
  256. bitmap2 = EWAHBitmap(data)
  257. self.assertEqual(len(bitmap), len(bitmap2))
  258. self.assertIn(0, bitmap2)
  259. self.assertIn(1, bitmap2)
  260. self.assertIn(64, bitmap2)
  261. self.assertIn(128, bitmap2)
  262. def test_bitwise_or(self):
  263. """Test bitwise OR operation."""
  264. bitmap1 = EWAHBitmap()
  265. bitmap1.add(0)
  266. bitmap1.add(5)
  267. bitmap2 = EWAHBitmap()
  268. bitmap2.add(5)
  269. bitmap2.add(10)
  270. result = bitmap1 | bitmap2
  271. self.assertEqual(3, len(result))
  272. self.assertIn(0, result)
  273. self.assertIn(5, result)
  274. self.assertIn(10, result)
  275. def test_bitwise_and(self):
  276. """Test bitwise AND operation."""
  277. bitmap1 = EWAHBitmap()
  278. bitmap1.add(0)
  279. bitmap1.add(5)
  280. bitmap2 = EWAHBitmap()
  281. bitmap2.add(5)
  282. bitmap2.add(10)
  283. result = bitmap1 & bitmap2
  284. self.assertEqual(1, len(result))
  285. self.assertIn(5, result)
  286. self.assertNotIn(0, result)
  287. self.assertNotIn(10, result)
  288. def test_bitwise_xor(self):
  289. """Test bitwise XOR operation."""
  290. bitmap1 = EWAHBitmap()
  291. bitmap1.add(0)
  292. bitmap1.add(5)
  293. bitmap2 = EWAHBitmap()
  294. bitmap2.add(5)
  295. bitmap2.add(10)
  296. result = bitmap1 ^ bitmap2
  297. self.assertEqual(2, len(result))
  298. self.assertIn(0, result)
  299. self.assertIn(10, result)
  300. self.assertNotIn(5, result)
  301. class BitmapEntryTests(unittest.TestCase):
  302. """Tests for bitmap entries."""
  303. def test_create_entry(self):
  304. """Test creating a bitmap entry."""
  305. bitmap = EWAHBitmap()
  306. bitmap.add(0)
  307. bitmap.add(10)
  308. entry = BitmapEntry(
  309. object_pos=100,
  310. xor_offset=0,
  311. flags=0,
  312. bitmap=bitmap,
  313. )
  314. self.assertEqual(100, entry.object_pos)
  315. self.assertEqual(0, entry.xor_offset)
  316. self.assertEqual(0, entry.flags)
  317. self.assertEqual(bitmap, entry.bitmap)
  318. class PackBitmapTests(unittest.TestCase):
  319. """Tests for pack bitmap."""
  320. def test_create_bitmap(self):
  321. """Test creating a pack bitmap."""
  322. bitmap = PackBitmap()
  323. self.assertEqual(BITMAP_VERSION, bitmap.version)
  324. self.assertEqual(BITMAP_OPT_FULL_DAG, bitmap.flags)
  325. self.assertIsNone(bitmap.pack_checksum)
  326. def test_bitmap_with_entries(self):
  327. """Test bitmap with entries."""
  328. bitmap = PackBitmap()
  329. commit_sha = b"\x00" * 20
  330. ewah_bitmap = EWAHBitmap()
  331. ewah_bitmap.add(0)
  332. ewah_bitmap.add(5)
  333. entry = BitmapEntry(
  334. object_pos=0,
  335. xor_offset=0,
  336. flags=0,
  337. bitmap=ewah_bitmap,
  338. )
  339. bitmap.entries[commit_sha] = entry
  340. self.assertTrue(bitmap.has_commit(commit_sha))
  341. self.assertFalse(bitmap.has_commit(b"\x01" * 20))
  342. def test_iter_commits(self):
  343. """Test iterating over commits with bitmaps."""
  344. bitmap = PackBitmap()
  345. commit1 = b"\x00" * 20
  346. commit2 = b"\x01" * 20
  347. ewah_bitmap = EWAHBitmap()
  348. entry = BitmapEntry(0, 0, 0, ewah_bitmap)
  349. bitmap.entries[commit1] = entry
  350. bitmap.entries[commit2] = entry
  351. commits = list(bitmap.iter_commits())
  352. self.assertEqual(2, len(commits))
  353. self.assertIn(commit1, commits)
  354. self.assertIn(commit2, commits)
  355. class BitmapFileTests(unittest.TestCase):
  356. """Tests for bitmap file I/O."""
  357. def test_write_read_empty_bitmap(self):
  358. """Test writing and reading an empty bitmap."""
  359. bitmap = PackBitmap()
  360. bitmap.pack_checksum = b"\x00" * 20
  361. # Write to bytes
  362. f = BytesIO()
  363. write_bitmap_file(f, bitmap)
  364. # Read back
  365. f.seek(0)
  366. bitmap2 = read_bitmap_file(f)
  367. self.assertEqual(bitmap.version, bitmap2.version)
  368. self.assertEqual(bitmap.flags, bitmap2.flags)
  369. self.assertEqual(bitmap.pack_checksum, bitmap2.pack_checksum)
  370. def test_write_read_with_type_bitmaps(self):
  371. """Test writing and reading bitmaps with type information."""
  372. bitmap = PackBitmap()
  373. bitmap.pack_checksum = b"\xaa" * 20
  374. # Add some type bitmap data
  375. bitmap.commit_bitmap.add(0)
  376. bitmap.commit_bitmap.add(5)
  377. bitmap.tree_bitmap.add(1)
  378. bitmap.blob_bitmap.add(2)
  379. bitmap.tag_bitmap.add(3)
  380. # Write to bytes
  381. f = BytesIO()
  382. write_bitmap_file(f, bitmap)
  383. # Read back
  384. f.seek(0)
  385. bitmap2 = read_bitmap_file(f)
  386. self.assertEqual(bitmap.version, bitmap2.version)
  387. self.assertEqual(bitmap.pack_checksum, bitmap2.pack_checksum)
  388. # Check type bitmaps
  389. self.assertIn(0, bitmap2.commit_bitmap)
  390. self.assertIn(5, bitmap2.commit_bitmap)
  391. self.assertIn(1, bitmap2.tree_bitmap)
  392. self.assertIn(2, bitmap2.blob_bitmap)
  393. self.assertIn(3, bitmap2.tag_bitmap)
  394. def test_invalid_signature(self):
  395. """Test reading file with invalid signature."""
  396. f = BytesIO(b"XXXX\x00\x01\x00\x00")
  397. with self.assertRaises(ValueError) as cm:
  398. read_bitmap_file(f)
  399. self.assertIn("Invalid bitmap signature", str(cm.exception))
  400. def test_invalid_version(self):
  401. """Test reading file with invalid version."""
  402. f = BytesIO(BITMAP_SIGNATURE + b"\x00\x02\x00\x01")
  403. with self.assertRaises(ValueError) as cm:
  404. read_bitmap_file(f)
  405. self.assertIn("Unsupported bitmap version", str(cm.exception))
  406. def test_incomplete_header(self):
  407. """Test reading file with incomplete header."""
  408. f = BytesIO(BITMAP_SIGNATURE + b"\x00")
  409. with self.assertRaises(ValueError) as cm:
  410. read_bitmap_file(f)
  411. self.assertIn("Incomplete bitmap header", str(cm.exception))
  412. class BitmapIntegrationTests(unittest.TestCase):
  413. """Integration tests for bitmap functionality."""
  414. def test_round_trip_file(self):
  415. """Test writing and reading bitmap to/from actual file."""
  416. with tempfile.TemporaryDirectory() as tmpdir:
  417. bitmap_path = os.path.join(tmpdir, "test.bitmap")
  418. # Create bitmap
  419. bitmap = PackBitmap()
  420. bitmap.pack_checksum = b"\xff" * 20
  421. bitmap.commit_bitmap.add(10)
  422. bitmap.tree_bitmap.add(20)
  423. # Write to file
  424. from dulwich.bitmap import write_bitmap
  425. write_bitmap(bitmap_path, bitmap)
  426. # Read back
  427. from dulwich.bitmap import read_bitmap
  428. bitmap2 = read_bitmap(bitmap_path)
  429. self.assertEqual(bitmap.version, bitmap2.version)
  430. self.assertEqual(bitmap.pack_checksum, bitmap2.pack_checksum)
  431. self.assertIn(10, bitmap2.commit_bitmap)
  432. self.assertIn(20, bitmap2.tree_bitmap)
  433. def test_xor_decompression(self):
  434. """Test XOR decompression of bitmap entries."""
  435. bitmap = PackBitmap()
  436. # Create base bitmap
  437. base_bitmap = EWAHBitmap()
  438. base_bitmap.add(0)
  439. base_bitmap.add(1)
  440. base_bitmap.add(2)
  441. base_entry = BitmapEntry(
  442. object_pos=0,
  443. xor_offset=0,
  444. flags=0,
  445. bitmap=base_bitmap,
  446. )
  447. # Create XOR'd bitmap
  448. # If we XOR base (bits 0,1,2) with XOR bitmap (bits 1,3),
  449. # result should be (bits 0,2,3)
  450. xor_bitmap = EWAHBitmap()
  451. xor_bitmap.add(1)
  452. xor_bitmap.add(3)
  453. xor_entry = BitmapEntry(
  454. object_pos=1,
  455. xor_offset=1, # Reference the previous entry
  456. flags=0,
  457. bitmap=xor_bitmap,
  458. )
  459. # Add entries
  460. base_sha = b"\x00" * 20
  461. xor_sha = b"\x01" * 20
  462. bitmap.entries[base_sha] = base_entry
  463. bitmap.entries_list.append((base_sha, base_entry))
  464. bitmap.entries[xor_sha] = xor_entry
  465. bitmap.entries_list.append((xor_sha, xor_entry))
  466. # Get decompressed bitmap
  467. result = bitmap.get_bitmap(xor_sha)
  468. self.assertIsNotNone(result)
  469. self.assertIn(0, result)
  470. self.assertNotIn(1, result) # XOR cancels this
  471. self.assertIn(2, result)
  472. self.assertIn(3, result)
  473. def test_lookup_table_round_trip(self):
  474. """Test reading and writing lookup tables."""
  475. with tempfile.TemporaryDirectory() as tmpdir:
  476. bitmap_path = os.path.join(tmpdir, "test.bitmap")
  477. # Create bitmap with lookup table
  478. bitmap = PackBitmap()
  479. bitmap.flags = BITMAP_OPT_FULL_DAG | BITMAP_OPT_LOOKUP_TABLE
  480. bitmap.pack_checksum = b"\xaa" * 20
  481. # Add some bitmap entries (required for lookup table to be written/read)
  482. for i in range(3):
  483. ewah = EWAHBitmap()
  484. ewah.add(i)
  485. entry = BitmapEntry(
  486. object_pos=i,
  487. xor_offset=0,
  488. flags=0,
  489. bitmap=ewah,
  490. )
  491. sha = i.to_bytes(20, byteorder="big")
  492. bitmap.entries[sha] = entry
  493. bitmap.entries_list.append((sha, entry))
  494. bitmap.lookup_table = [
  495. (0, 100, 0),
  496. (1, 200, 0),
  497. (2, 300, 1),
  498. ]
  499. # Write to file
  500. from dulwich.bitmap import write_bitmap
  501. write_bitmap(bitmap_path, bitmap)
  502. # Read back
  503. from dulwich.bitmap import read_bitmap
  504. bitmap2 = read_bitmap(bitmap_path)
  505. self.assertEqual(bitmap.flags, bitmap2.flags)
  506. self.assertIsNotNone(bitmap2.lookup_table)
  507. self.assertEqual(3, len(bitmap2.lookup_table))
  508. self.assertEqual((0, 100, 0), bitmap2.lookup_table[0])
  509. self.assertEqual((1, 200, 0), bitmap2.lookup_table[1])
  510. self.assertEqual((2, 300, 1), bitmap2.lookup_table[2])
  511. def test_name_hash_cache_round_trip(self):
  512. """Test reading and writing name-hash cache."""
  513. with tempfile.TemporaryDirectory() as tmpdir:
  514. bitmap_path = os.path.join(tmpdir, "test.bitmap")
  515. # Create bitmap with name-hash cache
  516. bitmap = PackBitmap()
  517. bitmap.flags = BITMAP_OPT_FULL_DAG | BITMAP_OPT_HASH_CACHE
  518. bitmap.pack_checksum = b"\xbb" * 20
  519. bitmap.name_hash_cache = [0x12345678, 0x9ABCDEF0, 0xFEDCBA98]
  520. # Write to file
  521. from dulwich.bitmap import write_bitmap
  522. write_bitmap(bitmap_path, bitmap)
  523. # Read back
  524. from dulwich.bitmap import read_bitmap
  525. bitmap2 = read_bitmap(bitmap_path)
  526. self.assertEqual(bitmap.flags, bitmap2.flags)
  527. self.assertIsNotNone(bitmap2.name_hash_cache)
  528. self.assertEqual(3, len(bitmap2.name_hash_cache))
  529. self.assertEqual(0x12345678, bitmap2.name_hash_cache[0])
  530. self.assertEqual(0x9ABCDEF0, bitmap2.name_hash_cache[1])
  531. self.assertEqual(0xFEDCBA98, bitmap2.name_hash_cache[2])
  532. class BitmapErrorHandlingTests(unittest.TestCase):
  533. """Tests for error handling in bitmap reading."""
  534. def test_truncated_header(self):
  535. """Test reading bitmap with truncated header."""
  536. # Only 10 bytes instead of full header
  537. data = b"BITM\x00\x01\x00\x00\x00\x00"
  538. with self.assertRaises(ValueError) as ctx:
  539. read_bitmap_file(BytesIO(data))
  540. # Should raise ValueError about missing/incomplete header data
  541. self.assertIsInstance(ctx.exception, ValueError)
  542. def test_invalid_signature(self):
  543. """Test reading bitmap with invalid signature."""
  544. data = b"JUNK\x00\x01\x00\x00" + b"\x00" * 20
  545. with self.assertRaises(ValueError) as ctx:
  546. read_bitmap_file(BytesIO(data))
  547. self.assertIn("signature", str(ctx.exception).lower())
  548. def test_unsupported_version(self):
  549. """Test reading bitmap with unsupported version."""
  550. data = BITMAP_SIGNATURE
  551. data += b"\x00\x99" # Version 153 (unsupported)
  552. data += b"\x00\x00" # Flags
  553. data += b"\x00\x00\x00\x00" # Entry count
  554. data += b"\x00" * 20 # Pack checksum
  555. with self.assertRaises(ValueError) as ctx:
  556. read_bitmap_file(BytesIO(data))
  557. self.assertIn("version", str(ctx.exception).lower())
  558. def test_truncated_type_bitmap(self):
  559. """Test reading bitmap with truncated type bitmap data."""
  560. # Valid header
  561. data = BITMAP_SIGNATURE
  562. data += BITMAP_VERSION.to_bytes(2, "big")
  563. data += b"\x00\x00" # Flags
  564. data += b"\x00\x00\x00\x00" # Entry count
  565. data += b"\x00" * 20 # Pack checksum
  566. # Truncated type bitmap (incomplete EWAH header)
  567. data += b"\x00\x00\x00\x05" # bit_count
  568. data += b"\x00\x00" # Incomplete word_count
  569. with self.assertRaises(ValueError) as ctx:
  570. read_bitmap_file(BytesIO(data))
  571. self.assertIn("type bitmap", str(ctx.exception).lower())
  572. def test_truncated_bitmap_entry(self):
  573. """Test reading bitmap with truncated entry."""
  574. # Valid header with 1 entry
  575. data = BITMAP_SIGNATURE
  576. data += BITMAP_VERSION.to_bytes(2, "big")
  577. data += b"\x00\x00" # Flags
  578. data += b"\x00\x00\x00\x01" # 1 entry
  579. data += b"\x00" * 20 # Pack checksum
  580. # Write empty type bitmaps
  581. for _ in range(4):
  582. empty_ewah = EWAHBitmap().encode()
  583. data += empty_ewah
  584. # Truncated entry (only object position, missing rest)
  585. data += b"\x00\x00\x00\x00" # Object position
  586. # Missing: XOR offset, flags, bitmap data
  587. with self.assertRaises(ValueError) as ctx:
  588. read_bitmap_file(BytesIO(data))
  589. # Should raise ValueError about missing data
  590. self.assertIsInstance(ctx.exception, ValueError)
  591. def test_empty_bitmap_file(self):
  592. """Test reading completely empty file."""
  593. with self.assertRaises(ValueError):
  594. read_bitmap_file(BytesIO(b""))
  595. def test_bitmap_with_zero_entries(self):
  596. """Test valid bitmap with zero entries."""
  597. bitmap = PackBitmap()
  598. bitmap.pack_checksum = b"\x00" * 20
  599. f = BytesIO()
  600. write_bitmap_file(f, bitmap)
  601. f.seek(0)
  602. # Should read successfully
  603. bitmap2 = read_bitmap_file(f)
  604. self.assertEqual(0, len(bitmap2.entries))
  605. self.assertIsNotNone(bitmap2.pack_checksum)
  606. class BitmapEdgeCaseTests(unittest.TestCase):
  607. """Tests for edge cases in bitmap handling."""
  608. def test_very_large_bitmap(self):
  609. """Test bitmap with many bits set."""
  610. bitmap = EWAHBitmap()
  611. # Add 100,000 bits
  612. for i in range(100000):
  613. if i % 3 == 0: # Every 3rd bit
  614. bitmap.add(i)
  615. # Should encode and decode without issues
  616. encoded = bitmap.encode()
  617. decoded = EWAHBitmap(encoded)
  618. self.assertEqual(len(bitmap), len(decoded))
  619. # Verify a sample of bits
  620. self.assertIn(0, decoded)
  621. self.assertIn(99999, decoded)
  622. self.assertNotIn(1, decoded)
  623. self.assertNotIn(99998, decoded)
  624. def test_bitmap_with_large_gaps(self):
  625. """Test bitmap with large gaps between set bits."""
  626. bitmap = EWAHBitmap()
  627. bitmap.add(0)
  628. bitmap.add(100000)
  629. bitmap.add(200000)
  630. encoded = bitmap.encode()
  631. decoded = EWAHBitmap(encoded)
  632. self.assertEqual(3, len(decoded))
  633. self.assertIn(0, decoded)
  634. self.assertIn(100000, decoded)
  635. self.assertIn(200000, decoded)
  636. def test_bitmap_all_bits_in_word(self):
  637. """Test bitmap with all 64 bits in a word set."""
  638. bitmap = EWAHBitmap()
  639. for i in range(64):
  640. bitmap.add(i)
  641. encoded = bitmap.encode()
  642. decoded = EWAHBitmap(encoded)
  643. self.assertEqual(64, len(decoded))
  644. for i in range(64):
  645. self.assertIn(i, decoded)
  646. def test_multiple_flags_combined(self):
  647. """Test bitmap with multiple flags set."""
  648. bitmap = PackBitmap(
  649. flags=BITMAP_OPT_FULL_DAG | BITMAP_OPT_HASH_CACHE | BITMAP_OPT_LOOKUP_TABLE
  650. )
  651. bitmap.pack_checksum = b"\x00" * 20
  652. bitmap.lookup_table = [(0, 0, 0), (1, 100, 0)]
  653. bitmap.name_hash_cache = [0x12345678, 0xABCDEF00]
  654. # Add an entry
  655. test_bitmap = EWAHBitmap()
  656. test_bitmap.add(0)
  657. test_bitmap.add(5)
  658. entry = BitmapEntry(object_pos=0, xor_offset=0, flags=0, bitmap=test_bitmap)
  659. bitmap.entries[b"\x00" * 20] = entry
  660. # Write and read back
  661. f = BytesIO()
  662. write_bitmap_file(f, bitmap)
  663. f.seek(0)
  664. bitmap2 = read_bitmap_file(f)
  665. self.assertEqual(bitmap.flags, bitmap2.flags)
  666. self.assertTrue(bitmap2.flags & BITMAP_OPT_FULL_DAG)
  667. self.assertTrue(bitmap2.flags & BITMAP_OPT_HASH_CACHE)
  668. self.assertTrue(bitmap2.flags & BITMAP_OPT_LOOKUP_TABLE)
  669. self.assertIsNotNone(bitmap2.lookup_table)
  670. self.assertIsNotNone(bitmap2.name_hash_cache)
  671. class BitmapConfigTests(unittest.TestCase):
  672. """Tests for bitmap-related configuration settings."""
  673. def test_pack_write_bitmaps_default(self):
  674. """Test pack.writeBitmaps defaults to false."""
  675. from dulwich.config import ConfigFile
  676. config = ConfigFile()
  677. self.assertFalse(config.get_boolean((b"pack",), b"writeBitmaps", False))
  678. def test_pack_write_bitmaps_true(self):
  679. """Test pack.writeBitmaps = true."""
  680. from dulwich.config import ConfigFile
  681. config = ConfigFile()
  682. config.set((b"pack",), b"writeBitmaps", b"true")
  683. self.assertTrue(config.get_boolean((b"pack",), b"writeBitmaps", False))
  684. def test_pack_write_bitmaps_false(self):
  685. """Test pack.writeBitmaps = false."""
  686. from dulwich.config import ConfigFile
  687. config = ConfigFile()
  688. config.set((b"pack",), b"writeBitmaps", b"false")
  689. self.assertFalse(config.get_boolean((b"pack",), b"writeBitmaps", False))
  690. def test_pack_write_bitmap_hash_cache_default(self):
  691. """Test pack.writeBitmapHashCache defaults to true."""
  692. from dulwich.config import ConfigFile
  693. config = ConfigFile()
  694. self.assertTrue(config.get_boolean((b"pack",), b"writeBitmapHashCache", True))
  695. def test_pack_write_bitmap_hash_cache_false(self):
  696. """Test pack.writeBitmapHashCache = false."""
  697. from dulwich.config import ConfigFile
  698. config = ConfigFile()
  699. config.set((b"pack",), b"writeBitmapHashCache", b"false")
  700. self.assertFalse(config.get_boolean((b"pack",), b"writeBitmapHashCache", True))
  701. def test_pack_write_bitmap_lookup_table_default(self):
  702. """Test pack.writeBitmapLookupTable defaults to true."""
  703. from dulwich.config import ConfigFile
  704. config = ConfigFile()
  705. self.assertTrue(config.get_boolean((b"pack",), b"writeBitmapLookupTable", True))
  706. def test_repack_write_bitmaps(self):
  707. """Test repack.writeBitmaps configuration."""
  708. from dulwich.config import ConfigFile
  709. config = ConfigFile()
  710. config.set((b"repack",), b"writeBitmaps", b"true")
  711. self.assertTrue(config.get_boolean((b"repack",), b"writeBitmaps", False))
  712. def test_pack_use_bitmap_index_default(self):
  713. """Test pack.useBitmapIndex defaults to true."""
  714. from dulwich.config import ConfigFile
  715. config = ConfigFile()
  716. self.assertTrue(config.get_boolean((b"pack",), b"useBitmapIndex", True))
  717. def test_pack_use_bitmap_index_false(self):
  718. """Test pack.useBitmapIndex = false."""
  719. from dulwich.config import ConfigFile
  720. config = ConfigFile()
  721. config.set((b"pack",), b"useBitmapIndex", b"false")
  722. self.assertFalse(config.get_boolean((b"pack",), b"useBitmapIndex", True))
  723. class ReachabilityProviderTests(unittest.TestCase):
  724. """Tests for ObjectReachabilityProvider implementations."""
  725. def setUp(self):
  726. """Set up test repository with commits."""
  727. from dulwich.object_store import DiskObjectStore
  728. from dulwich.objects import Blob, Commit, Tree
  729. self.test_dir = tempfile.mkdtemp()
  730. self.store = DiskObjectStore(self.test_dir)
  731. # Create a simple commit history:
  732. # commit1 -> commit2 -> commit3
  733. # \-> commit4
  734. # Create blob and tree
  735. self.blob1 = Blob.from_string(b"test content 1")
  736. self.store.add_object(self.blob1)
  737. self.blob2 = Blob.from_string(b"test content 2")
  738. self.store.add_object(self.blob2)
  739. self.tree1 = Tree()
  740. self.tree1[b"file1.txt"] = (0o100644, self.blob1.id)
  741. self.store.add_object(self.tree1)
  742. self.tree2 = Tree()
  743. self.tree2[b"file1.txt"] = (0o100644, self.blob1.id)
  744. self.tree2[b"file2.txt"] = (0o100644, self.blob2.id)
  745. self.store.add_object(self.tree2)
  746. # Create commit1 (root)
  747. self.commit1 = Commit()
  748. self.commit1.tree = self.tree1.id
  749. self.commit1.message = b"First commit"
  750. self.commit1.author = self.commit1.committer = b"Test <test@example.com>"
  751. self.commit1.author_time = self.commit1.commit_time = 1234567890
  752. self.commit1.author_timezone = self.commit1.commit_timezone = 0
  753. self.store.add_object(self.commit1)
  754. # Create commit2 (child of commit1)
  755. self.commit2 = Commit()
  756. self.commit2.tree = self.tree1.id
  757. self.commit2.parents = [self.commit1.id]
  758. self.commit2.message = b"Second commit"
  759. self.commit2.author = self.commit2.committer = b"Test <test@example.com>"
  760. self.commit2.author_time = self.commit2.commit_time = 1234567891
  761. self.commit2.author_timezone = self.commit2.commit_timezone = 0
  762. self.store.add_object(self.commit2)
  763. # Create commit3 (child of commit2)
  764. self.commit3 = Commit()
  765. self.commit3.tree = self.tree2.id
  766. self.commit3.parents = [self.commit2.id]
  767. self.commit3.message = b"Third commit"
  768. self.commit3.author = self.commit3.committer = b"Test <test@example.com>"
  769. self.commit3.author_time = self.commit3.commit_time = 1234567892
  770. self.commit3.author_timezone = self.commit3.commit_timezone = 0
  771. self.store.add_object(self.commit3)
  772. # Create commit4 (child of commit1, creates a branch)
  773. self.commit4 = Commit()
  774. self.commit4.tree = self.tree2.id
  775. self.commit4.parents = [self.commit1.id]
  776. self.commit4.message = b"Fourth commit"
  777. self.commit4.author = self.commit4.committer = b"Test <test@example.com>"
  778. self.commit4.author_time = self.commit4.commit_time = 1234567893
  779. self.commit4.author_timezone = self.commit4.commit_timezone = 0
  780. self.store.add_object(self.commit4)
  781. def tearDown(self):
  782. """Clean up test directory."""
  783. import shutil
  784. shutil.rmtree(self.test_dir)
  785. def test_graph_traversal_reachability_single_commit(self):
  786. """Test GraphTraversalReachability with single commit."""
  787. from dulwich.object_store import GraphTraversalReachability
  788. provider = GraphTraversalReachability(self.store)
  789. # Get reachable commits from commit1
  790. reachable = provider.get_reachable_commits(
  791. [self.commit1.id], exclude=None, shallow=None
  792. )
  793. # Should only include commit1
  794. self.assertEqual({self.commit1.id}, reachable)
  795. def test_graph_traversal_reachability_linear_history(self):
  796. """Test GraphTraversalReachability with linear history."""
  797. from dulwich.object_store import GraphTraversalReachability
  798. provider = GraphTraversalReachability(self.store)
  799. # Get reachable commits from commit3
  800. reachable = provider.get_reachable_commits(
  801. [self.commit3.id], exclude=None, shallow=None
  802. )
  803. # Should include commit3, commit2, and commit1
  804. expected = {self.commit1.id, self.commit2.id, self.commit3.id}
  805. self.assertEqual(expected, reachable)
  806. def test_graph_traversal_reachability_with_exclusion(self):
  807. """Test GraphTraversalReachability with exclusion."""
  808. from dulwich.object_store import GraphTraversalReachability
  809. provider = GraphTraversalReachability(self.store)
  810. # Get commits reachable from commit3 but not from commit1
  811. reachable = provider.get_reachable_commits(
  812. [self.commit3.id], exclude=[self.commit1.id], shallow=None
  813. )
  814. # Should include commit3 and commit2, but not commit1
  815. expected = {self.commit2.id, self.commit3.id}
  816. self.assertEqual(expected, reachable)
  817. def test_graph_traversal_reachability_branching(self):
  818. """Test GraphTraversalReachability with branching history."""
  819. from dulwich.object_store import GraphTraversalReachability
  820. provider = GraphTraversalReachability(self.store)
  821. # Get reachable commits from both commit3 and commit4
  822. reachable = provider.get_reachable_commits(
  823. [self.commit3.id, self.commit4.id], exclude=None, shallow=None
  824. )
  825. # Should include all commits
  826. expected = {self.commit1.id, self.commit2.id, self.commit3.id, self.commit4.id}
  827. self.assertEqual(expected, reachable)
  828. def test_graph_traversal_reachable_objects(self):
  829. """Test GraphTraversalReachability.get_reachable_objects()."""
  830. from dulwich.object_store import GraphTraversalReachability
  831. provider = GraphTraversalReachability(self.store)
  832. # Get all objects reachable from commit3
  833. reachable = provider.get_reachable_objects(
  834. [self.commit3.id], exclude_commits=None
  835. )
  836. # Should include commit3, blob1, and blob2 (but not tree objects themselves)
  837. self.assertIn(self.commit3.id, reachable)
  838. self.assertIn(self.blob1.id, reachable)
  839. self.assertIn(self.blob2.id, reachable)
  840. # Verify at least 3 objects
  841. self.assertGreaterEqual(len(reachable), 3)
  842. def test_graph_traversal_reachable_objects_with_exclusion(self):
  843. """Test GraphTraversalReachability.get_reachable_objects() with exclusion."""
  844. from dulwich.object_store import GraphTraversalReachability
  845. provider = GraphTraversalReachability(self.store)
  846. # Get objects reachable from commit3 but not from commit2
  847. reachable = provider.get_reachable_objects(
  848. [self.commit3.id], exclude_commits=[self.commit2.id]
  849. )
  850. # commit2 uses tree1 (which has blob1), commit3 uses tree2 (which has blob1 + blob2)
  851. # So should include commit3 and blob2 (new in commit3)
  852. # blob1 should be excluded because it's in tree1 (reachable from commit2)
  853. self.assertIn(self.commit3.id, reachable)
  854. self.assertIn(self.blob2.id, reachable)
  855. def test_get_reachability_provider_without_bitmaps(self):
  856. """Test get_reachability_provider returns GraphTraversalReachability when no bitmaps."""
  857. from dulwich.object_store import (
  858. GraphTraversalReachability,
  859. get_reachability_provider,
  860. )
  861. provider = get_reachability_provider(self.store)
  862. # Should return GraphTraversalReachability when no bitmaps available
  863. self.assertIsInstance(provider, GraphTraversalReachability)
  864. def test_get_reachability_provider_prefer_bitmaps_false(self):
  865. """Test get_reachability_provider with prefer_bitmaps=False."""
  866. from dulwich.object_store import (
  867. GraphTraversalReachability,
  868. get_reachability_provider,
  869. )
  870. provider = get_reachability_provider(self.store, prefer_bitmaps=False)
  871. # Should return GraphTraversalReachability when prefer_bitmaps=False
  872. self.assertIsInstance(provider, GraphTraversalReachability)
  873. def test_bitmap_reachability_fallback_without_bitmaps(self):
  874. """Test BitmapReachability falls back to graph traversal without bitmaps."""
  875. provider = BitmapReachability(self.store)
  876. # Without bitmaps, should fall back to graph traversal
  877. reachable = provider.get_reachable_commits(
  878. [self.commit3.id], exclude=None, shallow=None
  879. )
  880. # Should still work via fallback
  881. expected = {self.commit1.id, self.commit2.id, self.commit3.id}
  882. self.assertEqual(expected, reachable)
  883. def test_bitmap_reachability_fallback_with_shallow(self):
  884. """Test BitmapReachability falls back for shallow clones."""
  885. provider = BitmapReachability(self.store)
  886. # With shallow boundary, should fall back to graph traversal
  887. reachable = provider.get_reachable_commits(
  888. [self.commit3.id], exclude=None, shallow={self.commit2.id}
  889. )
  890. # Should include commit3 and commit2 (shallow boundary includes boundary commit)
  891. # but not commit1 (beyond shallow boundary)
  892. self.assertEqual({self.commit2.id, self.commit3.id}, reachable)
  893. def test_reachability_provider_protocol(self):
  894. """Test that both providers implement the same interface."""
  895. graph_provider = GraphTraversalReachability(self.store)
  896. bitmap_provider = BitmapReachability(self.store)
  897. # Both should have the same methods
  898. for method in [
  899. "get_reachable_commits",
  900. "get_reachable_objects",
  901. "get_tree_objects",
  902. ]:
  903. self.assertTrue(hasattr(graph_provider, method))
  904. self.assertTrue(hasattr(bitmap_provider, method))
  905. def test_graph_traversal_vs_bitmap_consistency(self):
  906. """Test that GraphTraversalReachability and BitmapReachability produce same results."""
  907. graph_provider = GraphTraversalReachability(self.store)
  908. bitmap_provider = BitmapReachability(self.store) # Will use fallback
  909. # Test get_reachable_commits
  910. graph_commits = graph_provider.get_reachable_commits(
  911. [self.commit3.id], exclude=[self.commit1.id], shallow=None
  912. )
  913. bitmap_commits = bitmap_provider.get_reachable_commits(
  914. [self.commit3.id], exclude=[self.commit1.id], shallow=None
  915. )
  916. self.assertEqual(graph_commits, bitmap_commits)
  917. # Test get_reachable_objects
  918. graph_objects = graph_provider.get_reachable_objects(
  919. [self.commit3.id], exclude_commits=None
  920. )
  921. bitmap_objects = bitmap_provider.get_reachable_objects(
  922. [self.commit3.id], exclude_commits=None
  923. )
  924. self.assertEqual(graph_objects, bitmap_objects)