test_bitmap.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833
  1. # test_bitmap.py -- Tests for bitmap support
  2. # Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for bitmap support."""
  22. import os
  23. import tempfile
  24. import unittest
  25. from io import BytesIO
  26. from dulwich.bitmap import (
  27. BITMAP_OPT_FULL_DAG,
  28. BITMAP_OPT_HASH_CACHE,
  29. BITMAP_OPT_LOOKUP_TABLE,
  30. BITMAP_SIGNATURE,
  31. BITMAP_VERSION,
  32. BitmapEntry,
  33. EWAHBitmap,
  34. PackBitmap,
  35. _encode_ewah_words,
  36. read_bitmap_file,
  37. write_bitmap_file,
  38. )
  39. class EWAHCompressionTests(unittest.TestCase):
  40. """Tests for EWAH compression helper functions."""
  41. def test_encode_empty_words(self):
  42. """Test encoding empty word list."""
  43. result = _encode_ewah_words([])
  44. self.assertEqual([], result)
  45. def test_encode_single_literal(self):
  46. """Test encoding single literal word."""
  47. result = _encode_ewah_words([0x123])
  48. # Should be: RLW(0 run, 1 literal) + literal
  49. self.assertEqual(2, len(result))
  50. # RLW bit layout: [literal_words(31)][running_len(32)][running_bit(1)]
  51. # running_bit=0, running_len=0, literal_words=1
  52. expected_rlw = (1 << 33) | (0 << 1) | 0
  53. self.assertEqual(expected_rlw, result[0])
  54. self.assertEqual(0x123, result[1])
  55. def test_encode_zero_run(self):
  56. """Test encoding run of zeros."""
  57. result = _encode_ewah_words([0, 0, 0])
  58. # Should be: RLW(3 zeros, 0 literals)
  59. self.assertEqual(1, len(result))
  60. # RLW bit layout: [literal_words(31)][running_len(32)][running_bit(1)]
  61. # running_bit=0, running_len=3, literal_words=0
  62. expected_rlw = (0 << 33) | (3 << 1) | 0
  63. self.assertEqual(expected_rlw, result[0])
  64. def test_encode_ones_run(self):
  65. """Test encoding run of all-ones."""
  66. result = _encode_ewah_words([0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF])
  67. # Should be: RLW(2 ones, 0 literals)
  68. self.assertEqual(1, len(result))
  69. # RLW bit layout: [literal_words(31)][running_len(32)][running_bit(1)]
  70. # running_bit=1, running_len=2, literal_words=0
  71. expected_rlw = (0 << 33) | (2 << 1) | 1
  72. self.assertEqual(expected_rlw, result[0])
  73. def test_encode_run_followed_by_literals(self):
  74. """Test encoding run followed by literal words."""
  75. result = _encode_ewah_words([0, 0, 0x123, 0x456])
  76. # Should be: RLW(2 zeros, 2 literals) + literals
  77. self.assertEqual(3, len(result))
  78. # RLW bit layout: [literal_words(31)][running_len(32)][running_bit(1)]
  79. # running_bit=0, running_len=2, literal_words=2
  80. expected_rlw = (2 << 33) | (2 << 1) | 0
  81. self.assertEqual(expected_rlw, result[0])
  82. self.assertEqual(0x123, result[1])
  83. self.assertEqual(0x456, result[2])
  84. def test_encode_mixed_pattern(self):
  85. """Test encoding mixed runs and literals."""
  86. result = _encode_ewah_words(
  87. [0, 0, 0x123, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF]
  88. )
  89. # Should be: RLW(2 zeros, 1 literal) + literal + RLW(2 ones, 0 literals)
  90. self.assertEqual(3, len(result))
  91. class EWAHCompatibilityTests(unittest.TestCase):
  92. """Tests for EWAH encode/decode compatibility."""
  93. def test_encode_decode_sparse_bitmap(self):
  94. """Test encoding and decoding a sparse bitmap with runs."""
  95. # Create a bitmap with a pattern that benefits from run-length encoding
  96. # Bits: 0, 1, 128 (has runs of zeros between set bits)
  97. bitmap = EWAHBitmap()
  98. bitmap.add(0)
  99. bitmap.add(1)
  100. bitmap.add(128)
  101. # Encode
  102. encoded = bitmap.encode()
  103. # Decode
  104. bitmap2 = EWAHBitmap(encoded)
  105. # Verify all bits are preserved
  106. self.assertEqual(len(bitmap), len(bitmap2))
  107. self.assertIn(0, bitmap2)
  108. self.assertIn(1, bitmap2)
  109. self.assertIn(128, bitmap2)
  110. self.assertNotIn(2, bitmap2)
  111. self.assertNotIn(127, bitmap2)
  112. def test_encode_decode_dense_bitmap(self):
  113. """Test encoding and decoding a dense bitmap."""
  114. # Create a bitmap with many consecutive bits set
  115. bitmap = EWAHBitmap()
  116. for i in range(64):
  117. bitmap.add(i)
  118. # Encode
  119. encoded = bitmap.encode()
  120. # Decode
  121. bitmap2 = EWAHBitmap(encoded)
  122. # Verify all bits are preserved
  123. self.assertEqual(64, len(bitmap2))
  124. for i in range(64):
  125. self.assertIn(i, bitmap2)
  126. self.assertNotIn(64, bitmap2)
  127. def test_encode_decode_runs_of_zeros(self):
  128. """Test encoding and decoding bitmap with long runs of zeros."""
  129. # Create bitmap: bits 0, 200, 400 (lots of zeros in between)
  130. bitmap = EWAHBitmap()
  131. bitmap.add(0)
  132. bitmap.add(200)
  133. bitmap.add(400)
  134. # Encode
  135. encoded = bitmap.encode()
  136. # Decode
  137. bitmap2 = EWAHBitmap(encoded)
  138. # Verify
  139. self.assertEqual(3, len(bitmap2))
  140. self.assertIn(0, bitmap2)
  141. self.assertIn(200, bitmap2)
  142. self.assertIn(400, bitmap2)
  143. self.assertNotIn(1, bitmap2)
  144. self.assertNotIn(199, bitmap2)
  145. self.assertNotIn(201, bitmap2)
  146. def test_encode_decode_mixed_pattern(self):
  147. """Test encoding and decoding bitmap with mixed dense/sparse regions."""
  148. bitmap = EWAHBitmap()
  149. # Dense region: 0-63
  150. for i in range(64):
  151. bitmap.add(i)
  152. # Sparse region: 200, 300, 400
  153. bitmap.add(200)
  154. bitmap.add(300)
  155. bitmap.add(400)
  156. # Another dense region: 500-563
  157. for i in range(500, 564):
  158. bitmap.add(i)
  159. # Encode
  160. encoded = bitmap.encode()
  161. # Decode
  162. bitmap2 = EWAHBitmap(encoded)
  163. # Verify all bits preserved
  164. self.assertEqual(len(bitmap), len(bitmap2))
  165. for i in range(64):
  166. self.assertIn(i, bitmap2)
  167. self.assertIn(200, bitmap2)
  168. self.assertIn(300, bitmap2)
  169. self.assertIn(400, bitmap2)
  170. for i in range(500, 564):
  171. self.assertIn(i, bitmap2)
  172. # Check some bits that shouldn't be set
  173. self.assertNotIn(64, bitmap2)
  174. self.assertNotIn(199, bitmap2)
  175. self.assertNotIn(201, bitmap2)
  176. self.assertNotIn(499, bitmap2)
  177. def test_encode_decode_preserves_bitwise_ops(self):
  178. """Test that encode/decode doesn't break bitwise operations."""
  179. # Create two bitmaps
  180. bitmap1 = EWAHBitmap()
  181. bitmap1.add(0)
  182. bitmap1.add(5)
  183. bitmap1.add(100)
  184. bitmap2 = EWAHBitmap()
  185. bitmap2.add(5)
  186. bitmap2.add(10)
  187. bitmap2.add(100)
  188. # Encode and decode both
  189. bitmap1_decoded = EWAHBitmap(bitmap1.encode())
  190. bitmap2_decoded = EWAHBitmap(bitmap2.encode())
  191. # Perform operations on decoded bitmaps
  192. or_result = bitmap1_decoded | bitmap2_decoded
  193. and_result = bitmap1_decoded & bitmap2_decoded
  194. xor_result = bitmap1_decoded ^ bitmap2_decoded
  195. # Verify results
  196. # OR: {0, 5, 10, 100}
  197. self.assertEqual(4, len(or_result))
  198. self.assertIn(0, or_result)
  199. self.assertIn(5, or_result)
  200. self.assertIn(10, or_result)
  201. self.assertIn(100, or_result)
  202. # AND: {5, 100}
  203. self.assertEqual(2, len(and_result))
  204. self.assertIn(5, and_result)
  205. self.assertIn(100, and_result)
  206. # XOR: {0, 10}
  207. self.assertEqual(2, len(xor_result))
  208. self.assertIn(0, xor_result)
  209. self.assertIn(10, xor_result)
  210. def test_round_trip_large_bitmap(self):
  211. """Test round-trip encoding/decoding of a large bitmap."""
  212. # Create a large bitmap with various patterns
  213. bitmap = EWAHBitmap()
  214. # Add bits at various intervals
  215. for i in range(0, 10000, 7):
  216. bitmap.add(i)
  217. original_bits = set(bitmap.bits)
  218. # Encode and decode
  219. encoded = bitmap.encode()
  220. decoded = EWAHBitmap(encoded)
  221. # Verify all bits preserved
  222. self.assertEqual(original_bits, decoded.bits)
  223. self.assertEqual(len(bitmap), len(decoded))
  224. class EWAHBitmapTests(unittest.TestCase):
  225. """Tests for EWAH bitmap compression."""
  226. def test_empty_bitmap(self):
  227. """Test empty bitmap."""
  228. bitmap = EWAHBitmap()
  229. self.assertEqual(0, len(bitmap))
  230. self.assertEqual(0, bitmap.bit_count)
  231. def test_add_bit(self):
  232. """Test adding bits to bitmap."""
  233. bitmap = EWAHBitmap()
  234. bitmap.add(0)
  235. bitmap.add(5)
  236. bitmap.add(100)
  237. self.assertEqual(3, len(bitmap))
  238. self.assertEqual(101, bitmap.bit_count)
  239. self.assertIn(0, bitmap)
  240. self.assertIn(5, bitmap)
  241. self.assertIn(100, bitmap)
  242. self.assertNotIn(1, bitmap)
  243. self.assertNotIn(99, bitmap)
  244. def test_encode_decode(self):
  245. """Test encoding and decoding bitmaps."""
  246. bitmap = EWAHBitmap()
  247. bitmap.add(0)
  248. bitmap.add(1)
  249. bitmap.add(64)
  250. bitmap.add(128)
  251. # Encode
  252. data = bitmap.encode()
  253. self.assertIsInstance(data, bytes)
  254. # Decode
  255. bitmap2 = EWAHBitmap(data)
  256. self.assertEqual(len(bitmap), len(bitmap2))
  257. self.assertIn(0, bitmap2)
  258. self.assertIn(1, bitmap2)
  259. self.assertIn(64, bitmap2)
  260. self.assertIn(128, bitmap2)
  261. def test_bitwise_or(self):
  262. """Test bitwise OR operation."""
  263. bitmap1 = EWAHBitmap()
  264. bitmap1.add(0)
  265. bitmap1.add(5)
  266. bitmap2 = EWAHBitmap()
  267. bitmap2.add(5)
  268. bitmap2.add(10)
  269. result = bitmap1 | bitmap2
  270. self.assertEqual(3, len(result))
  271. self.assertIn(0, result)
  272. self.assertIn(5, result)
  273. self.assertIn(10, result)
  274. def test_bitwise_and(self):
  275. """Test bitwise AND operation."""
  276. bitmap1 = EWAHBitmap()
  277. bitmap1.add(0)
  278. bitmap1.add(5)
  279. bitmap2 = EWAHBitmap()
  280. bitmap2.add(5)
  281. bitmap2.add(10)
  282. result = bitmap1 & bitmap2
  283. self.assertEqual(1, len(result))
  284. self.assertIn(5, result)
  285. self.assertNotIn(0, result)
  286. self.assertNotIn(10, result)
  287. def test_bitwise_xor(self):
  288. """Test bitwise XOR operation."""
  289. bitmap1 = EWAHBitmap()
  290. bitmap1.add(0)
  291. bitmap1.add(5)
  292. bitmap2 = EWAHBitmap()
  293. bitmap2.add(5)
  294. bitmap2.add(10)
  295. result = bitmap1 ^ bitmap2
  296. self.assertEqual(2, len(result))
  297. self.assertIn(0, result)
  298. self.assertIn(10, result)
  299. self.assertNotIn(5, result)
  300. class BitmapEntryTests(unittest.TestCase):
  301. """Tests for bitmap entries."""
  302. def test_create_entry(self):
  303. """Test creating a bitmap entry."""
  304. bitmap = EWAHBitmap()
  305. bitmap.add(0)
  306. bitmap.add(10)
  307. entry = BitmapEntry(
  308. object_pos=100,
  309. xor_offset=0,
  310. flags=0,
  311. bitmap=bitmap,
  312. )
  313. self.assertEqual(100, entry.object_pos)
  314. self.assertEqual(0, entry.xor_offset)
  315. self.assertEqual(0, entry.flags)
  316. self.assertEqual(bitmap, entry.bitmap)
  317. class PackBitmapTests(unittest.TestCase):
  318. """Tests for pack bitmap."""
  319. def test_create_bitmap(self):
  320. """Test creating a pack bitmap."""
  321. bitmap = PackBitmap()
  322. self.assertEqual(BITMAP_VERSION, bitmap.version)
  323. self.assertEqual(BITMAP_OPT_FULL_DAG, bitmap.flags)
  324. self.assertIsNone(bitmap.pack_checksum)
  325. def test_bitmap_with_entries(self):
  326. """Test bitmap with entries."""
  327. bitmap = PackBitmap()
  328. commit_sha = b"\x00" * 20
  329. ewah_bitmap = EWAHBitmap()
  330. ewah_bitmap.add(0)
  331. ewah_bitmap.add(5)
  332. entry = BitmapEntry(
  333. object_pos=0,
  334. xor_offset=0,
  335. flags=0,
  336. bitmap=ewah_bitmap,
  337. )
  338. bitmap.entries[commit_sha] = entry
  339. self.assertTrue(bitmap.has_commit(commit_sha))
  340. self.assertFalse(bitmap.has_commit(b"\x01" * 20))
  341. def test_iter_commits(self):
  342. """Test iterating over commits with bitmaps."""
  343. bitmap = PackBitmap()
  344. commit1 = b"\x00" * 20
  345. commit2 = b"\x01" * 20
  346. ewah_bitmap = EWAHBitmap()
  347. entry = BitmapEntry(0, 0, 0, ewah_bitmap)
  348. bitmap.entries[commit1] = entry
  349. bitmap.entries[commit2] = entry
  350. commits = list(bitmap.iter_commits())
  351. self.assertEqual(2, len(commits))
  352. self.assertIn(commit1, commits)
  353. self.assertIn(commit2, commits)
  354. class BitmapFileTests(unittest.TestCase):
  355. """Tests for bitmap file I/O."""
  356. def test_write_read_empty_bitmap(self):
  357. """Test writing and reading an empty bitmap."""
  358. bitmap = PackBitmap()
  359. bitmap.pack_checksum = b"\x00" * 20
  360. # Write to bytes
  361. f = BytesIO()
  362. write_bitmap_file(f, bitmap)
  363. # Read back
  364. f.seek(0)
  365. bitmap2 = read_bitmap_file(f)
  366. self.assertEqual(bitmap.version, bitmap2.version)
  367. self.assertEqual(bitmap.flags, bitmap2.flags)
  368. self.assertEqual(bitmap.pack_checksum, bitmap2.pack_checksum)
  369. def test_write_read_with_type_bitmaps(self):
  370. """Test writing and reading bitmaps with type information."""
  371. bitmap = PackBitmap()
  372. bitmap.pack_checksum = b"\xaa" * 20
  373. # Add some type bitmap data
  374. bitmap.commit_bitmap.add(0)
  375. bitmap.commit_bitmap.add(5)
  376. bitmap.tree_bitmap.add(1)
  377. bitmap.blob_bitmap.add(2)
  378. bitmap.tag_bitmap.add(3)
  379. # Write to bytes
  380. f = BytesIO()
  381. write_bitmap_file(f, bitmap)
  382. # Read back
  383. f.seek(0)
  384. bitmap2 = read_bitmap_file(f)
  385. self.assertEqual(bitmap.version, bitmap2.version)
  386. self.assertEqual(bitmap.pack_checksum, bitmap2.pack_checksum)
  387. # Check type bitmaps
  388. self.assertIn(0, bitmap2.commit_bitmap)
  389. self.assertIn(5, bitmap2.commit_bitmap)
  390. self.assertIn(1, bitmap2.tree_bitmap)
  391. self.assertIn(2, bitmap2.blob_bitmap)
  392. self.assertIn(3, bitmap2.tag_bitmap)
  393. def test_invalid_signature(self):
  394. """Test reading file with invalid signature."""
  395. f = BytesIO(b"XXXX\x00\x01\x00\x00")
  396. with self.assertRaises(ValueError) as cm:
  397. read_bitmap_file(f)
  398. self.assertIn("Invalid bitmap signature", str(cm.exception))
  399. def test_invalid_version(self):
  400. """Test reading file with invalid version."""
  401. f = BytesIO(BITMAP_SIGNATURE + b"\x00\x02\x00\x01")
  402. with self.assertRaises(ValueError) as cm:
  403. read_bitmap_file(f)
  404. self.assertIn("Unsupported bitmap version", str(cm.exception))
  405. def test_incomplete_header(self):
  406. """Test reading file with incomplete header."""
  407. f = BytesIO(BITMAP_SIGNATURE + b"\x00")
  408. with self.assertRaises(ValueError) as cm:
  409. read_bitmap_file(f)
  410. self.assertIn("Incomplete bitmap header", str(cm.exception))
  411. class BitmapIntegrationTests(unittest.TestCase):
  412. """Integration tests for bitmap functionality."""
  413. def test_round_trip_file(self):
  414. """Test writing and reading bitmap to/from actual file."""
  415. with tempfile.TemporaryDirectory() as tmpdir:
  416. bitmap_path = os.path.join(tmpdir, "test.bitmap")
  417. # Create bitmap
  418. bitmap = PackBitmap()
  419. bitmap.pack_checksum = b"\xff" * 20
  420. bitmap.commit_bitmap.add(10)
  421. bitmap.tree_bitmap.add(20)
  422. # Write to file
  423. from dulwich.bitmap import write_bitmap
  424. write_bitmap(bitmap_path, bitmap)
  425. # Read back
  426. from dulwich.bitmap import read_bitmap
  427. bitmap2 = read_bitmap(bitmap_path)
  428. self.assertEqual(bitmap.version, bitmap2.version)
  429. self.assertEqual(bitmap.pack_checksum, bitmap2.pack_checksum)
  430. self.assertIn(10, bitmap2.commit_bitmap)
  431. self.assertIn(20, bitmap2.tree_bitmap)
  432. def test_xor_decompression(self):
  433. """Test XOR decompression of bitmap entries."""
  434. bitmap = PackBitmap()
  435. # Create base bitmap
  436. base_bitmap = EWAHBitmap()
  437. base_bitmap.add(0)
  438. base_bitmap.add(1)
  439. base_bitmap.add(2)
  440. base_entry = BitmapEntry(
  441. object_pos=0,
  442. xor_offset=0,
  443. flags=0,
  444. bitmap=base_bitmap,
  445. )
  446. # Create XOR'd bitmap
  447. # If we XOR base (bits 0,1,2) with XOR bitmap (bits 1,3),
  448. # result should be (bits 0,2,3)
  449. xor_bitmap = EWAHBitmap()
  450. xor_bitmap.add(1)
  451. xor_bitmap.add(3)
  452. xor_entry = BitmapEntry(
  453. object_pos=1,
  454. xor_offset=1, # Reference the previous entry
  455. flags=0,
  456. bitmap=xor_bitmap,
  457. )
  458. # Add entries
  459. base_sha = b"\x00" * 20
  460. xor_sha = b"\x01" * 20
  461. bitmap.entries[base_sha] = base_entry
  462. bitmap.entries_list.append((base_sha, base_entry))
  463. bitmap.entries[xor_sha] = xor_entry
  464. bitmap.entries_list.append((xor_sha, xor_entry))
  465. # Get decompressed bitmap
  466. result = bitmap.get_bitmap(xor_sha)
  467. self.assertIsNotNone(result)
  468. self.assertIn(0, result)
  469. self.assertNotIn(1, result) # XOR cancels this
  470. self.assertIn(2, result)
  471. self.assertIn(3, result)
  472. def test_lookup_table_round_trip(self):
  473. """Test reading and writing lookup tables."""
  474. with tempfile.TemporaryDirectory() as tmpdir:
  475. bitmap_path = os.path.join(tmpdir, "test.bitmap")
  476. # Create bitmap with lookup table
  477. bitmap = PackBitmap()
  478. bitmap.flags = BITMAP_OPT_FULL_DAG | BITMAP_OPT_LOOKUP_TABLE
  479. bitmap.pack_checksum = b"\xaa" * 20
  480. # Add some bitmap entries (required for lookup table to be written/read)
  481. for i in range(3):
  482. ewah = EWAHBitmap()
  483. ewah.add(i)
  484. entry = BitmapEntry(
  485. object_pos=i,
  486. xor_offset=0,
  487. flags=0,
  488. bitmap=ewah,
  489. )
  490. sha = i.to_bytes(20, byteorder="big")
  491. bitmap.entries[sha] = entry
  492. bitmap.entries_list.append((sha, entry))
  493. bitmap.lookup_table = [
  494. (0, 100, 0),
  495. (1, 200, 0),
  496. (2, 300, 1),
  497. ]
  498. # Write to file
  499. from dulwich.bitmap import write_bitmap
  500. write_bitmap(bitmap_path, bitmap)
  501. # Read back
  502. from dulwich.bitmap import read_bitmap
  503. bitmap2 = read_bitmap(bitmap_path)
  504. self.assertEqual(bitmap.flags, bitmap2.flags)
  505. self.assertIsNotNone(bitmap2.lookup_table)
  506. self.assertEqual(3, len(bitmap2.lookup_table))
  507. self.assertEqual((0, 100, 0), bitmap2.lookup_table[0])
  508. self.assertEqual((1, 200, 0), bitmap2.lookup_table[1])
  509. self.assertEqual((2, 300, 1), bitmap2.lookup_table[2])
  510. def test_name_hash_cache_round_trip(self):
  511. """Test reading and writing name-hash cache."""
  512. with tempfile.TemporaryDirectory() as tmpdir:
  513. bitmap_path = os.path.join(tmpdir, "test.bitmap")
  514. # Create bitmap with name-hash cache
  515. bitmap = PackBitmap()
  516. bitmap.flags = BITMAP_OPT_FULL_DAG | BITMAP_OPT_HASH_CACHE
  517. bitmap.pack_checksum = b"\xbb" * 20
  518. bitmap.name_hash_cache = [0x12345678, 0x9ABCDEF0, 0xFEDCBA98]
  519. # Write to file
  520. from dulwich.bitmap import write_bitmap
  521. write_bitmap(bitmap_path, bitmap)
  522. # Read back
  523. from dulwich.bitmap import read_bitmap
  524. bitmap2 = read_bitmap(bitmap_path)
  525. self.assertEqual(bitmap.flags, bitmap2.flags)
  526. self.assertIsNotNone(bitmap2.name_hash_cache)
  527. self.assertEqual(3, len(bitmap2.name_hash_cache))
  528. self.assertEqual(0x12345678, bitmap2.name_hash_cache[0])
  529. self.assertEqual(0x9ABCDEF0, bitmap2.name_hash_cache[1])
  530. self.assertEqual(0xFEDCBA98, bitmap2.name_hash_cache[2])
  531. class BitmapErrorHandlingTests(unittest.TestCase):
  532. """Tests for error handling in bitmap reading."""
  533. def test_truncated_header(self):
  534. """Test reading bitmap with truncated header."""
  535. # Only 10 bytes instead of full header
  536. data = b"BITM\x00\x01\x00\x00\x00\x00"
  537. with self.assertRaises(ValueError) as ctx:
  538. read_bitmap_file(BytesIO(data))
  539. # Should raise ValueError about missing/incomplete header data
  540. self.assertIsInstance(ctx.exception, ValueError)
  541. def test_invalid_signature(self):
  542. """Test reading bitmap with invalid signature."""
  543. data = b"JUNK\x00\x01\x00\x00" + b"\x00" * 20
  544. with self.assertRaises(ValueError) as ctx:
  545. read_bitmap_file(BytesIO(data))
  546. self.assertIn("signature", str(ctx.exception).lower())
  547. def test_unsupported_version(self):
  548. """Test reading bitmap with unsupported version."""
  549. data = BITMAP_SIGNATURE
  550. data += b"\x00\x99" # Version 153 (unsupported)
  551. data += b"\x00\x00" # Flags
  552. data += b"\x00\x00\x00\x00" # Entry count
  553. data += b"\x00" * 20 # Pack checksum
  554. with self.assertRaises(ValueError) as ctx:
  555. read_bitmap_file(BytesIO(data))
  556. self.assertIn("version", str(ctx.exception).lower())
  557. def test_truncated_type_bitmap(self):
  558. """Test reading bitmap with truncated type bitmap data."""
  559. # Valid header
  560. data = BITMAP_SIGNATURE
  561. data += BITMAP_VERSION.to_bytes(2, "big")
  562. data += b"\x00\x00" # Flags
  563. data += b"\x00\x00\x00\x00" # Entry count
  564. data += b"\x00" * 20 # Pack checksum
  565. # Truncated type bitmap (incomplete EWAH header)
  566. data += b"\x00\x00\x00\x05" # bit_count
  567. data += b"\x00\x00" # Incomplete word_count
  568. with self.assertRaises(ValueError) as ctx:
  569. read_bitmap_file(BytesIO(data))
  570. self.assertIn("type bitmap", str(ctx.exception).lower())
  571. def test_truncated_bitmap_entry(self):
  572. """Test reading bitmap with truncated entry."""
  573. # Valid header with 1 entry
  574. data = BITMAP_SIGNATURE
  575. data += BITMAP_VERSION.to_bytes(2, "big")
  576. data += b"\x00\x00" # Flags
  577. data += b"\x00\x00\x00\x01" # 1 entry
  578. data += b"\x00" * 20 # Pack checksum
  579. # Write empty type bitmaps
  580. for _ in range(4):
  581. empty_ewah = EWAHBitmap().encode()
  582. data += empty_ewah
  583. # Truncated entry (only object position, missing rest)
  584. data += b"\x00\x00\x00\x00" # Object position
  585. # Missing: XOR offset, flags, bitmap data
  586. with self.assertRaises(ValueError) as ctx:
  587. read_bitmap_file(BytesIO(data))
  588. # Should raise ValueError about missing data
  589. self.assertIsInstance(ctx.exception, ValueError)
  590. def test_empty_bitmap_file(self):
  591. """Test reading completely empty file."""
  592. with self.assertRaises(ValueError):
  593. read_bitmap_file(BytesIO(b""))
  594. def test_bitmap_with_zero_entries(self):
  595. """Test valid bitmap with zero entries."""
  596. bitmap = PackBitmap()
  597. bitmap.pack_checksum = b"\x00" * 20
  598. f = BytesIO()
  599. write_bitmap_file(f, bitmap)
  600. f.seek(0)
  601. # Should read successfully
  602. bitmap2 = read_bitmap_file(f)
  603. self.assertEqual(0, len(bitmap2.entries))
  604. self.assertIsNotNone(bitmap2.pack_checksum)
  605. class BitmapEdgeCaseTests(unittest.TestCase):
  606. """Tests for edge cases in bitmap handling."""
  607. def test_very_large_bitmap(self):
  608. """Test bitmap with many bits set."""
  609. bitmap = EWAHBitmap()
  610. # Add 100,000 bits
  611. for i in range(100000):
  612. if i % 3 == 0: # Every 3rd bit
  613. bitmap.add(i)
  614. # Should encode and decode without issues
  615. encoded = bitmap.encode()
  616. decoded = EWAHBitmap(encoded)
  617. self.assertEqual(len(bitmap), len(decoded))
  618. # Verify a sample of bits
  619. self.assertIn(0, decoded)
  620. self.assertIn(99999, decoded)
  621. self.assertNotIn(1, decoded)
  622. self.assertNotIn(99998, decoded)
  623. def test_bitmap_with_large_gaps(self):
  624. """Test bitmap with large gaps between set bits."""
  625. bitmap = EWAHBitmap()
  626. bitmap.add(0)
  627. bitmap.add(100000)
  628. bitmap.add(200000)
  629. encoded = bitmap.encode()
  630. decoded = EWAHBitmap(encoded)
  631. self.assertEqual(3, len(decoded))
  632. self.assertIn(0, decoded)
  633. self.assertIn(100000, decoded)
  634. self.assertIn(200000, decoded)
  635. def test_bitmap_all_bits_in_word(self):
  636. """Test bitmap with all 64 bits in a word set."""
  637. bitmap = EWAHBitmap()
  638. for i in range(64):
  639. bitmap.add(i)
  640. encoded = bitmap.encode()
  641. decoded = EWAHBitmap(encoded)
  642. self.assertEqual(64, len(decoded))
  643. for i in range(64):
  644. self.assertIn(i, decoded)
  645. def test_multiple_flags_combined(self):
  646. """Test bitmap with multiple flags set."""
  647. bitmap = PackBitmap(
  648. flags=BITMAP_OPT_FULL_DAG | BITMAP_OPT_HASH_CACHE | BITMAP_OPT_LOOKUP_TABLE
  649. )
  650. bitmap.pack_checksum = b"\x00" * 20
  651. bitmap.lookup_table = [(0, 0, 0), (1, 100, 0)]
  652. bitmap.name_hash_cache = [0x12345678, 0xABCDEF00]
  653. # Add an entry
  654. test_bitmap = EWAHBitmap()
  655. test_bitmap.add(0)
  656. test_bitmap.add(5)
  657. entry = BitmapEntry(object_pos=0, xor_offset=0, flags=0, bitmap=test_bitmap)
  658. bitmap.entries[b"\x00" * 20] = entry
  659. # Write and read back
  660. f = BytesIO()
  661. write_bitmap_file(f, bitmap)
  662. f.seek(0)
  663. bitmap2 = read_bitmap_file(f)
  664. self.assertEqual(bitmap.flags, bitmap2.flags)
  665. self.assertTrue(bitmap2.flags & BITMAP_OPT_FULL_DAG)
  666. self.assertTrue(bitmap2.flags & BITMAP_OPT_HASH_CACHE)
  667. self.assertTrue(bitmap2.flags & BITMAP_OPT_LOOKUP_TABLE)
  668. self.assertIsNotNone(bitmap2.lookup_table)
  669. self.assertIsNotNone(bitmap2.name_hash_cache)