test_midx.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. # test_midx.py -- Tests for multi-pack-index
  2. # Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for multi-pack-index (MIDX) functionality."""
  22. import os
  23. import tempfile
  24. from io import BytesIO
  25. from unittest import TestCase
  26. from dulwich.midx import (
  27. HASH_ALGORITHM_SHA1,
  28. MultiPackIndex,
  29. write_midx,
  30. write_midx_file,
  31. )
  32. class MIDXWriteTests(TestCase):
  33. """Tests for writing MIDX files."""
  34. def test_write_empty_midx(self):
  35. """Test writing an empty MIDX file."""
  36. f = BytesIO()
  37. pack_entries = []
  38. checksum = write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
  39. # Checksum should be 20 bytes
  40. self.assertEqual(20, len(checksum))
  41. # Should be able to read it back
  42. f.seek(0)
  43. midx = MultiPackIndex("test.midx", file=f, contents=f.read())
  44. self.assertEqual(0, len(midx))
  45. self.assertEqual(0, midx.pack_count)
  46. self.assertEqual([], midx.pack_names)
  47. def test_write_single_pack_midx(self):
  48. """Test writing a MIDX file with a single pack."""
  49. f = BytesIO()
  50. # Create some fake pack entries
  51. pack_entries = [
  52. (
  53. "pack-abc123.idx",
  54. [
  55. (b"\x01" * 20, 100, 0x12345678), # sha, offset, crc32
  56. (b"\x02" * 20, 200, 0x87654321),
  57. (b"\x03" * 20, 300, 0xABCDEF00),
  58. ],
  59. )
  60. ]
  61. checksum = write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
  62. self.assertEqual(20, len(checksum))
  63. # Read it back
  64. f.seek(0)
  65. midx = MultiPackIndex("test.midx", file=f, contents=f.read())
  66. self.assertEqual(3, len(midx))
  67. self.assertEqual(1, midx.pack_count)
  68. self.assertEqual(["pack-abc123.idx"], midx.pack_names)
  69. # Check object lookups
  70. result = midx.object_offset(b"\x01" * 20)
  71. self.assertIsNotNone(result)
  72. pack_name, offset = result
  73. self.assertEqual("pack-abc123.idx", pack_name)
  74. self.assertEqual(100, offset)
  75. result = midx.object_offset(b"\x02" * 20)
  76. self.assertIsNotNone(result)
  77. pack_name, offset = result
  78. self.assertEqual("pack-abc123.idx", pack_name)
  79. self.assertEqual(200, offset)
  80. result = midx.object_offset(b"\x03" * 20)
  81. self.assertIsNotNone(result)
  82. pack_name, offset = result
  83. self.assertEqual("pack-abc123.idx", pack_name)
  84. self.assertEqual(300, offset)
  85. # Check non-existent object
  86. result = midx.object_offset(b"\xff" * 20)
  87. self.assertIsNone(result)
  88. def test_write_multiple_packs_midx(self):
  89. """Test writing a MIDX file with multiple packs."""
  90. f = BytesIO()
  91. pack_entries = [
  92. (
  93. "pack-111.idx",
  94. [
  95. (b"\x01" * 20, 100, 0),
  96. (b"\x03" * 20, 300, 0),
  97. ],
  98. ),
  99. (
  100. "pack-222.idx",
  101. [
  102. (b"\x02" * 20, 50, 0),
  103. (b"\x04" * 20, 150, 0),
  104. ],
  105. ),
  106. ]
  107. checksum = write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
  108. self.assertEqual(20, len(checksum))
  109. # Read it back
  110. f.seek(0)
  111. midx = MultiPackIndex("test.midx", file=f, contents=f.read())
  112. self.assertEqual(4, len(midx))
  113. self.assertEqual(2, midx.pack_count)
  114. self.assertEqual(["pack-111.idx", "pack-222.idx"], midx.pack_names)
  115. # Objects should be findable across packs
  116. result = midx.object_offset(b"\x01" * 20)
  117. self.assertIsNotNone(result)
  118. self.assertEqual("pack-111.idx", result[0])
  119. result = midx.object_offset(b"\x02" * 20)
  120. self.assertIsNotNone(result)
  121. self.assertEqual("pack-222.idx", result[0])
  122. def test_write_large_offsets(self):
  123. """Test writing a MIDX file with large offsets (>= 2^31)."""
  124. f = BytesIO()
  125. large_offset = 2**32 # Offset that requires LOFF chunk
  126. pack_entries = [
  127. (
  128. "pack-large.idx",
  129. [
  130. (b"\x01" * 20, 100, 0),
  131. (b"\x02" * 20, large_offset, 0), # Large offset
  132. ],
  133. )
  134. ]
  135. checksum = write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
  136. self.assertEqual(20, len(checksum))
  137. # Read it back
  138. f.seek(0)
  139. midx = MultiPackIndex("test.midx", file=f, contents=f.read())
  140. self.assertEqual(2, len(midx))
  141. # Small offset should work
  142. result = midx.object_offset(b"\x01" * 20)
  143. self.assertIsNotNone(result)
  144. self.assertEqual(100, result[1])
  145. # Large offset should work
  146. result = midx.object_offset(b"\x02" * 20)
  147. self.assertIsNotNone(result)
  148. self.assertEqual(large_offset, result[1])
  149. def test_write_midx_file(self):
  150. """Test writing a MIDX file to disk."""
  151. with tempfile.TemporaryDirectory() as tmpdir:
  152. midx_path = os.path.join(tmpdir, "multi-pack-index")
  153. pack_entries = [
  154. (
  155. "pack-test.idx",
  156. [
  157. (b"\xaa" * 20, 1000, 0),
  158. ],
  159. )
  160. ]
  161. checksum = write_midx_file(midx_path, pack_entries, HASH_ALGORITHM_SHA1)
  162. self.assertEqual(20, len(checksum))
  163. # Verify file was created
  164. self.assertTrue(os.path.exists(midx_path))
  165. # Read it back from disk
  166. with open(midx_path, "rb") as f:
  167. midx = MultiPackIndex(midx_path, file=f, contents=f.read())
  168. self.assertEqual(1, len(midx))
  169. result = midx.object_offset(b"\xaa" * 20)
  170. self.assertIsNotNone(result)
  171. self.assertEqual("pack-test.idx", result[0])
  172. self.assertEqual(1000, result[1])
  173. class MIDXContainsTests(TestCase):
  174. """Tests for MIDX __contains__ method."""
  175. def test_contains_object(self):
  176. """Test checking if an object is in the MIDX."""
  177. f = BytesIO()
  178. pack_entries = [
  179. (
  180. "pack-test.idx",
  181. [
  182. (b"\x01" * 20, 100, 0),
  183. (b"\x02" * 20, 200, 0),
  184. ],
  185. )
  186. ]
  187. write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
  188. f.seek(0)
  189. midx = MultiPackIndex("test.midx", file=f, contents=f.read())
  190. self.assertTrue(b"\x01" * 20 in midx)
  191. self.assertTrue(b"\x02" * 20 in midx)
  192. self.assertFalse(b"\xff" * 20 in midx)
  193. class MIDXIterEntriesTests(TestCase):
  194. """Tests for MIDX iterentries method."""
  195. def test_iterentries(self):
  196. """Test iterating over MIDX entries."""
  197. f = BytesIO()
  198. pack_entries = [
  199. (
  200. "pack-111.idx",
  201. [
  202. (b"\x01" * 20, 100, 0),
  203. (b"\x03" * 20, 300, 0),
  204. ],
  205. ),
  206. (
  207. "pack-222.idx",
  208. [
  209. (b"\x02" * 20, 50, 0),
  210. ],
  211. ),
  212. ]
  213. write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
  214. f.seek(0)
  215. midx = MultiPackIndex("test.midx", file=f, contents=f.read())
  216. entries = list(midx.iterentries())
  217. self.assertEqual(3, len(entries))
  218. # Entries should be sorted by SHA
  219. self.assertEqual(b"\x01" * 20, entries[0][0])
  220. self.assertEqual("pack-111.idx", entries[0][1])
  221. self.assertEqual(100, entries[0][2])
  222. self.assertEqual(b"\x02" * 20, entries[1][0])
  223. self.assertEqual("pack-222.idx", entries[1][1])
  224. self.assertEqual(50, entries[1][2])
  225. self.assertEqual(b"\x03" * 20, entries[2][0])
  226. self.assertEqual("pack-111.idx", entries[2][1])
  227. self.assertEqual(300, entries[2][2])