test_midx.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. # test_midx.py -- Compatibility tests for multi-pack-index functionality
  2. # Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. """Compatibility tests for Git multi-pack-index functionality.
  10. These tests verify that dulwich's MIDX implementation can read and interact
  11. with MIDX files created by C Git, and that Git can read MIDX files created
  12. by Dulwich.
  13. """
  14. import os
  15. import tempfile
  16. from dulwich.midx import load_midx
  17. from dulwich.object_store import DiskObjectStore
  18. from dulwich.repo import Repo
  19. from .utils import CompatTestCase, run_git_or_fail
  20. class MIDXCompatTests(CompatTestCase):
  21. """Compatibility tests for multi-pack-index functionality."""
  22. # Multi-pack-index was introduced in Git 2.21.0
  23. min_git_version = (2, 21, 0)
  24. def setUp(self):
  25. super().setUp()
  26. self.test_dir = tempfile.mkdtemp()
  27. self.repo_path = os.path.join(self.test_dir, "test-repo")
  28. # Set up git identity to avoid committer identity errors
  29. self.overrideEnv("GIT_COMMITTER_NAME", "Test Author")
  30. self.overrideEnv("GIT_COMMITTER_EMAIL", "test@example.com")
  31. self.overrideEnv("GIT_AUTHOR_NAME", "Test Author")
  32. self.overrideEnv("GIT_AUTHOR_EMAIL", "test@example.com")
  33. def tearDown(self):
  34. from .utils import rmtree_ro
  35. rmtree_ro(self.test_dir)
  36. def create_test_repo_with_packs(self):
  37. """Create a test repository with multiple pack files."""
  38. # Initialize repository
  39. run_git_or_fail(["init"], cwd=self.test_dir)
  40. os.rename(os.path.join(self.test_dir, ".git"), self.repo_path)
  41. work_dir = os.path.join(self.test_dir, "work")
  42. os.makedirs(work_dir)
  43. # Create .git file pointing to our repo
  44. with open(os.path.join(work_dir, ".git"), "w") as f:
  45. f.write(f"gitdir: {self.repo_path}\n")
  46. # Create some commits and pack them
  47. for i in range(5):
  48. filename = f"file{i}.txt"
  49. with open(os.path.join(work_dir, filename), "w") as f:
  50. f.write(f"Content {i}\n" * 100) # Make files bigger to ensure packing
  51. run_git_or_fail(["add", filename], cwd=work_dir)
  52. run_git_or_fail(
  53. [
  54. "commit",
  55. "-m",
  56. f"Commit {i}",
  57. "--author",
  58. "Test Author <test@example.com>",
  59. ],
  60. cwd=work_dir,
  61. )
  62. # Create a pack file after each commit to get multiple packs
  63. if i > 0: # Skip first commit to avoid empty pack
  64. run_git_or_fail(["repack", "-d"], cwd=work_dir)
  65. return work_dir
  66. def test_read_git_midx(self):
  67. """Test that Dulwich can read a MIDX file created by Git."""
  68. work_dir = self.create_test_repo_with_packs()
  69. # Have Git create a MIDX file
  70. run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)
  71. # Verify Git created the MIDX file
  72. midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
  73. self.assertTrue(
  74. os.path.exists(midx_path), "Git did not create multi-pack-index file"
  75. )
  76. # Load the MIDX file with Dulwich
  77. midx = load_midx(midx_path)
  78. try:
  79. # Verify we can read it
  80. self.assertGreater(len(midx), 0, "MIDX should contain objects")
  81. self.assertGreater(midx.pack_count, 0, "MIDX should reference packs")
  82. # Verify the pack names look reasonable
  83. # Git stores .idx extensions in MIDX files
  84. for pack_name in midx.pack_names:
  85. self.assertTrue(pack_name.startswith("pack-"))
  86. self.assertTrue(pack_name.endswith(".idx"))
  87. finally:
  88. midx.close()
  89. def test_git_uses_dulwich_midx(self):
  90. """Test that Git can use a MIDX file created by Dulwich."""
  91. work_dir = self.create_test_repo_with_packs()
  92. # Use Dulwich to create a MIDX file
  93. repo = Repo(self.repo_path)
  94. try:
  95. store = repo.object_store
  96. self.assertIsInstance(store, DiskObjectStore)
  97. # Write MIDX with Dulwich
  98. checksum = store.write_midx()
  99. self.assertEqual(20, len(checksum))
  100. finally:
  101. repo.close()
  102. # Verify the file was created
  103. midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
  104. self.assertTrue(os.path.exists(midx_path))
  105. # Have Git verify the MIDX file (should succeed with return code 0)
  106. run_git_or_fail(["multi-pack-index", "verify"], cwd=work_dir)
  107. # Try to use the MIDX with Git commands
  108. # This should work if the MIDX is valid
  109. run_git_or_fail(["fsck"], cwd=work_dir)
  110. def test_midx_object_lookup_matches_git(self):
  111. """Test that object lookups through MIDX match Git's results."""
  112. work_dir = self.create_test_repo_with_packs()
  113. # Have Git create a MIDX file
  114. run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)
  115. # Load with Dulwich
  116. repo = Repo(self.repo_path)
  117. try:
  118. store = repo.object_store
  119. # Get MIDX
  120. midx = store.get_midx()
  121. self.assertIsNotNone(midx, "MIDX should be loaded")
  122. # Get all objects from Git
  123. result = run_git_or_fail(["rev-list", "--all", "--objects"], cwd=work_dir)
  124. object_shas = [
  125. line.split()[0].encode("ascii")
  126. for line in result.decode("utf-8").strip().split("\n")
  127. if line
  128. ]
  129. # Verify we can find these objects through the MIDX
  130. found_count = 0
  131. for sha_hex in object_shas:
  132. # Convert hex to binary
  133. sha_bin = bytes.fromhex(sha_hex.decode("ascii"))
  134. # Check if it's in the MIDX
  135. if sha_bin in midx:
  136. found_count += 1
  137. # Verify we can get the object location
  138. result = midx.object_offset(sha_bin)
  139. self.assertIsNotNone(result)
  140. pack_name, offset = result
  141. self.assertIsInstance(pack_name, str)
  142. self.assertIsInstance(offset, int)
  143. self.assertGreater(offset, 0)
  144. # We should find at least some objects in the MIDX
  145. self.assertGreater(
  146. found_count, 0, "Should find at least some objects in MIDX"
  147. )
  148. finally:
  149. repo.close()
  150. def test_midx_with_multiple_packs(self):
  151. """Test MIDX functionality with multiple pack files."""
  152. work_dir = self.create_test_repo_with_packs()
  153. # Create multiple pack files explicitly
  154. run_git_or_fail(["repack"], cwd=work_dir)
  155. run_git_or_fail(["repack"], cwd=work_dir)
  156. # Create MIDX with Git
  157. run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)
  158. # Load with Dulwich
  159. midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
  160. midx = load_midx(midx_path)
  161. try:
  162. # Should have multiple packs
  163. # (Exact count may vary depending on Git version and repacking)
  164. self.assertGreaterEqual(midx.pack_count, 1)
  165. # Verify we can iterate over all entries
  166. entries = list(midx.iterentries())
  167. self.assertGreater(len(entries), 0)
  168. # All entries should have valid structure
  169. for sha, pack_name, offset in entries:
  170. self.assertEqual(20, len(sha)) # SHA-1 is 20 bytes
  171. self.assertIsInstance(pack_name, str)
  172. # Git stores .idx extensions in MIDX files
  173. self.assertTrue(pack_name.endswith(".idx"))
  174. self.assertIsInstance(offset, int)
  175. self.assertGreaterEqual(offset, 0)
  176. finally:
  177. midx.close()
  178. def test_dulwich_object_store_with_git_midx(self):
  179. """Test that DiskObjectStore can use Git-created MIDX for lookups."""
  180. work_dir = self.create_test_repo_with_packs()
  181. # Have Git create a MIDX file
  182. run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)
  183. # Load repo with Dulwich
  184. repo = Repo(self.repo_path)
  185. try:
  186. # Get a commit from the repo
  187. result = run_git_or_fail(["rev-parse", "HEAD"], cwd=work_dir)
  188. head_sha = result.decode("utf-8").strip().encode("ascii")
  189. # Verify we can access it through Dulwich
  190. # This should use the MIDX for lookup
  191. obj = repo.object_store[head_sha]
  192. self.assertIsNotNone(obj)
  193. self.assertEqual(b"commit", obj.type_name)
  194. finally:
  195. repo.close()
  196. def test_repack_with_midx(self):
  197. """Test that repacking works correctly with MIDX present."""
  198. work_dir = self.create_test_repo_with_packs()
  199. # Create MIDX with Dulwich
  200. repo = Repo(self.repo_path)
  201. try:
  202. repo.object_store.write_midx()
  203. finally:
  204. repo.close()
  205. # Verify Git can still repack
  206. run_git_or_fail(["repack", "-d"], cwd=work_dir)
  207. # The MIDX should still be readable
  208. midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
  209. if os.path.exists(midx_path): # Git may remove it during repack
  210. midx = load_midx(midx_path)
  211. try:
  212. self.assertGreaterEqual(len(midx), 0)
  213. finally:
  214. midx.close()