test_midx.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. # test_midx.py -- Compatibility tests for multi-pack-index functionality
  2. # Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. """Compatibility tests for Git multi-pack-index functionality.
  10. These tests verify that dulwich's MIDX implementation can read and interact
  11. with MIDX files created by C Git, and that Git can read MIDX files created
  12. by Dulwich.
  13. """
  14. import os
  15. import tempfile
  16. from dulwich.midx import load_midx
  17. from dulwich.object_store import DiskObjectStore
  18. from dulwich.repo import Repo
  19. from .utils import CompatTestCase, run_git_or_fail
  20. class MIDXCompatTests(CompatTestCase):
  21. """Compatibility tests for multi-pack-index functionality."""
  22. # Multi-pack-index was introduced in Git 2.21.0
  23. min_git_version = (2, 21, 0)
  24. def setUp(self):
  25. super().setUp()
  26. self.test_dir = tempfile.mkdtemp()
  27. self.repo_path = os.path.join(self.test_dir, "test-repo")
  28. # Set up git identity to avoid committer identity errors
  29. self.overrideEnv("GIT_COMMITTER_NAME", "Test Author")
  30. self.overrideEnv("GIT_COMMITTER_EMAIL", "test@example.com")
  31. self.overrideEnv("GIT_AUTHOR_NAME", "Test Author")
  32. self.overrideEnv("GIT_AUTHOR_EMAIL", "test@example.com")
  33. def tearDown(self):
  34. from .utils import rmtree_ro
  35. rmtree_ro(self.test_dir)
  36. def create_test_repo_with_packs(self):
  37. """Create a test repository with multiple pack files."""
  38. # Initialize repository
  39. run_git_or_fail(["init"], cwd=self.test_dir)
  40. os.rename(os.path.join(self.test_dir, ".git"), self.repo_path)
  41. work_dir = os.path.join(self.test_dir, "work")
  42. os.makedirs(work_dir)
  43. # Create .git file pointing to our repo
  44. with open(os.path.join(work_dir, ".git"), "w") as f:
  45. f.write(f"gitdir: {self.repo_path}\n")
  46. # Create some commits and pack them
  47. for i in range(5):
  48. filename = f"file{i}.txt"
  49. with open(os.path.join(work_dir, filename), "w") as f:
  50. f.write(f"Content {i}\n" * 100) # Make files bigger to ensure packing
  51. run_git_or_fail(["add", filename], cwd=work_dir)
  52. run_git_or_fail(
  53. [
  54. "commit",
  55. "-m",
  56. f"Commit {i}",
  57. "--author",
  58. "Test Author <test@example.com>",
  59. ],
  60. cwd=work_dir,
  61. )
  62. # Create a pack file after each commit to get multiple packs
  63. if i > 0: # Skip first commit to avoid empty pack
  64. run_git_or_fail(["repack", "-d"], cwd=work_dir)
  65. return work_dir
  66. def test_read_git_midx(self):
  67. """Test that Dulwich can read a MIDX file created by Git."""
  68. work_dir = self.create_test_repo_with_packs()
  69. # Have Git create a MIDX file
  70. run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)
  71. # Verify Git created the MIDX file
  72. midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
  73. self.assertTrue(
  74. os.path.exists(midx_path), "Git did not create multi-pack-index file"
  75. )
  76. # Load the MIDX file with Dulwich
  77. midx = load_midx(midx_path)
  78. # Verify we can read it
  79. self.assertGreater(len(midx), 0, "MIDX should contain objects")
  80. self.assertGreater(midx.pack_count, 0, "MIDX should reference packs")
  81. # Verify the pack names look reasonable
  82. # Git stores .idx extensions in MIDX files
  83. for pack_name in midx.pack_names:
  84. self.assertTrue(pack_name.startswith("pack-"))
  85. self.assertTrue(pack_name.endswith(".idx"))
  86. def test_git_uses_dulwich_midx(self):
  87. """Test that Git can use a MIDX file created by Dulwich."""
  88. work_dir = self.create_test_repo_with_packs()
  89. # Use Dulwich to create a MIDX file
  90. repo = Repo(self.repo_path)
  91. store = repo.object_store
  92. self.assertIsInstance(store, DiskObjectStore)
  93. # Write MIDX with Dulwich
  94. checksum = store.write_midx()
  95. self.assertEqual(20, len(checksum))
  96. # Verify the file was created
  97. midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
  98. self.assertTrue(os.path.exists(midx_path))
  99. # Have Git verify the MIDX file (should succeed with return code 0)
  100. run_git_or_fail(["multi-pack-index", "verify"], cwd=work_dir)
  101. # Try to use the MIDX with Git commands
  102. # This should work if the MIDX is valid
  103. run_git_or_fail(["fsck"], cwd=work_dir)
  104. def test_midx_object_lookup_matches_git(self):
  105. """Test that object lookups through MIDX match Git's results."""
  106. work_dir = self.create_test_repo_with_packs()
  107. # Have Git create a MIDX file
  108. run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)
  109. # Load with Dulwich
  110. repo = Repo(self.repo_path)
  111. store = repo.object_store
  112. # Get MIDX
  113. midx = store.get_midx()
  114. self.assertIsNotNone(midx, "MIDX should be loaded")
  115. # Get all objects from Git
  116. result = run_git_or_fail(["rev-list", "--all", "--objects"], cwd=work_dir)
  117. object_shas = [
  118. line.split()[0].encode("ascii")
  119. for line in result.decode("utf-8").strip().split("\n")
  120. if line
  121. ]
  122. # Verify we can find these objects through the MIDX
  123. found_count = 0
  124. for sha_hex in object_shas:
  125. # Convert hex to binary
  126. sha_bin = bytes.fromhex(sha_hex.decode("ascii"))
  127. # Check if it's in the MIDX
  128. if sha_bin in midx:
  129. found_count += 1
  130. # Verify we can get the object location
  131. result = midx.object_offset(sha_bin)
  132. self.assertIsNotNone(result)
  133. pack_name, offset = result
  134. self.assertIsInstance(pack_name, str)
  135. self.assertIsInstance(offset, int)
  136. self.assertGreater(offset, 0)
  137. # We should find at least some objects in the MIDX
  138. self.assertGreater(found_count, 0, "Should find at least some objects in MIDX")
  139. def test_midx_with_multiple_packs(self):
  140. """Test MIDX functionality with multiple pack files."""
  141. work_dir = self.create_test_repo_with_packs()
  142. # Create multiple pack files explicitly
  143. run_git_or_fail(["repack"], cwd=work_dir)
  144. run_git_or_fail(["repack"], cwd=work_dir)
  145. # Create MIDX with Git
  146. run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)
  147. # Load with Dulwich
  148. midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
  149. midx = load_midx(midx_path)
  150. # Should have multiple packs
  151. # (Exact count may vary depending on Git version and repacking)
  152. self.assertGreaterEqual(midx.pack_count, 1)
  153. # Verify we can iterate over all entries
  154. entries = list(midx.iterentries())
  155. self.assertGreater(len(entries), 0)
  156. # All entries should have valid structure
  157. for sha, pack_name, offset in entries:
  158. self.assertEqual(20, len(sha)) # SHA-1 is 20 bytes
  159. self.assertIsInstance(pack_name, str)
  160. # Git stores .idx extensions in MIDX files
  161. self.assertTrue(pack_name.endswith(".idx"))
  162. self.assertIsInstance(offset, int)
  163. self.assertGreaterEqual(offset, 0)
  164. def test_dulwich_object_store_with_git_midx(self):
  165. """Test that DiskObjectStore can use Git-created MIDX for lookups."""
  166. work_dir = self.create_test_repo_with_packs()
  167. # Have Git create a MIDX file
  168. run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)
  169. # Load repo with Dulwich
  170. repo = Repo(self.repo_path)
  171. # Get a commit from the repo
  172. result = run_git_or_fail(["rev-parse", "HEAD"], cwd=work_dir)
  173. head_sha = result.decode("utf-8").strip().encode("ascii")
  174. # Verify we can access it through Dulwich
  175. # This should use the MIDX for lookup
  176. obj = repo.object_store[head_sha]
  177. self.assertIsNotNone(obj)
  178. self.assertEqual(b"commit", obj.type_name)
  179. def test_repack_with_midx(self):
  180. """Test that repacking works correctly with MIDX present."""
  181. work_dir = self.create_test_repo_with_packs()
  182. # Create MIDX with Dulwich
  183. repo = Repo(self.repo_path)
  184. repo.object_store.write_midx()
  185. # Verify Git can still repack
  186. run_git_or_fail(["repack", "-d"], cwd=work_dir)
  187. # The MIDX should still be readable
  188. midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
  189. if os.path.exists(midx_path): # Git may remove it during repack
  190. midx = load_midx(midx_path)
  191. self.assertGreaterEqual(len(midx), 0)