test_bitmap.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. # test_bitmap.py -- Compatibility tests for git pack bitmaps.
  2. # Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Compatibility tests for git pack bitmaps."""
  22. import os
  23. import tempfile
  24. from dulwich.bitmap import (
  25. BITMAP_OPT_FULL_DAG,
  26. BITMAP_OPT_HASH_CACHE,
  27. BITMAP_OPT_LOOKUP_TABLE,
  28. BitmapEntry,
  29. EWAHBitmap,
  30. PackBitmap,
  31. write_bitmap,
  32. )
  33. from dulwich.object_format import DEFAULT_OBJECT_FORMAT
  34. from dulwich.pack import Pack
  35. from dulwich.repo import Repo
  36. from .. import TestCase
  37. from .utils import remove_ro, require_git_version, rmtree_ro, run_git_or_fail
  38. class BitmapCompatTests(TestCase):
  39. """Compatibility tests for reading git-generated bitmaps."""
  40. def setUp(self):
  41. super().setUp()
  42. # Git bitmap support was added in 2.0.0
  43. require_git_version((2, 0, 0))
  44. self._tempdir = tempfile.mkdtemp()
  45. self.addCleanup(rmtree_ro, self._tempdir)
  46. def _init_repo_with_bitmap(self):
  47. """Create a repo and generate a bitmap using git."""
  48. repo_path = os.path.join(self._tempdir, "test-repo")
  49. os.mkdir(repo_path)
  50. # Initialize repo
  51. run_git_or_fail(["init"], cwd=repo_path)
  52. # Create some commits
  53. test_file = os.path.join(repo_path, "test.txt")
  54. for i in range(5):
  55. with open(test_file, "w") as f:
  56. f.write(f"Content {i}\n")
  57. run_git_or_fail(["add", "test.txt"], cwd=repo_path)
  58. run_git_or_fail(
  59. ["commit", "-m", f"Commit {i}"],
  60. cwd=repo_path,
  61. env={"GIT_AUTHOR_NAME": "Test", "GIT_AUTHOR_EMAIL": "test@example.com"},
  62. )
  63. # Enable bitmap writing and repack
  64. run_git_or_fail(
  65. ["config", "pack.writeBitmaps", "true"],
  66. cwd=repo_path,
  67. )
  68. run_git_or_fail(["repack", "-a", "-d", "-b"], cwd=repo_path)
  69. return repo_path
  70. def test_read_git_generated_bitmap(self):
  71. """Test that Dulwich can read a bitmap generated by git."""
  72. repo_path = self._init_repo_with_bitmap()
  73. # Find the pack file with bitmap
  74. pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
  75. bitmap_files = [f for f in os.listdir(pack_dir) if f.endswith(".bitmap")]
  76. # Get the pack file (basename without extension)
  77. bitmap_name = bitmap_files[0]
  78. pack_basename = bitmap_name.replace(".bitmap", "")
  79. pack_path = os.path.join(pack_dir, pack_basename)
  80. # Verify bitmap file exists at expected location
  81. bitmap_path = pack_path + ".bitmap"
  82. self.assertTrue(
  83. os.path.exists(bitmap_path), f"Bitmap file not found at {bitmap_path}"
  84. )
  85. # Try to load the bitmap using Dulwich
  86. with Pack(pack_path, object_format=DEFAULT_OBJECT_FORMAT) as pack:
  87. bitmap = pack.bitmap
  88. # Basic checks
  89. self.assertIsNotNone(bitmap, f"Failed to load bitmap from {pack_path}")
  90. self.assertIsNotNone(bitmap.pack_checksum, "Bitmap missing pack checksum")
  91. # Check that we have some type bitmaps
  92. # At minimum, we should have some commits
  93. self.assertGreater(
  94. len(bitmap.commit_bitmap.bits),
  95. 0,
  96. "Commit bitmap should not be empty",
  97. )
  98. def test_git_can_use_dulwich_repo_with_bitmap(self):
  99. """Test that git can work with a repo that has Dulwich-created objects."""
  100. repo_path = os.path.join(self._tempdir, "dulwich-repo")
  101. # Create a repo with Dulwich and add commits to ensure git creates bitmaps
  102. repo = Repo.init(repo_path, mkdir=True)
  103. self.addCleanup(repo.close)
  104. # Create actual commits, not just loose objects - git needs commits for bitmaps
  105. test_file = os.path.join(repo_path, "test.txt")
  106. for i in range(5):
  107. with open(test_file, "w") as f:
  108. f.write(f"Content {i}\n")
  109. run_git_or_fail(["add", "test.txt"], cwd=repo_path)
  110. run_git_or_fail(
  111. ["commit", "-m", f"Commit {i}"],
  112. cwd=repo_path,
  113. env={"GIT_AUTHOR_NAME": "Test", "GIT_AUTHOR_EMAIL": "test@example.com"},
  114. )
  115. # Configure git to write bitmaps
  116. run_git_or_fail(
  117. ["config", "pack.writeBitmaps", "true"],
  118. cwd=repo_path,
  119. )
  120. # Git should be able to repack with bitmaps
  121. run_git_or_fail(["repack", "-a", "-d", "-b"], cwd=repo_path)
  122. # Verify git created a bitmap
  123. pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
  124. self.assertTrue(os.path.exists(pack_dir), "Pack directory should exist")
  125. bitmap_files = [f for f in os.listdir(pack_dir) if f.endswith(".bitmap")]
  126. self.assertGreater(
  127. len(bitmap_files), 0, "Git should have created a bitmap file after repack"
  128. )
  129. def test_git_can_read_dulwich_bitmap(self):
  130. """Test that git can read a bitmap file written by Dulwich."""
  131. repo_path = os.path.join(self._tempdir, "dulwich-bitmap-repo")
  132. # Create a repo with git and add commits
  133. run_git_or_fail(["init"], cwd=None, env={"GIT_DIR": repo_path})
  134. test_file = os.path.join(repo_path, "..", "test.txt")
  135. os.makedirs(os.path.dirname(test_file), exist_ok=True)
  136. for i in range(5):
  137. with open(test_file, "w") as f:
  138. f.write(f"Content {i}\n")
  139. run_git_or_fail(
  140. ["add", test_file],
  141. cwd=os.path.dirname(repo_path),
  142. env={
  143. "GIT_DIR": repo_path,
  144. "GIT_WORK_TREE": os.path.dirname(repo_path),
  145. },
  146. )
  147. run_git_or_fail(
  148. ["commit", "-m", f"Commit {i}"],
  149. cwd=os.path.dirname(repo_path),
  150. env={
  151. "GIT_DIR": repo_path,
  152. "GIT_WORK_TREE": os.path.dirname(repo_path),
  153. "GIT_AUTHOR_NAME": "Test",
  154. "GIT_AUTHOR_EMAIL": "test@example.com",
  155. },
  156. )
  157. # Create a pack with git first
  158. run_git_or_fail(["repack", "-a", "-d"], cwd=None, env={"GIT_DIR": repo_path})
  159. # Now use Dulwich to write a bitmap for the pack
  160. pack_dir = os.path.join(repo_path, "objects", "pack")
  161. pack_files = [f for f in os.listdir(pack_dir) if f.endswith(".pack")]
  162. self.assertGreater(len(pack_files), 0, "Should have at least one pack file")
  163. pack_basename = pack_files[0].replace(".pack", "")
  164. pack_path = os.path.join(pack_dir, pack_basename)
  165. # Load the pack and create bitmap data, then close before writing
  166. with Pack(pack_path, object_format=DEFAULT_OBJECT_FORMAT) as pack:
  167. # Create a simple bitmap for testing
  168. # Git requires BITMAP_OPT_FULL_DAG flag
  169. bitmap = PackBitmap(
  170. flags=BITMAP_OPT_FULL_DAG
  171. | BITMAP_OPT_HASH_CACHE
  172. | BITMAP_OPT_LOOKUP_TABLE
  173. )
  174. bitmap.pack_checksum = pack.get_stored_checksum()
  175. # Add bitmap entries for the first few commits in the pack
  176. for i, (sha, offset, crc) in enumerate(pack.index.iterentries()):
  177. if i >= 3: # Just add 3 entries
  178. break
  179. ewah = EWAHBitmap()
  180. # Mark this object and a couple others as reachable
  181. for j in range(i + 1):
  182. ewah.add(j)
  183. entry = BitmapEntry(object_pos=i, xor_offset=0, flags=0, bitmap=ewah)
  184. bitmap.entries[sha] = entry
  185. bitmap.entries_list.append((sha, entry))
  186. # Add name hash cache
  187. bitmap.name_hash_cache = [0x12345678, 0xABCDEF00, 0xFEDCBA98]
  188. # Write the bitmap after pack is closed to avoid file locking on Windows
  189. bitmap_path = pack_path + ".bitmap"
  190. remove_ro(bitmap_path)
  191. write_bitmap(bitmap_path, bitmap)
  192. # Verify git can use the repository with our bitmap
  193. # This should succeed if git can read our bitmap
  194. run_git_or_fail(
  195. ["rev-list", "--count", "--use-bitmap-index", "HEAD"],
  196. cwd=None,
  197. env={"GIT_DIR": repo_path},
  198. )
  199. # Verify git count-objects works with our bitmap
  200. run_git_or_fail(["count-objects", "-v"], cwd=None, env={"GIT_DIR": repo_path})
  201. def test_bitmap_file_format_structure(self):
  202. """Test that git-generated bitmap has expected structure."""
  203. repo_path = self._init_repo_with_bitmap()
  204. # Find bitmap
  205. pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
  206. bitmap_files = [f for f in os.listdir(pack_dir) if f.endswith(".bitmap")]
  207. bitmap_path = os.path.join(pack_dir, bitmap_files[0])
  208. # Read the raw file to verify header
  209. with open(bitmap_path, "rb") as f:
  210. signature = f.read(4)
  211. self.assertEqual(b"BITM", signature, "Invalid bitmap signature")
  212. version = int.from_bytes(f.read(2), byteorder="big")
  213. self.assertGreaterEqual(version, 1, "Bitmap version should be >= 1")
  214. # Load with Dulwich and verify structure
  215. bitmap_name = bitmap_files[0]
  216. pack_basename = bitmap_name.replace(".bitmap", "")
  217. pack_path = os.path.join(pack_dir, pack_basename)
  218. with Pack(pack_path, object_format=DEFAULT_OBJECT_FORMAT) as pack:
  219. bitmap = pack.bitmap
  220. self.assertIsNotNone(bitmap)
  221. self.assertEqual(bitmap.version, version)