test_pack.py 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. # test_pack.py -- Compatibility tests for git packs.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Compatibility tests for git packs."""
  22. import binascii
  23. import os
  24. import re
  25. import shutil
  26. import tempfile
  27. from typing import NoReturn
  28. from dulwich.file import GitFile
  29. from dulwich.objects import Blob
  30. from dulwich.pack import (
  31. PackData,
  32. PackIndex3,
  33. load_pack_index,
  34. write_pack,
  35. write_pack_index_v3,
  36. )
  37. from .. import SkipTest
  38. from ..test_pack import PackTests, a_sha, pack1_sha
  39. from .utils import require_git_version, rmtree_ro, run_git_or_fail
  40. _NON_DELTA_RE = re.compile(b"non delta: (?P<non_delta>\\d+) objects")
  41. def _git_verify_pack_object_list(output):
  42. pack_shas = set()
  43. for line in output.splitlines():
  44. sha = line[:40]
  45. try:
  46. binascii.unhexlify(sha)
  47. except (TypeError, binascii.Error):
  48. continue # non-sha line
  49. pack_shas.add(sha)
  50. return pack_shas
  51. class TestPack(PackTests):
  52. """Compatibility tests for reading and writing pack files."""
  53. def setUp(self) -> None:
  54. require_git_version((1, 5, 0))
  55. super().setUp()
  56. self._tempdir = tempfile.mkdtemp()
  57. self.addCleanup(shutil.rmtree, self._tempdir)
  58. def test_copy(self) -> None:
  59. with self.get_pack(pack1_sha) as origpack:
  60. self.assertSucceeds(origpack.index.check)
  61. pack_path = os.path.join(self._tempdir, "Elch")
  62. write_pack(pack_path, origpack.pack_tuples())
  63. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  64. orig_shas = {o.id for o in origpack.iterobjects()}
  65. self.assertEqual(orig_shas, _git_verify_pack_object_list(output))
  66. def test_deltas_work(self) -> None:
  67. with self.get_pack(pack1_sha) as orig_pack:
  68. orig_blob = orig_pack[a_sha]
  69. new_blob = Blob()
  70. new_blob.data = orig_blob.data + b"x"
  71. all_to_pack = [(o, None) for o in orig_pack.iterobjects()] + [
  72. (new_blob, None)
  73. ]
  74. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  75. write_pack(pack_path, all_to_pack, deltify=True)
  76. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  77. self.assertEqual(
  78. {x[0].id for x in all_to_pack},
  79. _git_verify_pack_object_list(output),
  80. )
  81. # We specifically made a new blob that should be a delta
  82. # against the blob a_sha, so make sure we really got only 3
  83. # non-delta objects:
  84. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  85. self.assertEqual(
  86. 3,
  87. got_non_delta,
  88. f"Expected 3 non-delta objects, got {got_non_delta}",
  89. )
  90. def test_delta_medium_object(self) -> None:
  91. # This tests an object set that will have a copy operation
  92. # 2**20 in size.
  93. with self.get_pack(pack1_sha) as orig_pack:
  94. orig_blob = orig_pack[a_sha]
  95. new_blob = Blob()
  96. new_blob.data = orig_blob.data + (b"x" * 2**20)
  97. new_blob_2 = Blob()
  98. new_blob_2.data = new_blob.data + b"y"
  99. all_to_pack = [
  100. *list(orig_pack.pack_tuples()),
  101. (new_blob, None),
  102. (new_blob_2, None),
  103. ]
  104. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  105. write_pack(pack_path, all_to_pack, deltify=True)
  106. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  107. self.assertEqual(
  108. {x[0].id for x in all_to_pack},
  109. _git_verify_pack_object_list(output),
  110. )
  111. # We specifically made a new blob that should be a delta
  112. # against the blob a_sha, so make sure we really got only 3
  113. # non-delta objects:
  114. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  115. self.assertEqual(
  116. 3,
  117. got_non_delta,
  118. f"Expected 3 non-delta objects, got {got_non_delta}",
  119. )
  120. # We expect one object to have a delta chain length of two
  121. # (new_blob_2), so let's verify that actually happens:
  122. self.assertIn(b"chain length = 2", output)
  123. # This test is SUPER slow: over 80 seconds on a 2012-era
  124. # laptop. This is because SequenceMatcher is worst-case quadratic
  125. # on the input size. It's impractical to produce deltas for
  126. # objects this large, but it's still worth doing the right thing
  127. # when it happens.
  128. def test_delta_large_object(self) -> NoReturn:
  129. # This tests an object set that will have a copy operation
  130. # 2**25 in size. This is a copy large enough that it requires
  131. # two copy operations in git's binary delta format.
  132. raise SkipTest("skipping slow, large test")
  133. with self.get_pack(pack1_sha) as orig_pack:
  134. new_blob = Blob()
  135. new_blob.data = "big blob" + ("x" * 2**25)
  136. new_blob_2 = Blob()
  137. new_blob_2.data = new_blob.data + "y"
  138. all_to_pack = [
  139. *list(orig_pack.pack_tuples()),
  140. (new_blob, None),
  141. (new_blob_2, None),
  142. ]
  143. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  144. write_pack(pack_path, all_to_pack, deltify=True)
  145. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  146. self.assertEqual(
  147. {x[0].id for x in all_to_pack},
  148. _git_verify_pack_object_list(output),
  149. )
  150. # We specifically made a new blob that should be a delta
  151. # against the blob a_sha, so make sure we really got only 4
  152. # non-delta objects:
  153. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  154. self.assertEqual(
  155. 4,
  156. got_non_delta,
  157. f"Expected 4 non-delta objects, got {got_non_delta}",
  158. )
  159. class TestPackIndexCompat(PackTests):
  160. """Compatibility tests for pack index formats."""
  161. def setUp(self) -> None:
  162. require_git_version((1, 5, 0))
  163. super().setUp()
  164. self._tempdir = tempfile.mkdtemp()
  165. self.addCleanup(rmtree_ro, self._tempdir)
  166. def test_dulwich_create_index_git_readable(self) -> None:
  167. """Test that git can read pack indexes created by dulwich."""
  168. # Create a simple pack with objects
  169. blob = Blob()
  170. blob.data = b"Test blob"
  171. pack_path = os.path.join(self._tempdir, "test_pack")
  172. entries = [(blob, None)]
  173. write_pack(pack_path, entries)
  174. # Load the pack and create v2 index (most compatible)
  175. pack_data = PackData(pack_path + ".pack")
  176. try:
  177. pack_data.create_index(pack_path + ".idx", version=2)
  178. finally:
  179. pack_data.close()
  180. # Verify git can read it
  181. output = run_git_or_fail(["verify-pack", "-v", pack_path + ".pack"])
  182. self.assertIn(blob.id.decode("ascii"), output.decode("ascii"))
  183. def test_dulwich_read_git_index(self) -> None:
  184. """Test that dulwich can read pack indexes created by git."""
  185. # Create a simple pack with objects
  186. blob = Blob()
  187. blob.data = b"Test blob for git"
  188. pack_path = os.path.join(self._tempdir, "git_pack")
  189. entries = [(blob, None)]
  190. write_pack(pack_path, entries)
  191. # Create index with git
  192. run_git_or_fail(["index-pack", pack_path + ".pack"])
  193. # Load with dulwich
  194. idx = load_pack_index(pack_path + ".idx")
  195. # Verify it works
  196. self.assertIn(blob.id, idx)
  197. self.assertEqual(len(idx), 1)
  198. def test_index_format_v3_sha256_future(self) -> None:
  199. """Test that v3 index format is ready for SHA-256 support."""
  200. # This test verifies the v3 implementation structure is ready
  201. # for SHA-256, even though SHA-256 itself is not yet implemented
  202. # Create a dummy v3 index to test the format
  203. entries = [(b"a" * 20, 100, 1234)] # SHA-1 for now
  204. v3_path = os.path.join(self._tempdir, "v3_test.idx")
  205. with GitFile(v3_path, "wb") as f:
  206. write_pack_index_v3(f, entries, b"x" * 20, hash_algorithm=1)
  207. # Load and verify structure
  208. idx = load_pack_index(v3_path)
  209. self.assertIsInstance(idx, PackIndex3)
  210. self.assertEqual(idx.version, 3)
  211. self.assertEqual(idx.hash_algorithm, 1) # SHA-1
  212. self.assertEqual(idx.hash_size, 20)
  213. # Verify SHA-256 would raise NotImplementedError
  214. with self.assertRaises(NotImplementedError):
  215. with GitFile(v3_path + ".sha256", "wb") as f:
  216. write_pack_index_v3(f, entries, b"x" * 32, hash_algorithm=2)