test_pack.py 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. # test_pack.py -- Compatibility tests for git packs.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Compatibility tests for git packs."""
  22. import binascii
  23. import os
  24. import re
  25. import shutil
  26. import tempfile
  27. from typing import NoReturn
  28. from dulwich.file import GitFile
  29. from dulwich.object_format import DEFAULT_OBJECT_FORMAT
  30. from dulwich.objects import Blob
  31. from dulwich.pack import (
  32. PackData,
  33. PackIndex3,
  34. load_pack_index,
  35. write_pack,
  36. write_pack_index_v3,
  37. )
  38. from .. import SkipTest
  39. from ..test_pack import PackTests, a_sha, pack1_sha
  40. from .utils import require_git_version, rmtree_ro, run_git_or_fail
  41. _NON_DELTA_RE = re.compile(b"non delta: (?P<non_delta>\\d+) objects")
  42. def _git_verify_pack_object_list(output):
  43. pack_shas = set()
  44. for line in output.splitlines():
  45. sha = line[:40]
  46. try:
  47. binascii.unhexlify(sha)
  48. except (TypeError, binascii.Error):
  49. continue # non-sha line
  50. pack_shas.add(sha)
  51. return pack_shas
  52. class TestPack(PackTests):
  53. """Compatibility tests for reading and writing pack files."""
  54. def setUp(self) -> None:
  55. require_git_version((1, 5, 0))
  56. super().setUp()
  57. self._tempdir = tempfile.mkdtemp()
  58. self.addCleanup(shutil.rmtree, self._tempdir)
  59. def test_copy(self) -> None:
  60. with self.get_pack(pack1_sha) as origpack:
  61. self.assertSucceeds(origpack.index.check)
  62. pack_path = os.path.join(self._tempdir, "Elch")
  63. write_pack(
  64. pack_path, origpack.pack_tuples(), object_format=DEFAULT_OBJECT_FORMAT
  65. )
  66. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  67. orig_shas = {o.id for o in origpack.iterobjects()}
  68. self.assertEqual(orig_shas, _git_verify_pack_object_list(output))
  69. def test_deltas_work(self) -> None:
  70. with self.get_pack(pack1_sha) as orig_pack:
  71. orig_blob = orig_pack[a_sha]
  72. new_blob = Blob()
  73. new_blob.data = orig_blob.data + b"x"
  74. all_to_pack = [(o, None) for o in orig_pack.iterobjects()] + [
  75. (new_blob, None)
  76. ]
  77. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  78. write_pack(
  79. pack_path, all_to_pack, deltify=True, object_format=DEFAULT_OBJECT_FORMAT
  80. )
  81. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  82. self.assertEqual(
  83. {x[0].id for x in all_to_pack},
  84. _git_verify_pack_object_list(output),
  85. )
  86. # We specifically made a new blob that should be a delta
  87. # against the blob a_sha, so make sure we really got only 3
  88. # non-delta objects:
  89. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  90. self.assertEqual(
  91. 3,
  92. got_non_delta,
  93. f"Expected 3 non-delta objects, got {got_non_delta}",
  94. )
  95. def test_delta_medium_object(self) -> None:
  96. # This tests an object set that will have a copy operation
  97. # 2**20 in size.
  98. with self.get_pack(pack1_sha) as orig_pack:
  99. orig_blob = orig_pack[a_sha]
  100. new_blob = Blob()
  101. new_blob.data = orig_blob.data + (b"x" * 2**20)
  102. new_blob_2 = Blob()
  103. new_blob_2.data = new_blob.data + b"y"
  104. all_to_pack = [
  105. *list(orig_pack.pack_tuples()),
  106. (new_blob, None),
  107. (new_blob_2, None),
  108. ]
  109. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  110. write_pack(
  111. pack_path,
  112. all_to_pack,
  113. deltify=True,
  114. object_format=DEFAULT_OBJECT_FORMAT,
  115. )
  116. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  117. self.assertEqual(
  118. {x[0].id for x in all_to_pack},
  119. _git_verify_pack_object_list(output),
  120. )
  121. # We specifically made a new blob that should be a delta
  122. # against the blob a_sha, so make sure we really got only 3
  123. # non-delta objects:
  124. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  125. self.assertEqual(
  126. 3,
  127. got_non_delta,
  128. f"Expected 3 non-delta objects, got {got_non_delta}",
  129. )
  130. # We expect one object to have a delta chain length of two
  131. # (new_blob_2), so let's verify that actually happens:
  132. self.assertIn(b"chain length = 2", output)
  133. # This test is SUPER slow: over 80 seconds on a 2012-era
  134. # laptop. This is because SequenceMatcher is worst-case quadratic
  135. # on the input size. It's impractical to produce deltas for
  136. # objects this large, but it's still worth doing the right thing
  137. # when it happens.
  138. def test_delta_large_object(self) -> NoReturn:
  139. # This tests an object set that will have a copy operation
  140. # 2**25 in size. This is a copy large enough that it requires
  141. # two copy operations in git's binary delta format.
  142. raise SkipTest("skipping slow, large test")
  143. with self.get_pack(pack1_sha) as orig_pack:
  144. new_blob = Blob()
  145. new_blob.data = "big blob" + ("x" * 2**25)
  146. new_blob_2 = Blob()
  147. new_blob_2.data = new_blob.data + "y"
  148. all_to_pack = [
  149. *list(orig_pack.pack_tuples()),
  150. (new_blob, None),
  151. (new_blob_2, None),
  152. ]
  153. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  154. write_pack(
  155. pack_path,
  156. all_to_pack,
  157. deltify=True,
  158. object_format=DEFAULT_OBJECT_FORMAT,
  159. )
  160. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  161. self.assertEqual(
  162. {x[0].id for x in all_to_pack},
  163. _git_verify_pack_object_list(output),
  164. )
  165. # We specifically made a new blob that should be a delta
  166. # against the blob a_sha, so make sure we really got only 4
  167. # non-delta objects:
  168. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  169. self.assertEqual(
  170. 4,
  171. got_non_delta,
  172. f"Expected 4 non-delta objects, got {got_non_delta}",
  173. )
  174. class TestPackIndexCompat(PackTests):
  175. """Compatibility tests for pack index formats."""
  176. def setUp(self) -> None:
  177. require_git_version((1, 5, 0))
  178. super().setUp()
  179. self._tempdir = tempfile.mkdtemp()
  180. self.addCleanup(rmtree_ro, self._tempdir)
  181. def test_dulwich_create_index_git_readable(self) -> None:
  182. """Test that git can read pack indexes created by dulwich."""
  183. # Create a simple pack with objects
  184. blob = Blob()
  185. blob.data = b"Test blob"
  186. pack_path = os.path.join(self._tempdir, "test_pack")
  187. entries = [(blob, None)]
  188. write_pack(pack_path, entries, object_format=DEFAULT_OBJECT_FORMAT)
  189. # Load the pack and create v2 index (most compatible)
  190. pack_data = PackData(pack_path + ".pack", object_format=DEFAULT_OBJECT_FORMAT)
  191. try:
  192. pack_data.create_index(pack_path + ".idx", version=2)
  193. finally:
  194. pack_data.close()
  195. # Verify git can read it
  196. output = run_git_or_fail(["verify-pack", "-v", pack_path + ".pack"])
  197. self.assertIn(blob.id.decode("ascii"), output.decode("ascii"))
  198. def test_dulwich_read_git_index(self) -> None:
  199. """Test that dulwich can read pack indexes created by git."""
  200. # Create a simple pack with objects
  201. blob = Blob()
  202. blob.data = b"Test blob for git"
  203. pack_path = os.path.join(self._tempdir, "git_pack")
  204. entries = [(blob, None)]
  205. write_pack(pack_path, entries, object_format=DEFAULT_OBJECT_FORMAT)
  206. # Create index with git
  207. run_git_or_fail(["index-pack", pack_path + ".pack"])
  208. # Load with dulwich
  209. idx = load_pack_index(pack_path + ".idx", DEFAULT_OBJECT_FORMAT)
  210. # Verify it works
  211. self.assertIn(blob.id, idx)
  212. self.assertEqual(len(idx), 1)
  213. def test_index_format_v3_sha256_future(self) -> None:
  214. """Test that v3 index format is ready for SHA-256 support."""
  215. # This test verifies the v3 implementation structure is ready
  216. # for SHA-256, even though SHA-256 itself is not yet implemented
  217. # Create a dummy v3 index to test the format
  218. entries = [(b"a" * 20, 100, 1234)] # SHA-1 for now
  219. v3_path = os.path.join(self._tempdir, "v3_test.idx")
  220. with GitFile(v3_path, "wb") as f:
  221. write_pack_index_v3(f, entries, b"x" * 20, hash_format=1)
  222. # Load and verify structure
  223. idx = load_pack_index(v3_path, DEFAULT_OBJECT_FORMAT)
  224. self.assertIsInstance(idx, PackIndex3)
  225. self.assertEqual(idx.version, 3)
  226. self.assertEqual(idx.hash_format, 1) # SHA-1
  227. self.assertEqual(idx.hash_size, 20)
  228. # Verify SHA-256 would raise NotImplementedError
  229. with self.assertRaises(NotImplementedError):
  230. with GitFile(v3_path + ".sha256", "wb") as f:
  231. write_pack_index_v3(f, entries, b"x" * 32, hash_format=2)