test_pack.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. # test_pack.py -- Compatibility tests for git packs.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Compatibility tests for git packs."""
  22. import binascii
  23. import os
  24. import re
  25. import shutil
  26. import tempfile
  27. from typing import NoReturn
  28. from dulwich.objects import Blob
  29. from dulwich.pack import write_pack
  30. from .. import SkipTest
  31. from ..test_pack import PackTests, a_sha, pack1_sha
  32. from .utils import require_git_version, run_git_or_fail
  33. _NON_DELTA_RE = re.compile(b"non delta: (?P<non_delta>\\d+) objects")
  34. def _git_verify_pack_object_list(output):
  35. pack_shas = set()
  36. for line in output.splitlines():
  37. sha = line[:40]
  38. try:
  39. binascii.unhexlify(sha)
  40. except (TypeError, binascii.Error):
  41. continue # non-sha line
  42. pack_shas.add(sha)
  43. return pack_shas
  44. class TestPack(PackTests):
  45. """Compatibility tests for reading and writing pack files."""
  46. def setUp(self) -> None:
  47. require_git_version((1, 5, 0))
  48. super().setUp()
  49. self._tempdir = tempfile.mkdtemp()
  50. self.addCleanup(shutil.rmtree, self._tempdir)
  51. def test_copy(self) -> None:
  52. with self.get_pack(pack1_sha) as origpack:
  53. self.assertSucceeds(origpack.index.check)
  54. pack_path = os.path.join(self._tempdir, "Elch")
  55. write_pack(pack_path, origpack.pack_tuples())
  56. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  57. orig_shas = {o.id for o in origpack.iterobjects()}
  58. self.assertEqual(orig_shas, _git_verify_pack_object_list(output))
  59. def test_deltas_work(self) -> None:
  60. with self.get_pack(pack1_sha) as orig_pack:
  61. orig_blob = orig_pack[a_sha]
  62. new_blob = Blob()
  63. new_blob.data = orig_blob.data + b"x"
  64. all_to_pack = [(o, None) for o in orig_pack.iterobjects()] + [
  65. (new_blob, None)
  66. ]
  67. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  68. write_pack(pack_path, all_to_pack, deltify=True)
  69. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  70. self.assertEqual(
  71. {x[0].id for x in all_to_pack},
  72. _git_verify_pack_object_list(output),
  73. )
  74. # We specifically made a new blob that should be a delta
  75. # against the blob a_sha, so make sure we really got only 3
  76. # non-delta objects:
  77. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  78. self.assertEqual(
  79. 3,
  80. got_non_delta,
  81. "Expected 3 non-delta objects, got %d" % got_non_delta,
  82. )
  83. def test_delta_medium_object(self) -> None:
  84. # This tests an object set that will have a copy operation
  85. # 2**20 in size.
  86. with self.get_pack(pack1_sha) as orig_pack:
  87. orig_blob = orig_pack[a_sha]
  88. new_blob = Blob()
  89. new_blob.data = orig_blob.data + (b"x" * 2**20)
  90. new_blob_2 = Blob()
  91. new_blob_2.data = new_blob.data + b"y"
  92. all_to_pack = [
  93. *list(orig_pack.pack_tuples()),
  94. (new_blob, None),
  95. (new_blob_2, None),
  96. ]
  97. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  98. write_pack(pack_path, all_to_pack, deltify=True)
  99. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  100. self.assertEqual(
  101. {x[0].id for x in all_to_pack},
  102. _git_verify_pack_object_list(output),
  103. )
  104. # We specifically made a new blob that should be a delta
  105. # against the blob a_sha, so make sure we really got only 3
  106. # non-delta objects:
  107. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  108. self.assertEqual(
  109. 3,
  110. got_non_delta,
  111. "Expected 3 non-delta objects, got %d" % got_non_delta,
  112. )
  113. # We expect one object to have a delta chain length of two
  114. # (new_blob_2), so let's verify that actually happens:
  115. self.assertIn(b"chain length = 2", output)
  116. # This test is SUPER slow: over 80 seconds on a 2012-era
  117. # laptop. This is because SequenceMatcher is worst-case quadratic
  118. # on the input size. It's impractical to produce deltas for
  119. # objects this large, but it's still worth doing the right thing
  120. # when it happens.
  121. def test_delta_large_object(self) -> NoReturn:
  122. # This tests an object set that will have a copy operation
  123. # 2**25 in size. This is a copy large enough that it requires
  124. # two copy operations in git's binary delta format.
  125. raise SkipTest("skipping slow, large test")
  126. with self.get_pack(pack1_sha) as orig_pack:
  127. new_blob = Blob()
  128. new_blob.data = "big blob" + ("x" * 2**25)
  129. new_blob_2 = Blob()
  130. new_blob_2.data = new_blob.data + "y"
  131. all_to_pack = [
  132. *list(orig_pack.pack_tuples()),
  133. (new_blob, None),
  134. (new_blob_2, None),
  135. ]
  136. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  137. write_pack(pack_path, all_to_pack, deltify=True)
  138. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  139. self.assertEqual(
  140. {x[0].id for x in all_to_pack},
  141. _git_verify_pack_object_list(output),
  142. )
  143. # We specifically made a new blob that should be a delta
  144. # against the blob a_sha, so make sure we really got only 4
  145. # non-delta objects:
  146. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  147. self.assertEqual(
  148. 4,
  149. got_non_delta,
  150. "Expected 4 non-delta objects, got %d" % got_non_delta,
  151. )