test_pack.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. # test_pack.py -- Compatibility tests for git packs.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Compatibility tests for git packs."""
  21. import binascii
  22. import os
  23. import re
  24. import shutil
  25. import tempfile
  26. from typing import NoReturn
  27. from dulwich.objects import Blob
  28. from dulwich.pack import write_pack
  29. from .. import SkipTest
  30. from ..test_pack import PackTests, a_sha, pack1_sha
  31. from .utils import require_git_version, run_git_or_fail
  32. _NON_DELTA_RE = re.compile(b"non delta: (?P<non_delta>\\d+) objects")
  33. def _git_verify_pack_object_list(output):
  34. pack_shas = set()
  35. for line in output.splitlines():
  36. sha = line[:40]
  37. try:
  38. binascii.unhexlify(sha)
  39. except (TypeError, binascii.Error):
  40. continue # non-sha line
  41. pack_shas.add(sha)
  42. return pack_shas
  43. class TestPack(PackTests):
  44. """Compatibility tests for reading and writing pack files."""
  45. def setUp(self) -> None:
  46. require_git_version((1, 5, 0))
  47. super().setUp()
  48. self._tempdir = tempfile.mkdtemp()
  49. self.addCleanup(shutil.rmtree, self._tempdir)
  50. def test_copy(self) -> None:
  51. with self.get_pack(pack1_sha) as origpack:
  52. self.assertSucceeds(origpack.index.check)
  53. pack_path = os.path.join(self._tempdir, "Elch")
  54. write_pack(pack_path, origpack.pack_tuples())
  55. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  56. orig_shas = {o.id for o in origpack.iterobjects()}
  57. self.assertEqual(orig_shas, _git_verify_pack_object_list(output))
  58. def test_deltas_work(self) -> None:
  59. with self.get_pack(pack1_sha) as orig_pack:
  60. orig_blob = orig_pack[a_sha]
  61. new_blob = Blob()
  62. new_blob.data = orig_blob.data + b"x"
  63. all_to_pack = [(o, None) for o in orig_pack.iterobjects()] + [
  64. (new_blob, None)
  65. ]
  66. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  67. write_pack(pack_path, all_to_pack, deltify=True)
  68. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  69. self.assertEqual(
  70. {x[0].id for x in all_to_pack},
  71. _git_verify_pack_object_list(output),
  72. )
  73. # We specifically made a new blob that should be a delta
  74. # against the blob a_sha, so make sure we really got only 3
  75. # non-delta objects:
  76. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  77. self.assertEqual(
  78. 3,
  79. got_non_delta,
  80. "Expected 3 non-delta objects, got %d" % got_non_delta,
  81. )
  82. def test_delta_medium_object(self) -> None:
  83. # This tests an object set that will have a copy operation
  84. # 2**20 in size.
  85. with self.get_pack(pack1_sha) as orig_pack:
  86. orig_blob = orig_pack[a_sha]
  87. new_blob = Blob()
  88. new_blob.data = orig_blob.data + (b"x" * 2**20)
  89. new_blob_2 = Blob()
  90. new_blob_2.data = new_blob.data + b"y"
  91. all_to_pack = [
  92. *list(orig_pack.pack_tuples()),
  93. (new_blob, None),
  94. (new_blob_2, None),
  95. ]
  96. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  97. write_pack(pack_path, all_to_pack, deltify=True)
  98. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  99. self.assertEqual(
  100. {x[0].id for x in all_to_pack},
  101. _git_verify_pack_object_list(output),
  102. )
  103. # We specifically made a new blob that should be a delta
  104. # against the blob a_sha, so make sure we really got only 3
  105. # non-delta objects:
  106. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  107. self.assertEqual(
  108. 3,
  109. got_non_delta,
  110. "Expected 3 non-delta objects, got %d" % got_non_delta,
  111. )
  112. # We expect one object to have a delta chain length of two
  113. # (new_blob_2), so let's verify that actually happens:
  114. self.assertIn(b"chain length = 2", output)
  115. # This test is SUPER slow: over 80 seconds on a 2012-era
  116. # laptop. This is because SequenceMatcher is worst-case quadratic
  117. # on the input size. It's impractical to produce deltas for
  118. # objects this large, but it's still worth doing the right thing
  119. # when it happens.
  120. def test_delta_large_object(self) -> NoReturn:
  121. # This tests an object set that will have a copy operation
  122. # 2**25 in size. This is a copy large enough that it requires
  123. # two copy operations in git's binary delta format.
  124. raise SkipTest("skipping slow, large test")
  125. with self.get_pack(pack1_sha) as orig_pack:
  126. new_blob = Blob()
  127. new_blob.data = "big blob" + ("x" * 2**25)
  128. new_blob_2 = Blob()
  129. new_blob_2.data = new_blob.data + "y"
  130. all_to_pack = [
  131. *list(orig_pack.pack_tuples()),
  132. (new_blob, None),
  133. (new_blob_2, None),
  134. ]
  135. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  136. write_pack(pack_path, all_to_pack, deltify=True)
  137. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  138. self.assertEqual(
  139. {x[0].id for x in all_to_pack},
  140. _git_verify_pack_object_list(output),
  141. )
  142. # We specifically made a new blob that should be a delta
  143. # against the blob a_sha, so make sure we really got only 4
  144. # non-delta objects:
  145. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  146. self.assertEqual(
  147. 4,
  148. got_non_delta,
  149. "Expected 4 non-delta objects, got %d" % got_non_delta,
  150. )