test_pack.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. # test_pack.py -- Compatibility tests for git packs.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Compatibility tests for git packs."""
  21. import binascii
  22. import os
  23. import re
  24. import shutil
  25. import tempfile
  26. from dulwich.tests import SkipTest
  27. from ...objects import Blob
  28. from ...pack import write_pack
  29. from ..test_pack import PackTests, a_sha, pack1_sha
  30. from .utils import require_git_version, run_git_or_fail
  31. _NON_DELTA_RE = re.compile(b"non delta: (?P<non_delta>\\d+) objects")
  32. def _git_verify_pack_object_list(output):
  33. pack_shas = set()
  34. for line in output.splitlines():
  35. sha = line[:40]
  36. try:
  37. binascii.unhexlify(sha)
  38. except (TypeError, binascii.Error):
  39. continue # non-sha line
  40. pack_shas.add(sha)
  41. return pack_shas
  42. class TestPack(PackTests):
  43. """Compatibility tests for reading and writing pack files."""
  44. def setUp(self):
  45. require_git_version((1, 5, 0))
  46. super().setUp()
  47. self._tempdir = tempfile.mkdtemp()
  48. self.addCleanup(shutil.rmtree, self._tempdir)
  49. def test_copy(self):
  50. with self.get_pack(pack1_sha) as origpack:
  51. self.assertSucceeds(origpack.index.check)
  52. pack_path = os.path.join(self._tempdir, "Elch")
  53. write_pack(pack_path, origpack.pack_tuples())
  54. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  55. orig_shas = {o.id for o in origpack.iterobjects()}
  56. self.assertEqual(orig_shas, _git_verify_pack_object_list(output))
  57. def test_deltas_work(self):
  58. with self.get_pack(pack1_sha) as orig_pack:
  59. orig_blob = orig_pack[a_sha]
  60. new_blob = Blob()
  61. new_blob.data = orig_blob.data + b"x"
  62. all_to_pack = [(o, None) for o in orig_pack.iterobjects()] + [(new_blob, None)]
  63. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  64. write_pack(pack_path, all_to_pack, deltify=True)
  65. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  66. self.assertEqual(
  67. {x[0].id for x in all_to_pack},
  68. _git_verify_pack_object_list(output),
  69. )
  70. # We specifically made a new blob that should be a delta
  71. # against the blob a_sha, so make sure we really got only 3
  72. # non-delta objects:
  73. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  74. self.assertEqual(
  75. 3,
  76. got_non_delta,
  77. "Expected 3 non-delta objects, got %d" % got_non_delta,
  78. )
  79. def test_delta_medium_object(self):
  80. # This tests an object set that will have a copy operation
  81. # 2**20 in size.
  82. with self.get_pack(pack1_sha) as orig_pack:
  83. orig_blob = orig_pack[a_sha]
  84. new_blob = Blob()
  85. new_blob.data = orig_blob.data + (b"x" * 2 ** 20)
  86. new_blob_2 = Blob()
  87. new_blob_2.data = new_blob.data + b"y"
  88. all_to_pack = list(orig_pack.pack_tuples()) + [
  89. (new_blob, None),
  90. (new_blob_2, None),
  91. ]
  92. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  93. write_pack(pack_path, all_to_pack, deltify=True)
  94. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  95. self.assertEqual(
  96. {x[0].id for x in all_to_pack},
  97. _git_verify_pack_object_list(output),
  98. )
  99. # We specifically made a new blob that should be a delta
  100. # against the blob a_sha, so make sure we really got only 3
  101. # non-delta objects:
  102. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  103. self.assertEqual(
  104. 3,
  105. got_non_delta,
  106. "Expected 3 non-delta objects, got %d" % got_non_delta,
  107. )
  108. # We expect one object to have a delta chain length of two
  109. # (new_blob_2), so let's verify that actually happens:
  110. self.assertIn(b"chain length = 2", output)
  111. # This test is SUPER slow: over 80 seconds on a 2012-era
  112. # laptop. This is because SequenceMatcher is worst-case quadratic
  113. # on the input size. It's impractical to produce deltas for
  114. # objects this large, but it's still worth doing the right thing
  115. # when it happens.
  116. def test_delta_large_object(self):
  117. # This tests an object set that will have a copy operation
  118. # 2**25 in size. This is a copy large enough that it requires
  119. # two copy operations in git's binary delta format.
  120. raise SkipTest("skipping slow, large test")
  121. with self.get_pack(pack1_sha) as orig_pack:
  122. new_blob = Blob()
  123. new_blob.data = "big blob" + ("x" * 2 ** 25)
  124. new_blob_2 = Blob()
  125. new_blob_2.data = new_blob.data + "y"
  126. all_to_pack = list(orig_pack.pack_tuples()) + [
  127. (new_blob, None),
  128. (new_blob_2, None),
  129. ]
  130. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  131. write_pack(pack_path, all_to_pack, deltify=True)
  132. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  133. self.assertEqual(
  134. {x[0].id for x in all_to_pack},
  135. _git_verify_pack_object_list(output),
  136. )
  137. # We specifically made a new blob that should be a delta
  138. # against the blob a_sha, so make sure we really got only 4
  139. # non-delta objects:
  140. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  141. self.assertEqual(
  142. 4,
  143. got_non_delta,
  144. "Expected 4 non-delta objects, got %d" % got_non_delta,
  145. )