test_pack.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. # test_pack.py -- Compatibility tests for git packs.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Compatibility tests for git packs."""
  21. import binascii
  22. import os
  23. import re
  24. import shutil
  25. import tempfile
  26. from dulwich.objects import Blob
  27. from dulwich.pack import write_pack
  28. from dulwich.tests import SkipTest
  29. from dulwich.tests.compat.utils import require_git_version, run_git_or_fail
  30. from dulwich.tests.test_pack import PackTests, a_sha, pack1_sha
  31. _NON_DELTA_RE = re.compile(b"non delta: (?P<non_delta>\\d+) objects")
  32. def _git_verify_pack_object_list(output):
  33. pack_shas = set()
  34. for line in output.splitlines():
  35. sha = line[:40]
  36. try:
  37. binascii.unhexlify(sha)
  38. except (TypeError, binascii.Error):
  39. continue # non-sha line
  40. pack_shas.add(sha)
  41. return pack_shas
  42. class TestPack(PackTests):
  43. """Compatibility tests for reading and writing pack files."""
  44. def setUp(self):
  45. require_git_version((1, 5, 0))
  46. super().setUp()
  47. self._tempdir = tempfile.mkdtemp()
  48. self.addCleanup(shutil.rmtree, self._tempdir)
  49. def test_copy(self):
  50. with self.get_pack(pack1_sha) as origpack:
  51. self.assertSucceeds(origpack.index.check)
  52. pack_path = os.path.join(self._tempdir, "Elch")
  53. write_pack(pack_path, origpack.pack_tuples())
  54. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  55. orig_shas = {o.id for o in origpack.iterobjects()}
  56. self.assertEqual(orig_shas, _git_verify_pack_object_list(output))
  57. def test_deltas_work(self):
  58. with self.get_pack(pack1_sha) as orig_pack:
  59. orig_blob = orig_pack[a_sha]
  60. new_blob = Blob()
  61. new_blob.data = orig_blob.data + b"x"
  62. all_to_pack = [(o, None) for o in orig_pack.iterobjects()] + [(new_blob, None)]
  63. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  64. write_pack(pack_path, all_to_pack, deltify=True)
  65. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  66. self.assertEqual(
  67. {x[0].id for x in all_to_pack},
  68. _git_verify_pack_object_list(output),
  69. )
  70. # We specifically made a new blob that should be a delta
  71. # against the blob a_sha, so make sure we really got only 3
  72. # non-delta objects:
  73. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  74. self.assertEqual(
  75. 3,
  76. got_non_delta,
  77. "Expected 3 non-delta objects, got %d" % got_non_delta,
  78. )
  79. def test_delta_medium_object(self):
  80. # This tests an object set that will have a copy operation
  81. # 2**20 in size.
  82. with self.get_pack(pack1_sha) as orig_pack:
  83. orig_blob = orig_pack[a_sha]
  84. new_blob = Blob()
  85. new_blob.data = orig_blob.data + (b"x" * 2 ** 20)
  86. new_blob_2 = Blob()
  87. new_blob_2.data = new_blob.data + b"y"
  88. all_to_pack = list(orig_pack.pack_tuples()) + [
  89. (new_blob, None),
  90. (new_blob_2, None),
  91. ]
  92. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  93. write_pack(pack_path, all_to_pack, deltify=True)
  94. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  95. self.assertEqual(
  96. {x[0].id for x in all_to_pack},
  97. _git_verify_pack_object_list(output),
  98. )
  99. # We specifically made a new blob that should be a delta
  100. # against the blob a_sha, so make sure we really got only 3
  101. # non-delta objects:
  102. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  103. self.assertEqual(
  104. 3,
  105. got_non_delta,
  106. "Expected 3 non-delta objects, got %d" % got_non_delta,
  107. )
  108. # We expect one object to have a delta chain length of two
  109. # (new_blob_2), so let's verify that actually happens:
  110. self.assertIn(b"chain length = 2", output)
  111. # This test is SUPER slow: over 80 seconds on a 2012-era
  112. # laptop. This is because SequenceMatcher is worst-case quadratic
  113. # on the input size. It's impractical to produce deltas for
  114. # objects this large, but it's still worth doing the right thing
  115. # when it happens.
  116. def test_delta_large_object(self):
  117. # This tests an object set that will have a copy operation
  118. # 2**25 in size. This is a copy large enough that it requires
  119. # two copy operations in git's binary delta format.
  120. raise SkipTest("skipping slow, large test")
  121. with self.get_pack(pack1_sha) as orig_pack:
  122. new_blob = Blob()
  123. new_blob.data = "big blob" + ("x" * 2 ** 25)
  124. new_blob_2 = Blob()
  125. new_blob_2.data = new_blob.data + "y"
  126. all_to_pack = list(orig_pack.pack_tuples()) + [
  127. (new_blob, None),
  128. (new_blob_2, None),
  129. ]
  130. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  131. write_pack(pack_path, all_to_pack, deltify=True)
  132. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  133. self.assertEqual(
  134. {x[0].id for x in all_to_pack},
  135. _git_verify_pack_object_list(output),
  136. )
  137. # We specifically made a new blob that should be a delta
  138. # against the blob a_sha, so make sure we really got only 4
  139. # non-delta objects:
  140. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  141. self.assertEqual(
  142. 4,
  143. got_non_delta,
  144. "Expected 4 non-delta objects, got %d" % got_non_delta,
  145. )