test_pack.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. # test_pack.py -- Compatibility tests for git packs.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Compatibility tests for git packs."""
  21. import binascii
  22. import os
  23. import re
  24. import shutil
  25. import tempfile
  26. from dulwich.objects import Blob
  27. from dulwich.pack import write_pack
  28. from .. import SkipTest
  29. from ..test_pack import PackTests, a_sha, pack1_sha
  30. from .utils import require_git_version, run_git_or_fail
  31. _NON_DELTA_RE = re.compile(b"non delta: (?P<non_delta>\\d+) objects")
  32. def _git_verify_pack_object_list(output):
  33. pack_shas = set()
  34. for line in output.splitlines():
  35. sha = line[:40]
  36. try:
  37. binascii.unhexlify(sha)
  38. except (TypeError, binascii.Error):
  39. continue # non-sha line
  40. pack_shas.add(sha)
  41. return pack_shas
  42. class TestPack(PackTests):
  43. """Compatibility tests for reading and writing pack files."""
  44. def setUp(self):
  45. require_git_version((1, 5, 0))
  46. super().setUp()
  47. self._tempdir = tempfile.mkdtemp()
  48. self.addCleanup(shutil.rmtree, self._tempdir)
  49. def test_copy(self):
  50. with self.get_pack(pack1_sha) as origpack:
  51. self.assertSucceeds(origpack.index.check)
  52. pack_path = os.path.join(self._tempdir, "Elch")
  53. write_pack(pack_path, origpack.pack_tuples())
  54. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  55. orig_shas = {o.id for o in origpack.iterobjects()}
  56. self.assertEqual(orig_shas, _git_verify_pack_object_list(output))
  57. def test_deltas_work(self):
  58. with self.get_pack(pack1_sha) as orig_pack:
  59. orig_blob = orig_pack[a_sha]
  60. new_blob = Blob()
  61. new_blob.data = orig_blob.data + b"x"
  62. all_to_pack = [(o, None) for o in orig_pack.iterobjects()] + [
  63. (new_blob, None)
  64. ]
  65. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  66. write_pack(pack_path, all_to_pack, deltify=True)
  67. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  68. self.assertEqual(
  69. {x[0].id for x in all_to_pack},
  70. _git_verify_pack_object_list(output),
  71. )
  72. # We specifically made a new blob that should be a delta
  73. # against the blob a_sha, so make sure we really got only 3
  74. # non-delta objects:
  75. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  76. self.assertEqual(
  77. 3,
  78. got_non_delta,
  79. "Expected 3 non-delta objects, got %d" % got_non_delta,
  80. )
  81. def test_delta_medium_object(self):
  82. # This tests an object set that will have a copy operation
  83. # 2**20 in size.
  84. with self.get_pack(pack1_sha) as orig_pack:
  85. orig_blob = orig_pack[a_sha]
  86. new_blob = Blob()
  87. new_blob.data = orig_blob.data + (b"x" * 2**20)
  88. new_blob_2 = Blob()
  89. new_blob_2.data = new_blob.data + b"y"
  90. all_to_pack = [
  91. *list(orig_pack.pack_tuples()),
  92. (new_blob, None),
  93. (new_blob_2, None),
  94. ]
  95. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  96. write_pack(pack_path, all_to_pack, deltify=True)
  97. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  98. self.assertEqual(
  99. {x[0].id for x in all_to_pack},
  100. _git_verify_pack_object_list(output),
  101. )
  102. # We specifically made a new blob that should be a delta
  103. # against the blob a_sha, so make sure we really got only 3
  104. # non-delta objects:
  105. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  106. self.assertEqual(
  107. 3,
  108. got_non_delta,
  109. "Expected 3 non-delta objects, got %d" % got_non_delta,
  110. )
  111. # We expect one object to have a delta chain length of two
  112. # (new_blob_2), so let's verify that actually happens:
  113. self.assertIn(b"chain length = 2", output)
  114. # This test is SUPER slow: over 80 seconds on a 2012-era
  115. # laptop. This is because SequenceMatcher is worst-case quadratic
  116. # on the input size. It's impractical to produce deltas for
  117. # objects this large, but it's still worth doing the right thing
  118. # when it happens.
  119. def test_delta_large_object(self):
  120. # This tests an object set that will have a copy operation
  121. # 2**25 in size. This is a copy large enough that it requires
  122. # two copy operations in git's binary delta format.
  123. raise SkipTest("skipping slow, large test")
  124. with self.get_pack(pack1_sha) as orig_pack:
  125. new_blob = Blob()
  126. new_blob.data = "big blob" + ("x" * 2**25)
  127. new_blob_2 = Blob()
  128. new_blob_2.data = new_blob.data + "y"
  129. all_to_pack = [
  130. *list(orig_pack.pack_tuples()),
  131. (new_blob, None),
  132. (new_blob_2, None),
  133. ]
  134. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  135. write_pack(pack_path, all_to_pack, deltify=True)
  136. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  137. self.assertEqual(
  138. {x[0].id for x in all_to_pack},
  139. _git_verify_pack_object_list(output),
  140. )
  141. # We specifically made a new blob that should be a delta
  142. # against the blob a_sha, so make sure we really got only 4
  143. # non-delta objects:
  144. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  145. self.assertEqual(
  146. 4,
  147. got_non_delta,
  148. "Expected 4 non-delta objects, got %d" % got_non_delta,
  149. )