test_pack.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. # test_pack.py -- Compatibility tests for git packs.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Compatibility tests for git packs."""
  21. import binascii
  22. import os
  23. import re
  24. import shutil
  25. import tempfile
  26. from dulwich.pack import (
  27. write_pack,
  28. )
  29. from dulwich.objects import (
  30. Blob,
  31. )
  32. from dulwich.tests import (
  33. SkipTest,
  34. )
  35. from dulwich.tests.test_pack import (
  36. a_sha,
  37. pack1_sha,
  38. PackTests,
  39. )
  40. from dulwich.tests.compat.utils import (
  41. require_git_version,
  42. run_git_or_fail,
  43. )
  44. _NON_DELTA_RE = re.compile(b'non delta: (?P<non_delta>\\d+) objects')
  45. def _git_verify_pack_object_list(output):
  46. pack_shas = set()
  47. for line in output.splitlines():
  48. sha = line[:40]
  49. try:
  50. binascii.unhexlify(sha)
  51. except (TypeError, binascii.Error):
  52. continue # non-sha line
  53. pack_shas.add(sha)
  54. return pack_shas
  55. class TestPack(PackTests):
  56. """Compatibility tests for reading and writing pack files."""
  57. def setUp(self):
  58. require_git_version((1, 5, 0))
  59. super(TestPack, self).setUp()
  60. self._tempdir = tempfile.mkdtemp()
  61. self.addCleanup(shutil.rmtree, self._tempdir)
  62. def test_copy(self):
  63. with self.get_pack(pack1_sha) as origpack:
  64. self.assertSucceeds(origpack.index.check)
  65. pack_path = os.path.join(self._tempdir, "Elch")
  66. write_pack(pack_path, origpack.pack_tuples())
  67. output = run_git_or_fail(['verify-pack', '-v', pack_path])
  68. orig_shas = set(o.id for o in origpack.iterobjects())
  69. self.assertEqual(orig_shas, _git_verify_pack_object_list(output))
  70. def test_deltas_work(self):
  71. with self.get_pack(pack1_sha) as orig_pack:
  72. orig_blob = orig_pack[a_sha]
  73. new_blob = Blob()
  74. new_blob.data = orig_blob.data + b'x'
  75. all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None)]
  76. pack_path = os.path.join(self._tempdir, 'pack_with_deltas')
  77. write_pack(pack_path, all_to_pack, deltify=True)
  78. output = run_git_or_fail(['verify-pack', '-v', pack_path])
  79. self.assertEqual(set(x[0].id for x in all_to_pack),
  80. _git_verify_pack_object_list(output))
  81. # We specifically made a new blob that should be a delta
  82. # against the blob a_sha, so make sure we really got only 3
  83. # non-delta objects:
  84. got_non_delta = int(_NON_DELTA_RE.search(output).group('non_delta'))
  85. self.assertEqual(
  86. 3, got_non_delta,
  87. 'Expected 3 non-delta objects, got %d' % got_non_delta)
  88. def test_delta_medium_object(self):
  89. # This tests an object set that will have a copy operation
  90. # 2**20 in size.
  91. with self.get_pack(pack1_sha) as orig_pack:
  92. orig_blob = orig_pack[a_sha]
  93. new_blob = Blob()
  94. new_blob.data = orig_blob.data + (b'x' * 2 ** 20)
  95. new_blob_2 = Blob()
  96. new_blob_2.data = new_blob.data + b'y'
  97. all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None),
  98. (new_blob_2, None)]
  99. pack_path = os.path.join(self._tempdir, 'pack_with_deltas')
  100. write_pack(pack_path, all_to_pack, deltify=True)
  101. output = run_git_or_fail(['verify-pack', '-v', pack_path])
  102. self.assertEqual(set(x[0].id for x in all_to_pack),
  103. _git_verify_pack_object_list(output))
  104. # We specifically made a new blob that should be a delta
  105. # against the blob a_sha, so make sure we really got only 3
  106. # non-delta objects:
  107. got_non_delta = int(_NON_DELTA_RE.search(output).group('non_delta'))
  108. self.assertEqual(
  109. 3, got_non_delta,
  110. 'Expected 3 non-delta objects, got %d' % got_non_delta)
  111. # We expect one object to have a delta chain length of two
  112. # (new_blob_2), so let's verify that actually happens:
  113. self.assertIn(b'chain length = 2', output)
  114. # This test is SUPER slow: over 80 seconds on a 2012-era
  115. # laptop. This is because SequenceMatcher is worst-case quadratic
  116. # on the input size. It's impractical to produce deltas for
  117. # objects this large, but it's still worth doing the right thing
  118. # when it happens.
  119. def test_delta_large_object(self):
  120. # This tests an object set that will have a copy operation
  121. # 2**25 in size. This is a copy large enough that it requires
  122. # two copy operations in git's binary delta format.
  123. raise SkipTest('skipping slow, large test')
  124. with self.get_pack(pack1_sha) as orig_pack:
  125. new_blob = Blob()
  126. new_blob.data = 'big blob' + ('x' * 2 ** 25)
  127. new_blob_2 = Blob()
  128. new_blob_2.data = new_blob.data + 'y'
  129. all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None),
  130. (new_blob_2, None)]
  131. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  132. write_pack(pack_path, all_to_pack, deltify=True)
  133. output = run_git_or_fail(['verify-pack', '-v', pack_path])
  134. self.assertEqual(set(x[0].id for x in all_to_pack),
  135. _git_verify_pack_object_list(output))
  136. # We specifically made a new blob that should be a delta
  137. # against the blob a_sha, so make sure we really got only 4
  138. # non-delta objects:
  139. got_non_delta = int(_NON_DELTA_RE.search(output).group('non_delta'))
  140. self.assertEqual(
  141. 4, got_non_delta,
  142. 'Expected 4 non-delta objects, got %d' % got_non_delta)