test_pack.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. # test_pack.py -- Compatibility tests for git packs.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; version 2
  7. # of the License or (at your option) any later version of
  8. # the License.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program; if not, write to the Free Software
  17. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  18. # MA 02110-1301, USA.
  19. """Compatibility tests for git packs."""
  20. import binascii
  21. import os
  22. import re
  23. import shutil
  24. import tempfile
  25. from unittest import SkipTest
  26. from dulwich.pack import (
  27. write_pack,
  28. )
  29. from dulwich.objects import (
  30. Blob,
  31. )
  32. from dulwich.tests.test_pack import (
  33. a_sha,
  34. pack1_sha,
  35. PackTests,
  36. )
  37. from dulwich.tests.compat.utils import (
  38. require_git_version,
  39. run_git_or_fail,
  40. )
  41. _NON_DELTA_RE = re.compile('non delta: (?P<non_delta>\d+) objects')
  42. def _git_verify_pack_object_list(output):
  43. pack_shas = set()
  44. for line in output.splitlines():
  45. sha = line[:40]
  46. try:
  47. binascii.unhexlify(sha)
  48. except (TypeError, binascii.Error):
  49. continue # non-sha line
  50. pack_shas.add(sha)
  51. return pack_shas
  52. class TestPack(PackTests):
  53. """Compatibility tests for reading and writing pack files."""
  54. def setUp(self):
  55. require_git_version((1, 5, 0))
  56. super(TestPack, self).setUp()
  57. self._tempdir = tempfile.mkdtemp()
  58. self.addCleanup(shutil.rmtree, self._tempdir)
  59. def test_copy(self):
  60. with self.get_pack(pack1_sha) as origpack:
  61. self.assertSucceeds(origpack.index.check)
  62. pack_path = os.path.join(self._tempdir, "Elch")
  63. write_pack(pack_path, origpack.pack_tuples())
  64. output = run_git_or_fail(['verify-pack', '-v', pack_path])
  65. orig_shas = set(o.id for o in origpack.iterobjects())
  66. self.assertEqual(orig_shas, _git_verify_pack_object_list(output))
  67. def test_deltas_work(self):
  68. orig_pack = self.get_pack(pack1_sha)
  69. orig_blob = orig_pack[a_sha]
  70. new_blob = Blob()
  71. new_blob.data = orig_blob.data + 'x'
  72. all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None)]
  73. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  74. write_pack(pack_path, all_to_pack, deltify=True)
  75. output = run_git_or_fail(['verify-pack', '-v', pack_path])
  76. self.assertEqual(set(x[0].id for x in all_to_pack),
  77. _git_verify_pack_object_list(output))
  78. # We specifically made a new blob that should be a delta
  79. # against the blob a_sha, so make sure we really got only 3
  80. # non-delta objects:
  81. got_non_delta = int(_NON_DELTA_RE.search(output).group('non_delta'))
  82. self.assertEqual(
  83. 3, got_non_delta,
  84. 'Expected 3 non-delta objects, got %d' % got_non_delta)
  85. def test_delta_medium_object(self):
  86. # This tests an object set that will have a copy operation
  87. # 2**20 in size.
  88. orig_pack = self.get_pack(pack1_sha)
  89. orig_blob = orig_pack[a_sha]
  90. new_blob = Blob()
  91. new_blob.data = orig_blob.data + ('x' * 2 ** 20)
  92. new_blob_2 = Blob()
  93. new_blob_2.data = new_blob.data + 'y'
  94. all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None),
  95. (new_blob_2, None)]
  96. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  97. write_pack(pack_path, all_to_pack, deltify=True)
  98. output = run_git_or_fail(['verify-pack', '-v', pack_path])
  99. self.assertEqual(set(x[0].id for x in all_to_pack),
  100. _git_verify_pack_object_list(output))
  101. # We specifically made a new blob that should be a delta
  102. # against the blob a_sha, so make sure we really got only 3
  103. # non-delta objects:
  104. got_non_delta = int(_NON_DELTA_RE.search(output).group('non_delta'))
  105. self.assertEqual(
  106. 3, got_non_delta,
  107. 'Expected 3 non-delta objects, got %d' % got_non_delta)
  108. # We expect one object to have a delta chain length of two
  109. # (new_blob_2), so let's verify that actually happens:
  110. self.assertIn('chain length = 2', output)
  111. # This test is SUPER slow: over 80 seconds on a 2012-era
  112. # laptop. This is because SequenceMatcher is worst-case quadratic
  113. # on the input size. It's impractical to produce deltas for
  114. # objects this large, but it's still worth doing the right thing
  115. # when it happens.
  116. def test_delta_large_object(self):
  117. # This tests an object set that will have a copy operation
  118. # 2**25 in size. This is a copy large enough that it requires
  119. # two copy operations in git's binary delta format.
  120. raise SkipTest('skipping slow, large test')
  121. orig_pack = self.get_pack(pack1_sha)
  122. orig_blob = orig_pack[a_sha]
  123. new_blob = Blob()
  124. new_blob.data = 'big blob' + ('x' * 2 ** 25)
  125. new_blob_2 = Blob()
  126. new_blob_2.data = new_blob.data + 'y'
  127. all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None),
  128. (new_blob_2, None)]
  129. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  130. write_pack(pack_path, all_to_pack, deltify=True)
  131. output = run_git_or_fail(['verify-pack', '-v', pack_path])
  132. self.assertEqual(set(x[0].id for x in all_to_pack),
  133. _git_verify_pack_object_list(output))
  134. # We specifically made a new blob that should be a delta
  135. # against the blob a_sha, so make sure we really got only 4
  136. # non-delta objects:
  137. got_non_delta = int(_NON_DELTA_RE.search(output).group('non_delta'))
  138. self.assertEqual(
  139. 4, got_non_delta,
  140. 'Expected 4 non-delta objects, got %d' % got_non_delta)