test_pack.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. # test_pack.py -- Compatibility tests for git packs.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Compatibility tests for git packs."""
  21. import binascii
  22. import os
  23. import re
  24. import shutil
  25. import tempfile
  26. from dulwich.pack import (
  27. write_pack,
  28. )
  29. from dulwich.objects import (
  30. Blob,
  31. )
  32. from dulwich.tests import (
  33. SkipTest,
  34. )
  35. from dulwich.tests.test_pack import (
  36. a_sha,
  37. pack1_sha,
  38. PackTests,
  39. )
  40. from dulwich.tests.compat.utils import (
  41. require_git_version,
  42. run_git_or_fail,
  43. )
  44. _NON_DELTA_RE = re.compile(b"non delta: (?P<non_delta>\\d+) objects")
  45. def _git_verify_pack_object_list(output):
  46. pack_shas = set()
  47. for line in output.splitlines():
  48. sha = line[:40]
  49. try:
  50. binascii.unhexlify(sha)
  51. except (TypeError, binascii.Error):
  52. continue # non-sha line
  53. pack_shas.add(sha)
  54. return pack_shas
  55. class TestPack(PackTests):
  56. """Compatibility tests for reading and writing pack files."""
  57. def setUp(self):
  58. require_git_version((1, 5, 0))
  59. super(TestPack, self).setUp()
  60. self._tempdir = tempfile.mkdtemp()
  61. self.addCleanup(shutil.rmtree, self._tempdir)
  62. def test_copy(self):
  63. with self.get_pack(pack1_sha) as origpack:
  64. self.assertSucceeds(origpack.index.check)
  65. pack_path = os.path.join(self._tempdir, "Elch")
  66. write_pack(pack_path, origpack.pack_tuples())
  67. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  68. orig_shas = {o.id for o in origpack.iterobjects()}
  69. self.assertEqual(orig_shas, _git_verify_pack_object_list(output))
  70. def test_deltas_work(self):
  71. with self.get_pack(pack1_sha) as orig_pack:
  72. orig_blob = orig_pack[a_sha]
  73. new_blob = Blob()
  74. new_blob.data = orig_blob.data + b"x"
  75. all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None)]
  76. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  77. write_pack(pack_path, all_to_pack, deltify=True)
  78. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  79. self.assertEqual(
  80. {x[0].id for x in all_to_pack},
  81. _git_verify_pack_object_list(output),
  82. )
  83. # We specifically made a new blob that should be a delta
  84. # against the blob a_sha, so make sure we really got only 3
  85. # non-delta objects:
  86. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  87. self.assertEqual(
  88. 3,
  89. got_non_delta,
  90. "Expected 3 non-delta objects, got %d" % got_non_delta,
  91. )
  92. def test_delta_medium_object(self):
  93. # This tests an object set that will have a copy operation
  94. # 2**20 in size.
  95. with self.get_pack(pack1_sha) as orig_pack:
  96. orig_blob = orig_pack[a_sha]
  97. new_blob = Blob()
  98. new_blob.data = orig_blob.data + (b"x" * 2 ** 20)
  99. new_blob_2 = Blob()
  100. new_blob_2.data = new_blob.data + b"y"
  101. all_to_pack = list(orig_pack.pack_tuples()) + [
  102. (new_blob, None),
  103. (new_blob_2, None),
  104. ]
  105. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  106. write_pack(pack_path, all_to_pack, deltify=True)
  107. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  108. self.assertEqual(
  109. {x[0].id for x in all_to_pack},
  110. _git_verify_pack_object_list(output),
  111. )
  112. # We specifically made a new blob that should be a delta
  113. # against the blob a_sha, so make sure we really got only 3
  114. # non-delta objects:
  115. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  116. self.assertEqual(
  117. 3,
  118. got_non_delta,
  119. "Expected 3 non-delta objects, got %d" % got_non_delta,
  120. )
  121. # We expect one object to have a delta chain length of two
  122. # (new_blob_2), so let's verify that actually happens:
  123. self.assertIn(b"chain length = 2", output)
  124. # This test is SUPER slow: over 80 seconds on a 2012-era
  125. # laptop. This is because SequenceMatcher is worst-case quadratic
  126. # on the input size. It's impractical to produce deltas for
  127. # objects this large, but it's still worth doing the right thing
  128. # when it happens.
  129. def test_delta_large_object(self):
  130. # This tests an object set that will have a copy operation
  131. # 2**25 in size. This is a copy large enough that it requires
  132. # two copy operations in git's binary delta format.
  133. raise SkipTest("skipping slow, large test")
  134. with self.get_pack(pack1_sha) as orig_pack:
  135. new_blob = Blob()
  136. new_blob.data = "big blob" + ("x" * 2 ** 25)
  137. new_blob_2 = Blob()
  138. new_blob_2.data = new_blob.data + "y"
  139. all_to_pack = list(orig_pack.pack_tuples()) + [
  140. (new_blob, None),
  141. (new_blob_2, None),
  142. ]
  143. pack_path = os.path.join(self._tempdir, "pack_with_deltas")
  144. write_pack(pack_path, all_to_pack, deltify=True)
  145. output = run_git_or_fail(["verify-pack", "-v", pack_path])
  146. self.assertEqual(
  147. {x[0].id for x in all_to_pack},
  148. _git_verify_pack_object_list(output),
  149. )
  150. # We specifically made a new blob that should be a delta
  151. # against the blob a_sha, so make sure we really got only 4
  152. # non-delta objects:
  153. got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
  154. self.assertEqual(
  155. 4,
  156. got_non_delta,
  157. "Expected 4 non-delta objects, got %d" % got_non_delta,
  158. )