123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153 |
- # test_pack.py -- Compatibility tests for git packs.
- # Copyright (C) 2010 Google, Inc.
- #
- # This program is free software; you can redistribute it and/or
- # modify it under the terms of the GNU General Public License
- # as published by the Free Software Foundation; version 2
- # of the License or (at your option) any later version of
- # the License.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, write to the Free Software
- # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- # MA 02110-1301, USA.
- """Compatibility tests for git packs."""
- import binascii
- import os
- import re
- import shutil
- import tempfile
- from unittest import SkipTest
- from dulwich.pack import (
- write_pack,
- )
- from dulwich.objects import (
- Blob,
- )
- from dulwich.tests.test_pack import (
- a_sha,
- pack1_sha,
- PackTests,
- )
- from dulwich.tests.compat.utils import (
- require_git_version,
- run_git_or_fail,
- )
- _NON_DELTA_RE = re.compile('non delta: (?P<non_delta>\d+) objects')
- def _git_verify_pack_object_list(output):
- pack_shas = set()
- for line in output.splitlines():
- sha = line[:40]
- try:
- binascii.unhexlify(sha)
- except (TypeError, binascii.Error):
- continue # non-sha line
- pack_shas.add(sha)
- return pack_shas
- class TestPack(PackTests):
- """Compatibility tests for reading and writing pack files."""
- def setUp(self):
- require_git_version((1, 5, 0))
- super(TestPack, self).setUp()
- self._tempdir = tempfile.mkdtemp()
- self.addCleanup(shutil.rmtree, self._tempdir)
- def test_copy(self):
- with self.get_pack(pack1_sha) as origpack:
- self.assertSucceeds(origpack.index.check)
- pack_path = os.path.join(self._tempdir, "Elch")
- write_pack(pack_path, origpack.pack_tuples())
- output = run_git_or_fail(['verify-pack', '-v', pack_path])
- orig_shas = set(o.id for o in origpack.iterobjects())
- self.assertEqual(orig_shas, _git_verify_pack_object_list(output))
- def test_deltas_work(self):
- orig_pack = self.get_pack(pack1_sha)
- orig_blob = orig_pack[a_sha]
- new_blob = Blob()
- new_blob.data = orig_blob.data + 'x'
- all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None)]
- pack_path = os.path.join(self._tempdir, "pack_with_deltas")
- write_pack(pack_path, all_to_pack, deltify=True)
- output = run_git_or_fail(['verify-pack', '-v', pack_path])
- self.assertEqual(set(x[0].id for x in all_to_pack),
- _git_verify_pack_object_list(output))
- # We specifically made a new blob that should be a delta
- # against the blob a_sha, so make sure we really got only 3
- # non-delta objects:
- got_non_delta = int(_NON_DELTA_RE.search(output).group('non_delta'))
- self.assertEqual(
- 3, got_non_delta,
- 'Expected 3 non-delta objects, got %d' % got_non_delta)
- def test_delta_medium_object(self):
- # This tests an object set that will have a copy operation
- # 2**20 in size.
- orig_pack = self.get_pack(pack1_sha)
- orig_blob = orig_pack[a_sha]
- new_blob = Blob()
- new_blob.data = orig_blob.data + ('x' * 2 ** 20)
- new_blob_2 = Blob()
- new_blob_2.data = new_blob.data + 'y'
- all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None),
- (new_blob_2, None)]
- pack_path = os.path.join(self._tempdir, "pack_with_deltas")
- write_pack(pack_path, all_to_pack, deltify=True)
- output = run_git_or_fail(['verify-pack', '-v', pack_path])
- self.assertEqual(set(x[0].id for x in all_to_pack),
- _git_verify_pack_object_list(output))
- # We specifically made a new blob that should be a delta
- # against the blob a_sha, so make sure we really got only 3
- # non-delta objects:
- got_non_delta = int(_NON_DELTA_RE.search(output).group('non_delta'))
- self.assertEqual(
- 3, got_non_delta,
- 'Expected 3 non-delta objects, got %d' % got_non_delta)
- # We expect one object to have a delta chain length of two
- # (new_blob_2), so let's verify that actually happens:
- self.assertIn('chain length = 2', output)
- # This test is SUPER slow: over 80 seconds on a 2012-era
- # laptop. This is because SequenceMatcher is worst-case quadratic
- # on the input size. It's impractical to produce deltas for
- # objects this large, but it's still worth doing the right thing
- # when it happens.
- def test_delta_large_object(self):
- # This tests an object set that will have a copy operation
- # 2**25 in size. This is a copy large enough that it requires
- # two copy operations in git's binary delta format.
- raise SkipTest('skipping slow, large test')
- orig_pack = self.get_pack(pack1_sha)
- orig_blob = orig_pack[a_sha]
- new_blob = Blob()
- new_blob.data = 'big blob' + ('x' * 2 ** 25)
- new_blob_2 = Blob()
- new_blob_2.data = new_blob.data + 'y'
- all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None),
- (new_blob_2, None)]
- pack_path = os.path.join(self._tempdir, "pack_with_deltas")
- write_pack(pack_path, all_to_pack, deltify=True)
- output = run_git_or_fail(['verify-pack', '-v', pack_path])
- self.assertEqual(set(x[0].id for x in all_to_pack),
- _git_verify_pack_object_list(output))
- # We specifically made a new blob that should be a delta
- # against the blob a_sha, so make sure we really got only 4
- # non-delta objects:
- got_non_delta = int(_NON_DELTA_RE.search(output).group('non_delta'))
- self.assertEqual(
- 4, got_non_delta,
- 'Expected 4 non-delta objects, got %d' % got_non_delta)
|