# test_pack.py -- Compatibility tests for git packs.
# Copyright (C) 2010 Google, Inc.
#
# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
# General Public License as public by the Free Software Foundation; version 2.0
# or (at your option) any later version. You can redistribute it and/or
# modify it under the terms of either of these two licenses.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# You should have received a copy of the licenses; if not, see
# for a copy of the GNU General Public License
# and for a copy of the Apache
# License, Version 2.0.
#
"""Compatibility tests for git packs."""
import binascii
import os
import re
import shutil
import tempfile
from typing import NoReturn
from dulwich.file import GitFile
from dulwich.objects import Blob
from dulwich.pack import (
PackData,
PackIndex3,
load_pack_index,
write_pack,
write_pack_index_v3,
)
from .. import SkipTest
from ..test_pack import PackTests, a_sha, pack1_sha
from .utils import require_git_version, rmtree_ro, run_git_or_fail
_NON_DELTA_RE = re.compile(b"non delta: (?P\\d+) objects")
def _git_verify_pack_object_list(output):
pack_shas = set()
for line in output.splitlines():
sha = line[:40]
try:
binascii.unhexlify(sha)
except (TypeError, binascii.Error):
continue # non-sha line
pack_shas.add(sha)
return pack_shas
class TestPack(PackTests):
"""Compatibility tests for reading and writing pack files."""
def setUp(self) -> None:
require_git_version((1, 5, 0))
super().setUp()
self._tempdir = tempfile.mkdtemp()
self.addCleanup(shutil.rmtree, self._tempdir)
def test_copy(self) -> None:
with self.get_pack(pack1_sha) as origpack:
self.assertSucceeds(origpack.index.check)
pack_path = os.path.join(self._tempdir, "Elch")
write_pack(pack_path, origpack.pack_tuples())
output = run_git_or_fail(["verify-pack", "-v", pack_path])
orig_shas = {o.id for o in origpack.iterobjects()}
self.assertEqual(orig_shas, _git_verify_pack_object_list(output))
def test_deltas_work(self) -> None:
with self.get_pack(pack1_sha) as orig_pack:
orig_blob = orig_pack[a_sha]
new_blob = Blob()
new_blob.data = orig_blob.data + b"x"
all_to_pack = [(o, None) for o in orig_pack.iterobjects()] + [
(new_blob, None)
]
pack_path = os.path.join(self._tempdir, "pack_with_deltas")
write_pack(pack_path, all_to_pack, deltify=True)
output = run_git_or_fail(["verify-pack", "-v", pack_path])
self.assertEqual(
{x[0].id for x in all_to_pack},
_git_verify_pack_object_list(output),
)
# We specifically made a new blob that should be a delta
# against the blob a_sha, so make sure we really got only 3
# non-delta objects:
got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
self.assertEqual(
3,
got_non_delta,
f"Expected 3 non-delta objects, got {got_non_delta}",
)
def test_delta_medium_object(self) -> None:
# This tests an object set that will have a copy operation
# 2**20 in size.
with self.get_pack(pack1_sha) as orig_pack:
orig_blob = orig_pack[a_sha]
new_blob = Blob()
new_blob.data = orig_blob.data + (b"x" * 2**20)
new_blob_2 = Blob()
new_blob_2.data = new_blob.data + b"y"
all_to_pack = [
*list(orig_pack.pack_tuples()),
(new_blob, None),
(new_blob_2, None),
]
pack_path = os.path.join(self._tempdir, "pack_with_deltas")
write_pack(pack_path, all_to_pack, deltify=True)
output = run_git_or_fail(["verify-pack", "-v", pack_path])
self.assertEqual(
{x[0].id for x in all_to_pack},
_git_verify_pack_object_list(output),
)
# We specifically made a new blob that should be a delta
# against the blob a_sha, so make sure we really got only 3
# non-delta objects:
got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
self.assertEqual(
3,
got_non_delta,
f"Expected 3 non-delta objects, got {got_non_delta}",
)
# We expect one object to have a delta chain length of two
# (new_blob_2), so let's verify that actually happens:
self.assertIn(b"chain length = 2", output)
# This test is SUPER slow: over 80 seconds on a 2012-era
# laptop. This is because SequenceMatcher is worst-case quadratic
# on the input size. It's impractical to produce deltas for
# objects this large, but it's still worth doing the right thing
# when it happens.
def test_delta_large_object(self) -> NoReturn:
# This tests an object set that will have a copy operation
# 2**25 in size. This is a copy large enough that it requires
# two copy operations in git's binary delta format.
raise SkipTest("skipping slow, large test")
with self.get_pack(pack1_sha) as orig_pack:
new_blob = Blob()
new_blob.data = "big blob" + ("x" * 2**25)
new_blob_2 = Blob()
new_blob_2.data = new_blob.data + "y"
all_to_pack = [
*list(orig_pack.pack_tuples()),
(new_blob, None),
(new_blob_2, None),
]
pack_path = os.path.join(self._tempdir, "pack_with_deltas")
write_pack(pack_path, all_to_pack, deltify=True)
output = run_git_or_fail(["verify-pack", "-v", pack_path])
self.assertEqual(
{x[0].id for x in all_to_pack},
_git_verify_pack_object_list(output),
)
# We specifically made a new blob that should be a delta
# against the blob a_sha, so make sure we really got only 4
# non-delta objects:
got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
self.assertEqual(
4,
got_non_delta,
f"Expected 4 non-delta objects, got {got_non_delta}",
)
class TestPackIndexCompat(PackTests):
"""Compatibility tests for pack index formats."""
def setUp(self) -> None:
require_git_version((1, 5, 0))
super().setUp()
self._tempdir = tempfile.mkdtemp()
self.addCleanup(rmtree_ro, self._tempdir)
def test_dulwich_create_index_git_readable(self) -> None:
"""Test that git can read pack indexes created by dulwich."""
# Create a simple pack with objects
blob = Blob()
blob.data = b"Test blob"
pack_path = os.path.join(self._tempdir, "test_pack")
entries = [(blob, None)]
write_pack(pack_path, entries)
# Load the pack and create v2 index (most compatible)
pack_data = PackData(pack_path + ".pack")
try:
pack_data.create_index(pack_path + ".idx", version=2)
finally:
pack_data.close()
# Verify git can read it
output = run_git_or_fail(["verify-pack", "-v", pack_path + ".pack"])
self.assertIn(blob.id.decode("ascii"), output.decode("ascii"))
def test_dulwich_read_git_index(self) -> None:
"""Test that dulwich can read pack indexes created by git."""
# Create a simple pack with objects
blob = Blob()
blob.data = b"Test blob for git"
pack_path = os.path.join(self._tempdir, "git_pack")
entries = [(blob, None)]
write_pack(pack_path, entries)
# Create index with git
run_git_or_fail(["index-pack", pack_path + ".pack"])
# Load with dulwich
idx = load_pack_index(pack_path + ".idx")
# Verify it works
self.assertIn(blob.id, idx)
self.assertEqual(len(idx), 1)
def test_index_format_v3_sha256_future(self) -> None:
"""Test that v3 index format is ready for SHA-256 support."""
# This test verifies the v3 implementation structure is ready
# for SHA-256, even though SHA-256 itself is not yet implemented
# Create a dummy v3 index to test the format
entries = [(b"a" * 20, 100, 1234)] # SHA-1 for now
v3_path = os.path.join(self._tempdir, "v3_test.idx")
with GitFile(v3_path, "wb") as f:
write_pack_index_v3(f, entries, b"x" * 20, hash_algorithm=1)
# Load and verify structure
idx = load_pack_index(v3_path)
self.assertIsInstance(idx, PackIndex3)
self.assertEqual(idx.version, 3)
self.assertEqual(idx.hash_algorithm, 1) # SHA-1
self.assertEqual(idx.hash_size, 20)
# Verify SHA-256 would raise NotImplementedError
with self.assertRaises(NotImplementedError):
with GitFile(v3_path + ".sha256", "wb") as f:
write_pack_index_v3(f, entries, b"x" * 32, hash_algorithm=2)