| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277 |
- # test_midx.py -- Tests for multi-pack-index
- # Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
- #
- # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
- # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
- # General Public License as published by the Free Software Foundation; version 2.0
- # or (at your option) any later version. You can redistribute it and/or
- # modify it under the terms of either of these two licenses.
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- #
- # You should have received a copy of the licenses; if not, see
- # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
- # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
- # License, Version 2.0.
- #
- """Tests for multi-pack-index (MIDX) functionality."""
- import os
- import tempfile
- from io import BytesIO
- from unittest import TestCase
- from dulwich.midx import (
- HASH_ALGORITHM_SHA1,
- MultiPackIndex,
- write_midx,
- write_midx_file,
- )
- class MIDXWriteTests(TestCase):
- """Tests for writing MIDX files."""
- def test_write_empty_midx(self):
- """Test writing an empty MIDX file."""
- f = BytesIO()
- pack_entries = []
- checksum = write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
- # Checksum should be 20 bytes
- self.assertEqual(20, len(checksum))
- # Should be able to read it back
- f.seek(0)
- midx = MultiPackIndex("test.midx", file=f, contents=f.read())
- self.assertEqual(0, len(midx))
- self.assertEqual(0, midx.pack_count)
- self.assertEqual([], midx.pack_names)
- def test_write_single_pack_midx(self):
- """Test writing a MIDX file with a single pack."""
- f = BytesIO()
- # Create some fake pack entries
- pack_entries = [
- (
- "pack-abc123.idx",
- [
- (b"\x01" * 20, 100, 0x12345678), # sha, offset, crc32
- (b"\x02" * 20, 200, 0x87654321),
- (b"\x03" * 20, 300, 0xABCDEF00),
- ],
- )
- ]
- checksum = write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
- self.assertEqual(20, len(checksum))
- # Read it back
- f.seek(0)
- midx = MultiPackIndex("test.midx", file=f, contents=f.read())
- self.assertEqual(3, len(midx))
- self.assertEqual(1, midx.pack_count)
- self.assertEqual(["pack-abc123.idx"], midx.pack_names)
- # Check object lookups
- result = midx.object_offset(b"\x01" * 20)
- self.assertIsNotNone(result)
- pack_name, offset = result
- self.assertEqual("pack-abc123.idx", pack_name)
- self.assertEqual(100, offset)
- result = midx.object_offset(b"\x02" * 20)
- self.assertIsNotNone(result)
- pack_name, offset = result
- self.assertEqual("pack-abc123.idx", pack_name)
- self.assertEqual(200, offset)
- result = midx.object_offset(b"\x03" * 20)
- self.assertIsNotNone(result)
- pack_name, offset = result
- self.assertEqual("pack-abc123.idx", pack_name)
- self.assertEqual(300, offset)
- # Check non-existent object
- result = midx.object_offset(b"\xff" * 20)
- self.assertIsNone(result)
- def test_write_multiple_packs_midx(self):
- """Test writing a MIDX file with multiple packs."""
- f = BytesIO()
- pack_entries = [
- (
- "pack-111.idx",
- [
- (b"\x01" * 20, 100, 0),
- (b"\x03" * 20, 300, 0),
- ],
- ),
- (
- "pack-222.idx",
- [
- (b"\x02" * 20, 50, 0),
- (b"\x04" * 20, 150, 0),
- ],
- ),
- ]
- checksum = write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
- self.assertEqual(20, len(checksum))
- # Read it back
- f.seek(0)
- midx = MultiPackIndex("test.midx", file=f, contents=f.read())
- self.assertEqual(4, len(midx))
- self.assertEqual(2, midx.pack_count)
- self.assertEqual(["pack-111.idx", "pack-222.idx"], midx.pack_names)
- # Objects should be findable across packs
- result = midx.object_offset(b"\x01" * 20)
- self.assertIsNotNone(result)
- self.assertEqual("pack-111.idx", result[0])
- result = midx.object_offset(b"\x02" * 20)
- self.assertIsNotNone(result)
- self.assertEqual("pack-222.idx", result[0])
- def test_write_large_offsets(self):
- """Test writing a MIDX file with large offsets (>= 2^31)."""
- f = BytesIO()
- large_offset = 2**32 # Offset that requires LOFF chunk
- pack_entries = [
- (
- "pack-large.idx",
- [
- (b"\x01" * 20, 100, 0),
- (b"\x02" * 20, large_offset, 0), # Large offset
- ],
- )
- ]
- checksum = write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
- self.assertEqual(20, len(checksum))
- # Read it back
- f.seek(0)
- midx = MultiPackIndex("test.midx", file=f, contents=f.read())
- self.assertEqual(2, len(midx))
- # Small offset should work
- result = midx.object_offset(b"\x01" * 20)
- self.assertIsNotNone(result)
- self.assertEqual(100, result[1])
- # Large offset should work
- result = midx.object_offset(b"\x02" * 20)
- self.assertIsNotNone(result)
- self.assertEqual(large_offset, result[1])
- def test_write_midx_file(self):
- """Test writing a MIDX file to disk."""
- with tempfile.TemporaryDirectory() as tmpdir:
- midx_path = os.path.join(tmpdir, "multi-pack-index")
- pack_entries = [
- (
- "pack-test.idx",
- [
- (b"\xaa" * 20, 1000, 0),
- ],
- )
- ]
- checksum = write_midx_file(midx_path, pack_entries, HASH_ALGORITHM_SHA1)
- self.assertEqual(20, len(checksum))
- # Verify file was created
- self.assertTrue(os.path.exists(midx_path))
- # Read it back from disk
- with open(midx_path, "rb") as f:
- midx = MultiPackIndex(midx_path, file=f, contents=f.read())
- self.assertEqual(1, len(midx))
- result = midx.object_offset(b"\xaa" * 20)
- self.assertIsNotNone(result)
- self.assertEqual("pack-test.idx", result[0])
- self.assertEqual(1000, result[1])
- class MIDXContainsTests(TestCase):
- """Tests for MIDX __contains__ method."""
- def test_contains_object(self):
- """Test checking if an object is in the MIDX."""
- f = BytesIO()
- pack_entries = [
- (
- "pack-test.idx",
- [
- (b"\x01" * 20, 100, 0),
- (b"\x02" * 20, 200, 0),
- ],
- )
- ]
- write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
- f.seek(0)
- midx = MultiPackIndex("test.midx", file=f, contents=f.read())
- self.assertTrue(b"\x01" * 20 in midx)
- self.assertTrue(b"\x02" * 20 in midx)
- self.assertFalse(b"\xff" * 20 in midx)
- class MIDXIterEntriesTests(TestCase):
- """Tests for MIDX iterentries method."""
- def test_iterentries(self):
- """Test iterating over MIDX entries."""
- f = BytesIO()
- pack_entries = [
- (
- "pack-111.idx",
- [
- (b"\x01" * 20, 100, 0),
- (b"\x03" * 20, 300, 0),
- ],
- ),
- (
- "pack-222.idx",
- [
- (b"\x02" * 20, 50, 0),
- ],
- ),
- ]
- write_midx(f, pack_entries, HASH_ALGORITHM_SHA1)
- f.seek(0)
- midx = MultiPackIndex("test.midx", file=f, contents=f.read())
- entries = list(midx.iterentries())
- self.assertEqual(3, len(entries))
- # Entries should be sorted by SHA
- self.assertEqual(b"\x01" * 20, entries[0][0])
- self.assertEqual("pack-111.idx", entries[0][1])
- self.assertEqual(100, entries[0][2])
- self.assertEqual(b"\x02" * 20, entries[1][0])
- self.assertEqual("pack-222.idx", entries[1][1])
- self.assertEqual(50, entries[1][2])
- self.assertEqual(b"\x03" * 20, entries[2][0])
- self.assertEqual("pack-111.idx", entries[2][1])
- self.assertEqual(300, entries[2][2])
|