| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267 |
- # test_midx.py -- Compatibility tests for multi-pack-index functionality
- # Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
- #
- # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
- # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
- # General Public License as published by the Free Software Foundation; version 2.0
- # or (at your option) any later version. You can redistribute it and/or
- # modify it under the terms of either of these two licenses.
- """Compatibility tests for Git multi-pack-index functionality.
- These tests verify that dulwich's MIDX implementation can read and interact
- with MIDX files created by C Git, and that Git can read MIDX files created
- by Dulwich.
- """
- import os
- import tempfile
- from dulwich.midx import load_midx
- from dulwich.object_store import DiskObjectStore
- from dulwich.repo import Repo
- from .utils import CompatTestCase, run_git_or_fail
- class MIDXCompatTests(CompatTestCase):
- """Compatibility tests for multi-pack-index functionality."""
- # Multi-pack-index was introduced in Git 2.21.0
- min_git_version = (2, 21, 0)
- def setUp(self):
- super().setUp()
- self.test_dir = tempfile.mkdtemp()
- self.repo_path = os.path.join(self.test_dir, "test-repo")
- # Set up git identity to avoid committer identity errors
- self.overrideEnv("GIT_COMMITTER_NAME", "Test Author")
- self.overrideEnv("GIT_COMMITTER_EMAIL", "test@example.com")
- self.overrideEnv("GIT_AUTHOR_NAME", "Test Author")
- self.overrideEnv("GIT_AUTHOR_EMAIL", "test@example.com")
- def tearDown(self):
- from .utils import rmtree_ro
- rmtree_ro(self.test_dir)
- def create_test_repo_with_packs(self):
- """Create a test repository with multiple pack files."""
- # Initialize repository
- run_git_or_fail(["init"], cwd=self.test_dir)
- os.rename(os.path.join(self.test_dir, ".git"), self.repo_path)
- work_dir = os.path.join(self.test_dir, "work")
- os.makedirs(work_dir)
- # Create .git file pointing to our repo
- with open(os.path.join(work_dir, ".git"), "w") as f:
- f.write(f"gitdir: {self.repo_path}\n")
- # Create some commits and pack them
- for i in range(5):
- filename = f"file{i}.txt"
- with open(os.path.join(work_dir, filename), "w") as f:
- f.write(f"Content {i}\n" * 100) # Make files bigger to ensure packing
- run_git_or_fail(["add", filename], cwd=work_dir)
- run_git_or_fail(
- [
- "commit",
- "-m",
- f"Commit {i}",
- "--author",
- "Test Author <test@example.com>",
- ],
- cwd=work_dir,
- )
- # Create a pack file after each commit to get multiple packs
- if i > 0: # Skip first commit to avoid empty pack
- run_git_or_fail(["repack", "-d"], cwd=work_dir)
- return work_dir
- def test_read_git_midx(self):
- """Test that Dulwich can read a MIDX file created by Git."""
- work_dir = self.create_test_repo_with_packs()
- # Have Git create a MIDX file
- run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)
- # Verify Git created the MIDX file
- midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
- self.assertTrue(
- os.path.exists(midx_path), "Git did not create multi-pack-index file"
- )
- # Load the MIDX file with Dulwich
- midx = load_midx(midx_path)
- try:
- # Verify we can read it
- self.assertGreater(len(midx), 0, "MIDX should contain objects")
- self.assertGreater(midx.pack_count, 0, "MIDX should reference packs")
- # Verify the pack names look reasonable
- # Git stores .idx extensions in MIDX files
- for pack_name in midx.pack_names:
- self.assertTrue(pack_name.startswith("pack-"))
- self.assertTrue(pack_name.endswith(".idx"))
- finally:
- midx.close()
- def test_git_uses_dulwich_midx(self):
- """Test that Git can use a MIDX file created by Dulwich."""
- work_dir = self.create_test_repo_with_packs()
- # Use Dulwich to create a MIDX file
- repo = Repo(self.repo_path)
- try:
- store = repo.object_store
- self.assertIsInstance(store, DiskObjectStore)
- # Write MIDX with Dulwich
- checksum = store.write_midx()
- self.assertEqual(20, len(checksum))
- finally:
- repo.close()
- # Verify the file was created
- midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
- self.assertTrue(os.path.exists(midx_path))
- # Have Git verify the MIDX file (should succeed with return code 0)
- run_git_or_fail(["multi-pack-index", "verify"], cwd=work_dir)
- # Try to use the MIDX with Git commands
- # This should work if the MIDX is valid
- run_git_or_fail(["fsck"], cwd=work_dir)
- def test_midx_object_lookup_matches_git(self):
- """Test that object lookups through MIDX match Git's results."""
- work_dir = self.create_test_repo_with_packs()
- # Have Git create a MIDX file
- run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)
- # Load with Dulwich
- repo = Repo(self.repo_path)
- try:
- store = repo.object_store
- # Get MIDX
- midx = store.get_midx()
- self.assertIsNotNone(midx, "MIDX should be loaded")
- # Get all objects from Git
- result = run_git_or_fail(["rev-list", "--all", "--objects"], cwd=work_dir)
- object_shas = [
- line.split()[0].encode("ascii")
- for line in result.decode("utf-8").strip().split("\n")
- if line
- ]
- # Verify we can find these objects through the MIDX
- found_count = 0
- for sha_hex in object_shas:
- # Convert hex to binary
- sha_bin = bytes.fromhex(sha_hex.decode("ascii"))
- # Check if it's in the MIDX
- if sha_bin in midx:
- found_count += 1
- # Verify we can get the object location
- result = midx.object_offset(sha_bin)
- self.assertIsNotNone(result)
- pack_name, offset = result
- self.assertIsInstance(pack_name, str)
- self.assertIsInstance(offset, int)
- self.assertGreater(offset, 0)
- # We should find at least some objects in the MIDX
- self.assertGreater(
- found_count, 0, "Should find at least some objects in MIDX"
- )
- finally:
- repo.close()
- def test_midx_with_multiple_packs(self):
- """Test MIDX functionality with multiple pack files."""
- work_dir = self.create_test_repo_with_packs()
- # Create multiple pack files explicitly
- run_git_or_fail(["repack"], cwd=work_dir)
- run_git_or_fail(["repack"], cwd=work_dir)
- # Create MIDX with Git
- run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)
- # Load with Dulwich
- midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
- midx = load_midx(midx_path)
- try:
- # Should have multiple packs
- # (Exact count may vary depending on Git version and repacking)
- self.assertGreaterEqual(midx.pack_count, 1)
- # Verify we can iterate over all entries
- entries = list(midx.iterentries())
- self.assertGreater(len(entries), 0)
- # All entries should have valid structure
- for sha, pack_name, offset in entries:
- self.assertEqual(20, len(sha)) # SHA-1 is 20 bytes
- self.assertIsInstance(pack_name, str)
- # Git stores .idx extensions in MIDX files
- self.assertTrue(pack_name.endswith(".idx"))
- self.assertIsInstance(offset, int)
- self.assertGreaterEqual(offset, 0)
- finally:
- midx.close()
- def test_dulwich_object_store_with_git_midx(self):
- """Test that DiskObjectStore can use Git-created MIDX for lookups."""
- work_dir = self.create_test_repo_with_packs()
- # Have Git create a MIDX file
- run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)
- # Load repo with Dulwich
- repo = Repo(self.repo_path)
- try:
- # Get a commit from the repo
- result = run_git_or_fail(["rev-parse", "HEAD"], cwd=work_dir)
- head_sha = result.decode("utf-8").strip().encode("ascii")
- # Verify we can access it through Dulwich
- # This should use the MIDX for lookup
- obj = repo.object_store[head_sha]
- self.assertIsNotNone(obj)
- self.assertEqual(b"commit", obj.type_name)
- finally:
- repo.close()
- def test_repack_with_midx(self):
- """Test that repacking works correctly with MIDX present."""
- work_dir = self.create_test_repo_with_packs()
- # Create MIDX with Dulwich
- repo = Repo(self.repo_path)
- try:
- repo.object_store.write_midx()
- finally:
- repo.close()
- # Verify Git can still repack
- run_git_or_fail(["repack", "-d"], cwd=work_dir)
- # The MIDX should still be readable
- midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
- if os.path.exists(midx_path): # Git may remove it during repack
- midx = load_midx(midx_path)
- try:
- self.assertGreaterEqual(len(midx), 0)
- finally:
- midx.close()
|