| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330 |
- # test_sha256_packs.py -- Compatibility tests for SHA256 pack files
- # Copyright (C) 2024 The Dulwich contributors
- #
- # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
- # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
- # General Public License as public by the Free Software Foundation; version 2.0
- # or (at your option) any later version. You can redistribute it and/or
- # modify it under the terms of either of these two licenses.
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- #
- # You should have received a copy of the licenses; if not, see
- # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
- # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
- # License, Version 2.0.
- #
- """Compatibility tests for SHA256 pack files with git command line tools."""
- import os
- import tempfile
- from dulwich.object_format import SHA256
- from dulwich.objects import Blob, Commit, Tree
- from dulwich.pack import load_pack_index_file
- from dulwich.repo import Repo
- from .utils import CompatTestCase, run_git_or_fail
- class GitSHA256PackCompatibilityTests(CompatTestCase):
- """Test SHA256 pack file compatibility with git command line tools."""
- min_git_version = (2, 29, 0)
- def _run_git(self, args, cwd=None):
- """Run git command in the specified directory."""
- return run_git_or_fail(args, cwd=cwd)
- def test_git_pack_readable_by_dulwich(self):
- """Test that git-created SHA256 pack files are readable by dulwich."""
- # Create SHA256 repo with git
- repo_path = tempfile.mkdtemp()
- self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
- self._run_git(["init", "--object-format=sha256", repo_path])
- # Create multiple files to ensure pack creation
- for i in range(20):
- test_file = os.path.join(repo_path, f"file{i}.txt")
- with open(test_file, "w") as f:
- f.write(f"Content for file {i}\n")
- self._run_git(["add", "."], cwd=repo_path)
- self._run_git(["commit", "-m", "Add 20 files"], cwd=repo_path)
- # Force pack creation
- self._run_git(["gc"], cwd=repo_path)
- # Open with dulwich
- repo = Repo(repo_path)
- self.assertEqual(repo.object_format, SHA256)
- # Find pack files
- pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
- pack_files = [f for f in os.listdir(pack_dir) if f.endswith(".pack")]
- self.assertGreater(len(pack_files), 0, "No pack files created")
- # Read pack with dulwich
- for pack_file in pack_files:
- pack_path = os.path.join(pack_dir, pack_file)
- idx_path = pack_path[:-5] + ".idx"
- # Load pack index with SHA256 algorithm
- with open(idx_path, "rb") as f:
- pack_idx = load_pack_index_file(
- idx_path, f, object_format=repo.object_format
- )
- # Verify it's detected as SHA256
- self.assertEqual(pack_idx.hash_size, 32)
- # Verify we can iterate objects
- obj_count = 0
- for sha, offset, crc32 in pack_idx.iterentries():
- self.assertEqual(len(sha), 32) # SHA256
- obj_count += 1
- self.assertGreater(obj_count, 20) # At least our files + trees + commit
- # Verify we can read all objects through the repo interface
- head_ref = repo.refs[b"refs/heads/master"]
- commit = repo[head_ref]
- self.assertIsInstance(commit, Commit)
- # Read the tree
- tree = repo[commit.tree]
- self.assertIsInstance(tree, Tree)
- # Verify all files are there
- file_count = 0
- for name, mode, sha in tree.items():
- if name.startswith(b"file") and name.endswith(b".txt"):
- file_count += 1
- # Read the blob
- blob = repo[sha]
- self.assertIsInstance(blob, Blob)
- self.assertEqual(file_count, 20)
- repo.close()
- def test_dulwich_objects_readable_by_git(self):
- """Test that dulwich-created SHA256 objects are readable by git."""
- # Create SHA256 repo with dulwich
- repo_path = tempfile.mkdtemp()
- self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
- repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
- # Create objects
- blobs = []
- for i in range(10):
- blob = Blob.from_string(f"Dulwich blob content {i}".encode())
- repo.object_store.add_object(blob)
- blobs.append(blob)
- # Create a tree with all blobs
- tree = Tree()
- for i, blob in enumerate(blobs):
- tree.add(f"blob{i}.txt".encode(), 0o100644, blob.get_id(SHA256))
- repo.object_store.add_object(tree)
- # Create a commit
- commit = Commit()
- commit.tree = tree.get_id(SHA256)
- commit.author = commit.committer = b"Dulwich Test <test@dulwich.org>"
- commit.commit_time = commit.author_time = 1234567890
- commit.commit_timezone = commit.author_timezone = 0
- commit.message = b"Test commit with blobs"
- repo.object_store.add_object(commit)
- # Update HEAD
- repo.refs[b"refs/heads/master"] = commit.get_id(SHA256)
- repo.close()
- # Verify git can read all objects
- output = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
- self.assertEqual(len(output.strip()), 64) # SHA256
- # List tree contents
- tree_output = self._run_git(["ls-tree", "HEAD"], cwd=repo_path)
- # Count lines instead of occurrences of "blob" since "blob" appears twice per line
- lines = tree_output.strip().split(b"\n")
- self.assertEqual(len(lines), 10)
- # Verify git can check out the content
- self._run_git(["checkout", "HEAD", "--", "."], cwd=repo_path)
- # Verify files exist with correct content
- for i in range(10):
- file_path = os.path.join(repo_path, f"blob{i}.txt")
- self.assertTrue(os.path.exists(file_path))
- with open(file_path, "rb") as f:
- content = f.read()
- self.assertEqual(content, f"Dulwich blob content {i}".encode())
- def test_pack_index_v1_interop(self):
- """Test pack index v1 interoperability with SHA256."""
- # Create repo with git using pack index v1
- repo_path = tempfile.mkdtemp()
- self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
- self._run_git(["init", "--object-format=sha256", repo_path])
- self._run_git(["config", "pack.indexVersion", "1"], cwd=repo_path)
- # Create files
- for i in range(10):
- test_file = os.path.join(repo_path, f"v1test{i}.txt")
- with open(test_file, "w") as f:
- f.write(f"Pack v1 test {i}\n")
- self._run_git(["add", "."], cwd=repo_path)
- self._run_git(["commit", "-m", "Test pack v1"], cwd=repo_path)
- self._run_git(["gc"], cwd=repo_path)
- # Read with dulwich
- repo = Repo(repo_path)
- # Find pack index
- pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
- idx_files = [f for f in os.listdir(pack_dir) if f.endswith(".idx")]
- for idx_file in idx_files:
- idx_path = os.path.join(pack_dir, idx_file)
- with open(idx_path, "rb") as f:
- pack_idx = load_pack_index_file(
- idx_path, f, object_format=repo.object_format
- )
- # Verify it's v1 with SHA256
- self.assertEqual(pack_idx.version, 1)
- self.assertEqual(pack_idx.hash_size, 32)
- # Verify we can iterate
- for sha, offset, crc32 in pack_idx.iterentries():
- self.assertEqual(len(sha), 32)
- self.assertIsNone(crc32) # v1 doesn't store CRC32
- repo.close()
- def test_large_pack_interop(self):
- """Test large pack file interoperability."""
- # Create repo with dulwich
- repo_path = tempfile.mkdtemp()
- self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
- repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
- # Create a large file that will use delta compression
- large_content = b"A" * 10000
- blobs = []
- # Create similar blobs to trigger delta compression
- for i in range(10):
- content = large_content + f" variation {i}".encode()
- blob = Blob.from_string(content)
- repo.object_store.add_object(blob)
- blobs.append(blob)
- # Create tree
- tree = Tree()
- for i, blob in enumerate(blobs):
- tree.add(f"large{i}.txt".encode(), 0o100644, blob.get_id(SHA256))
- repo.object_store.add_object(tree)
- # Create commit
- commit = Commit()
- commit.tree = tree.get_id(SHA256)
- commit.author = commit.committer = b"Test <test@example.com>"
- commit.commit_time = commit.author_time = 1234567890
- commit.commit_timezone = commit.author_timezone = 0
- commit.message = b"Large files for delta compression test"
- repo.object_store.add_object(commit)
- repo.refs[b"refs/heads/master"] = commit.get_id(SHA256)
- repo.close()
- # Run git gc to create packs with delta compression
- self._run_git(["gc", "--aggressive"], cwd=repo_path)
- # Verify git created a pack
- pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
- pack_files = [f for f in os.listdir(pack_dir) if f.endswith(".pack")]
- self.assertGreater(len(pack_files), 0)
- # Re-open with dulwich and verify we can read everything
- repo = Repo(repo_path)
- head = repo.refs[b"refs/heads/master"]
- commit = repo[head]
- tree = repo[commit.tree]
- # Read all blobs
- for i in range(10):
- name = f"large{i}.txt".encode()
- mode, sha = tree[name]
- blob = repo[sha]
- expected = large_content + f" variation {i}".encode()
- self.assertEqual(blob.data, expected)
- repo.close()
- def test_mixed_loose_packed_objects(self):
- """Test repositories with both loose and packed objects."""
- # Create repo with git
- repo_path = tempfile.mkdtemp()
- self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
- self._run_git(["init", "--object-format=sha256", repo_path])
- # Create initial objects that will be packed
- for i in range(5):
- test_file = os.path.join(repo_path, f"packed{i}.txt")
- with open(test_file, "w") as f:
- f.write(f"Will be packed {i}\n")
- self._run_git(["add", "."], cwd=repo_path)
- self._run_git(["commit", "-m", "Initial packed objects"], cwd=repo_path)
- self._run_git(["gc"], cwd=repo_path)
- # Create more objects that will remain loose
- for i in range(5):
- test_file = os.path.join(repo_path, f"loose{i}.txt")
- with open(test_file, "w") as f:
- f.write(f"Will stay loose {i}\n")
- self._run_git(["add", "."], cwd=repo_path)
- self._run_git(["commit", "-m", "Loose objects"], cwd=repo_path)
- # Open with dulwich
- repo = Repo(repo_path)
- # Count objects in packs vs loose
- pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
- pack_count = len([f for f in os.listdir(pack_dir) if f.endswith(".pack")])
- self.assertGreater(pack_count, 0)
- # Verify we can read all objects
- head = repo.refs[b"refs/heads/master"]
- commit = repo[head]
- # Walk the commit history
- commit_count = 0
- while commit.parents:
- commit_count += 1
- tree = repo[commit.tree]
- # Verify we can read the tree
- self.assertGreater(len(tree), 0)
- if commit.parents:
- commit = repo[commit.parents[0]]
- else:
- break
- self.assertEqual(commit_count, 1) # We made 2 commits total
- repo.close()
- if __name__ == "__main__":
- import unittest
- unittest.main()
|