| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184 |
- # test_index.py -- Git index compatibility tests
- # Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
- #
- # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
- # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
- # General Public License as published by the Free Software Foundation; version 2.0
- # or (at your option) any later version. You can redistribute it and/or
- # modify it under the terms of either of these two licenses.
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- #
- # You should have received a copy of the licenses; if not, see
- # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
- # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
- # License, Version 2.0.
- #
- """Compatibility tests for Git index format v4."""
- import os
- import tempfile
- from dulwich.index import Index, read_index_dict_with_version, write_index_dict
- from dulwich.repo import Repo
- from .utils import CompatTestCase, require_git_version, run_git, run_git_or_fail
- class IndexV4CompatTestCase(CompatTestCase):
- """Tests for Git index format v4 compatibility with C Git."""
- def setUp(self) -> None:
- super().setUp()
- self.tempdir = tempfile.mkdtemp()
- self.addCleanup(self._cleanup)
- def _cleanup(self) -> None:
- import shutil
- shutil.rmtree(self.tempdir, ignore_errors=True)
- def _init_repo_with_manyfiles(self) -> Repo:
- """Initialize a repo with manyFiles feature enabled."""
- # Create repo
- repo_path = os.path.join(self.tempdir, "test_repo")
- os.mkdir(repo_path)
- # Initialize with C git and enable manyFiles
- run_git_or_fail(["init"], cwd=repo_path)
- run_git_or_fail(["config", "feature.manyFiles", "true"], cwd=repo_path)
- # Open with dulwich
- return Repo(repo_path)
- def test_index_v4_path_compression(self) -> None:
- """Test that dulwich can read and write index v4 with path compression."""
- require_git_version((2, 20, 0)) # manyFiles feature requires newer Git
- repo = self._init_repo_with_manyfiles()
- # Create test files with paths that will benefit from compression
- test_files = [
- "dir1/subdir/file1.txt",
- "dir1/subdir/file2.txt",
- "dir1/subdir/file3.txt",
- "dir2/another/path.txt",
- "dir2/another/path2.txt",
- "file_at_root.txt",
- ]
- for path in test_files:
- full_path = os.path.join(repo.path, path)
- os.makedirs(os.path.dirname(full_path), exist_ok=True)
- with open(full_path, "w") as f:
- f.write(f"content of {path}\n")
- # Add files with C git - this should create index v4
- run_git_or_fail(["add", "."], cwd=repo.path)
- # Read the index with dulwich
- index_path = os.path.join(repo.path, ".git", "index")
- with open(index_path, "rb") as f:
- entries, version, extensions = read_index_dict_with_version(f)
- # Verify it's version 4
- self.assertEqual(version, 4)
- # Verify all files are in the index
- self.assertEqual(len(entries), len(test_files))
- for path in test_files:
- self.assertIn(path.encode(), entries)
- # Write the index back with dulwich
- with open(index_path + ".dulwich", "wb") as f:
- from dulwich.pack import SHA1Writer
- sha1_writer = SHA1Writer(f)
- write_index_dict(sha1_writer, entries, version=4, extensions=extensions)
- sha1_writer.close()
- # Compare with C git - use git ls-files to read both indexes
- output1 = run_git_or_fail(["ls-files", "--stage"], cwd=repo.path)
- # Replace index with dulwich version
- if os.path.exists(index_path):
- os.remove(index_path)
- os.rename(index_path + ".dulwich", index_path)
- output2 = run_git_or_fail(["ls-files", "--stage"], cwd=repo.path)
- # Both outputs should be identical
- self.assertEqual(output1, output2)
- def test_index_v4_round_trip(self) -> None:
- """Test round-trip: C Git write -> dulwich read -> dulwich write -> C Git read."""
- require_git_version((2, 20, 0))
- repo = self._init_repo_with_manyfiles()
- # Create files that test various edge cases
- test_files = [
- "a", # Very short name
- "abc/def/ghi/jkl/mno/pqr/stu/vwx/yz.txt", # Deep path
- "same_prefix_1.txt",
- "same_prefix_2.txt",
- "same_prefix_3.txt",
- "different/path/here.txt",
- ]
- for path in test_files:
- full_path = os.path.join(repo.path, path)
- os.makedirs(os.path.dirname(full_path), exist_ok=True)
- with open(full_path, "w") as f:
- f.write("test content\n")
- # Stage with C Git
- run_git_or_fail(["add", "."], cwd=repo.path)
- # Get original state
- original_output = run_git_or_fail(["ls-files", "--stage"], cwd=repo.path)
- # Read with dulwich, write back
- index = Index(os.path.join(repo.path, ".git", "index"))
- index.write()
- # Verify C Git can still read it
- final_output = run_git_or_fail(["ls-files", "--stage"], cwd=repo.path)
- self.assertEqual(original_output, final_output)
- def test_index_v4_skip_hash(self) -> None:
- """Test index v4 with skipHash extension."""
- require_git_version((2, 20, 0))
- repo = self._init_repo_with_manyfiles()
- # Enable skipHash
- run_git_or_fail(["config", "index.skipHash", "true"], cwd=repo.path)
- # Create a file
- test_file = os.path.join(repo.path, "test.txt")
- with open(test_file, "w") as f:
- f.write("test content\n")
- # Add with C Git
- run_git_or_fail(["add", "test.txt"], cwd=repo.path)
- # Read the index
- index_path = os.path.join(repo.path, ".git", "index")
- with open(index_path, "rb") as f:
- entries, version, _extensions = read_index_dict_with_version(f)
- self.assertEqual(version, 4)
- self.assertIn(b"test.txt", entries)
- # Verify skipHash is active by checking last 20 bytes
- with open(index_path, "rb") as f:
- f.seek(-20, 2)
- last_bytes = f.read(20)
- self.assertEqual(last_bytes, b"\x00" * 20)
- # Write with dulwich (with skipHash)
- index = Index(index_path, skip_hash=True, version=4)
- index.write()
- # Verify C Git can read it
- output = run_git_or_fail(["ls-files"], cwd=repo.path)
- self.assertEqual(output.strip(), b"test.txt")
- def test_index_v4_with_various_filenames(self) -> None:
- """Test v4 with various filename patterns."""
- require_git_version((2, 20, 0))
- repo = self._init_repo_with_manyfiles()
- # Test various filename patterns that might trigger different behaviors
- test_files = [
- "a", # Single character
- "ab", # Two characters
- "abc", # Three characters
- "file.txt", # Normal filename
- "very_long_filename_to_test_edge_cases.extension", # Long filename
- "dir/file.txt", # With directory
- "dir1/dir2/dir3/file.txt", # Deep directory
- "unicode_café.txt", # Unicode filename
- "with-dashes-and_underscores.txt", # Special chars
- ".hidden", # Hidden file
- "UPPERCASE.TXT", # Uppercase
- ]
- for filename in test_files:
- filepath = os.path.join(repo.path, filename)
- os.makedirs(os.path.dirname(filepath), exist_ok=True)
- with open(filepath, "w", encoding="utf-8") as f:
- f.write(f"Content of {filename}\n")
- # Add all files
- run_git_or_fail(["add", "."], cwd=repo.path)
- # Read with dulwich
- index_path = os.path.join(repo.path, ".git", "index")
- with open(index_path, "rb") as f:
- entries, version, extensions = read_index_dict_with_version(f)
- self.assertEqual(version, 4)
- self.assertEqual(len(entries), len(test_files))
- # Verify all filenames are correctly stored
- for filename in test_files:
- filename_bytes = filename.encode("utf-8")
- self.assertIn(filename_bytes, entries)
- # Test round-trip: dulwich write -> C Git read
- with open(index_path + ".dulwich", "wb") as f:
- from dulwich.pack import SHA1Writer
- sha1_writer = SHA1Writer(f)
- write_index_dict(sha1_writer, entries, version=4, extensions=extensions)
- sha1_writer.close()
- # Replace index
- if os.path.exists(index_path):
- os.remove(index_path)
- os.rename(index_path + ".dulwich", index_path)
- # Verify C Git can read all files
- # Use -z flag to avoid quoting of non-ASCII filenames
- output = run_git_or_fail(["ls-files", "-z"], cwd=repo.path)
- git_files = set(output.strip(b"\x00").split(b"\x00"))
- expected_files = {f.encode("utf-8") for f in test_files}
- self.assertEqual(git_files, expected_files)
- def test_index_v4_path_compression_scenarios(self) -> None:
- """Test various scenarios where path compression should/shouldn't be used."""
- require_git_version((2, 20, 0))
- repo = self._init_repo_with_manyfiles()
- # Create files that should trigger compression
- compression_files = [
- "src/main/java/com/example/Service.java",
- "src/main/java/com/example/Controller.java",
- "src/main/java/com/example/Repository.java",
- "src/test/java/com/example/ServiceTest.java",
- ]
- # Create files that shouldn't benefit much from compression
- no_compression_files = [
- "README.md",
- "LICENSE",
- "docs/guide.txt",
- "config/settings.json",
- ]
- all_files = compression_files + no_compression_files
- for filename in all_files:
- filepath = os.path.join(repo.path, filename)
- os.makedirs(os.path.dirname(filepath), exist_ok=True)
- with open(filepath, "w") as f:
- f.write(f"Content of {filename}\n")
- # Add files
- run_git_or_fail(["add", "."], cwd=repo.path)
- # Read the index
- index_path = os.path.join(repo.path, ".git", "index")
- with open(index_path, "rb") as f:
- entries, version, extensions = read_index_dict_with_version(f)
- self.assertEqual(version, 4)
- self.assertEqual(len(entries), len(all_files))
- # Verify all files are present
- for filename in all_files:
- self.assertIn(filename.encode(), entries)
- # Test that dulwich can write a compatible index
- with open(index_path + ".dulwich", "wb") as f:
- from dulwich.pack import SHA1Writer
- sha1_writer = SHA1Writer(f)
- write_index_dict(sha1_writer, entries, version=4, extensions=extensions)
- sha1_writer.close()
- # Verify the written index is the same size (for byte-for-byte compatibility)
- original_size = os.path.getsize(index_path)
- dulwich_size = os.path.getsize(index_path + ".dulwich")
- # For v4 format with proper compression, checksum, and extensions, sizes should match
- self.assertEqual(
- original_size,
- dulwich_size,
- f"Index sizes don't match: Git={original_size}, Dulwich={dulwich_size}",
- )
- def test_index_v4_with_extensions(self) -> None:
- """Test v4 index with various extensions."""
- require_git_version((2, 20, 0))
- repo = self._init_repo_with_manyfiles()
- # Create some files
- files = ["file1.txt", "file2.txt", "dir/file3.txt"]
- for filename in files:
- filepath = os.path.join(repo.path, filename)
- os.makedirs(os.path.dirname(filepath), exist_ok=True)
- with open(filepath, "w") as f:
- f.write("content\n")
- # Add files
- run_git_or_fail(["add", "."], cwd=repo.path)
- # Enable untracked cache (creates UNTR extension)
- run_git_or_fail(["config", "core.untrackedCache", "true"], cwd=repo.path)
- run_git_or_fail(["status"], cwd=repo.path) # Trigger cache update
- # Read index with extensions
- index_path = os.path.join(repo.path, ".git", "index")
- with open(index_path, "rb") as f:
- entries, version, _extensions = read_index_dict_with_version(f)
- self.assertEqual(version, 4)
- self.assertEqual(len(entries), len(files))
- # Test round-trip with extensions present
- index = Index(index_path)
- index.write()
- # Verify C Git can still read it
- output = run_git_or_fail(["ls-files"], cwd=repo.path)
- git_files = set(output.strip().split(b"\n"))
- expected_files = {f.encode() for f in files}
- self.assertEqual(git_files, expected_files)
- def test_index_v4_empty_repository(self) -> None:
- """Test v4 index behavior with empty repository."""
- require_git_version((2, 20, 0))
- repo = self._init_repo_with_manyfiles()
- # Create empty commit to get an index file
- run_git_or_fail(["commit", "--allow-empty", "-m", "empty"], cwd=repo.path)
- # Read the empty index
- index_path = os.path.join(repo.path, ".git", "index")
- if os.path.exists(index_path):
- with open(index_path, "rb") as f:
- entries, version, extensions = read_index_dict_with_version(f)
- # Even empty indexes should be readable
- self.assertEqual(len(entries), 0)
- # Test writing empty index
- with open(index_path + ".dulwich", "wb") as f:
- from dulwich.pack import SHA1Writer
- sha1_writer = SHA1Writer(f)
- write_index_dict(
- sha1_writer, entries, version=version, extensions=extensions
- )
- sha1_writer.close()
- def test_index_v4_large_file_count(self) -> None:
- """Test v4 index with many files (stress test)."""
- require_git_version((2, 20, 0))
- repo = self._init_repo_with_manyfiles()
- # Create many files with similar paths to test compression
- files = []
- for i in range(50): # Reasonable number for CI
- filename = f"src/component_{i:03d}/index.js"
- files.append(filename)
- filepath = os.path.join(repo.path, filename)
- os.makedirs(os.path.dirname(filepath), exist_ok=True)
- with open(filepath, "w") as f:
- f.write(f"// Component {i}\nexport default {{}};")
- # Add all files
- run_git_or_fail(["add", "."], cwd=repo.path)
- # Read index
- index_path = os.path.join(repo.path, ".git", "index")
- with open(index_path, "rb") as f:
- entries, version, _extensions = read_index_dict_with_version(f)
- self.assertEqual(version, 4)
- self.assertEqual(len(entries), len(files))
- # Test dulwich can handle large indexes
- index = Index(index_path)
- index.write()
- # Verify all files are still present
- output = run_git_or_fail(["ls-files"], cwd=repo.path)
- git_files = output.strip().split(b"\n")
- self.assertEqual(len(git_files), len(files))
- def test_index_v4_concurrent_modifications(self) -> None:
- """Test v4 index behavior with file modifications."""
- require_git_version((2, 20, 0))
- repo = self._init_repo_with_manyfiles()
- # Create initial files
- files = ["file1.txt", "file2.txt", "subdir/file3.txt"]
- for filename in files:
- filepath = os.path.join(repo.path, filename)
- os.makedirs(os.path.dirname(filepath), exist_ok=True)
- with open(filepath, "w") as f:
- f.write("initial content\n")
- # Add files
- run_git_or_fail(["add", "."], cwd=repo.path)
- # Modify some files
- with open(os.path.join(repo.path, "file1.txt"), "w") as f:
- f.write("modified content\n")
- # Add new file
- with open(os.path.join(repo.path, "file4.txt"), "w") as f:
- f.write("new file\n")
- run_git_or_fail(["add", "file4.txt"], cwd=repo.path)
- # Test dulwich can read the updated index
- index_path = os.path.join(repo.path, ".git", "index")
- with open(index_path, "rb") as f:
- entries, version, _extensions = read_index_dict_with_version(f)
- self.assertEqual(version, 4)
- self.assertEqual(len(entries), 4) # 3 original + 1 new
- # Verify specific files
- self.assertIn(b"file1.txt", entries)
- self.assertIn(b"file4.txt", entries)
- # Test round-trip
- index = Index(index_path)
- index.write()
- # Verify state is preserved
- output = run_git_or_fail(["ls-files"], cwd=repo.path)
- self.assertIn(b"file4.txt", output)
- def test_index_v4_with_merge_conflicts(self) -> None:
- """Test v4 index behavior with merge conflicts and staging."""
- require_git_version((2, 20, 0))
- repo = self._init_repo_with_manyfiles()
- # Create initial commit
- with open(os.path.join(repo.path, "conflict.txt"), "w") as f:
- f.write("original content\n")
- with open(os.path.join(repo.path, "normal.txt"), "w") as f:
- f.write("normal file\n")
- run_git_or_fail(["add", "."], cwd=repo.path)
- run_git_or_fail(["commit", "-m", "initial"], cwd=repo.path)
- # Create branch and modify file
- run_git_or_fail(["checkout", "-b", "feature"], cwd=repo.path)
- with open(os.path.join(repo.path, "conflict.txt"), "w") as f:
- f.write("feature content\n")
- run_git_or_fail(["add", "conflict.txt"], cwd=repo.path)
- run_git_or_fail(["commit", "-m", "feature change"], cwd=repo.path)
- # Go back to main and make conflicting change
- run_git_or_fail(["checkout", "master"], cwd=repo.path)
- with open(os.path.join(repo.path, "conflict.txt"), "w") as f:
- f.write("master content\n")
- run_git_or_fail(["add", "conflict.txt"], cwd=repo.path)
- run_git_or_fail(["commit", "-m", "master change"], cwd=repo.path)
- # Try to merge (should create conflicts)
- run_git(["merge", "feature"], cwd=repo.path)
- # Read the index with conflicts
- index_path = os.path.join(repo.path, ".git", "index")
- if os.path.exists(index_path):
- with open(index_path, "rb") as f:
- _entries, version, _extensions = read_index_dict_with_version(f)
- self.assertEqual(version, 4)
- # Test dulwich can handle conflicted index
- index = Index(index_path)
- index.write()
- # Verify Git can still read it
- output = run_git_or_fail(["status", "--porcelain"], cwd=repo.path)
- self.assertIn(b"conflict.txt", output)
- def test_index_v4_boundary_filename_lengths(self) -> None:
- """Test v4 with boundary conditions for filename lengths."""
- require_git_version((2, 20, 0))
- repo = self._init_repo_with_manyfiles()
- import sys
- # Test various boundary conditions
- if sys.platform == "win32":
- # Windows has path length limitations
- boundary_files = [
- "", # Empty name (invalid, but test robustness)
- "x", # Single char
- "xx", # Two chars
- "x" * 100, # Long but within Windows limit
- "a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p", # Deep nesting but shorter
- "file_with_" + "very_" * 10 + "long_name.txt", # Long name within limit
- ]
- else:
- boundary_files = [
- "", # Empty name (invalid, but test robustness)
- "x", # Single char
- "xx", # Two chars
- "x" * 255, # Max typical filename length
- "x" * 4095, # Max path length in many filesystems
- "a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z", # Deep nesting
- "file_with_" + "very_" * 50 + "long_name.txt", # Very long name
- ]
- valid_files = []
- for filename in boundary_files:
- if not filename: # Skip empty filename
- continue
- try:
- filepath = os.path.join(repo.path, filename)
- os.makedirs(os.path.dirname(filepath), exist_ok=True)
- with open(filepath, "w") as f:
- f.write("Content\n")
- valid_files.append(filename)
- except (OSError, ValueError):
- # Skip files that can't be created on this system
- continue
- if valid_files:
- # Add files
- run_git_or_fail(["add", "."], cwd=repo.path)
- # Test reading
- index_path = os.path.join(repo.path, ".git", "index")
- with open(index_path, "rb") as f:
- _entries, version, _extensions = read_index_dict_with_version(f)
- self.assertEqual(version, 4)
- # Test round-trip
- index = Index(index_path)
- index.write()
- def test_index_v4_special_characters_and_encoding(self) -> None:
- """Test v4 with special characters and various encodings."""
- require_git_version((2, 20, 0))
- repo = self._init_repo_with_manyfiles()
- # Test files with special characters
- special_files = [
- "file with spaces.txt",
- "file\twith\ttabs.txt",
- "file-with-dashes.txt",
- "file_with_underscores.txt",
- "file.with.dots.txt",
- "UPPERCASE.TXT",
- "MixedCase.TxT",
- "file123numbers.txt",
- "file@#$%special.txt",
- "café.txt", # Unicode
- "файл.txt", # Cyrillic
- "文件.txt", # Chinese
- "🚀rocket.txt", # Emoji
- "file'with'quotes.txt",
- 'file"with"doublequotes.txt',
- "file[with]brackets.txt",
- "file(with)parens.txt",
- "file{with}braces.txt",
- ]
- valid_files = []
- for filename in special_files:
- try:
- filepath = os.path.join(repo.path, filename)
- with open(filepath, "w", encoding="utf-8") as f:
- f.write(f"Content of {filename}\n")
- valid_files.append(filename)
- except (OSError, UnicodeError):
- # Skip files that can't be created on this system
- continue
- if valid_files:
- # Add files
- run_git_or_fail(["add", "."], cwd=repo.path)
- # Test reading
- index_path = os.path.join(repo.path, ".git", "index")
- with open(index_path, "rb") as f:
- entries, version, _extensions = read_index_dict_with_version(f)
- self.assertEqual(version, 4)
- self.assertGreater(len(entries), 0)
- # Test all valid files are present
- for filename in valid_files:
- filename_bytes = filename.encode("utf-8")
- self.assertIn(filename_bytes, entries)
- def test_index_v4_symlinks_and_special_modes(self) -> None:
- """Test v4 with symlinks and special file modes."""
- require_git_version((2, 20, 0))
- repo = self._init_repo_with_manyfiles()
- # Create regular file
- with open(os.path.join(repo.path, "regular.txt"), "w") as f:
- f.write("regular file\n")
- # Create executable file
- exec_path = os.path.join(repo.path, "executable.sh")
- with open(exec_path, "w") as f:
- f.write("#!/bin/bash\necho hello\n")
- os.chmod(exec_path, 0o755)
- # Create symlink (if supported)
- try:
- os.symlink("regular.txt", os.path.join(repo.path, "symlink.txt"))
- has_symlink = True
- except (OSError, NotImplementedError):
- has_symlink = False
- # Add files
- run_git_or_fail(["add", "."], cwd=repo.path)
- # Test reading
- index_path = os.path.join(repo.path, ".git", "index")
- with open(index_path, "rb") as f:
- entries, version, _extensions = read_index_dict_with_version(f)
- self.assertEqual(version, 4)
- # Verify files with different modes
- self.assertIn(b"regular.txt", entries)
- self.assertIn(b"executable.sh", entries)
- if has_symlink:
- self.assertIn(b"symlink.txt", entries)
- # Test round-trip preserves modes
- index = Index(index_path)
- index.write()
- # Verify Git can read it
- output = run_git_or_fail(["ls-files", "-s"], cwd=repo.path)
- self.assertIn(b"regular.txt", output)
- self.assertIn(b"executable.sh", output)
- def test_index_v4_alternating_compression_patterns(self) -> None:
- """Test v4 with files that alternate between compressed/uncompressed."""
- require_git_version((2, 20, 0))
- repo = self._init_repo_with_manyfiles()
- # Create files that should create alternating compression patterns
- files = [
- # These should be uncompressed (no common prefix)
- "a.txt",
- "b.txt",
- "c.txt",
- # These should be compressed (common prefix)
- "common/path/file1.txt",
- "common/path/file2.txt",
- "common/path/file3.txt",
- # Back to uncompressed (different pattern)
- "different/structure/x.txt",
- "another/structure/y.txt",
- # More compression opportunities
- "src/main/Component1.java",
- "src/main/Component2.java",
- "src/test/Test1.java",
- "src/test/Test2.java",
- ]
- for filename in files:
- filepath = os.path.join(repo.path, filename)
- os.makedirs(os.path.dirname(filepath), exist_ok=True)
- with open(filepath, "w") as f:
- f.write(f"Content of {filename}\n")
- # Add files
- run_git_or_fail(["add", "."], cwd=repo.path)
- # Test reading
- index_path = os.path.join(repo.path, ".git", "index")
- with open(index_path, "rb") as f:
- entries, version, _extensions = read_index_dict_with_version(f)
- self.assertEqual(version, 4)
- self.assertEqual(len(entries), len(files))
- # Verify all files are present
- for filename in files:
- self.assertIn(filename.encode(), entries)
- # Test round-trip
- index = Index(index_path)
- index.write()
- def test_index_v4_git_submodules(self) -> None:
- """Test v4 index with Git submodules."""
- require_git_version((2, 20, 0))
- repo = self._init_repo_with_manyfiles()
- # Create a submodule directory structure
- submodule_dir = os.path.join(repo.path, "submodule")
- os.makedirs(submodule_dir)
- # Initialize a separate repo for the submodule
- run_git_or_fail(["init"], cwd=submodule_dir)
- with open(os.path.join(submodule_dir, "sub.txt"), "w") as f:
- f.write("submodule content\n")
- run_git_or_fail(["add", "sub.txt"], cwd=submodule_dir)
- run_git_or_fail(["commit", "-m", "submodule commit"], cwd=submodule_dir)
- # Add some regular files to main repo
- with open(os.path.join(repo.path, "main.txt"), "w") as f:
- f.write("main repo content\n")
- run_git_or_fail(["add", "main.txt"], cwd=repo.path)
- # Add submodule (this creates a gitlink entry)
- run_git_or_fail(["submodule", "add", "./submodule", "submodule"], cwd=repo.path)
- # Test reading index with submodule
- index_path = os.path.join(repo.path, ".git", "index")
- with open(index_path, "rb") as f:
- entries, version, _extensions = read_index_dict_with_version(f)
- self.assertEqual(version, 4)
- # Should have main.txt, .gitmodules, and submodule gitlink
- self.assertIn(b"main.txt", entries)
- self.assertIn(b".gitmodules", entries)
- self.assertIn(b"submodule", entries)
- # Test round-trip
- index = Index(index_path)
- index.write()
- def test_index_v4_partial_staging(self) -> None:
- """Test v4 with partial file staging (git add -p simulation)."""
- require_git_version((2, 20, 0))
- repo = self._init_repo_with_manyfiles()
- # Create initial file
- filepath = os.path.join(repo.path, "partial.txt")
- with open(filepath, "w") as f:
- f.write("line1\nline2\nline3\n")
- run_git_or_fail(["add", "partial.txt"], cwd=repo.path)
- run_git_or_fail(["commit", "-m", "initial"], cwd=repo.path)
- # Modify the file
- with open(filepath, "w") as f:
- f.write("line1 modified\nline2\nline3 modified\n")
- # Stage only part of the changes (simulate git add -p)
- # This creates an interesting index state
- run_git_or_fail(["add", "partial.txt"], cwd=repo.path)
- # Make more changes
- with open(filepath, "w") as f:
- f.write("line1 modified\nline2 modified\nline3 modified\n")
- # Now we have staged and unstaged changes
- # Test reading this complex index state
- index_path = os.path.join(repo.path, ".git", "index")
- with open(index_path, "rb") as f:
- entries, version, _extensions = read_index_dict_with_version(f)
- self.assertEqual(version, 4)
- self.assertIn(b"partial.txt", entries)
- # Test round-trip
- index = Index(index_path)
- index.write()
- def test_index_v4_with_gitattributes_and_ignore(self) -> None:
- """Test v4 with .gitattributes and .gitignore files."""
- require_git_version((2, 20, 0))
- repo = self._init_repo_with_manyfiles()
- # Create .gitignore
- with open(os.path.join(repo.path, ".gitignore"), "w") as f:
- f.write("*.tmp\n*.log\nbuild/\n")
- # Create .gitattributes
- with open(os.path.join(repo.path, ".gitattributes"), "w") as f:
- f.write("*.txt text\n*.bin binary\n")
- # Create various files
- files = [
- "regular.txt",
- "binary.bin",
- "script.sh",
- "config.json",
- "README.md",
- ]
- for filename in files:
- filepath = os.path.join(repo.path, filename)
- with open(filepath, "w") as f:
- f.write(f"Content of {filename}\n")
- # Create some files that should be ignored
- with open(os.path.join(repo.path, "temp.tmp"), "w") as f:
- f.write("temporary file\n")
- # Add files
- run_git_or_fail(["add", "."], cwd=repo.path)
- # Test reading
- index_path = os.path.join(repo.path, ".git", "index")
- with open(index_path, "rb") as f:
- entries, version, _extensions = read_index_dict_with_version(f)
- self.assertEqual(version, 4)
- # Should have .gitignore, .gitattributes, and regular files
- self.assertIn(b".gitignore", entries)
- self.assertIn(b".gitattributes", entries)
- for filename in files:
- self.assertIn(filename.encode(), entries)
- # Should NOT have ignored files
- self.assertNotIn(b"temp.tmp", entries)
- def test_index_v4_stress_test_many_entries(self) -> None:
- """Stress test v4 with many entries in complex directory structure."""
- require_git_version((2, 20, 0))
- repo = self._init_repo_with_manyfiles()
- # Create a complex directory structure with many files
- dirs = [
- "src/main/java/com/example",
- "src/main/resources",
- "src/test/java/com/example",
- "docs/api",
- "docs/user",
- "scripts/build",
- "config/env",
- ]
- for dir_path in dirs:
- os.makedirs(os.path.join(repo.path, dir_path), exist_ok=True)
- # Create many files
- files = []
- for i in range(200): # Reasonable for CI
- if i % 7 == 0:
- filename = f"src/main/java/com/example/Service{i}.java"
- elif i % 7 == 1:
- filename = f"src/test/java/com/example/Test{i}.java"
- elif i % 7 == 2:
- filename = f"docs/api/page{i}.md"
- elif i % 7 == 3:
- filename = f"config/env/config{i}.properties"
- elif i % 7 == 4:
- filename = f"scripts/build/script{i}.sh"
- elif i % 7 == 5:
- filename = f"src/main/resources/resource{i}.txt"
- else:
- filename = f"file{i}.txt"
- files.append(filename)
- filepath = os.path.join(repo.path, filename)
- os.makedirs(os.path.dirname(filepath), exist_ok=True)
- with open(filepath, "w") as f:
- f.write(f"// File {i}\ncontent here\n")
- # Add files in batches to avoid command line length limits
- batch_size = 50
- for i in range(0, len(files), batch_size):
- batch = files[i : i + batch_size]
- run_git_or_fail(["add", *batch], cwd=repo.path)
- # Test reading large index
- index_path = os.path.join(repo.path, ".git", "index")
- with open(index_path, "rb") as f:
- entries, version, _extensions = read_index_dict_with_version(f)
- self.assertEqual(version, 4)
- self.assertEqual(len(entries), len(files))
- # Verify some files are present
- for i in range(0, len(files), 20): # Check every 20th file
- filename = files[i]
- self.assertIn(filename.encode(), entries)
- def test_index_v4_rename_detection_scenario(self) -> None:
- """Test v4 with file renames (complex staging scenario)."""
- require_git_version((2, 20, 0))
- repo = self._init_repo_with_manyfiles()
- # Create initial files
- files = ["old1.txt", "old2.txt", "unchanged.txt"]
- for filename in files:
- filepath = os.path.join(repo.path, filename)
- with open(filepath, "w") as f:
- f.write(f"Content of {filename}\n")
- run_git_or_fail(["add", "."], cwd=repo.path)
- run_git_or_fail(["commit", "-m", "initial"], cwd=repo.path)
- # Rename files
- os.rename(
- os.path.join(repo.path, "old1.txt"), os.path.join(repo.path, "new1.txt")
- )
- os.rename(
- os.path.join(repo.path, "old2.txt"), os.path.join(repo.path, "new2.txt")
- )
- # Stage renames
- run_git_or_fail(["add", "-A"], cwd=repo.path)
- # Test reading index with renames
- index_path = os.path.join(repo.path, ".git", "index")
- with open(index_path, "rb") as f:
- entries, version, _extensions = read_index_dict_with_version(f)
- self.assertEqual(version, 4)
- # Should have new names, not old names
- self.assertIn(b"new1.txt", entries)
- self.assertIn(b"new2.txt", entries)
- self.assertIn(b"unchanged.txt", entries)
- self.assertNotIn(b"old1.txt", entries)
- self.assertNotIn(b"old2.txt", entries)
- class SparseIndexCompatTestCase(CompatTestCase):
- """Tests for Git sparse index compatibility with C Git."""
- def setUp(self) -> None:
- super().setUp()
- self.tempdir = tempfile.mkdtemp()
- self.addCleanup(self._cleanup)
- def _cleanup(self) -> None:
- import shutil
- shutil.rmtree(self.tempdir, ignore_errors=True)
- def test_read_sparse_index_created_by_git(self) -> None:
- """Test that Dulwich can read a sparse index created by Git."""
- # Sparse index requires Git 2.37+
- require_git_version((2, 37, 0))
- repo_path = os.path.join(self.tempdir, "test_repo")
- os.mkdir(repo_path)
- # Initialize repo
- run_git_or_fail(["init"], cwd=repo_path)
- run_git_or_fail(["config", "core.sparseCheckout", "true"], cwd=repo_path)
- run_git_or_fail(
- ["config", "index.sparse", "true"], cwd=repo_path
- ) # Enable sparse index
- # Create directory structure
- os.makedirs(os.path.join(repo_path, "included"), exist_ok=True)
- os.makedirs(os.path.join(repo_path, "excluded", "subdir"), exist_ok=True)
- # Create files
- with open(os.path.join(repo_path, "included", "file1.txt"), "w") as f:
- f.write("included file 1\n")
- with open(os.path.join(repo_path, "included", "file2.txt"), "w") as f:
- f.write("included file 2\n")
- with open(os.path.join(repo_path, "excluded", "file3.txt"), "w") as f:
- f.write("excluded file 3\n")
- with open(os.path.join(repo_path, "excluded", "subdir", "file4.txt"), "w") as f:
- f.write("excluded file 4\n")
- with open(os.path.join(repo_path, "root.txt"), "w") as f:
- f.write("root file\n")
- # Add and commit all files
- run_git_or_fail(["add", "."], cwd=repo_path)
- run_git_or_fail(["commit", "-m", "initial"], cwd=repo_path)
- # Set up sparse-checkout to include only "included/" and root files
- sparse_checkout_path = os.path.join(
- repo_path, ".git", "info", "sparse-checkout"
- )
- os.makedirs(os.path.dirname(sparse_checkout_path), exist_ok=True)
- with open(sparse_checkout_path, "w") as f:
- f.write("/*\n") # Include top-level files
- f.write("!/*/\n") # Exclude all directories
- f.write("/included/\n") # Re-include "included" directory
- # Run sparse-checkout reapply to create sparse index
- run_git_or_fail(["sparse-checkout", "reapply"], cwd=repo_path)
- # Read the index with Dulwich
- from dulwich.index import Index
- index_path = os.path.join(repo_path, ".git", "index")
- idx = Index(index_path)
- # Git may or may not create a sparse index depending on the repo state
- # The key test is that Dulwich can read it without errors
- self.assertIsNotNone(idx)
- # If it is sparse, verify we can handle sparse directory entries
- if idx.is_sparse():
- for path, entry in idx.items():
- if entry.is_sparse_dir(path):
- # Verify sparse dirs are for excluded paths
- self.assertTrue(path.startswith(b"excluded"))
- else:
- # If not sparse, we should still be able to read all entries
- self.assertGreater(len(idx), 0)
- def test_write_sparse_index_readable_by_git(self) -> None:
- """Test that Git can read a sparse index created by Dulwich."""
- # Sparse index requires Git 2.37+
- require_git_version((2, 37, 0))
- from dulwich.index import Index, IndexEntry, SparseDirExtension
- from dulwich.objects import Blob, Tree
- from dulwich.repo import Repo
- repo_path = os.path.join(self.tempdir, "test_repo")
- os.mkdir(repo_path)
- # Initialize repo with Git
- run_git_or_fail(["init"], cwd=repo_path)
- run_git_or_fail(["config", "index.sparse", "true"], cwd=repo_path)
- # Create a tree structure using Dulwich
- repo = Repo(repo_path)
- # Create blobs
- blob1 = Blob()
- blob1.data = b"file1 content"
- repo.object_store.add_object(blob1)
- blob2 = Blob()
- blob2.data = b"file2 content"
- repo.object_store.add_object(blob2)
- # Create subtree for sparse directory
- subtree = Tree()
- subtree[b"file1.txt"] = (0o100644, blob1.id)
- subtree[b"file2.txt"] = (0o100644, blob2.id)
- repo.object_store.add_object(subtree)
- # Create root tree
- tree = Tree()
- tree[b"sparse_dir"] = (0o040000, subtree.id)
- repo.object_store.add_object(tree)
- # Create a commit
- from dulwich.objects import Commit
- commit = Commit()
- commit.tree = tree.id
- commit.author = commit.committer = b"Test <test@example.com>"
- commit.author_time = commit.commit_time = 1234567890
- commit.author_timezone = commit.commit_timezone = 0
- commit.encoding = b"UTF-8"
- commit.message = b"Test commit"
- repo.object_store.add_object(commit)
- repo.refs[b"refs/heads/master"] = commit.id
- # Create sparse index with Dulwich
- index = Index(os.path.join(repo_path, ".git", "index"), read=False)
- # Add sparse directory entry
- sparse_entry = IndexEntry(
- ctime=0,
- mtime=0,
- dev=0,
- ino=0,
- mode=0o040000,
- uid=0,
- gid=0,
- size=0,
- sha=subtree.id,
- extended_flags=0x4000, # SKIP_WORKTREE
- )
- index[b"sparse_dir/"] = sparse_entry
- index._extensions.append(SparseDirExtension())
- index.write()
- # Verify Git can read the index
- output = run_git_or_fail(["ls-files", "--debug"], cwd=repo_path)
- self.assertIn(b"sparse_dir/", output)
- # Verify Git recognizes it as sparse
- output = run_git_or_fail(["status"], cwd=repo_path)
- # Should not crash
- self.assertIsNotNone(output)
- def test_expand_sparse_index_matches_git(self) -> None:
- """Test that expanding a sparse index matches Git's behavior."""
- # Sparse index requires Git 2.37+
- require_git_version((2, 37, 0))
- repo_path = os.path.join(self.tempdir, "test_repo")
- os.mkdir(repo_path)
- # Initialize repo
- run_git_or_fail(["init"], cwd=repo_path)
- run_git_or_fail(["config", "core.sparseCheckout", "true"], cwd=repo_path)
- run_git_or_fail(["config", "index.sparse", "true"], cwd=repo_path)
- # Create directory structure
- os.makedirs(os.path.join(repo_path, "dir1", "subdir"), exist_ok=True)
- with open(os.path.join(repo_path, "dir1", "file1.txt"), "w") as f:
- f.write("file1\n")
- with open(os.path.join(repo_path, "dir1", "subdir", "file2.txt"), "w") as f:
- f.write("file2\n")
- # Commit files
- run_git_or_fail(["add", "."], cwd=repo_path)
- run_git_or_fail(["commit", "-m", "initial"], cwd=repo_path)
- # Set up sparse-checkout to exclude dir1
- sparse_checkout_path = os.path.join(
- repo_path, ".git", "info", "sparse-checkout"
- )
- os.makedirs(os.path.dirname(sparse_checkout_path), exist_ok=True)
- with open(sparse_checkout_path, "w") as f:
- f.write("/*\n")
- f.write("!/dir1/\n")
- run_git_or_fail(["sparse-checkout", "reapply"], cwd=repo_path)
- # Read sparse index with Dulwich
- from dulwich.index import Index
- from dulwich.repo import Repo
- index_path = os.path.join(repo_path, ".git", "index")
- idx = Index(index_path)
- if idx.is_sparse():
- # Expand the index
- repo = Repo(repo_path)
- idx.ensure_full_index(repo.object_store)
- # Should no longer be sparse
- self.assertFalse(idx.is_sparse())
- # Write it back
- idx.write()
- # Git should still be able to read it
- output = run_git_or_fail(["status"], cwd=repo_path)
- self.assertIsNotNone(output)
|