test_index.py 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186
  1. # test_index.py -- Git index compatibility tests
  2. # Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Compatibility tests for Git index format v4."""
  22. import os
  23. import tempfile
  24. from dulwich.index import Index, read_index_dict_with_version, write_index_dict
  25. from dulwich.repo import Repo
  26. from .utils import CompatTestCase, require_git_version, run_git, run_git_or_fail
  27. class IndexV4CompatTestCase(CompatTestCase):
  28. """Tests for Git index format v4 compatibility with C Git."""
  29. def setUp(self) -> None:
  30. super().setUp()
  31. self.tempdir = tempfile.mkdtemp()
  32. self.addCleanup(self._cleanup)
  33. def _cleanup(self) -> None:
  34. import shutil
  35. shutil.rmtree(self.tempdir, ignore_errors=True)
  36. def _init_repo_with_manyfiles(self) -> Repo:
  37. """Initialize a repo with manyFiles feature enabled."""
  38. # Create repo
  39. repo_path = os.path.join(self.tempdir, "test_repo")
  40. os.mkdir(repo_path)
  41. # Initialize with C git and enable manyFiles
  42. run_git_or_fail(["init"], cwd=repo_path)
  43. run_git_or_fail(["config", "feature.manyFiles", "true"], cwd=repo_path)
  44. # Open with dulwich
  45. return Repo(repo_path)
  46. def test_index_v4_path_compression(self) -> None:
  47. """Test that dulwich can read and write index v4 with path compression."""
  48. require_git_version((2, 20, 0)) # manyFiles feature requires newer Git
  49. repo = self._init_repo_with_manyfiles()
  50. # Create test files with paths that will benefit from compression
  51. test_files = [
  52. "dir1/subdir/file1.txt",
  53. "dir1/subdir/file2.txt",
  54. "dir1/subdir/file3.txt",
  55. "dir2/another/path.txt",
  56. "dir2/another/path2.txt",
  57. "file_at_root.txt",
  58. ]
  59. for path in test_files:
  60. full_path = os.path.join(repo.path, path)
  61. os.makedirs(os.path.dirname(full_path), exist_ok=True)
  62. with open(full_path, "w") as f:
  63. f.write(f"content of {path}\n")
  64. # Add files with C git - this should create index v4
  65. run_git_or_fail(["add", "."], cwd=repo.path)
  66. # Read the index with dulwich
  67. index_path = os.path.join(repo.path, ".git", "index")
  68. with open(index_path, "rb") as f:
  69. entries, version, extensions = read_index_dict_with_version(f)
  70. # Verify it's version 4
  71. self.assertEqual(version, 4)
  72. # Verify all files are in the index
  73. self.assertEqual(len(entries), len(test_files))
  74. for path in test_files:
  75. self.assertIn(path.encode(), entries)
  76. # Write the index back with dulwich
  77. with open(index_path + ".dulwich", "wb") as f:
  78. from dulwich.pack import SHA1Writer
  79. sha1_writer = SHA1Writer(f)
  80. write_index_dict(sha1_writer, entries, version=4, extensions=extensions)
  81. sha1_writer.close()
  82. # Compare with C git - use git ls-files to read both indexes
  83. output1 = run_git_or_fail(["ls-files", "--stage"], cwd=repo.path)
  84. # Replace index with dulwich version
  85. if os.path.exists(index_path):
  86. os.remove(index_path)
  87. os.rename(index_path + ".dulwich", index_path)
  88. output2 = run_git_or_fail(["ls-files", "--stage"], cwd=repo.path)
  89. # Both outputs should be identical
  90. self.assertEqual(output1, output2)
  91. def test_index_v4_round_trip(self) -> None:
  92. """Test round-trip: C Git write -> dulwich read -> dulwich write -> C Git read."""
  93. require_git_version((2, 20, 0))
  94. repo = self._init_repo_with_manyfiles()
  95. # Create files that test various edge cases
  96. test_files = [
  97. "a", # Very short name
  98. "abc/def/ghi/jkl/mno/pqr/stu/vwx/yz.txt", # Deep path
  99. "same_prefix_1.txt",
  100. "same_prefix_2.txt",
  101. "same_prefix_3.txt",
  102. "different/path/here.txt",
  103. ]
  104. for path in test_files:
  105. full_path = os.path.join(repo.path, path)
  106. os.makedirs(os.path.dirname(full_path), exist_ok=True)
  107. with open(full_path, "w") as f:
  108. f.write("test content\n")
  109. # Stage with C Git
  110. run_git_or_fail(["add", "."], cwd=repo.path)
  111. # Get original state
  112. original_output = run_git_or_fail(["ls-files", "--stage"], cwd=repo.path)
  113. # Read with dulwich, write back
  114. index = Index(os.path.join(repo.path, ".git", "index"))
  115. index.write()
  116. # Verify C Git can still read it
  117. final_output = run_git_or_fail(["ls-files", "--stage"], cwd=repo.path)
  118. self.assertEqual(original_output, final_output)
  119. def test_index_v4_skip_hash(self) -> None:
  120. """Test index v4 with skipHash extension."""
  121. require_git_version((2, 20, 0))
  122. repo = self._init_repo_with_manyfiles()
  123. # Enable skipHash
  124. run_git_or_fail(["config", "index.skipHash", "true"], cwd=repo.path)
  125. # Create a file
  126. test_file = os.path.join(repo.path, "test.txt")
  127. with open(test_file, "w") as f:
  128. f.write("test content\n")
  129. # Add with C Git
  130. run_git_or_fail(["add", "test.txt"], cwd=repo.path)
  131. # Read the index
  132. index_path = os.path.join(repo.path, ".git", "index")
  133. with open(index_path, "rb") as f:
  134. entries, version, _extensions = read_index_dict_with_version(f)
  135. self.assertEqual(version, 4)
  136. self.assertIn(b"test.txt", entries)
  137. # Verify skipHash is active by checking last 20 bytes
  138. with open(index_path, "rb") as f:
  139. f.seek(-20, 2)
  140. last_bytes = f.read(20)
  141. self.assertEqual(last_bytes, b"\x00" * 20)
  142. # Write with dulwich (with skipHash)
  143. index = Index(index_path, skip_hash=True, version=4)
  144. index.write()
  145. # Verify C Git can read it
  146. output = run_git_or_fail(["ls-files"], cwd=repo.path)
  147. self.assertEqual(output.strip(), b"test.txt")
  148. def test_index_v4_with_various_filenames(self) -> None:
  149. """Test v4 with various filename patterns."""
  150. require_git_version((2, 20, 0))
  151. repo = self._init_repo_with_manyfiles()
  152. # Test various filename patterns that might trigger different behaviors
  153. test_files = [
  154. "a", # Single character
  155. "ab", # Two characters
  156. "abc", # Three characters
  157. "file.txt", # Normal filename
  158. "very_long_filename_to_test_edge_cases.extension", # Long filename
  159. "dir/file.txt", # With directory
  160. "dir1/dir2/dir3/file.txt", # Deep directory
  161. "unicode_café.txt", # Unicode filename
  162. "with-dashes-and_underscores.txt", # Special chars
  163. ".hidden", # Hidden file
  164. "UPPERCASE.TXT", # Uppercase
  165. ]
  166. for filename in test_files:
  167. filepath = os.path.join(repo.path, filename)
  168. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  169. with open(filepath, "w", encoding="utf-8") as f:
  170. f.write(f"Content of {filename}\n")
  171. # Add all files
  172. run_git_or_fail(["add", "."], cwd=repo.path)
  173. # Read with dulwich
  174. index_path = os.path.join(repo.path, ".git", "index")
  175. with open(index_path, "rb") as f:
  176. entries, version, extensions = read_index_dict_with_version(f)
  177. self.assertEqual(version, 4)
  178. self.assertEqual(len(entries), len(test_files))
  179. # Verify all filenames are correctly stored
  180. for filename in test_files:
  181. filename_bytes = filename.encode("utf-8")
  182. self.assertIn(filename_bytes, entries)
  183. # Test round-trip: dulwich write -> C Git read
  184. with open(index_path + ".dulwich", "wb") as f:
  185. from dulwich.pack import SHA1Writer
  186. sha1_writer = SHA1Writer(f)
  187. write_index_dict(sha1_writer, entries, version=4, extensions=extensions)
  188. sha1_writer.close()
  189. # Replace index
  190. if os.path.exists(index_path):
  191. os.remove(index_path)
  192. os.rename(index_path + ".dulwich", index_path)
  193. # Verify C Git can read all files
  194. # Use -z flag to avoid quoting of non-ASCII filenames
  195. output = run_git_or_fail(["ls-files", "-z"], cwd=repo.path)
  196. git_files = set(output.strip(b"\x00").split(b"\x00"))
  197. expected_files = {f.encode("utf-8") for f in test_files}
  198. self.assertEqual(git_files, expected_files)
  199. def test_index_v4_path_compression_scenarios(self) -> None:
  200. """Test various scenarios where path compression should/shouldn't be used."""
  201. require_git_version((2, 20, 0))
  202. repo = self._init_repo_with_manyfiles()
  203. # Create files that should trigger compression
  204. compression_files = [
  205. "src/main/java/com/example/Service.java",
  206. "src/main/java/com/example/Controller.java",
  207. "src/main/java/com/example/Repository.java",
  208. "src/test/java/com/example/ServiceTest.java",
  209. ]
  210. # Create files that shouldn't benefit much from compression
  211. no_compression_files = [
  212. "README.md",
  213. "LICENSE",
  214. "docs/guide.txt",
  215. "config/settings.json",
  216. ]
  217. all_files = compression_files + no_compression_files
  218. for filename in all_files:
  219. filepath = os.path.join(repo.path, filename)
  220. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  221. with open(filepath, "w") as f:
  222. f.write(f"Content of {filename}\n")
  223. # Add files
  224. run_git_or_fail(["add", "."], cwd=repo.path)
  225. # Read the index
  226. index_path = os.path.join(repo.path, ".git", "index")
  227. with open(index_path, "rb") as f:
  228. entries, version, extensions = read_index_dict_with_version(f)
  229. self.assertEqual(version, 4)
  230. self.assertEqual(len(entries), len(all_files))
  231. # Verify all files are present
  232. for filename in all_files:
  233. self.assertIn(filename.encode(), entries)
  234. # Test that dulwich can write a compatible index
  235. with open(index_path + ".dulwich", "wb") as f:
  236. from dulwich.pack import SHA1Writer
  237. sha1_writer = SHA1Writer(f)
  238. write_index_dict(sha1_writer, entries, version=4, extensions=extensions)
  239. sha1_writer.close()
  240. # Verify the written index is the same size (for byte-for-byte compatibility)
  241. original_size = os.path.getsize(index_path)
  242. dulwich_size = os.path.getsize(index_path + ".dulwich")
  243. # For v4 format with proper compression, checksum, and extensions, sizes should match
  244. self.assertEqual(
  245. original_size,
  246. dulwich_size,
  247. f"Index sizes don't match: Git={original_size}, Dulwich={dulwich_size}",
  248. )
  249. def test_index_v4_with_extensions(self) -> None:
  250. """Test v4 index with various extensions."""
  251. require_git_version((2, 20, 0))
  252. repo = self._init_repo_with_manyfiles()
  253. # Create some files
  254. files = ["file1.txt", "file2.txt", "dir/file3.txt"]
  255. for filename in files:
  256. filepath = os.path.join(repo.path, filename)
  257. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  258. with open(filepath, "w") as f:
  259. f.write("content\n")
  260. # Add files
  261. run_git_or_fail(["add", "."], cwd=repo.path)
  262. # Enable untracked cache (creates UNTR extension)
  263. run_git_or_fail(["config", "core.untrackedCache", "true"], cwd=repo.path)
  264. run_git_or_fail(["status"], cwd=repo.path) # Trigger cache update
  265. # Read index with extensions
  266. index_path = os.path.join(repo.path, ".git", "index")
  267. with open(index_path, "rb") as f:
  268. entries, version, _extensions = read_index_dict_with_version(f)
  269. self.assertEqual(version, 4)
  270. self.assertEqual(len(entries), len(files))
  271. # Test round-trip with extensions present
  272. index = Index(index_path)
  273. index.write()
  274. # Verify C Git can still read it
  275. output = run_git_or_fail(["ls-files"], cwd=repo.path)
  276. git_files = set(output.strip().split(b"\n"))
  277. expected_files = {f.encode() for f in files}
  278. self.assertEqual(git_files, expected_files)
  279. def test_index_v4_empty_repository(self) -> None:
  280. """Test v4 index behavior with empty repository."""
  281. require_git_version((2, 20, 0))
  282. repo = self._init_repo_with_manyfiles()
  283. # Create empty commit to get an index file
  284. run_git_or_fail(["commit", "--allow-empty", "-m", "empty"], cwd=repo.path)
  285. # Read the empty index
  286. index_path = os.path.join(repo.path, ".git", "index")
  287. if os.path.exists(index_path):
  288. with open(index_path, "rb") as f:
  289. entries, version, extensions = read_index_dict_with_version(f)
  290. # Even empty indexes should be readable
  291. self.assertEqual(len(entries), 0)
  292. # Test writing empty index
  293. with open(index_path + ".dulwich", "wb") as f:
  294. from dulwich.pack import SHA1Writer
  295. sha1_writer = SHA1Writer(f)
  296. write_index_dict(
  297. sha1_writer, entries, version=version, extensions=extensions
  298. )
  299. sha1_writer.close()
  300. def test_index_v4_large_file_count(self) -> None:
  301. """Test v4 index with many files (stress test)."""
  302. require_git_version((2, 20, 0))
  303. repo = self._init_repo_with_manyfiles()
  304. # Create many files with similar paths to test compression
  305. files = []
  306. for i in range(50): # Reasonable number for CI
  307. filename = f"src/component_{i:03d}/index.js"
  308. files.append(filename)
  309. filepath = os.path.join(repo.path, filename)
  310. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  311. with open(filepath, "w") as f:
  312. f.write(f"// Component {i}\nexport default {{}};")
  313. # Add all files
  314. run_git_or_fail(["add", "."], cwd=repo.path)
  315. # Read index
  316. index_path = os.path.join(repo.path, ".git", "index")
  317. with open(index_path, "rb") as f:
  318. entries, version, _extensions = read_index_dict_with_version(f)
  319. self.assertEqual(version, 4)
  320. self.assertEqual(len(entries), len(files))
  321. # Test dulwich can handle large indexes
  322. index = Index(index_path)
  323. index.write()
  324. # Verify all files are still present
  325. output = run_git_or_fail(["ls-files"], cwd=repo.path)
  326. git_files = output.strip().split(b"\n")
  327. self.assertEqual(len(git_files), len(files))
  328. def test_index_v4_concurrent_modifications(self) -> None:
  329. """Test v4 index behavior with file modifications."""
  330. require_git_version((2, 20, 0))
  331. repo = self._init_repo_with_manyfiles()
  332. # Create initial files
  333. files = ["file1.txt", "file2.txt", "subdir/file3.txt"]
  334. for filename in files:
  335. filepath = os.path.join(repo.path, filename)
  336. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  337. with open(filepath, "w") as f:
  338. f.write("initial content\n")
  339. # Add files
  340. run_git_or_fail(["add", "."], cwd=repo.path)
  341. # Modify some files
  342. with open(os.path.join(repo.path, "file1.txt"), "w") as f:
  343. f.write("modified content\n")
  344. # Add new file
  345. with open(os.path.join(repo.path, "file4.txt"), "w") as f:
  346. f.write("new file\n")
  347. run_git_or_fail(["add", "file4.txt"], cwd=repo.path)
  348. # Test dulwich can read the updated index
  349. index_path = os.path.join(repo.path, ".git", "index")
  350. with open(index_path, "rb") as f:
  351. entries, version, _extensions = read_index_dict_with_version(f)
  352. self.assertEqual(version, 4)
  353. self.assertEqual(len(entries), 4) # 3 original + 1 new
  354. # Verify specific files
  355. self.assertIn(b"file1.txt", entries)
  356. self.assertIn(b"file4.txt", entries)
  357. # Test round-trip
  358. index = Index(index_path)
  359. index.write()
  360. # Verify state is preserved
  361. output = run_git_or_fail(["ls-files"], cwd=repo.path)
  362. self.assertIn(b"file4.txt", output)
  363. def test_index_v4_with_merge_conflicts(self) -> None:
  364. """Test v4 index behavior with merge conflicts and staging."""
  365. require_git_version((2, 20, 0))
  366. repo = self._init_repo_with_manyfiles()
  367. # Create initial commit
  368. with open(os.path.join(repo.path, "conflict.txt"), "w") as f:
  369. f.write("original content\n")
  370. with open(os.path.join(repo.path, "normal.txt"), "w") as f:
  371. f.write("normal file\n")
  372. run_git_or_fail(["add", "."], cwd=repo.path)
  373. run_git_or_fail(["commit", "-m", "initial"], cwd=repo.path)
  374. # Create branch and modify file
  375. run_git_or_fail(["checkout", "-b", "feature"], cwd=repo.path)
  376. with open(os.path.join(repo.path, "conflict.txt"), "w") as f:
  377. f.write("feature content\n")
  378. run_git_or_fail(["add", "conflict.txt"], cwd=repo.path)
  379. run_git_or_fail(["commit", "-m", "feature change"], cwd=repo.path)
  380. # Go back to main and make conflicting change
  381. run_git_or_fail(["checkout", "master"], cwd=repo.path)
  382. with open(os.path.join(repo.path, "conflict.txt"), "w") as f:
  383. f.write("master content\n")
  384. run_git_or_fail(["add", "conflict.txt"], cwd=repo.path)
  385. run_git_or_fail(["commit", "-m", "master change"], cwd=repo.path)
  386. # Try to merge (should create conflicts)
  387. run_git(["merge", "feature"], cwd=repo.path)
  388. # Read the index with conflicts
  389. index_path = os.path.join(repo.path, ".git", "index")
  390. if os.path.exists(index_path):
  391. with open(index_path, "rb") as f:
  392. _entries, version, _extensions = read_index_dict_with_version(f)
  393. self.assertEqual(version, 4)
  394. # Test dulwich can handle conflicted index
  395. index = Index(index_path)
  396. index.write()
  397. # Verify Git can still read it
  398. output = run_git_or_fail(["status", "--porcelain"], cwd=repo.path)
  399. self.assertIn(b"conflict.txt", output)
  400. def test_index_v4_boundary_filename_lengths(self) -> None:
  401. """Test v4 with boundary conditions for filename lengths."""
  402. require_git_version((2, 20, 0))
  403. repo = self._init_repo_with_manyfiles()
  404. import sys
  405. # Test various boundary conditions
  406. if sys.platform == "win32":
  407. # Windows has path length limitations
  408. boundary_files = [
  409. "", # Empty name (invalid, but test robustness)
  410. "x", # Single char
  411. "xx", # Two chars
  412. "x" * 100, # Long but within Windows limit
  413. "a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p", # Deep nesting but shorter
  414. "file_with_" + "very_" * 10 + "long_name.txt", # Long name within limit
  415. ]
  416. else:
  417. boundary_files = [
  418. "", # Empty name (invalid, but test robustness)
  419. "x", # Single char
  420. "xx", # Two chars
  421. "x" * 255, # Max typical filename length
  422. "x" * 4095, # Max path length in many filesystems
  423. "a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z", # Deep nesting
  424. "file_with_" + "very_" * 50 + "long_name.txt", # Very long name
  425. ]
  426. valid_files = []
  427. for filename in boundary_files:
  428. if not filename: # Skip empty filename
  429. continue
  430. try:
  431. filepath = os.path.join(repo.path, filename)
  432. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  433. with open(filepath, "w") as f:
  434. f.write("Content\n")
  435. valid_files.append(filename)
  436. except (OSError, ValueError):
  437. # Skip files that can't be created on this system
  438. continue
  439. if valid_files:
  440. # Add files
  441. run_git_or_fail(["add", "."], cwd=repo.path)
  442. # Test reading
  443. index_path = os.path.join(repo.path, ".git", "index")
  444. with open(index_path, "rb") as f:
  445. _entries, version, _extensions = read_index_dict_with_version(f)
  446. self.assertEqual(version, 4)
  447. # Test round-trip
  448. index = Index(index_path)
  449. index.write()
  450. def test_index_v4_special_characters_and_encoding(self) -> None:
  451. """Test v4 with special characters and various encodings."""
  452. require_git_version((2, 20, 0))
  453. repo = self._init_repo_with_manyfiles()
  454. # Test files with special characters
  455. special_files = [
  456. "file with spaces.txt",
  457. "file\twith\ttabs.txt",
  458. "file-with-dashes.txt",
  459. "file_with_underscores.txt",
  460. "file.with.dots.txt",
  461. "UPPERCASE.TXT",
  462. "MixedCase.TxT",
  463. "file123numbers.txt",
  464. "file@#$%special.txt",
  465. "café.txt", # Unicode
  466. "файл.txt", # Cyrillic
  467. "文件.txt", # Chinese
  468. "🚀rocket.txt", # Emoji
  469. "file'with'quotes.txt",
  470. 'file"with"doublequotes.txt',
  471. "file[with]brackets.txt",
  472. "file(with)parens.txt",
  473. "file{with}braces.txt",
  474. ]
  475. valid_files = []
  476. for filename in special_files:
  477. try:
  478. filepath = os.path.join(repo.path, filename)
  479. with open(filepath, "w", encoding="utf-8") as f:
  480. f.write(f"Content of {filename}\n")
  481. valid_files.append(filename)
  482. except (OSError, UnicodeError):
  483. # Skip files that can't be created on this system
  484. continue
  485. if valid_files:
  486. # Add files
  487. run_git_or_fail(["add", "."], cwd=repo.path)
  488. # Test reading
  489. index_path = os.path.join(repo.path, ".git", "index")
  490. with open(index_path, "rb") as f:
  491. entries, version, _extensions = read_index_dict_with_version(f)
  492. self.assertEqual(version, 4)
  493. self.assertGreater(len(entries), 0)
  494. # Test all valid files are present
  495. for filename in valid_files:
  496. filename_bytes = filename.encode("utf-8")
  497. self.assertIn(filename_bytes, entries)
  498. def test_index_v4_symlinks_and_special_modes(self) -> None:
  499. """Test v4 with symlinks and special file modes."""
  500. require_git_version((2, 20, 0))
  501. repo = self._init_repo_with_manyfiles()
  502. # Create regular file
  503. with open(os.path.join(repo.path, "regular.txt"), "w") as f:
  504. f.write("regular file\n")
  505. # Create executable file
  506. exec_path = os.path.join(repo.path, "executable.sh")
  507. with open(exec_path, "w") as f:
  508. f.write("#!/bin/bash\necho hello\n")
  509. os.chmod(exec_path, 0o755)
  510. # Create symlink (if supported)
  511. try:
  512. os.symlink("regular.txt", os.path.join(repo.path, "symlink.txt"))
  513. has_symlink = True
  514. except (OSError, NotImplementedError):
  515. has_symlink = False
  516. # Add files
  517. run_git_or_fail(["add", "."], cwd=repo.path)
  518. # Test reading
  519. index_path = os.path.join(repo.path, ".git", "index")
  520. with open(index_path, "rb") as f:
  521. entries, version, _extensions = read_index_dict_with_version(f)
  522. self.assertEqual(version, 4)
  523. # Verify files with different modes
  524. self.assertIn(b"regular.txt", entries)
  525. self.assertIn(b"executable.sh", entries)
  526. if has_symlink:
  527. self.assertIn(b"symlink.txt", entries)
  528. # Test round-trip preserves modes
  529. index = Index(index_path)
  530. index.write()
  531. # Verify Git can read it
  532. output = run_git_or_fail(["ls-files", "-s"], cwd=repo.path)
  533. self.assertIn(b"regular.txt", output)
  534. self.assertIn(b"executable.sh", output)
  535. def test_index_v4_alternating_compression_patterns(self) -> None:
  536. """Test v4 with files that alternate between compressed/uncompressed."""
  537. require_git_version((2, 20, 0))
  538. repo = self._init_repo_with_manyfiles()
  539. # Create files that should create alternating compression patterns
  540. files = [
  541. # These should be uncompressed (no common prefix)
  542. "a.txt",
  543. "b.txt",
  544. "c.txt",
  545. # These should be compressed (common prefix)
  546. "common/path/file1.txt",
  547. "common/path/file2.txt",
  548. "common/path/file3.txt",
  549. # Back to uncompressed (different pattern)
  550. "different/structure/x.txt",
  551. "another/structure/y.txt",
  552. # More compression opportunities
  553. "src/main/Component1.java",
  554. "src/main/Component2.java",
  555. "src/test/Test1.java",
  556. "src/test/Test2.java",
  557. ]
  558. for filename in files:
  559. filepath = os.path.join(repo.path, filename)
  560. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  561. with open(filepath, "w") as f:
  562. f.write(f"Content of {filename}\n")
  563. # Add files
  564. run_git_or_fail(["add", "."], cwd=repo.path)
  565. # Test reading
  566. index_path = os.path.join(repo.path, ".git", "index")
  567. with open(index_path, "rb") as f:
  568. entries, version, _extensions = read_index_dict_with_version(f)
  569. self.assertEqual(version, 4)
  570. self.assertEqual(len(entries), len(files))
  571. # Verify all files are present
  572. for filename in files:
  573. self.assertIn(filename.encode(), entries)
  574. # Test round-trip
  575. index = Index(index_path)
  576. index.write()
  577. def test_index_v4_git_submodules(self) -> None:
  578. """Test v4 index with Git submodules."""
  579. require_git_version((2, 20, 0))
  580. repo = self._init_repo_with_manyfiles()
  581. # Create a submodule directory structure
  582. submodule_dir = os.path.join(repo.path, "submodule")
  583. os.makedirs(submodule_dir)
  584. # Initialize a separate repo for the submodule
  585. run_git_or_fail(["init"], cwd=submodule_dir)
  586. with open(os.path.join(submodule_dir, "sub.txt"), "w") as f:
  587. f.write("submodule content\n")
  588. run_git_or_fail(["add", "sub.txt"], cwd=submodule_dir)
  589. run_git_or_fail(["commit", "-m", "submodule commit"], cwd=submodule_dir)
  590. # Add some regular files to main repo
  591. with open(os.path.join(repo.path, "main.txt"), "w") as f:
  592. f.write("main repo content\n")
  593. run_git_or_fail(["add", "main.txt"], cwd=repo.path)
  594. # Add submodule (this creates a gitlink entry)
  595. run_git_or_fail(["submodule", "add", "./submodule", "submodule"], cwd=repo.path)
  596. # Test reading index with submodule
  597. index_path = os.path.join(repo.path, ".git", "index")
  598. with open(index_path, "rb") as f:
  599. entries, version, _extensions = read_index_dict_with_version(f)
  600. self.assertEqual(version, 4)
  601. # Should have main.txt, .gitmodules, and submodule gitlink
  602. self.assertIn(b"main.txt", entries)
  603. self.assertIn(b".gitmodules", entries)
  604. self.assertIn(b"submodule", entries)
  605. # Test round-trip
  606. index = Index(index_path)
  607. index.write()
  608. def test_index_v4_partial_staging(self) -> None:
  609. """Test v4 with partial file staging (git add -p simulation)."""
  610. require_git_version((2, 20, 0))
  611. repo = self._init_repo_with_manyfiles()
  612. # Create initial file
  613. filepath = os.path.join(repo.path, "partial.txt")
  614. with open(filepath, "w") as f:
  615. f.write("line1\nline2\nline3\n")
  616. run_git_or_fail(["add", "partial.txt"], cwd=repo.path)
  617. run_git_or_fail(["commit", "-m", "initial"], cwd=repo.path)
  618. # Modify the file
  619. with open(filepath, "w") as f:
  620. f.write("line1 modified\nline2\nline3 modified\n")
  621. # Stage only part of the changes (simulate git add -p)
  622. # This creates an interesting index state
  623. run_git_or_fail(["add", "partial.txt"], cwd=repo.path)
  624. # Make more changes
  625. with open(filepath, "w") as f:
  626. f.write("line1 modified\nline2 modified\nline3 modified\n")
  627. # Now we have staged and unstaged changes
  628. # Test reading this complex index state
  629. index_path = os.path.join(repo.path, ".git", "index")
  630. with open(index_path, "rb") as f:
  631. entries, version, _extensions = read_index_dict_with_version(f)
  632. self.assertEqual(version, 4)
  633. self.assertIn(b"partial.txt", entries)
  634. # Test round-trip
  635. index = Index(index_path)
  636. index.write()
  637. def test_index_v4_with_gitattributes_and_ignore(self) -> None:
  638. """Test v4 with .gitattributes and .gitignore files."""
  639. require_git_version((2, 20, 0))
  640. repo = self._init_repo_with_manyfiles()
  641. # Create .gitignore
  642. with open(os.path.join(repo.path, ".gitignore"), "w") as f:
  643. f.write("*.tmp\n*.log\nbuild/\n")
  644. # Create .gitattributes
  645. with open(os.path.join(repo.path, ".gitattributes"), "w") as f:
  646. f.write("*.txt text\n*.bin binary\n")
  647. # Create various files
  648. files = [
  649. "regular.txt",
  650. "binary.bin",
  651. "script.sh",
  652. "config.json",
  653. "README.md",
  654. ]
  655. for filename in files:
  656. filepath = os.path.join(repo.path, filename)
  657. with open(filepath, "w") as f:
  658. f.write(f"Content of {filename}\n")
  659. # Create some files that should be ignored
  660. with open(os.path.join(repo.path, "temp.tmp"), "w") as f:
  661. f.write("temporary file\n")
  662. # Add files
  663. run_git_or_fail(["add", "."], cwd=repo.path)
  664. # Test reading
  665. index_path = os.path.join(repo.path, ".git", "index")
  666. with open(index_path, "rb") as f:
  667. entries, version, _extensions = read_index_dict_with_version(f)
  668. self.assertEqual(version, 4)
  669. # Should have .gitignore, .gitattributes, and regular files
  670. self.assertIn(b".gitignore", entries)
  671. self.assertIn(b".gitattributes", entries)
  672. for filename in files:
  673. self.assertIn(filename.encode(), entries)
  674. # Should NOT have ignored files
  675. self.assertNotIn(b"temp.tmp", entries)
  676. def test_index_v4_stress_test_many_entries(self) -> None:
  677. """Stress test v4 with many entries in complex directory structure."""
  678. require_git_version((2, 20, 0))
  679. repo = self._init_repo_with_manyfiles()
  680. # Create a complex directory structure with many files
  681. dirs = [
  682. "src/main/java/com/example",
  683. "src/main/resources",
  684. "src/test/java/com/example",
  685. "docs/api",
  686. "docs/user",
  687. "scripts/build",
  688. "config/env",
  689. ]
  690. for dir_path in dirs:
  691. os.makedirs(os.path.join(repo.path, dir_path), exist_ok=True)
  692. # Create many files
  693. files = []
  694. for i in range(200): # Reasonable for CI
  695. if i % 7 == 0:
  696. filename = f"src/main/java/com/example/Service{i}.java"
  697. elif i % 7 == 1:
  698. filename = f"src/test/java/com/example/Test{i}.java"
  699. elif i % 7 == 2:
  700. filename = f"docs/api/page{i}.md"
  701. elif i % 7 == 3:
  702. filename = f"config/env/config{i}.properties"
  703. elif i % 7 == 4:
  704. filename = f"scripts/build/script{i}.sh"
  705. elif i % 7 == 5:
  706. filename = f"src/main/resources/resource{i}.txt"
  707. else:
  708. filename = f"file{i}.txt"
  709. files.append(filename)
  710. filepath = os.path.join(repo.path, filename)
  711. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  712. with open(filepath, "w") as f:
  713. f.write(f"// File {i}\ncontent here\n")
  714. # Add files in batches to avoid command line length limits
  715. batch_size = 50
  716. for i in range(0, len(files), batch_size):
  717. batch = files[i : i + batch_size]
  718. run_git_or_fail(["add", *batch], cwd=repo.path)
  719. # Test reading large index
  720. index_path = os.path.join(repo.path, ".git", "index")
  721. with open(index_path, "rb") as f:
  722. entries, version, _extensions = read_index_dict_with_version(f)
  723. self.assertEqual(version, 4)
  724. self.assertEqual(len(entries), len(files))
  725. # Verify some files are present
  726. for i in range(0, len(files), 20): # Check every 20th file
  727. filename = files[i]
  728. self.assertIn(filename.encode(), entries)
  729. def test_index_v4_rename_detection_scenario(self) -> None:
  730. """Test v4 with file renames (complex staging scenario)."""
  731. require_git_version((2, 20, 0))
  732. repo = self._init_repo_with_manyfiles()
  733. # Create initial files
  734. files = ["old1.txt", "old2.txt", "unchanged.txt"]
  735. for filename in files:
  736. filepath = os.path.join(repo.path, filename)
  737. with open(filepath, "w") as f:
  738. f.write(f"Content of {filename}\n")
  739. run_git_or_fail(["add", "."], cwd=repo.path)
  740. run_git_or_fail(["commit", "-m", "initial"], cwd=repo.path)
  741. # Rename files
  742. os.rename(
  743. os.path.join(repo.path, "old1.txt"), os.path.join(repo.path, "new1.txt")
  744. )
  745. os.rename(
  746. os.path.join(repo.path, "old2.txt"), os.path.join(repo.path, "new2.txt")
  747. )
  748. # Stage renames
  749. run_git_or_fail(["add", "-A"], cwd=repo.path)
  750. # Test reading index with renames
  751. index_path = os.path.join(repo.path, ".git", "index")
  752. with open(index_path, "rb") as f:
  753. entries, version, _extensions = read_index_dict_with_version(f)
  754. self.assertEqual(version, 4)
  755. # Should have new names, not old names
  756. self.assertIn(b"new1.txt", entries)
  757. self.assertIn(b"new2.txt", entries)
  758. self.assertIn(b"unchanged.txt", entries)
  759. self.assertNotIn(b"old1.txt", entries)
  760. self.assertNotIn(b"old2.txt", entries)
  761. class SparseIndexCompatTestCase(CompatTestCase):
  762. """Tests for Git sparse index compatibility with C Git."""
  763. def setUp(self) -> None:
  764. super().setUp()
  765. self.tempdir = tempfile.mkdtemp()
  766. self.addCleanup(self._cleanup)
  767. def _cleanup(self) -> None:
  768. import shutil
  769. shutil.rmtree(self.tempdir, ignore_errors=True)
  770. def test_read_sparse_index_created_by_git(self) -> None:
  771. """Test that Dulwich can read a sparse index created by Git."""
  772. # Sparse index requires Git 2.37+
  773. require_git_version((2, 37, 0))
  774. repo_path = os.path.join(self.tempdir, "test_repo")
  775. os.mkdir(repo_path)
  776. # Initialize repo
  777. run_git_or_fail(["init"], cwd=repo_path)
  778. run_git_or_fail(["config", "core.sparseCheckout", "true"], cwd=repo_path)
  779. run_git_or_fail(
  780. ["config", "index.sparse", "true"], cwd=repo_path
  781. ) # Enable sparse index
  782. # Create directory structure
  783. os.makedirs(os.path.join(repo_path, "included"), exist_ok=True)
  784. os.makedirs(os.path.join(repo_path, "excluded", "subdir"), exist_ok=True)
  785. # Create files
  786. with open(os.path.join(repo_path, "included", "file1.txt"), "w") as f:
  787. f.write("included file 1\n")
  788. with open(os.path.join(repo_path, "included", "file2.txt"), "w") as f:
  789. f.write("included file 2\n")
  790. with open(os.path.join(repo_path, "excluded", "file3.txt"), "w") as f:
  791. f.write("excluded file 3\n")
  792. with open(os.path.join(repo_path, "excluded", "subdir", "file4.txt"), "w") as f:
  793. f.write("excluded file 4\n")
  794. with open(os.path.join(repo_path, "root.txt"), "w") as f:
  795. f.write("root file\n")
  796. # Add and commit all files
  797. run_git_or_fail(["add", "."], cwd=repo_path)
  798. run_git_or_fail(["commit", "-m", "initial"], cwd=repo_path)
  799. # Set up sparse-checkout to include only "included/" and root files
  800. sparse_checkout_path = os.path.join(
  801. repo_path, ".git", "info", "sparse-checkout"
  802. )
  803. os.makedirs(os.path.dirname(sparse_checkout_path), exist_ok=True)
  804. with open(sparse_checkout_path, "w") as f:
  805. f.write("/*\n") # Include top-level files
  806. f.write("!/*/\n") # Exclude all directories
  807. f.write("/included/\n") # Re-include "included" directory
  808. # Run sparse-checkout reapply to create sparse index
  809. run_git_or_fail(["sparse-checkout", "reapply"], cwd=repo_path)
  810. # Read the index with Dulwich
  811. from dulwich.index import Index
  812. index_path = os.path.join(repo_path, ".git", "index")
  813. idx = Index(index_path)
  814. # Git may or may not create a sparse index depending on the repo state
  815. # The key test is that Dulwich can read it without errors
  816. self.assertIsNotNone(idx)
  817. # If it is sparse, verify we can handle sparse directory entries
  818. if idx.is_sparse():
  819. for path, entry in idx.items():
  820. if entry.is_sparse_dir(path):
  821. # Verify sparse dirs are for excluded paths
  822. self.assertTrue(path.startswith(b"excluded"))
  823. else:
  824. # If not sparse, we should still be able to read all entries
  825. self.assertGreater(len(idx), 0)
  826. def test_write_sparse_index_readable_by_git(self) -> None:
  827. """Test that Git can read a sparse index created by Dulwich."""
  828. # Sparse index requires Git 2.37+
  829. require_git_version((2, 37, 0))
  830. from dulwich.index import Index, IndexEntry, SparseDirExtension
  831. from dulwich.objects import Blob, Tree
  832. from dulwich.repo import Repo
  833. repo_path = os.path.join(self.tempdir, "test_repo")
  834. os.mkdir(repo_path)
  835. # Initialize repo with Git
  836. run_git_or_fail(["init"], cwd=repo_path)
  837. run_git_or_fail(["config", "index.sparse", "true"], cwd=repo_path)
  838. # Create a tree structure using Dulwich
  839. repo = Repo(repo_path)
  840. self.addCleanup(repo.close)
  841. # Create blobs
  842. blob1 = Blob()
  843. blob1.data = b"file1 content"
  844. repo.object_store.add_object(blob1)
  845. blob2 = Blob()
  846. blob2.data = b"file2 content"
  847. repo.object_store.add_object(blob2)
  848. # Create subtree for sparse directory
  849. subtree = Tree()
  850. subtree[b"file1.txt"] = (0o100644, blob1.id)
  851. subtree[b"file2.txt"] = (0o100644, blob2.id)
  852. repo.object_store.add_object(subtree)
  853. # Create root tree
  854. tree = Tree()
  855. tree[b"sparse_dir"] = (0o040000, subtree.id)
  856. repo.object_store.add_object(tree)
  857. # Create a commit
  858. from dulwich.objects import Commit
  859. commit = Commit()
  860. commit.tree = tree.id
  861. commit.author = commit.committer = b"Test <test@example.com>"
  862. commit.author_time = commit.commit_time = 1234567890
  863. commit.author_timezone = commit.commit_timezone = 0
  864. commit.encoding = b"UTF-8"
  865. commit.message = b"Test commit"
  866. repo.object_store.add_object(commit)
  867. repo.refs[b"refs/heads/master"] = commit.id
  868. # Create sparse index with Dulwich
  869. index = Index(os.path.join(repo_path, ".git", "index"), read=False)
  870. # Add sparse directory entry
  871. sparse_entry = IndexEntry(
  872. ctime=0,
  873. mtime=0,
  874. dev=0,
  875. ino=0,
  876. mode=0o040000,
  877. uid=0,
  878. gid=0,
  879. size=0,
  880. sha=subtree.id,
  881. extended_flags=0x4000, # SKIP_WORKTREE
  882. )
  883. index[b"sparse_dir/"] = sparse_entry
  884. index._extensions.append(SparseDirExtension())
  885. index.write()
  886. # Verify Git can read the index
  887. output = run_git_or_fail(["ls-files", "--debug"], cwd=repo_path)
  888. self.assertIn(b"sparse_dir/", output)
  889. # Verify Git recognizes it as sparse
  890. output = run_git_or_fail(["status"], cwd=repo_path)
  891. # Should not crash
  892. self.assertIsNotNone(output)
  893. def test_expand_sparse_index_matches_git(self) -> None:
  894. """Test that expanding a sparse index matches Git's behavior."""
  895. # Sparse index requires Git 2.37+
  896. require_git_version((2, 37, 0))
  897. repo_path = os.path.join(self.tempdir, "test_repo")
  898. os.mkdir(repo_path)
  899. # Initialize repo
  900. run_git_or_fail(["init"], cwd=repo_path)
  901. run_git_or_fail(["config", "core.sparseCheckout", "true"], cwd=repo_path)
  902. run_git_or_fail(["config", "index.sparse", "true"], cwd=repo_path)
  903. # Create directory structure
  904. os.makedirs(os.path.join(repo_path, "dir1", "subdir"), exist_ok=True)
  905. with open(os.path.join(repo_path, "dir1", "file1.txt"), "w") as f:
  906. f.write("file1\n")
  907. with open(os.path.join(repo_path, "dir1", "subdir", "file2.txt"), "w") as f:
  908. f.write("file2\n")
  909. # Commit files
  910. run_git_or_fail(["add", "."], cwd=repo_path)
  911. run_git_or_fail(["commit", "-m", "initial"], cwd=repo_path)
  912. # Set up sparse-checkout to exclude dir1
  913. sparse_checkout_path = os.path.join(
  914. repo_path, ".git", "info", "sparse-checkout"
  915. )
  916. os.makedirs(os.path.dirname(sparse_checkout_path), exist_ok=True)
  917. with open(sparse_checkout_path, "w") as f:
  918. f.write("/*\n")
  919. f.write("!/dir1/\n")
  920. run_git_or_fail(["sparse-checkout", "reapply"], cwd=repo_path)
  921. # Read sparse index with Dulwich
  922. from dulwich.index import Index
  923. from dulwich.repo import Repo
  924. index_path = os.path.join(repo_path, ".git", "index")
  925. idx = Index(index_path)
  926. if idx.is_sparse():
  927. # Expand the index
  928. repo = Repo(repo_path)
  929. self.addCleanup(repo.close)
  930. idx.ensure_full_index(repo.object_store)
  931. # Should no longer be sparse
  932. self.assertFalse(idx.is_sparse())
  933. # Write it back
  934. idx.write()
  935. # Git should still be able to read it
  936. output = run_git_or_fail(["status"], cwd=repo_path)
  937. self.assertIsNotNone(output)