test_index.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924
  1. # test_index.py -- Git index compatibility tests
  2. # Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Compatibility tests for Git index format v4."""
  22. import os
  23. import tempfile
  24. from dulwich.index import Index, read_index_dict_with_version, write_index_dict
  25. from dulwich.repo import Repo
  26. from .utils import CompatTestCase, require_git_version, run_git_or_fail
  27. class IndexV4CompatTestCase(CompatTestCase):
  28. """Tests for Git index format v4 compatibility with C Git."""
  29. def setUp(self) -> None:
  30. super().setUp()
  31. self.tempdir = tempfile.mkdtemp()
  32. self.addCleanup(self._cleanup)
  33. def _cleanup(self) -> None:
  34. import shutil
  35. shutil.rmtree(self.tempdir, ignore_errors=True)
  36. def _init_repo_with_manyfiles(self) -> Repo:
  37. """Initialize a repo with manyFiles feature enabled."""
  38. # Create repo
  39. repo_path = os.path.join(self.tempdir, "test_repo")
  40. os.mkdir(repo_path)
  41. # Initialize with C git and enable manyFiles
  42. run_git_or_fail(["init"], cwd=repo_path)
  43. run_git_or_fail(["config", "feature.manyFiles", "true"], cwd=repo_path)
  44. # Open with dulwich
  45. return Repo(repo_path)
  46. def test_index_v4_path_compression(self) -> None:
  47. """Test that dulwich can read and write index v4 with path compression."""
  48. require_git_version((2, 20, 0)) # manyFiles feature requires newer Git
  49. repo = self._init_repo_with_manyfiles()
  50. # Create test files with paths that will benefit from compression
  51. test_files = [
  52. "dir1/subdir/file1.txt",
  53. "dir1/subdir/file2.txt",
  54. "dir1/subdir/file3.txt",
  55. "dir2/another/path.txt",
  56. "dir2/another/path2.txt",
  57. "file_at_root.txt",
  58. ]
  59. for path in test_files:
  60. full_path = os.path.join(repo.path, path)
  61. os.makedirs(os.path.dirname(full_path), exist_ok=True)
  62. with open(full_path, "w") as f:
  63. f.write(f"content of {path}\n")
  64. # Add files with C git - this should create index v4
  65. run_git_or_fail(["add", "."], cwd=repo.path)
  66. # Read the index with dulwich
  67. index_path = os.path.join(repo.path, ".git", "index")
  68. with open(index_path, "rb") as f:
  69. entries, version = read_index_dict_with_version(f)
  70. # Verify it's version 4
  71. self.assertEqual(version, 4)
  72. # Verify all files are in the index
  73. self.assertEqual(len(entries), len(test_files))
  74. for path in test_files:
  75. self.assertIn(path.encode(), entries)
  76. # Write the index back with dulwich
  77. with open(index_path + ".dulwich", "wb") as f:
  78. write_index_dict(f, entries, version=4)
  79. # Compare with C git - use git ls-files to read both indexes
  80. output1 = run_git_or_fail(["ls-files", "--stage"], cwd=repo.path)
  81. # Replace index with dulwich version
  82. os.rename(index_path + ".dulwich", index_path)
  83. output2 = run_git_or_fail(["ls-files", "--stage"], cwd=repo.path)
  84. # Both outputs should be identical
  85. self.assertEqual(output1, output2)
  86. def test_index_v4_round_trip(self) -> None:
  87. """Test round-trip: C Git write -> dulwich read -> dulwich write -> C Git read."""
  88. require_git_version((2, 20, 0))
  89. repo = self._init_repo_with_manyfiles()
  90. # Create files that test various edge cases
  91. test_files = [
  92. "a", # Very short name
  93. "abc/def/ghi/jkl/mno/pqr/stu/vwx/yz.txt", # Deep path
  94. "same_prefix_1.txt",
  95. "same_prefix_2.txt",
  96. "same_prefix_3.txt",
  97. "different/path/here.txt",
  98. ]
  99. for path in test_files:
  100. full_path = os.path.join(repo.path, path)
  101. os.makedirs(os.path.dirname(full_path), exist_ok=True)
  102. with open(full_path, "w") as f:
  103. f.write("test content\n")
  104. # Stage with C Git
  105. run_git_or_fail(["add", "."], cwd=repo.path)
  106. # Get original state
  107. original_output = run_git_or_fail(["ls-files", "--stage"], cwd=repo.path)
  108. # Read with dulwich, write back
  109. index = Index(os.path.join(repo.path, ".git", "index"))
  110. index.write()
  111. # Verify C Git can still read it
  112. final_output = run_git_or_fail(["ls-files", "--stage"], cwd=repo.path)
  113. self.assertEqual(original_output, final_output)
  114. def test_index_v4_skip_hash(self) -> None:
  115. """Test index v4 with skipHash extension."""
  116. require_git_version((2, 20, 0))
  117. repo = self._init_repo_with_manyfiles()
  118. # Enable skipHash
  119. run_git_or_fail(["config", "index.skipHash", "true"], cwd=repo.path)
  120. # Create a file
  121. test_file = os.path.join(repo.path, "test.txt")
  122. with open(test_file, "w") as f:
  123. f.write("test content\n")
  124. # Add with C Git
  125. run_git_or_fail(["add", "test.txt"], cwd=repo.path)
  126. # Read the index
  127. index_path = os.path.join(repo.path, ".git", "index")
  128. with open(index_path, "rb") as f:
  129. entries, version = read_index_dict_with_version(f)
  130. self.assertEqual(version, 4)
  131. self.assertIn(b"test.txt", entries)
  132. # Verify skipHash is active by checking last 20 bytes
  133. with open(index_path, "rb") as f:
  134. f.seek(-20, 2)
  135. last_bytes = f.read(20)
  136. self.assertEqual(last_bytes, b"\x00" * 20)
  137. # Write with dulwich (with skipHash)
  138. index = Index(index_path, skip_hash=True, version=4)
  139. index.write()
  140. # Verify C Git can read it
  141. output = run_git_or_fail(["ls-files"], cwd=repo.path)
  142. self.assertEqual(output.strip(), b"test.txt")
  143. def test_index_v4_with_various_filenames(self) -> None:
  144. """Test v4 with various filename patterns."""
  145. require_git_version((2, 20, 0))
  146. repo = self._init_repo_with_manyfiles()
  147. # Test various filename patterns that might trigger different behaviors
  148. test_files = [
  149. "a", # Single character
  150. "ab", # Two characters
  151. "abc", # Three characters
  152. "file.txt", # Normal filename
  153. "very_long_filename_to_test_edge_cases.extension", # Long filename
  154. "dir/file.txt", # With directory
  155. "dir1/dir2/dir3/file.txt", # Deep directory
  156. "unicode_café.txt", # Unicode filename
  157. "with-dashes-and_underscores.txt", # Special chars
  158. ".hidden", # Hidden file
  159. "UPPERCASE.TXT", # Uppercase
  160. ]
  161. for filename in test_files:
  162. filepath = os.path.join(repo.path, filename)
  163. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  164. with open(filepath, "w", encoding="utf-8") as f:
  165. f.write(f"Content of {filename}\n")
  166. # Add all files
  167. run_git_or_fail(["add", "."], cwd=repo.path)
  168. # Read with dulwich
  169. index_path = os.path.join(repo.path, ".git", "index")
  170. with open(index_path, "rb") as f:
  171. entries, version = read_index_dict_with_version(f)
  172. self.assertEqual(version, 4)
  173. self.assertEqual(len(entries), len(test_files))
  174. # Verify all filenames are correctly stored
  175. for filename in test_files:
  176. filename_bytes = filename.encode("utf-8")
  177. self.assertIn(filename_bytes, entries)
  178. # Test round-trip: dulwich write -> C Git read
  179. with open(index_path + ".dulwich", "wb") as f:
  180. write_index_dict(f, entries, version=4)
  181. # Replace index
  182. os.rename(index_path + ".dulwich", index_path)
  183. # Verify C Git can read all files
  184. output = run_git_or_fail(["ls-files"], cwd=repo.path)
  185. git_files = set(output.strip().split(b"\n"))
  186. expected_files = {f.encode("utf-8") for f in test_files}
  187. self.assertEqual(git_files, expected_files)
  188. def test_index_v4_path_compression_scenarios(self) -> None:
  189. """Test various scenarios where path compression should/shouldn't be used."""
  190. require_git_version((2, 20, 0))
  191. repo = self._init_repo_with_manyfiles()
  192. # Create files that should trigger compression
  193. compression_files = [
  194. "src/main/java/com/example/Service.java",
  195. "src/main/java/com/example/Controller.java",
  196. "src/main/java/com/example/Repository.java",
  197. "src/test/java/com/example/ServiceTest.java",
  198. ]
  199. # Create files that shouldn't benefit much from compression
  200. no_compression_files = [
  201. "README.md",
  202. "LICENSE",
  203. "docs/guide.txt",
  204. "config/settings.json",
  205. ]
  206. all_files = compression_files + no_compression_files
  207. for filename in all_files:
  208. filepath = os.path.join(repo.path, filename)
  209. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  210. with open(filepath, "w") as f:
  211. f.write(f"Content of {filename}\n")
  212. # Add files
  213. run_git_or_fail(["add", "."], cwd=repo.path)
  214. # Read the index
  215. index_path = os.path.join(repo.path, ".git", "index")
  216. with open(index_path, "rb") as f:
  217. entries, version = read_index_dict_with_version(f)
  218. self.assertEqual(version, 4)
  219. self.assertEqual(len(entries), len(all_files))
  220. # Verify all files are present
  221. for filename in all_files:
  222. self.assertIn(filename.encode(), entries)
  223. # Test that dulwich can write a compatible index
  224. with open(index_path + ".dulwich", "wb") as f:
  225. write_index_dict(f, entries, version=4)
  226. # Verify the written index is smaller (compression should help)
  227. original_size = os.path.getsize(index_path)
  228. dulwich_size = os.path.getsize(index_path + ".dulwich")
  229. # Allow some variance due to different compression decisions
  230. self.assertLess(abs(original_size - dulwich_size), original_size * 0.2)
  231. def test_index_v4_with_extensions(self) -> None:
  232. """Test v4 index with various extensions."""
  233. require_git_version((2, 20, 0))
  234. repo = self._init_repo_with_manyfiles()
  235. # Create some files
  236. files = ["file1.txt", "file2.txt", "dir/file3.txt"]
  237. for filename in files:
  238. filepath = os.path.join(repo.path, filename)
  239. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  240. with open(filepath, "w") as f:
  241. f.write("content\n")
  242. # Add files
  243. run_git_or_fail(["add", "."], cwd=repo.path)
  244. # Enable untracked cache (creates UNTR extension)
  245. run_git_or_fail(["config", "core.untrackedCache", "true"], cwd=repo.path)
  246. run_git_or_fail(["status"], cwd=repo.path) # Trigger cache update
  247. # Read index with extensions
  248. index_path = os.path.join(repo.path, ".git", "index")
  249. with open(index_path, "rb") as f:
  250. entries, version = read_index_dict_with_version(f)
  251. self.assertEqual(version, 4)
  252. self.assertEqual(len(entries), len(files))
  253. # Test round-trip with extensions present
  254. index = Index(index_path)
  255. index.write()
  256. # Verify C Git can still read it
  257. output = run_git_or_fail(["ls-files"], cwd=repo.path)
  258. git_files = set(output.strip().split(b"\n"))
  259. expected_files = {f.encode() for f in files}
  260. self.assertEqual(git_files, expected_files)
  261. def test_index_v4_empty_repository(self) -> None:
  262. """Test v4 index behavior with empty repository."""
  263. require_git_version((2, 20, 0))
  264. repo = self._init_repo_with_manyfiles()
  265. # Create empty commit to get an index file
  266. run_git_or_fail(["commit", "--allow-empty", "-m", "empty"], cwd=repo.path)
  267. # Read the empty index
  268. index_path = os.path.join(repo.path, ".git", "index")
  269. if os.path.exists(index_path):
  270. with open(index_path, "rb") as f:
  271. entries, version = read_index_dict_with_version(f)
  272. # Even empty indexes should be readable
  273. self.assertEqual(len(entries), 0)
  274. # Test writing empty index
  275. with open(index_path + ".dulwich", "wb") as f:
  276. write_index_dict(f, entries, version=version)
  277. def test_index_v4_large_file_count(self) -> None:
  278. """Test v4 index with many files (stress test)."""
  279. require_git_version((2, 20, 0))
  280. repo = self._init_repo_with_manyfiles()
  281. # Create many files with similar paths to test compression
  282. files = []
  283. for i in range(50): # Reasonable number for CI
  284. filename = f"src/component_{i:03d}/index.js"
  285. files.append(filename)
  286. filepath = os.path.join(repo.path, filename)
  287. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  288. with open(filepath, "w") as f:
  289. f.write(f"// Component {i}\nexport default {{}};")
  290. # Add all files
  291. run_git_or_fail(["add", "."], cwd=repo.path)
  292. # Read index
  293. index_path = os.path.join(repo.path, ".git", "index")
  294. with open(index_path, "rb") as f:
  295. entries, version = read_index_dict_with_version(f)
  296. self.assertEqual(version, 4)
  297. self.assertEqual(len(entries), len(files))
  298. # Test dulwich can handle large indexes
  299. index = Index(index_path)
  300. index.write()
  301. # Verify all files are still present
  302. output = run_git_or_fail(["ls-files"], cwd=repo.path)
  303. git_files = output.strip().split(b"\n")
  304. self.assertEqual(len(git_files), len(files))
  305. def test_index_v4_concurrent_modifications(self) -> None:
  306. """Test v4 index behavior with file modifications."""
  307. require_git_version((2, 20, 0))
  308. repo = self._init_repo_with_manyfiles()
  309. # Create initial files
  310. files = ["file1.txt", "file2.txt", "subdir/file3.txt"]
  311. for filename in files:
  312. filepath = os.path.join(repo.path, filename)
  313. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  314. with open(filepath, "w") as f:
  315. f.write("initial content\n")
  316. # Add files
  317. run_git_or_fail(["add", "."], cwd=repo.path)
  318. # Modify some files
  319. with open(os.path.join(repo.path, "file1.txt"), "w") as f:
  320. f.write("modified content\n")
  321. # Add new file
  322. with open(os.path.join(repo.path, "file4.txt"), "w") as f:
  323. f.write("new file\n")
  324. run_git_or_fail(["add", "file4.txt"], cwd=repo.path)
  325. # Test dulwich can read the updated index
  326. index_path = os.path.join(repo.path, ".git", "index")
  327. with open(index_path, "rb") as f:
  328. entries, version = read_index_dict_with_version(f)
  329. self.assertEqual(version, 4)
  330. self.assertEqual(len(entries), 4) # 3 original + 1 new
  331. # Verify specific files
  332. self.assertIn(b"file1.txt", entries)
  333. self.assertIn(b"file4.txt", entries)
  334. # Test round-trip
  335. index = Index(index_path)
  336. index.write()
  337. # Verify state is preserved
  338. output = run_git_or_fail(["ls-files"], cwd=repo.path)
  339. self.assertIn(b"file4.txt", output)
  340. def test_index_v4_with_merge_conflicts(self) -> None:
  341. """Test v4 index behavior with merge conflicts and staging."""
  342. require_git_version((2, 20, 0))
  343. repo = self._init_repo_with_manyfiles()
  344. # Create initial commit
  345. with open(os.path.join(repo.path, "conflict.txt"), "w") as f:
  346. f.write("original content\n")
  347. with open(os.path.join(repo.path, "normal.txt"), "w") as f:
  348. f.write("normal file\n")
  349. run_git_or_fail(["add", "."], cwd=repo.path)
  350. run_git_or_fail(["commit", "-m", "initial"], cwd=repo.path)
  351. # Create branch and modify file
  352. run_git_or_fail(["checkout", "-b", "feature"], cwd=repo.path)
  353. with open(os.path.join(repo.path, "conflict.txt"), "w") as f:
  354. f.write("feature content\n")
  355. run_git_or_fail(["add", "conflict.txt"], cwd=repo.path)
  356. run_git_or_fail(["commit", "-m", "feature change"], cwd=repo.path)
  357. # Go back to main and make conflicting change
  358. run_git_or_fail(["checkout", "master"], cwd=repo.path)
  359. with open(os.path.join(repo.path, "conflict.txt"), "w") as f:
  360. f.write("master content\n")
  361. run_git_or_fail(["add", "conflict.txt"], cwd=repo.path)
  362. run_git_or_fail(["commit", "-m", "master change"], cwd=repo.path)
  363. # Try to merge (should create conflicts)
  364. result = run_git_or_fail(["merge", "feature"], cwd=repo.path, check=False)
  365. # Read the index with conflicts
  366. index_path = os.path.join(repo.path, ".git", "index")
  367. if os.path.exists(index_path):
  368. with open(index_path, "rb") as f:
  369. entries, version = read_index_dict_with_version(f)
  370. self.assertEqual(version, 4)
  371. # Test dulwich can handle conflicted index
  372. index = Index(index_path)
  373. index.write()
  374. # Verify Git can still read it
  375. output = run_git_or_fail(["status", "--porcelain"], cwd=repo.path)
  376. self.assertIn(b"conflict.txt", output)
  377. def test_index_v4_boundary_filename_lengths(self) -> None:
  378. """Test v4 with boundary conditions for filename lengths."""
  379. require_git_version((2, 20, 0))
  380. repo = self._init_repo_with_manyfiles()
  381. # Test various boundary conditions
  382. boundary_files = [
  383. "", # Empty name (invalid, but test robustness)
  384. "x", # Single char
  385. "xx", # Two chars
  386. "x" * 255, # Max typical filename length
  387. "x" * 4095, # Max path length in many filesystems
  388. "a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z", # Deep nesting
  389. "file_with_" + "very_" * 50 + "long_name.txt", # Very long name
  390. ]
  391. valid_files = []
  392. for filename in boundary_files:
  393. if not filename: # Skip empty filename
  394. continue
  395. try:
  396. filepath = os.path.join(repo.path, filename)
  397. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  398. with open(filepath, "w") as f:
  399. f.write(f"Content\n")
  400. valid_files.append(filename)
  401. except (OSError, ValueError):
  402. # Skip files that can't be created on this system
  403. continue
  404. if valid_files:
  405. # Add files
  406. run_git_or_fail(["add", "."], cwd=repo.path)
  407. # Test reading
  408. index_path = os.path.join(repo.path, ".git", "index")
  409. with open(index_path, "rb") as f:
  410. entries, version = read_index_dict_with_version(f)
  411. self.assertEqual(version, 4)
  412. # Test round-trip
  413. index = Index(index_path)
  414. index.write()
  415. def test_index_v4_special_characters_and_encoding(self) -> None:
  416. """Test v4 with special characters and various encodings."""
  417. require_git_version((2, 20, 0))
  418. repo = self._init_repo_with_manyfiles()
  419. # Test files with special characters
  420. special_files = [
  421. "file with spaces.txt",
  422. "file\twith\ttabs.txt",
  423. "file-with-dashes.txt",
  424. "file_with_underscores.txt",
  425. "file.with.dots.txt",
  426. "UPPERCASE.TXT",
  427. "MixedCase.TxT",
  428. "file123numbers.txt",
  429. "file@#$%special.txt",
  430. "café.txt", # Unicode
  431. "файл.txt", # Cyrillic
  432. "文件.txt", # Chinese
  433. "🚀rocket.txt", # Emoji
  434. "file'with'quotes.txt",
  435. 'file"with"doublequotes.txt',
  436. "file[with]brackets.txt",
  437. "file(with)parens.txt",
  438. "file{with}braces.txt",
  439. ]
  440. valid_files = []
  441. for filename in special_files:
  442. try:
  443. filepath = os.path.join(repo.path, filename)
  444. with open(filepath, "w", encoding="utf-8") as f:
  445. f.write(f"Content of {filename}\n")
  446. valid_files.append(filename)
  447. except (OSError, UnicodeError):
  448. # Skip files that can't be created on this system
  449. continue
  450. if valid_files:
  451. # Add files
  452. run_git_or_fail(["add", "."], cwd=repo.path)
  453. # Test reading
  454. index_path = os.path.join(repo.path, ".git", "index")
  455. with open(index_path, "rb") as f:
  456. entries, version = read_index_dict_with_version(f)
  457. self.assertEqual(version, 4)
  458. self.assertGreater(len(entries), 0)
  459. # Test all valid files are present
  460. for filename in valid_files:
  461. filename_bytes = filename.encode("utf-8")
  462. self.assertIn(filename_bytes, entries)
  463. def test_index_v4_symlinks_and_special_modes(self) -> None:
  464. """Test v4 with symlinks and special file modes."""
  465. require_git_version((2, 20, 0))
  466. repo = self._init_repo_with_manyfiles()
  467. # Create regular file
  468. with open(os.path.join(repo.path, "regular.txt"), "w") as f:
  469. f.write("regular file\n")
  470. # Create executable file
  471. exec_path = os.path.join(repo.path, "executable.sh")
  472. with open(exec_path, "w") as f:
  473. f.write("#!/bin/bash\necho hello\n")
  474. os.chmod(exec_path, 0o755)
  475. # Create symlink (if supported)
  476. try:
  477. os.symlink("regular.txt", os.path.join(repo.path, "symlink.txt"))
  478. has_symlink = True
  479. except (OSError, NotImplementedError):
  480. has_symlink = False
  481. # Add files
  482. run_git_or_fail(["add", "."], cwd=repo.path)
  483. # Test reading
  484. index_path = os.path.join(repo.path, ".git", "index")
  485. with open(index_path, "rb") as f:
  486. entries, version = read_index_dict_with_version(f)
  487. self.assertEqual(version, 4)
  488. # Verify files with different modes
  489. self.assertIn(b"regular.txt", entries)
  490. self.assertIn(b"executable.sh", entries)
  491. if has_symlink:
  492. self.assertIn(b"symlink.txt", entries)
  493. # Test round-trip preserves modes
  494. index = Index(index_path)
  495. index.write()
  496. # Verify Git can read it
  497. output = run_git_or_fail(["ls-files", "-s"], cwd=repo.path)
  498. self.assertIn(b"regular.txt", output)
  499. self.assertIn(b"executable.sh", output)
  500. def test_index_v4_alternating_compression_patterns(self) -> None:
  501. """Test v4 with files that alternate between compressed/uncompressed."""
  502. require_git_version((2, 20, 0))
  503. repo = self._init_repo_with_manyfiles()
  504. # Create files that should create alternating compression patterns
  505. files = [
  506. # These should be uncompressed (no common prefix)
  507. "a.txt",
  508. "b.txt",
  509. "c.txt",
  510. # These should be compressed (common prefix)
  511. "common/path/file1.txt",
  512. "common/path/file2.txt",
  513. "common/path/file3.txt",
  514. # Back to uncompressed (different pattern)
  515. "different/structure/x.txt",
  516. "another/structure/y.txt",
  517. # More compression opportunities
  518. "src/main/Component1.java",
  519. "src/main/Component2.java",
  520. "src/test/Test1.java",
  521. "src/test/Test2.java",
  522. ]
  523. for filename in files:
  524. filepath = os.path.join(repo.path, filename)
  525. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  526. with open(filepath, "w") as f:
  527. f.write(f"Content of {filename}\n")
  528. # Add files
  529. run_git_or_fail(["add", "."], cwd=repo.path)
  530. # Test reading
  531. index_path = os.path.join(repo.path, ".git", "index")
  532. with open(index_path, "rb") as f:
  533. entries, version = read_index_dict_with_version(f)
  534. self.assertEqual(version, 4)
  535. self.assertEqual(len(entries), len(files))
  536. # Verify all files are present
  537. for filename in files:
  538. self.assertIn(filename.encode(), entries)
  539. # Test round-trip
  540. index = Index(index_path)
  541. index.write()
  542. def test_index_v4_git_submodules(self) -> None:
  543. """Test v4 index with Git submodules."""
  544. require_git_version((2, 20, 0))
  545. repo = self._init_repo_with_manyfiles()
  546. # Create a submodule directory structure
  547. submodule_dir = os.path.join(repo.path, "submodule")
  548. os.makedirs(submodule_dir)
  549. # Initialize a separate repo for the submodule
  550. run_git_or_fail(["init"], cwd=submodule_dir)
  551. with open(os.path.join(submodule_dir, "sub.txt"), "w") as f:
  552. f.write("submodule content\n")
  553. run_git_or_fail(["add", "sub.txt"], cwd=submodule_dir)
  554. run_git_or_fail(["commit", "-m", "submodule commit"], cwd=submodule_dir)
  555. # Add some regular files to main repo
  556. with open(os.path.join(repo.path, "main.txt"), "w") as f:
  557. f.write("main repo content\n")
  558. run_git_or_fail(["add", "main.txt"], cwd=repo.path)
  559. # Add submodule (this creates a gitlink entry)
  560. run_git_or_fail(["submodule", "add", "./submodule", "submodule"], cwd=repo.path)
  561. # Test reading index with submodule
  562. index_path = os.path.join(repo.path, ".git", "index")
  563. with open(index_path, "rb") as f:
  564. entries, version = read_index_dict_with_version(f)
  565. self.assertEqual(version, 4)
  566. # Should have main.txt, .gitmodules, and submodule gitlink
  567. self.assertIn(b"main.txt", entries)
  568. self.assertIn(b".gitmodules", entries)
  569. self.assertIn(b"submodule", entries)
  570. # Test round-trip
  571. index = Index(index_path)
  572. index.write()
  573. def test_index_v4_partial_staging(self) -> None:
  574. """Test v4 with partial file staging (git add -p simulation)."""
  575. require_git_version((2, 20, 0))
  576. repo = self._init_repo_with_manyfiles()
  577. # Create initial file
  578. filepath = os.path.join(repo.path, "partial.txt")
  579. with open(filepath, "w") as f:
  580. f.write("line1\nline2\nline3\n")
  581. run_git_or_fail(["add", "partial.txt"], cwd=repo.path)
  582. run_git_or_fail(["commit", "-m", "initial"], cwd=repo.path)
  583. # Modify the file
  584. with open(filepath, "w") as f:
  585. f.write("line1 modified\nline2\nline3 modified\n")
  586. # Stage only part of the changes (simulate git add -p)
  587. # This creates an interesting index state
  588. run_git_or_fail(["add", "partial.txt"], cwd=repo.path)
  589. # Make more changes
  590. with open(filepath, "w") as f:
  591. f.write("line1 modified\nline2 modified\nline3 modified\n")
  592. # Now we have staged and unstaged changes
  593. # Test reading this complex index state
  594. index_path = os.path.join(repo.path, ".git", "index")
  595. with open(index_path, "rb") as f:
  596. entries, version = read_index_dict_with_version(f)
  597. self.assertEqual(version, 4)
  598. self.assertIn(b"partial.txt", entries)
  599. # Test round-trip
  600. index = Index(index_path)
  601. index.write()
  602. def test_index_v4_with_gitattributes_and_ignore(self) -> None:
  603. """Test v4 with .gitattributes and .gitignore files."""
  604. require_git_version((2, 20, 0))
  605. repo = self._init_repo_with_manyfiles()
  606. # Create .gitignore
  607. with open(os.path.join(repo.path, ".gitignore"), "w") as f:
  608. f.write("*.tmp\n*.log\nbuild/\n")
  609. # Create .gitattributes
  610. with open(os.path.join(repo.path, ".gitattributes"), "w") as f:
  611. f.write("*.txt text\n*.bin binary\n")
  612. # Create various files
  613. files = [
  614. "regular.txt",
  615. "binary.bin",
  616. "script.sh",
  617. "config.json",
  618. "README.md",
  619. ]
  620. for filename in files:
  621. filepath = os.path.join(repo.path, filename)
  622. with open(filepath, "w") as f:
  623. f.write(f"Content of {filename}\n")
  624. # Create some files that should be ignored
  625. with open(os.path.join(repo.path, "temp.tmp"), "w") as f:
  626. f.write("temporary file\n")
  627. # Add files
  628. run_git_or_fail(["add", "."], cwd=repo.path)
  629. # Test reading
  630. index_path = os.path.join(repo.path, ".git", "index")
  631. with open(index_path, "rb") as f:
  632. entries, version = read_index_dict_with_version(f)
  633. self.assertEqual(version, 4)
  634. # Should have .gitignore, .gitattributes, and regular files
  635. self.assertIn(b".gitignore", entries)
  636. self.assertIn(b".gitattributes", entries)
  637. for filename in files:
  638. self.assertIn(filename.encode(), entries)
  639. # Should NOT have ignored files
  640. self.assertNotIn(b"temp.tmp", entries)
  641. def test_index_v4_stress_test_many_entries(self) -> None:
  642. """Stress test v4 with many entries in complex directory structure."""
  643. require_git_version((2, 20, 0))
  644. repo = self._init_repo_with_manyfiles()
  645. # Create a complex directory structure with many files
  646. dirs = [
  647. "src/main/java/com/example",
  648. "src/main/resources",
  649. "src/test/java/com/example",
  650. "docs/api",
  651. "docs/user",
  652. "scripts/build",
  653. "config/env",
  654. ]
  655. for dir_path in dirs:
  656. os.makedirs(os.path.join(repo.path, dir_path), exist_ok=True)
  657. # Create many files
  658. files = []
  659. for i in range(200): # Reasonable for CI
  660. if i % 7 == 0:
  661. filename = f"src/main/java/com/example/Service{i}.java"
  662. elif i % 7 == 1:
  663. filename = f"src/test/java/com/example/Test{i}.java"
  664. elif i % 7 == 2:
  665. filename = f"docs/api/page{i}.md"
  666. elif i % 7 == 3:
  667. filename = f"config/env/config{i}.properties"
  668. elif i % 7 == 4:
  669. filename = f"scripts/build/script{i}.sh"
  670. elif i % 7 == 5:
  671. filename = f"src/main/resources/resource{i}.txt"
  672. else:
  673. filename = f"file{i}.txt"
  674. files.append(filename)
  675. filepath = os.path.join(repo.path, filename)
  676. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  677. with open(filepath, "w") as f:
  678. f.write(f"// File {i}\ncontent here\n")
  679. # Add files in batches to avoid command line length limits
  680. batch_size = 50
  681. for i in range(0, len(files), batch_size):
  682. batch = files[i : i + batch_size]
  683. run_git_or_fail(["add"] + batch, cwd=repo.path)
  684. # Test reading large index
  685. index_path = os.path.join(repo.path, ".git", "index")
  686. with open(index_path, "rb") as f:
  687. entries, version = read_index_dict_with_version(f)
  688. self.assertEqual(version, 4)
  689. self.assertEqual(len(entries), len(files))
  690. # Verify some files are present
  691. for i in range(0, len(files), 20): # Check every 20th file
  692. filename = files[i]
  693. self.assertIn(filename.encode(), entries)
  694. def test_index_v4_rename_detection_scenario(self) -> None:
  695. """Test v4 with file renames (complex staging scenario)."""
  696. require_git_version((2, 20, 0))
  697. repo = self._init_repo_with_manyfiles()
  698. # Create initial files
  699. files = ["old1.txt", "old2.txt", "unchanged.txt"]
  700. for filename in files:
  701. filepath = os.path.join(repo.path, filename)
  702. with open(filepath, "w") as f:
  703. f.write(f"Content of {filename}\n")
  704. run_git_or_fail(["add", "."], cwd=repo.path)
  705. run_git_or_fail(["commit", "-m", "initial"], cwd=repo.path)
  706. # Rename files
  707. os.rename(
  708. os.path.join(repo.path, "old1.txt"), os.path.join(repo.path, "new1.txt")
  709. )
  710. os.rename(
  711. os.path.join(repo.path, "old2.txt"), os.path.join(repo.path, "new2.txt")
  712. )
  713. # Stage renames
  714. run_git_or_fail(["add", "-A"], cwd=repo.path)
  715. # Test reading index with renames
  716. index_path = os.path.join(repo.path, ".git", "index")
  717. with open(index_path, "rb") as f:
  718. entries, version = read_index_dict_with_version(f)
  719. self.assertEqual(version, 4)
  720. # Should have new names, not old names
  721. self.assertIn(b"new1.txt", entries)
  722. self.assertIn(b"new2.txt", entries)
  723. self.assertIn(b"unchanged.txt", entries)
  724. self.assertNotIn(b"old1.txt", entries)
  725. self.assertNotIn(b"old2.txt", entries)