test_index.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947
  1. # test_index.py -- Git index compatibility tests
  2. # Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Compatibility tests for Git index format v4."""
  22. import os
  23. import tempfile
  24. from dulwich.index import Index, read_index_dict_with_version, write_index_dict
  25. from dulwich.repo import Repo
  26. from .utils import CompatTestCase, require_git_version, run_git, run_git_or_fail
  27. class IndexV4CompatTestCase(CompatTestCase):
  28. """Tests for Git index format v4 compatibility with C Git."""
  29. def setUp(self) -> None:
  30. super().setUp()
  31. self.tempdir = tempfile.mkdtemp()
  32. self.addCleanup(self._cleanup)
  33. def _cleanup(self) -> None:
  34. import shutil
  35. shutil.rmtree(self.tempdir, ignore_errors=True)
  36. def _init_repo_with_manyfiles(self) -> Repo:
  37. """Initialize a repo with manyFiles feature enabled."""
  38. # Create repo
  39. repo_path = os.path.join(self.tempdir, "test_repo")
  40. os.mkdir(repo_path)
  41. # Initialize with C git and enable manyFiles
  42. run_git_or_fail(["init"], cwd=repo_path)
  43. run_git_or_fail(["config", "feature.manyFiles", "true"], cwd=repo_path)
  44. # Open with dulwich
  45. return Repo(repo_path)
  46. def test_index_v4_path_compression(self) -> None:
  47. """Test that dulwich can read and write index v4 with path compression."""
  48. require_git_version((2, 20, 0)) # manyFiles feature requires newer Git
  49. repo = self._init_repo_with_manyfiles()
  50. # Create test files with paths that will benefit from compression
  51. test_files = [
  52. "dir1/subdir/file1.txt",
  53. "dir1/subdir/file2.txt",
  54. "dir1/subdir/file3.txt",
  55. "dir2/another/path.txt",
  56. "dir2/another/path2.txt",
  57. "file_at_root.txt",
  58. ]
  59. for path in test_files:
  60. full_path = os.path.join(repo.path, path)
  61. os.makedirs(os.path.dirname(full_path), exist_ok=True)
  62. with open(full_path, "w") as f:
  63. f.write(f"content of {path}\n")
  64. # Add files with C git - this should create index v4
  65. run_git_or_fail(["add", "."], cwd=repo.path)
  66. # Read the index with dulwich
  67. index_path = os.path.join(repo.path, ".git", "index")
  68. with open(index_path, "rb") as f:
  69. entries, version, extensions = read_index_dict_with_version(f)
  70. # Verify it's version 4
  71. self.assertEqual(version, 4)
  72. # Verify all files are in the index
  73. self.assertEqual(len(entries), len(test_files))
  74. for path in test_files:
  75. self.assertIn(path.encode(), entries)
  76. # Write the index back with dulwich
  77. with open(index_path + ".dulwich", "wb") as f:
  78. from dulwich.pack import SHA1Writer
  79. sha1_writer = SHA1Writer(f)
  80. write_index_dict(sha1_writer, entries, version=4, extensions=extensions)
  81. sha1_writer.close()
  82. # Compare with C git - use git ls-files to read both indexes
  83. output1 = run_git_or_fail(["ls-files", "--stage"], cwd=repo.path)
  84. # Replace index with dulwich version
  85. os.rename(index_path + ".dulwich", index_path)
  86. output2 = run_git_or_fail(["ls-files", "--stage"], cwd=repo.path)
  87. # Both outputs should be identical
  88. self.assertEqual(output1, output2)
  89. def test_index_v4_round_trip(self) -> None:
  90. """Test round-trip: C Git write -> dulwich read -> dulwich write -> C Git read."""
  91. require_git_version((2, 20, 0))
  92. repo = self._init_repo_with_manyfiles()
  93. # Create files that test various edge cases
  94. test_files = [
  95. "a", # Very short name
  96. "abc/def/ghi/jkl/mno/pqr/stu/vwx/yz.txt", # Deep path
  97. "same_prefix_1.txt",
  98. "same_prefix_2.txt",
  99. "same_prefix_3.txt",
  100. "different/path/here.txt",
  101. ]
  102. for path in test_files:
  103. full_path = os.path.join(repo.path, path)
  104. os.makedirs(os.path.dirname(full_path), exist_ok=True)
  105. with open(full_path, "w") as f:
  106. f.write("test content\n")
  107. # Stage with C Git
  108. run_git_or_fail(["add", "."], cwd=repo.path)
  109. # Get original state
  110. original_output = run_git_or_fail(["ls-files", "--stage"], cwd=repo.path)
  111. # Read with dulwich, write back
  112. index = Index(os.path.join(repo.path, ".git", "index"))
  113. index.write()
  114. # Verify C Git can still read it
  115. final_output = run_git_or_fail(["ls-files", "--stage"], cwd=repo.path)
  116. self.assertEqual(original_output, final_output)
  117. def test_index_v4_skip_hash(self) -> None:
  118. """Test index v4 with skipHash extension."""
  119. require_git_version((2, 20, 0))
  120. repo = self._init_repo_with_manyfiles()
  121. # Enable skipHash
  122. run_git_or_fail(["config", "index.skipHash", "true"], cwd=repo.path)
  123. # Create a file
  124. test_file = os.path.join(repo.path, "test.txt")
  125. with open(test_file, "w") as f:
  126. f.write("test content\n")
  127. # Add with C Git
  128. run_git_or_fail(["add", "test.txt"], cwd=repo.path)
  129. # Read the index
  130. index_path = os.path.join(repo.path, ".git", "index")
  131. with open(index_path, "rb") as f:
  132. entries, version, extensions = read_index_dict_with_version(f)
  133. self.assertEqual(version, 4)
  134. self.assertIn(b"test.txt", entries)
  135. # Verify skipHash is active by checking last 20 bytes
  136. with open(index_path, "rb") as f:
  137. f.seek(-20, 2)
  138. last_bytes = f.read(20)
  139. self.assertEqual(last_bytes, b"\x00" * 20)
  140. # Write with dulwich (with skipHash)
  141. index = Index(index_path, skip_hash=True, version=4)
  142. index.write()
  143. # Verify C Git can read it
  144. output = run_git_or_fail(["ls-files"], cwd=repo.path)
  145. self.assertEqual(output.strip(), b"test.txt")
  146. def test_index_v4_with_various_filenames(self) -> None:
  147. """Test v4 with various filename patterns."""
  148. require_git_version((2, 20, 0))
  149. repo = self._init_repo_with_manyfiles()
  150. # Test various filename patterns that might trigger different behaviors
  151. test_files = [
  152. "a", # Single character
  153. "ab", # Two characters
  154. "abc", # Three characters
  155. "file.txt", # Normal filename
  156. "very_long_filename_to_test_edge_cases.extension", # Long filename
  157. "dir/file.txt", # With directory
  158. "dir1/dir2/dir3/file.txt", # Deep directory
  159. "unicode_café.txt", # Unicode filename
  160. "with-dashes-and_underscores.txt", # Special chars
  161. ".hidden", # Hidden file
  162. "UPPERCASE.TXT", # Uppercase
  163. ]
  164. for filename in test_files:
  165. filepath = os.path.join(repo.path, filename)
  166. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  167. with open(filepath, "w", encoding="utf-8") as f:
  168. f.write(f"Content of {filename}\n")
  169. # Add all files
  170. run_git_or_fail(["add", "."], cwd=repo.path)
  171. # Read with dulwich
  172. index_path = os.path.join(repo.path, ".git", "index")
  173. with open(index_path, "rb") as f:
  174. entries, version, extensions = read_index_dict_with_version(f)
  175. self.assertEqual(version, 4)
  176. self.assertEqual(len(entries), len(test_files))
  177. # Verify all filenames are correctly stored
  178. for filename in test_files:
  179. filename_bytes = filename.encode("utf-8")
  180. self.assertIn(filename_bytes, entries)
  181. # Test round-trip: dulwich write -> C Git read
  182. with open(index_path + ".dulwich", "wb") as f:
  183. from dulwich.pack import SHA1Writer
  184. sha1_writer = SHA1Writer(f)
  185. write_index_dict(sha1_writer, entries, version=4, extensions=extensions)
  186. sha1_writer.close()
  187. # Replace index
  188. os.rename(index_path + ".dulwich", index_path)
  189. # Verify C Git can read all files
  190. # Use -z flag to avoid quoting of non-ASCII filenames
  191. output = run_git_or_fail(["ls-files", "-z"], cwd=repo.path)
  192. git_files = set(output.strip(b"\x00").split(b"\x00"))
  193. expected_files = {f.encode("utf-8") for f in test_files}
  194. self.assertEqual(git_files, expected_files)
  195. def test_index_v4_path_compression_scenarios(self) -> None:
  196. """Test various scenarios where path compression should/shouldn't be used."""
  197. require_git_version((2, 20, 0))
  198. repo = self._init_repo_with_manyfiles()
  199. # Create files that should trigger compression
  200. compression_files = [
  201. "src/main/java/com/example/Service.java",
  202. "src/main/java/com/example/Controller.java",
  203. "src/main/java/com/example/Repository.java",
  204. "src/test/java/com/example/ServiceTest.java",
  205. ]
  206. # Create files that shouldn't benefit much from compression
  207. no_compression_files = [
  208. "README.md",
  209. "LICENSE",
  210. "docs/guide.txt",
  211. "config/settings.json",
  212. ]
  213. all_files = compression_files + no_compression_files
  214. for filename in all_files:
  215. filepath = os.path.join(repo.path, filename)
  216. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  217. with open(filepath, "w") as f:
  218. f.write(f"Content of {filename}\n")
  219. # Add files
  220. run_git_or_fail(["add", "."], cwd=repo.path)
  221. # Read the index
  222. index_path = os.path.join(repo.path, ".git", "index")
  223. with open(index_path, "rb") as f:
  224. entries, version, extensions = read_index_dict_with_version(f)
  225. self.assertEqual(version, 4)
  226. self.assertEqual(len(entries), len(all_files))
  227. # Verify all files are present
  228. for filename in all_files:
  229. self.assertIn(filename.encode(), entries)
  230. # Test that dulwich can write a compatible index
  231. with open(index_path + ".dulwich", "wb") as f:
  232. from dulwich.pack import SHA1Writer
  233. sha1_writer = SHA1Writer(f)
  234. write_index_dict(sha1_writer, entries, version=4, extensions=extensions)
  235. sha1_writer.close()
  236. # Verify the written index is the same size (for byte-for-byte compatibility)
  237. original_size = os.path.getsize(index_path)
  238. dulwich_size = os.path.getsize(index_path + ".dulwich")
  239. # For v4 format with proper compression, checksum, and extensions, sizes should match
  240. self.assertEqual(
  241. original_size,
  242. dulwich_size,
  243. f"Index sizes don't match: Git={original_size}, Dulwich={dulwich_size}",
  244. )
  245. def test_index_v4_with_extensions(self) -> None:
  246. """Test v4 index with various extensions."""
  247. require_git_version((2, 20, 0))
  248. repo = self._init_repo_with_manyfiles()
  249. # Create some files
  250. files = ["file1.txt", "file2.txt", "dir/file3.txt"]
  251. for filename in files:
  252. filepath = os.path.join(repo.path, filename)
  253. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  254. with open(filepath, "w") as f:
  255. f.write("content\n")
  256. # Add files
  257. run_git_or_fail(["add", "."], cwd=repo.path)
  258. # Enable untracked cache (creates UNTR extension)
  259. run_git_or_fail(["config", "core.untrackedCache", "true"], cwd=repo.path)
  260. run_git_or_fail(["status"], cwd=repo.path) # Trigger cache update
  261. # Read index with extensions
  262. index_path = os.path.join(repo.path, ".git", "index")
  263. with open(index_path, "rb") as f:
  264. entries, version, extensions = read_index_dict_with_version(f)
  265. self.assertEqual(version, 4)
  266. self.assertEqual(len(entries), len(files))
  267. # Test round-trip with extensions present
  268. index = Index(index_path)
  269. index.write()
  270. # Verify C Git can still read it
  271. output = run_git_or_fail(["ls-files"], cwd=repo.path)
  272. git_files = set(output.strip().split(b"\n"))
  273. expected_files = {f.encode() for f in files}
  274. self.assertEqual(git_files, expected_files)
  275. def test_index_v4_empty_repository(self) -> None:
  276. """Test v4 index behavior with empty repository."""
  277. require_git_version((2, 20, 0))
  278. repo = self._init_repo_with_manyfiles()
  279. # Create empty commit to get an index file
  280. run_git_or_fail(["commit", "--allow-empty", "-m", "empty"], cwd=repo.path)
  281. # Read the empty index
  282. index_path = os.path.join(repo.path, ".git", "index")
  283. if os.path.exists(index_path):
  284. with open(index_path, "rb") as f:
  285. entries, version, extensions = read_index_dict_with_version(f)
  286. # Even empty indexes should be readable
  287. self.assertEqual(len(entries), 0)
  288. # Test writing empty index
  289. with open(index_path + ".dulwich", "wb") as f:
  290. from dulwich.pack import SHA1Writer
  291. sha1_writer = SHA1Writer(f)
  292. write_index_dict(
  293. sha1_writer, entries, version=version, extensions=extensions
  294. )
  295. sha1_writer.close()
  296. def test_index_v4_large_file_count(self) -> None:
  297. """Test v4 index with many files (stress test)."""
  298. require_git_version((2, 20, 0))
  299. repo = self._init_repo_with_manyfiles()
  300. # Create many files with similar paths to test compression
  301. files = []
  302. for i in range(50): # Reasonable number for CI
  303. filename = f"src/component_{i:03d}/index.js"
  304. files.append(filename)
  305. filepath = os.path.join(repo.path, filename)
  306. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  307. with open(filepath, "w") as f:
  308. f.write(f"// Component {i}\nexport default {{}};")
  309. # Add all files
  310. run_git_or_fail(["add", "."], cwd=repo.path)
  311. # Read index
  312. index_path = os.path.join(repo.path, ".git", "index")
  313. with open(index_path, "rb") as f:
  314. entries, version, extensions = read_index_dict_with_version(f)
  315. self.assertEqual(version, 4)
  316. self.assertEqual(len(entries), len(files))
  317. # Test dulwich can handle large indexes
  318. index = Index(index_path)
  319. index.write()
  320. # Verify all files are still present
  321. output = run_git_or_fail(["ls-files"], cwd=repo.path)
  322. git_files = output.strip().split(b"\n")
  323. self.assertEqual(len(git_files), len(files))
  324. def test_index_v4_concurrent_modifications(self) -> None:
  325. """Test v4 index behavior with file modifications."""
  326. require_git_version((2, 20, 0))
  327. repo = self._init_repo_with_manyfiles()
  328. # Create initial files
  329. files = ["file1.txt", "file2.txt", "subdir/file3.txt"]
  330. for filename in files:
  331. filepath = os.path.join(repo.path, filename)
  332. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  333. with open(filepath, "w") as f:
  334. f.write("initial content\n")
  335. # Add files
  336. run_git_or_fail(["add", "."], cwd=repo.path)
  337. # Modify some files
  338. with open(os.path.join(repo.path, "file1.txt"), "w") as f:
  339. f.write("modified content\n")
  340. # Add new file
  341. with open(os.path.join(repo.path, "file4.txt"), "w") as f:
  342. f.write("new file\n")
  343. run_git_or_fail(["add", "file4.txt"], cwd=repo.path)
  344. # Test dulwich can read the updated index
  345. index_path = os.path.join(repo.path, ".git", "index")
  346. with open(index_path, "rb") as f:
  347. entries, version, extensions = read_index_dict_with_version(f)
  348. self.assertEqual(version, 4)
  349. self.assertEqual(len(entries), 4) # 3 original + 1 new
  350. # Verify specific files
  351. self.assertIn(b"file1.txt", entries)
  352. self.assertIn(b"file4.txt", entries)
  353. # Test round-trip
  354. index = Index(index_path)
  355. index.write()
  356. # Verify state is preserved
  357. output = run_git_or_fail(["ls-files"], cwd=repo.path)
  358. self.assertIn(b"file4.txt", output)
  359. def test_index_v4_with_merge_conflicts(self) -> None:
  360. """Test v4 index behavior with merge conflicts and staging."""
  361. require_git_version((2, 20, 0))
  362. repo = self._init_repo_with_manyfiles()
  363. # Create initial commit
  364. with open(os.path.join(repo.path, "conflict.txt"), "w") as f:
  365. f.write("original content\n")
  366. with open(os.path.join(repo.path, "normal.txt"), "w") as f:
  367. f.write("normal file\n")
  368. run_git_or_fail(["add", "."], cwd=repo.path)
  369. run_git_or_fail(["commit", "-m", "initial"], cwd=repo.path)
  370. # Create branch and modify file
  371. run_git_or_fail(["checkout", "-b", "feature"], cwd=repo.path)
  372. with open(os.path.join(repo.path, "conflict.txt"), "w") as f:
  373. f.write("feature content\n")
  374. run_git_or_fail(["add", "conflict.txt"], cwd=repo.path)
  375. run_git_or_fail(["commit", "-m", "feature change"], cwd=repo.path)
  376. # Go back to main and make conflicting change
  377. run_git_or_fail(["checkout", "master"], cwd=repo.path)
  378. with open(os.path.join(repo.path, "conflict.txt"), "w") as f:
  379. f.write("master content\n")
  380. run_git_or_fail(["add", "conflict.txt"], cwd=repo.path)
  381. run_git_or_fail(["commit", "-m", "master change"], cwd=repo.path)
  382. # Try to merge (should create conflicts)
  383. run_git(["merge", "feature"], cwd=repo.path)
  384. # Read the index with conflicts
  385. index_path = os.path.join(repo.path, ".git", "index")
  386. if os.path.exists(index_path):
  387. with open(index_path, "rb") as f:
  388. entries, version, extensions = read_index_dict_with_version(f)
  389. self.assertEqual(version, 4)
  390. # Test dulwich can handle conflicted index
  391. index = Index(index_path)
  392. index.write()
  393. # Verify Git can still read it
  394. output = run_git_or_fail(["status", "--porcelain"], cwd=repo.path)
  395. self.assertIn(b"conflict.txt", output)
  396. def test_index_v4_boundary_filename_lengths(self) -> None:
  397. """Test v4 with boundary conditions for filename lengths."""
  398. require_git_version((2, 20, 0))
  399. repo = self._init_repo_with_manyfiles()
  400. # Test various boundary conditions
  401. boundary_files = [
  402. "", # Empty name (invalid, but test robustness)
  403. "x", # Single char
  404. "xx", # Two chars
  405. "x" * 255, # Max typical filename length
  406. "x" * 4095, # Max path length in many filesystems
  407. "a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z", # Deep nesting
  408. "file_with_" + "very_" * 50 + "long_name.txt", # Very long name
  409. ]
  410. valid_files = []
  411. for filename in boundary_files:
  412. if not filename: # Skip empty filename
  413. continue
  414. try:
  415. filepath = os.path.join(repo.path, filename)
  416. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  417. with open(filepath, "w") as f:
  418. f.write("Content\n")
  419. valid_files.append(filename)
  420. except (OSError, ValueError):
  421. # Skip files that can't be created on this system
  422. continue
  423. if valid_files:
  424. # Add files
  425. run_git_or_fail(["add", "."], cwd=repo.path)
  426. # Test reading
  427. index_path = os.path.join(repo.path, ".git", "index")
  428. with open(index_path, "rb") as f:
  429. entries, version, extensions = read_index_dict_with_version(f)
  430. self.assertEqual(version, 4)
  431. # Test round-trip
  432. index = Index(index_path)
  433. index.write()
  434. def test_index_v4_special_characters_and_encoding(self) -> None:
  435. """Test v4 with special characters and various encodings."""
  436. require_git_version((2, 20, 0))
  437. repo = self._init_repo_with_manyfiles()
  438. # Test files with special characters
  439. special_files = [
  440. "file with spaces.txt",
  441. "file\twith\ttabs.txt",
  442. "file-with-dashes.txt",
  443. "file_with_underscores.txt",
  444. "file.with.dots.txt",
  445. "UPPERCASE.TXT",
  446. "MixedCase.TxT",
  447. "file123numbers.txt",
  448. "file@#$%special.txt",
  449. "café.txt", # Unicode
  450. "файл.txt", # Cyrillic
  451. "文件.txt", # Chinese
  452. "🚀rocket.txt", # Emoji
  453. "file'with'quotes.txt",
  454. 'file"with"doublequotes.txt',
  455. "file[with]brackets.txt",
  456. "file(with)parens.txt",
  457. "file{with}braces.txt",
  458. ]
  459. valid_files = []
  460. for filename in special_files:
  461. try:
  462. filepath = os.path.join(repo.path, filename)
  463. with open(filepath, "w", encoding="utf-8") as f:
  464. f.write(f"Content of {filename}\n")
  465. valid_files.append(filename)
  466. except (OSError, UnicodeError):
  467. # Skip files that can't be created on this system
  468. continue
  469. if valid_files:
  470. # Add files
  471. run_git_or_fail(["add", "."], cwd=repo.path)
  472. # Test reading
  473. index_path = os.path.join(repo.path, ".git", "index")
  474. with open(index_path, "rb") as f:
  475. entries, version, extensions = read_index_dict_with_version(f)
  476. self.assertEqual(version, 4)
  477. self.assertGreater(len(entries), 0)
  478. # Test all valid files are present
  479. for filename in valid_files:
  480. filename_bytes = filename.encode("utf-8")
  481. self.assertIn(filename_bytes, entries)
  482. def test_index_v4_symlinks_and_special_modes(self) -> None:
  483. """Test v4 with symlinks and special file modes."""
  484. require_git_version((2, 20, 0))
  485. repo = self._init_repo_with_manyfiles()
  486. # Create regular file
  487. with open(os.path.join(repo.path, "regular.txt"), "w") as f:
  488. f.write("regular file\n")
  489. # Create executable file
  490. exec_path = os.path.join(repo.path, "executable.sh")
  491. with open(exec_path, "w") as f:
  492. f.write("#!/bin/bash\necho hello\n")
  493. os.chmod(exec_path, 0o755)
  494. # Create symlink (if supported)
  495. try:
  496. os.symlink("regular.txt", os.path.join(repo.path, "symlink.txt"))
  497. has_symlink = True
  498. except (OSError, NotImplementedError):
  499. has_symlink = False
  500. # Add files
  501. run_git_or_fail(["add", "."], cwd=repo.path)
  502. # Test reading
  503. index_path = os.path.join(repo.path, ".git", "index")
  504. with open(index_path, "rb") as f:
  505. entries, version, extensions = read_index_dict_with_version(f)
  506. self.assertEqual(version, 4)
  507. # Verify files with different modes
  508. self.assertIn(b"regular.txt", entries)
  509. self.assertIn(b"executable.sh", entries)
  510. if has_symlink:
  511. self.assertIn(b"symlink.txt", entries)
  512. # Test round-trip preserves modes
  513. index = Index(index_path)
  514. index.write()
  515. # Verify Git can read it
  516. output = run_git_or_fail(["ls-files", "-s"], cwd=repo.path)
  517. self.assertIn(b"regular.txt", output)
  518. self.assertIn(b"executable.sh", output)
  519. def test_index_v4_alternating_compression_patterns(self) -> None:
  520. """Test v4 with files that alternate between compressed/uncompressed."""
  521. require_git_version((2, 20, 0))
  522. repo = self._init_repo_with_manyfiles()
  523. # Create files that should create alternating compression patterns
  524. files = [
  525. # These should be uncompressed (no common prefix)
  526. "a.txt",
  527. "b.txt",
  528. "c.txt",
  529. # These should be compressed (common prefix)
  530. "common/path/file1.txt",
  531. "common/path/file2.txt",
  532. "common/path/file3.txt",
  533. # Back to uncompressed (different pattern)
  534. "different/structure/x.txt",
  535. "another/structure/y.txt",
  536. # More compression opportunities
  537. "src/main/Component1.java",
  538. "src/main/Component2.java",
  539. "src/test/Test1.java",
  540. "src/test/Test2.java",
  541. ]
  542. for filename in files:
  543. filepath = os.path.join(repo.path, filename)
  544. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  545. with open(filepath, "w") as f:
  546. f.write(f"Content of {filename}\n")
  547. # Add files
  548. run_git_or_fail(["add", "."], cwd=repo.path)
  549. # Test reading
  550. index_path = os.path.join(repo.path, ".git", "index")
  551. with open(index_path, "rb") as f:
  552. entries, version, extensions = read_index_dict_with_version(f)
  553. self.assertEqual(version, 4)
  554. self.assertEqual(len(entries), len(files))
  555. # Verify all files are present
  556. for filename in files:
  557. self.assertIn(filename.encode(), entries)
  558. # Test round-trip
  559. index = Index(index_path)
  560. index.write()
  561. def test_index_v4_git_submodules(self) -> None:
  562. """Test v4 index with Git submodules."""
  563. require_git_version((2, 20, 0))
  564. repo = self._init_repo_with_manyfiles()
  565. # Create a submodule directory structure
  566. submodule_dir = os.path.join(repo.path, "submodule")
  567. os.makedirs(submodule_dir)
  568. # Initialize a separate repo for the submodule
  569. run_git_or_fail(["init"], cwd=submodule_dir)
  570. with open(os.path.join(submodule_dir, "sub.txt"), "w") as f:
  571. f.write("submodule content\n")
  572. run_git_or_fail(["add", "sub.txt"], cwd=submodule_dir)
  573. run_git_or_fail(["commit", "-m", "submodule commit"], cwd=submodule_dir)
  574. # Add some regular files to main repo
  575. with open(os.path.join(repo.path, "main.txt"), "w") as f:
  576. f.write("main repo content\n")
  577. run_git_or_fail(["add", "main.txt"], cwd=repo.path)
  578. # Add submodule (this creates a gitlink entry)
  579. run_git_or_fail(["submodule", "add", "./submodule", "submodule"], cwd=repo.path)
  580. # Test reading index with submodule
  581. index_path = os.path.join(repo.path, ".git", "index")
  582. with open(index_path, "rb") as f:
  583. entries, version, extensions = read_index_dict_with_version(f)
  584. self.assertEqual(version, 4)
  585. # Should have main.txt, .gitmodules, and submodule gitlink
  586. self.assertIn(b"main.txt", entries)
  587. self.assertIn(b".gitmodules", entries)
  588. self.assertIn(b"submodule", entries)
  589. # Test round-trip
  590. index = Index(index_path)
  591. index.write()
  592. def test_index_v4_partial_staging(self) -> None:
  593. """Test v4 with partial file staging (git add -p simulation)."""
  594. require_git_version((2, 20, 0))
  595. repo = self._init_repo_with_manyfiles()
  596. # Create initial file
  597. filepath = os.path.join(repo.path, "partial.txt")
  598. with open(filepath, "w") as f:
  599. f.write("line1\nline2\nline3\n")
  600. run_git_or_fail(["add", "partial.txt"], cwd=repo.path)
  601. run_git_or_fail(["commit", "-m", "initial"], cwd=repo.path)
  602. # Modify the file
  603. with open(filepath, "w") as f:
  604. f.write("line1 modified\nline2\nline3 modified\n")
  605. # Stage only part of the changes (simulate git add -p)
  606. # This creates an interesting index state
  607. run_git_or_fail(["add", "partial.txt"], cwd=repo.path)
  608. # Make more changes
  609. with open(filepath, "w") as f:
  610. f.write("line1 modified\nline2 modified\nline3 modified\n")
  611. # Now we have staged and unstaged changes
  612. # Test reading this complex index state
  613. index_path = os.path.join(repo.path, ".git", "index")
  614. with open(index_path, "rb") as f:
  615. entries, version, extensions = read_index_dict_with_version(f)
  616. self.assertEqual(version, 4)
  617. self.assertIn(b"partial.txt", entries)
  618. # Test round-trip
  619. index = Index(index_path)
  620. index.write()
  621. def test_index_v4_with_gitattributes_and_ignore(self) -> None:
  622. """Test v4 with .gitattributes and .gitignore files."""
  623. require_git_version((2, 20, 0))
  624. repo = self._init_repo_with_manyfiles()
  625. # Create .gitignore
  626. with open(os.path.join(repo.path, ".gitignore"), "w") as f:
  627. f.write("*.tmp\n*.log\nbuild/\n")
  628. # Create .gitattributes
  629. with open(os.path.join(repo.path, ".gitattributes"), "w") as f:
  630. f.write("*.txt text\n*.bin binary\n")
  631. # Create various files
  632. files = [
  633. "regular.txt",
  634. "binary.bin",
  635. "script.sh",
  636. "config.json",
  637. "README.md",
  638. ]
  639. for filename in files:
  640. filepath = os.path.join(repo.path, filename)
  641. with open(filepath, "w") as f:
  642. f.write(f"Content of {filename}\n")
  643. # Create some files that should be ignored
  644. with open(os.path.join(repo.path, "temp.tmp"), "w") as f:
  645. f.write("temporary file\n")
  646. # Add files
  647. run_git_or_fail(["add", "."], cwd=repo.path)
  648. # Test reading
  649. index_path = os.path.join(repo.path, ".git", "index")
  650. with open(index_path, "rb") as f:
  651. entries, version, extensions = read_index_dict_with_version(f)
  652. self.assertEqual(version, 4)
  653. # Should have .gitignore, .gitattributes, and regular files
  654. self.assertIn(b".gitignore", entries)
  655. self.assertIn(b".gitattributes", entries)
  656. for filename in files:
  657. self.assertIn(filename.encode(), entries)
  658. # Should NOT have ignored files
  659. self.assertNotIn(b"temp.tmp", entries)
  660. def test_index_v4_stress_test_many_entries(self) -> None:
  661. """Stress test v4 with many entries in complex directory structure."""
  662. require_git_version((2, 20, 0))
  663. repo = self._init_repo_with_manyfiles()
  664. # Create a complex directory structure with many files
  665. dirs = [
  666. "src/main/java/com/example",
  667. "src/main/resources",
  668. "src/test/java/com/example",
  669. "docs/api",
  670. "docs/user",
  671. "scripts/build",
  672. "config/env",
  673. ]
  674. for dir_path in dirs:
  675. os.makedirs(os.path.join(repo.path, dir_path), exist_ok=True)
  676. # Create many files
  677. files = []
  678. for i in range(200): # Reasonable for CI
  679. if i % 7 == 0:
  680. filename = f"src/main/java/com/example/Service{i}.java"
  681. elif i % 7 == 1:
  682. filename = f"src/test/java/com/example/Test{i}.java"
  683. elif i % 7 == 2:
  684. filename = f"docs/api/page{i}.md"
  685. elif i % 7 == 3:
  686. filename = f"config/env/config{i}.properties"
  687. elif i % 7 == 4:
  688. filename = f"scripts/build/script{i}.sh"
  689. elif i % 7 == 5:
  690. filename = f"src/main/resources/resource{i}.txt"
  691. else:
  692. filename = f"file{i}.txt"
  693. files.append(filename)
  694. filepath = os.path.join(repo.path, filename)
  695. os.makedirs(os.path.dirname(filepath), exist_ok=True)
  696. with open(filepath, "w") as f:
  697. f.write(f"// File {i}\ncontent here\n")
  698. # Add files in batches to avoid command line length limits
  699. batch_size = 50
  700. for i in range(0, len(files), batch_size):
  701. batch = files[i : i + batch_size]
  702. run_git_or_fail(["add", *batch], cwd=repo.path)
  703. # Test reading large index
  704. index_path = os.path.join(repo.path, ".git", "index")
  705. with open(index_path, "rb") as f:
  706. entries, version, extensions = read_index_dict_with_version(f)
  707. self.assertEqual(version, 4)
  708. self.assertEqual(len(entries), len(files))
  709. # Verify some files are present
  710. for i in range(0, len(files), 20): # Check every 20th file
  711. filename = files[i]
  712. self.assertIn(filename.encode(), entries)
  713. def test_index_v4_rename_detection_scenario(self) -> None:
  714. """Test v4 with file renames (complex staging scenario)."""
  715. require_git_version((2, 20, 0))
  716. repo = self._init_repo_with_manyfiles()
  717. # Create initial files
  718. files = ["old1.txt", "old2.txt", "unchanged.txt"]
  719. for filename in files:
  720. filepath = os.path.join(repo.path, filename)
  721. with open(filepath, "w") as f:
  722. f.write(f"Content of {filename}\n")
  723. run_git_or_fail(["add", "."], cwd=repo.path)
  724. run_git_or_fail(["commit", "-m", "initial"], cwd=repo.path)
  725. # Rename files
  726. os.rename(
  727. os.path.join(repo.path, "old1.txt"), os.path.join(repo.path, "new1.txt")
  728. )
  729. os.rename(
  730. os.path.join(repo.path, "old2.txt"), os.path.join(repo.path, "new2.txt")
  731. )
  732. # Stage renames
  733. run_git_or_fail(["add", "-A"], cwd=repo.path)
  734. # Test reading index with renames
  735. index_path = os.path.join(repo.path, ".git", "index")
  736. with open(index_path, "rb") as f:
  737. entries, version, extensions = read_index_dict_with_version(f)
  738. self.assertEqual(version, 4)
  739. # Should have new names, not old names
  740. self.assertIn(b"new1.txt", entries)
  741. self.assertIn(b"new2.txt", entries)
  742. self.assertIn(b"unchanged.txt", entries)
  743. self.assertNotIn(b"old1.txt", entries)
  744. self.assertNotIn(b"old2.txt", entries)