test_check_ignore.py 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143
  1. # test_check_ignore.py -- Compatibility tests for git check-ignore
  2. # Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Compatibility tests for git check-ignore functionality."""
  22. import os
  23. import tempfile
  24. from dulwich import porcelain
  25. from dulwich.repo import Repo
  26. from .utils import CompatTestCase, run_git_or_fail
  27. class CheckIgnoreCompatTestCase(CompatTestCase):
  28. """Test git check-ignore compatibility between dulwich and git."""
  29. min_git_version = (1, 8, 5) # git check-ignore was added in 1.8.5
  30. def setUp(self) -> None:
  31. super().setUp()
  32. self.test_dir = tempfile.mkdtemp()
  33. self.addCleanup(self._cleanup_test_dir)
  34. self.repo = Repo.init(self.test_dir)
  35. self.addCleanup(self.repo.close)
  36. def _cleanup_test_dir(self) -> None:
  37. import shutil
  38. shutil.rmtree(self.test_dir)
  39. def _write_gitignore(self, content: str) -> None:
  40. """Write .gitignore file with given content."""
  41. gitignore_path = os.path.join(self.test_dir, ".gitignore")
  42. with open(gitignore_path, "w") as f:
  43. f.write(content)
  44. def _create_file(self, path: str, content: str = "") -> None:
  45. """Create a file with given content."""
  46. full_path = os.path.join(self.test_dir, path)
  47. os.makedirs(os.path.dirname(full_path), exist_ok=True)
  48. with open(full_path, "w") as f:
  49. f.write(content)
  50. def _create_dir(self, path: str) -> None:
  51. """Create a directory."""
  52. full_path = os.path.join(self.test_dir, path)
  53. os.makedirs(full_path, exist_ok=True)
  54. def _git_check_ignore(self, paths: list[str]) -> set[str]:
  55. """Run git check-ignore and return set of ignored paths."""
  56. try:
  57. output = run_git_or_fail(
  58. ["-c", "core.quotePath=false", "check-ignore", *paths],
  59. cwd=self.test_dir,
  60. )
  61. # git check-ignore returns paths separated by newlines
  62. return set(
  63. line.decode("utf-8") for line in output.strip().split(b"\n") if line
  64. )
  65. except AssertionError:
  66. # git check-ignore returns non-zero when no paths are ignored
  67. return set()
  68. def _dulwich_check_ignore(self, paths: list[str]) -> set[str]:
  69. """Run dulwich check_ignore and return set of ignored paths."""
  70. # Convert to absolute paths relative to the test directory
  71. abs_paths = [os.path.join(self.test_dir, path) for path in paths]
  72. ignored = set(
  73. porcelain.check_ignore(self.test_dir, abs_paths, quote_path=False)
  74. )
  75. # Convert back to relative paths and preserve original path format
  76. result = set()
  77. path_mapping = {}
  78. for orig_path, abs_path in zip(paths, abs_paths):
  79. path_mapping[abs_path] = orig_path
  80. for path in ignored:
  81. # Normalize the path to use forward slashes for comparison
  82. normalized_path = path.replace("\\", "/")
  83. test_dir_normalized = self.test_dir.replace("\\", "/")
  84. if normalized_path.startswith(test_dir_normalized + "/"):
  85. rel_path = normalized_path[len(test_dir_normalized) + 1 :]
  86. # Find the original path format that was requested
  87. orig_path = None
  88. for requested_path in paths:
  89. if requested_path.rstrip("/") == rel_path.rstrip("/"):
  90. orig_path = requested_path
  91. break
  92. result.add(orig_path if orig_path else rel_path)
  93. else:
  94. # For relative paths, normalize to forward slashes
  95. result.add(path.replace("\\", "/"))
  96. return result
  97. def _assert_ignore_match(self, paths: list[str]) -> None:
  98. """Assert that dulwich and git return the same ignored paths."""
  99. git_ignored = self._git_check_ignore(paths)
  100. dulwich_ignored = self._dulwich_check_ignore(paths)
  101. self.assertEqual(
  102. git_ignored,
  103. dulwich_ignored,
  104. f"Mismatch for paths {paths}: git={git_ignored}, dulwich={dulwich_ignored}",
  105. )
  106. def test_issue_1203_directory_negation(self) -> None:
  107. """Test issue #1203: directory negation patterns with data/**,!data/*/."""
  108. self._write_gitignore("data/**\n!data/*/\n")
  109. self._create_file("data/test.dvc", "content")
  110. self._create_dir("data/subdir")
  111. # Based on dulwich's own test for issue #1203, the expected behavior is:
  112. # data/test.dvc: ignored, data/: not ignored, data/subdir/: not ignored
  113. # But git check-ignore might behave differently...
  114. # Test the core case that issue #1203 was about
  115. self._assert_ignore_match(["data/test.dvc"])
  116. def test_basic_patterns(self) -> None:
  117. """Test basic gitignore patterns."""
  118. self._write_gitignore("*.tmp\n*.log\n")
  119. self._create_file("test.tmp")
  120. self._create_file("debug.log")
  121. self._create_file("readme.txt")
  122. paths = ["test.tmp", "debug.log", "readme.txt"]
  123. self._assert_ignore_match(paths)
  124. def test_directory_patterns(self) -> None:
  125. """Test directory-specific patterns."""
  126. self._write_gitignore("build/\nnode_modules/\n")
  127. self._create_dir("build")
  128. self._create_dir("node_modules")
  129. self._create_file("build.txt")
  130. paths = ["build/", "node_modules/", "build.txt"]
  131. self._assert_ignore_match(paths)
  132. def test_issue_972_directory_pattern_with_slash(self) -> None:
  133. """Test issue #972: /data/ pattern should match both 'data' and 'data/'."""
  134. self._write_gitignore("/data/\n")
  135. self._create_dir("data")
  136. self._create_file("data/file.txt")
  137. # Both 'data' and 'data/' should be matched by /data/ pattern
  138. paths = ["data", "data/", "data/file.txt"]
  139. self._assert_ignore_match(paths)
  140. def test_wildcard_patterns(self) -> None:
  141. """Test wildcard patterns."""
  142. self._write_gitignore("*.py[cod]\n__pycache__/\n*.so\n")
  143. self._create_file("test.pyc")
  144. self._create_file("test.pyo")
  145. self._create_file("test.pyd")
  146. self._create_file("test.py")
  147. self._create_dir("__pycache__")
  148. paths = ["test.pyc", "test.pyo", "test.pyd", "test.py", "__pycache__/"]
  149. self._assert_ignore_match(paths)
  150. def test_negation_patterns(self) -> None:
  151. """Test negation patterns with !."""
  152. self._write_gitignore("*.log\n!important.log\n")
  153. self._create_file("debug.log")
  154. self._create_file("error.log")
  155. self._create_file("important.log")
  156. paths = ["debug.log", "error.log", "important.log"]
  157. self._assert_ignore_match(paths)
  158. def test_double_asterisk_patterns(self) -> None:
  159. """Test double asterisk ** patterns."""
  160. self._write_gitignore("**/temp\nvendor/**/cache\n")
  161. self._create_file("temp")
  162. self._create_file("src/temp")
  163. self._create_file("deep/nested/temp")
  164. self._create_file("vendor/lib/cache")
  165. self._create_file("vendor/gem/deep/cache")
  166. paths = [
  167. "temp",
  168. "src/temp",
  169. "deep/nested/temp",
  170. "vendor/lib/cache",
  171. "vendor/gem/deep/cache",
  172. ]
  173. self._assert_ignore_match(paths)
  174. def test_subdirectory_gitignore(self) -> None:
  175. """Test .gitignore files in subdirectories."""
  176. # Root .gitignore
  177. self._write_gitignore("*.tmp\n")
  178. # Subdirectory .gitignore
  179. self._create_dir("subdir")
  180. subdir_gitignore = os.path.join(self.test_dir, "subdir", ".gitignore")
  181. with open(subdir_gitignore, "w") as f:
  182. f.write("*.local\n!important.local\n")
  183. self._create_file("test.tmp")
  184. self._create_file("subdir/test.tmp")
  185. self._create_file("subdir/config.local")
  186. self._create_file("subdir/important.local")
  187. paths = [
  188. "test.tmp",
  189. "subdir/test.tmp",
  190. "subdir/config.local",
  191. "subdir/important.local",
  192. ]
  193. self._assert_ignore_match(paths)
  194. def test_complex_directory_negation(self) -> None:
  195. """Test complex directory negation patterns."""
  196. self._write_gitignore("dist/\n!dist/assets/\ndist/assets/*.tmp\n")
  197. self._create_dir("dist/assets")
  198. self._create_file("dist/main.js")
  199. self._create_file("dist/assets/style.css")
  200. self._create_file("dist/assets/temp.tmp")
  201. paths = [
  202. "dist/",
  203. "dist/main.js",
  204. "dist/assets/",
  205. "dist/assets/style.css",
  206. "dist/assets/temp.tmp",
  207. ]
  208. self._assert_ignore_match(paths)
  209. def test_leading_slash_patterns(self) -> None:
  210. """Test patterns with leading slash."""
  211. self._write_gitignore("/root-only.txt\nsubdir/specific.txt\n")
  212. self._create_file("root-only.txt")
  213. self._create_file("deep/root-only.txt") # Should not be ignored
  214. self._create_file("subdir/specific.txt")
  215. self._create_file("deep/subdir/specific.txt") # Should also be ignored
  216. paths = [
  217. "root-only.txt",
  218. "deep/root-only.txt",
  219. "subdir/specific.txt",
  220. "deep/subdir/specific.txt",
  221. ]
  222. self._assert_ignore_match(paths)
  223. def test_empty_directory_edge_case(self) -> None:
  224. """Test edge case with empty directories."""
  225. self._write_gitignore("empty/\n!empty/keep\n")
  226. self._create_dir("empty")
  227. self._create_file("empty/keep", "keep this")
  228. paths = ["empty/", "empty/keep"]
  229. self._assert_ignore_match(paths)
  230. def test_nested_wildcard_negation(self) -> None:
  231. """Test nested wildcard patterns with negation."""
  232. self._write_gitignore("docs/**\n!docs/*/\n!docs/**/*.md\n")
  233. self._create_file("docs/readme.txt") # Should be ignored
  234. self._create_file("docs/guide.md") # Should not be ignored
  235. self._create_dir("docs/api") # Should not be ignored
  236. self._create_file("docs/api/index.md") # Should not be ignored
  237. self._create_file("docs/api/temp.txt") # Should be ignored
  238. paths = [
  239. "docs/readme.txt",
  240. "docs/guide.md",
  241. "docs/api/",
  242. "docs/api/index.md",
  243. "docs/api/temp.txt",
  244. ]
  245. self._assert_ignore_match(paths)
  246. def test_case_sensitivity(self) -> None:
  247. """Test case sensitivity in patterns."""
  248. self._write_gitignore("*.TMP\nREADME\n")
  249. self._create_file("test.tmp") # Lowercase
  250. self._create_file("test.TMP") # Uppercase
  251. self._create_file("readme") # Lowercase
  252. self._create_file("README") # Uppercase
  253. paths = ["test.tmp", "test.TMP", "readme", "README"]
  254. self._assert_ignore_match(paths)
  255. def test_unicode_filenames(self) -> None:
  256. """Test unicode filenames in patterns."""
  257. try:
  258. self._write_gitignore("тест*\n*.测试\n")
  259. self._create_file("тест.txt")
  260. self._create_file("файл.测试")
  261. self._create_file("normal.txt")
  262. paths = ["тест.txt", "файл.测试", "normal.txt"]
  263. self._assert_ignore_match(paths)
  264. except (UnicodeEncodeError, OSError):
  265. # Skip test if filesystem doesn't support unicode
  266. self.skipTest("Filesystem doesn't support unicode filenames")
  267. def test_double_asterisk_edge_cases(self) -> None:
  268. """Test edge cases with ** patterns."""
  269. self._write_gitignore("**/afile\ndir1/**/b\n**/*.tmp\n")
  270. # Test **/afile pattern
  271. self._create_file("afile") # Root level
  272. self._create_file("dir/afile") # One level deep
  273. self._create_file("deep/nested/afile") # Multiple levels deep
  274. # Test dir1/**/b pattern
  275. self._create_file("dir1/b") # Direct child
  276. self._create_file("dir1/subdir/b") # One level deep in dir1/
  277. self._create_file("dir1/deep/nested/b") # Multiple levels deep in dir1/
  278. self._create_file("other/dir1/b") # Should not match (dir1/ not at start)
  279. # Test **/*.tmp pattern
  280. self._create_file("test.tmp") # Root level
  281. self._create_file("dir/test.tmp") # One level deep
  282. self._create_file("deep/nested/test.tmp") # Multiple levels deep
  283. paths = [
  284. "afile",
  285. "dir/afile",
  286. "deep/nested/afile",
  287. "dir1/b",
  288. "dir1/subdir/b",
  289. "dir1/deep/nested/b",
  290. "other/dir1/b",
  291. "test.tmp",
  292. "dir/test.tmp",
  293. "deep/nested/test.tmp",
  294. ]
  295. self._assert_ignore_match(paths)
  296. def test_double_asterisk_with_negation(self) -> None:
  297. """Test ** patterns combined with negation."""
  298. self._write_gitignore(
  299. "**/build/**\n!**/build/assets/**\n**/build/assets/*.tmp\n"
  300. )
  301. # Create build directories at different levels
  302. self._create_file("build/main.js")
  303. self._create_file("build/assets/style.css")
  304. self._create_file("build/assets/temp.tmp")
  305. self._create_file("src/build/app.js")
  306. self._create_file("src/build/assets/logo.png")
  307. self._create_file("src/build/assets/cache.tmp")
  308. self._create_file("deep/nested/build/lib.js")
  309. self._create_file("deep/nested/build/assets/icon.svg")
  310. self._create_file("deep/nested/build/assets/debug.tmp")
  311. paths = [
  312. "build/main.js",
  313. "build/assets/style.css",
  314. "build/assets/temp.tmp",
  315. "src/build/app.js",
  316. "src/build/assets/logo.png",
  317. "src/build/assets/cache.tmp",
  318. "deep/nested/build/lib.js",
  319. "deep/nested/build/assets/icon.svg",
  320. "deep/nested/build/assets/debug.tmp",
  321. ]
  322. self._assert_ignore_match(paths)
  323. def test_double_asterisk_middle_patterns(self) -> None:
  324. """Test ** patterns in the middle of paths."""
  325. self._write_gitignore("src/**/test/**\nlib/**/node_modules\n**/cache/**/temp\n")
  326. # Test src/**/test/** pattern
  327. self._create_file("src/test/unit.js")
  328. self._create_file("src/components/test/unit.js")
  329. self._create_file("src/deep/nested/test/integration.js")
  330. self._create_file("other/src/test/unit.js") # Should not match
  331. # Test lib/**/node_modules pattern
  332. self._create_file("lib/node_modules/package.json")
  333. self._create_file("lib/vendor/node_modules/package.json")
  334. self._create_file("lib/deep/path/node_modules/package.json")
  335. self._create_file("other/lib/node_modules/package.json") # Should not match
  336. # Test **/cache/**/temp pattern
  337. self._create_file("cache/temp")
  338. self._create_file("cache/data/temp")
  339. self._create_file("app/cache/temp")
  340. self._create_file("app/cache/nested/temp")
  341. self._create_file("deep/cache/very/nested/temp")
  342. paths = [
  343. "src/test/unit.js",
  344. "src/components/test/unit.js",
  345. "src/deep/nested/test/integration.js",
  346. "other/src/test/unit.js",
  347. "lib/node_modules/package.json",
  348. "lib/vendor/node_modules/package.json",
  349. "lib/deep/path/node_modules/package.json",
  350. "other/lib/node_modules/package.json",
  351. "cache/temp",
  352. "cache/data/temp",
  353. "app/cache/temp",
  354. "app/cache/nested/temp",
  355. "deep/cache/very/nested/temp",
  356. ]
  357. self._assert_ignore_match(paths)
  358. def test_multiple_double_asterisks(self) -> None:
  359. """Test patterns with multiple ** segments."""
  360. self._write_gitignore("**/**/test/**/*.js\n**/src/**/build/**/dist\n")
  361. # Test **/**/test/**/*.js pattern (multiple ** in one pattern)
  362. self._create_file("test/file.js")
  363. self._create_file("a/test/file.js")
  364. self._create_file("a/b/test/file.js")
  365. self._create_file("test/c/file.js")
  366. self._create_file("test/c/d/file.js")
  367. self._create_file("a/b/test/c/d/file.js")
  368. self._create_file("a/b/test/c/d/file.txt") # Different extension
  369. # Test **/src/**/build/**/dist pattern
  370. self._create_file("src/build/dist")
  371. self._create_file("app/src/build/dist")
  372. self._create_file("src/lib/build/dist")
  373. self._create_file("src/build/prod/dist")
  374. self._create_file("app/src/lib/build/prod/dist")
  375. paths = [
  376. "test/file.js",
  377. "a/test/file.js",
  378. "a/b/test/file.js",
  379. "test/c/file.js",
  380. "test/c/d/file.js",
  381. "a/b/test/c/d/file.js",
  382. "a/b/test/c/d/file.txt",
  383. "src/build/dist",
  384. "app/src/build/dist",
  385. "src/lib/build/dist",
  386. "src/build/prod/dist",
  387. "app/src/lib/build/prod/dist",
  388. ]
  389. self._assert_ignore_match(paths)
  390. def test_double_asterisk_directory_traversal(self) -> None:
  391. """Test ** patterns with directory traversal edge cases."""
  392. self._write_gitignore("**/.*\n!**/.gitkeep\n**/.git/**\n")
  393. # Test **/.* pattern (hidden files at any level)
  394. self._create_file(".hidden")
  395. self._create_file("dir/.hidden")
  396. self._create_file("deep/nested/.hidden")
  397. self._create_file(".gitkeep") # Should be negated
  398. self._create_file("dir/.gitkeep") # Should be negated
  399. # Test **/.git/** pattern
  400. self._create_file(".git/config")
  401. self._create_file(".git/objects/abc123")
  402. self._create_file("submodule/.git/config")
  403. self._create_file("deep/submodule/.git/refs/heads/master")
  404. paths = [
  405. ".hidden",
  406. "dir/.hidden",
  407. "deep/nested/.hidden",
  408. ".gitkeep",
  409. "dir/.gitkeep",
  410. ".git/config",
  411. ".git/objects/abc123",
  412. "submodule/.git/config",
  413. "deep/submodule/.git/refs/heads/master",
  414. ]
  415. self._assert_ignore_match(paths)
  416. def test_double_asterisk_empty_segments(self) -> None:
  417. """Test ** patterns with edge cases around empty path segments."""
  418. self._write_gitignore("a/**//b\n**//**/test\nc/**/**/\n")
  419. # These patterns test edge cases with path separator handling
  420. self._create_file("a/b")
  421. self._create_file("a/x/b")
  422. self._create_file("a/x/y/b")
  423. self._create_file("test")
  424. self._create_file("dir/test")
  425. self._create_file("dir/nested/test")
  426. self._create_file("c/file")
  427. self._create_file("c/dir/file")
  428. self._create_file("c/deep/nested/file")
  429. paths = [
  430. "a/b",
  431. "a/x/b",
  432. "a/x/y/b",
  433. "test",
  434. "dir/test",
  435. "dir/nested/test",
  436. "c/file",
  437. "c/dir/file",
  438. "c/deep/nested/file",
  439. ]
  440. self._assert_ignore_match(paths)
  441. def test_double_asterisk_root_patterns(self) -> None:
  442. """Test ** patterns at repository root with complex negations."""
  443. self._write_gitignore("/**\n!/**/\n!/**/*.md\n/**/*.tmp\n")
  444. # Pattern explanation:
  445. # /** - Ignore everything at any depth
  446. # !/**/ - But don't ignore directories
  447. # !/**/*.md - And don't ignore .md files
  448. # /**/*.tmp - But do ignore .tmp files (overrides .md negation for .tmp.md files)
  449. self._create_file("file.txt")
  450. self._create_file("readme.md")
  451. self._create_file("temp.tmp")
  452. self._create_file("backup.tmp.md") # Edge case: both .tmp and .md
  453. self._create_dir("dir")
  454. self._create_file("dir/file.txt")
  455. self._create_file("dir/guide.md")
  456. self._create_file("dir/cache.tmp")
  457. self._create_file("deep/nested/doc.md")
  458. self._create_file("deep/nested/log.tmp")
  459. paths = [
  460. "file.txt",
  461. "readme.md",
  462. "temp.tmp",
  463. "backup.tmp.md",
  464. "dir/",
  465. "dir/file.txt",
  466. "dir/guide.md",
  467. "dir/cache.tmp",
  468. "deep/nested/doc.md",
  469. "deep/nested/log.tmp",
  470. ]
  471. self._assert_ignore_match(paths)
  472. def test_single_asterisk_patterns(self) -> None:
  473. """Test single asterisk * patterns in various positions."""
  474. self._write_gitignore("src/*/build\n*.log\ntest*/\n*_backup\nlib/*\n*/temp/*\n")
  475. # Test src/*/build pattern
  476. self._create_file("src/app/build")
  477. self._create_file("src/lib/build")
  478. self._create_file("src/nested/deep/build") # Should not match (only one level)
  479. self._create_file("other/src/app/build") # Should not match
  480. # Test *.log pattern
  481. self._create_file("app.log")
  482. self._create_file("error.log")
  483. self._create_file("logs/debug.log") # Should match
  484. self._create_file("app.log.old") # Should not match
  485. # Test test*/ pattern (directories starting with test)
  486. self._create_dir("test")
  487. self._create_dir("testing")
  488. self._create_dir("test_data")
  489. self._create_file("test_file") # Should not match (not a directory)
  490. # Test *_backup pattern
  491. self._create_file("db_backup")
  492. self._create_file("config_backup")
  493. self._create_file("old_backup_file") # Should not match (backup not at end)
  494. # Test lib/* pattern
  495. self._create_file("lib/module.js")
  496. self._create_file("lib/utils.py")
  497. self._create_file("lib/nested/deep.js") # Should not match (only one level)
  498. # Test */temp/* pattern
  499. self._create_file("app/temp/cache")
  500. self._create_file("src/temp/logs")
  501. self._create_file("deep/nested/temp/file") # Should not match (nested too deep)
  502. self._create_file("temp/file") # Should not match (temp at root)
  503. paths = [
  504. "src/app/build",
  505. "src/lib/build",
  506. "src/nested/deep/build",
  507. "other/src/app/build",
  508. "app.log",
  509. "error.log",
  510. "logs/debug.log",
  511. "app.log.old",
  512. "test/",
  513. "testing/",
  514. "test_data/",
  515. "test_file",
  516. "db_backup",
  517. "config_backup",
  518. "old_backup_file",
  519. "lib/module.js",
  520. "lib/utils.py",
  521. "lib/nested/deep.js",
  522. "app/temp/cache",
  523. "src/temp/logs",
  524. "deep/nested/temp/file",
  525. "temp/file",
  526. ]
  527. self._assert_ignore_match(paths)
  528. def test_single_asterisk_edge_cases(self) -> None:
  529. """Test edge cases with single asterisk patterns."""
  530. self._write_gitignore("*\n!*/\n!*.txt\n*.*.*\n")
  531. # Pattern explanation:
  532. # * - Ignore everything
  533. # !*/ - But don't ignore directories
  534. # !*.txt - And don't ignore .txt files
  535. # *.*.* - But ignore files with multiple dots
  536. self._create_file("file")
  537. self._create_file("readme.txt")
  538. self._create_file("config.json")
  539. self._create_file("archive.tar.gz") # Multiple dots
  540. self._create_file("backup.sql.old") # Multiple dots
  541. self._create_dir("folder")
  542. self._create_file("folder/nested.txt")
  543. self._create_file("folder/data.json")
  544. paths = [
  545. "file",
  546. "readme.txt",
  547. "config.json",
  548. "archive.tar.gz",
  549. "backup.sql.old",
  550. "folder/",
  551. "folder/nested.txt",
  552. "folder/data.json",
  553. ]
  554. self._assert_ignore_match(paths)
  555. def test_single_asterisk_with_character_classes(self) -> None:
  556. """Test single asterisk with character classes and special patterns."""
  557. self._write_gitignore("*.[oa]\n*~\n.*\n!.gitignore\n[Tt]emp*\n")
  558. # Test *.[oa] pattern (object and archive files)
  559. self._create_file("main.o")
  560. self._create_file("lib.a")
  561. self._create_file("app.so") # Should not match
  562. self._create_file("test.c") # Should not match
  563. # Test *~ pattern (backup files)
  564. self._create_file("file~")
  565. self._create_file("config~")
  566. self._create_file("~file") # Should not match (~ at start)
  567. # Test .* pattern with negation
  568. self._create_file(".hidden")
  569. self._create_file(".secret")
  570. self._create_file(".gitignore") # Should be negated
  571. # Test [Tt]emp* pattern (case variations)
  572. self._create_file("temp_file")
  573. self._create_file("Temp_data")
  574. self._create_file("TEMP_LOG") # Should not match (not T or t)
  575. self._create_file("temporary")
  576. paths = [
  577. "main.o",
  578. "lib.a",
  579. "app.so",
  580. "test.c",
  581. "file~",
  582. "config~",
  583. "~file",
  584. ".hidden",
  585. ".secret",
  586. ".gitignore",
  587. "temp_file",
  588. "Temp_data",
  589. "TEMP_LOG",
  590. "temporary",
  591. ]
  592. self._assert_ignore_match(paths)
  593. def test_mixed_single_double_asterisk_patterns(self) -> None:
  594. """Test patterns that mix single (*) and double (**) asterisks."""
  595. self._write_gitignore(
  596. "src/**/test/*.js\n**/build/*\n*/cache/**\nlib/*/vendor/**/*.min.*\n"
  597. )
  598. # Test src/**/test/*.js - double asterisk in middle, single at end
  599. self._create_file("src/test/unit.js")
  600. self._create_file("src/components/test/spec.js")
  601. self._create_file("src/deep/nested/test/integration.js")
  602. self._create_file(
  603. "src/test/nested/unit.js"
  604. ) # Should not match (nested after test)
  605. self._create_file(
  606. "src/components/test/unit.ts"
  607. ) # Should not match (wrong extension)
  608. # Test **/build/* - double asterisk at start, single at end
  609. self._create_file("build/app.js")
  610. self._create_file("src/build/main.js")
  611. self._create_file("deep/nested/build/lib.js")
  612. self._create_file("build/dist/app.js") # Should not match (nested after build)
  613. # Test */cache/** - single at start, double at end
  614. self._create_file("app/cache/temp")
  615. self._create_file("src/cache/data/file")
  616. self._create_file("lib/cache/deep/nested/item")
  617. self._create_file(
  618. "nested/deep/cache/file"
  619. ) # Should not match (cache not at second level)
  620. self._create_file("cache/file") # Should not match (cache at root)
  621. # Test lib/*/vendor/**/*.min.* - complex mixed pattern
  622. self._create_file("lib/app/vendor/jquery.min.js")
  623. self._create_file("lib/ui/vendor/bootstrap.min.css")
  624. self._create_file("lib/core/vendor/deep/nested/lib.min.map")
  625. self._create_file("lib/app/vendor/jquery.js") # Should not match (not .min.)
  626. self._create_file(
  627. "lib/nested/deep/vendor/lib.min.js"
  628. ) # Should not match (too deep before vendor)
  629. paths = [
  630. "src/test/unit.js",
  631. "src/components/test/spec.js",
  632. "src/deep/nested/test/integration.js",
  633. "src/test/nested/unit.js",
  634. "src/components/test/unit.ts",
  635. "build/app.js",
  636. "src/build/main.js",
  637. "deep/nested/build/lib.js",
  638. "build/dist/app.js",
  639. "app/cache/temp",
  640. "src/cache/data/file",
  641. "lib/cache/deep/nested/item",
  642. "nested/deep/cache/file",
  643. "cache/file",
  644. "lib/app/vendor/jquery.min.js",
  645. "lib/ui/vendor/bootstrap.min.css",
  646. "lib/core/vendor/deep/nested/lib.min.map",
  647. "lib/app/vendor/jquery.js",
  648. "lib/nested/deep/vendor/lib.min.js",
  649. ]
  650. self._assert_ignore_match(paths)
  651. def test_asterisk_pattern_overlaps(self) -> None:
  652. """Test overlapping single and double asterisk patterns with negations."""
  653. self._write_gitignore(
  654. "**/*.tmp\n!src/**/*.tmp\nsrc/*/cache/*.tmp\n**/test/*\n!**/test/*.spec.*\n"
  655. )
  656. # Pattern explanation:
  657. # **/*.tmp - Ignore all .tmp files anywhere
  658. # !src/**/*.tmp - But don't ignore .tmp files under src/
  659. # src/*/cache/*.tmp - But do ignore .tmp files in src/*/cache/ (overrides negation)
  660. # **/test/* - Ignore everything directly in test directories
  661. # !**/test/*.spec.* - But don't ignore spec files in test directories
  662. # Test tmp file patterns with src/ negation
  663. self._create_file("temp.tmp") # Should be ignored
  664. self._create_file("build/cache.tmp") # Should be ignored
  665. self._create_file("src/app.tmp") # Should not be ignored (src negation)
  666. self._create_file("src/lib/utils.tmp") # Should not be ignored (src negation)
  667. self._create_file(
  668. "src/app/cache/data.tmp"
  669. ) # Should be ignored (cache override)
  670. self._create_file(
  671. "src/lib/cache/temp.tmp"
  672. ) # Should be ignored (cache override)
  673. # Test test directory patterns with spec negation
  674. self._create_file("test/unit.js") # Should be ignored
  675. self._create_file("src/test/helper.js") # Should be ignored
  676. self._create_file("test/app.spec.js") # Should not be ignored (spec negation)
  677. self._create_file(
  678. "src/test/lib.spec.ts"
  679. ) # Should not be ignored (spec negation)
  680. self._create_file(
  681. "test/nested/file.js"
  682. ) # Should not be ignored (not direct child)
  683. paths = [
  684. "temp.tmp",
  685. "build/cache.tmp",
  686. "src/app.tmp",
  687. "src/lib/utils.tmp",
  688. "src/app/cache/data.tmp",
  689. "src/lib/cache/temp.tmp",
  690. "test/unit.js",
  691. "src/test/helper.js",
  692. "test/app.spec.js",
  693. "src/test/lib.spec.ts",
  694. "test/nested/file.js",
  695. ]
  696. self._assert_ignore_match(paths)
  697. def test_asterisk_boundary_conditions(self) -> None:
  698. """Test boundary conditions between single and double asterisk patterns."""
  699. self._write_gitignore("a/**/b/*\nc/**/**/d\n*/e/**/*\nf/*/g/**\n")
  700. # Test a/**/b/* - ** in middle, * at end
  701. self._create_file("a/b/file") # Direct path
  702. self._create_file("a/x/b/file") # One level between a and b
  703. self._create_file("a/x/y/b/file") # Multiple levels between a and b
  704. self._create_file("a/b/nested/file") # Should not match (nested after b)
  705. # Test c/**/**/d - multiple ** separated by single level
  706. self._create_file("c/d") # Minimal match
  707. self._create_file("c/x/d") # One level before d
  708. self._create_file("c/x/y/d") # Multiple levels before d
  709. self._create_file("c/x/y/z/d") # Even more levels
  710. # Test */e/**/* - * at start, ** in middle, * at end
  711. self._create_file("a/e/file") # Minimal match
  712. self._create_file("x/e/nested/file") # Nested after e
  713. self._create_file("y/e/deep/nested/file") # Deep nesting after e
  714. self._create_file(
  715. "nested/path/e/file"
  716. ) # Should not match (path before e too deep)
  717. # Test f/*/g/** - * in middle, ** at end
  718. self._create_file("f/x/g/file") # Basic match
  719. self._create_file("f/y/g/nested/file") # Nested after g
  720. self._create_file("f/z/g/deep/nested/file") # Deep nesting after g
  721. self._create_file(
  722. "f/nested/path/g/file"
  723. ) # Should not match (path between f and g too deep)
  724. paths = [
  725. "a/b/file",
  726. "a/x/b/file",
  727. "a/x/y/b/file",
  728. "a/b/nested/file",
  729. "c/d",
  730. "c/x/d",
  731. "c/x/y/d",
  732. "c/x/y/z/d",
  733. "a/e/file",
  734. "x/e/nested/file",
  735. "y/e/deep/nested/file",
  736. "nested/path/e/file",
  737. "f/x/g/file",
  738. "f/y/g/nested/file",
  739. "f/z/g/deep/nested/file",
  740. "f/nested/path/g/file",
  741. ]
  742. self._assert_ignore_match(paths)
  743. def test_asterisk_edge_case_combinations(self) -> None:
  744. """Test really tricky edge cases with asterisk combinations."""
  745. self._write_gitignore("***\n**/*\n*/**\n*/*/\n**/*/*\n*/*/**\n")
  746. # Test *** pattern (should behave like **)
  747. self._create_file("file1")
  748. self._create_file("dir/file2")
  749. self._create_file("deep/nested/file3")
  750. # Test **/* pattern (anything with at least one path segment)
  751. self._create_file("path1/item1")
  752. self._create_file("path2/sub/item2")
  753. # Test */** pattern (anything under a single-level directory)
  754. self._create_file("single/file4")
  755. self._create_file("single/nested/deep")
  756. # Test */*/ pattern (directories exactly two levels deep)
  757. self._create_dir("level1/level2")
  758. self._create_dir("dir1/dir2")
  759. self._create_dir("path3/sub1/sub2") # Should not match (too deep)
  760. # Test **/*/* pattern (at least two path segments after any prefix)
  761. self._create_file("test1/test2/test3")
  762. self._create_file("deep/nested/item3/item4")
  763. self._create_file(
  764. "simple/item"
  765. ) # Should not match (only one segment after any prefix at root)
  766. # Test */*/** pattern (single/single/anything)
  767. self._create_file("part1/part2/anything")
  768. self._create_file("seg1/seg2/deep/nested")
  769. paths = [
  770. "file1",
  771. "dir/file2",
  772. "deep/nested/file3",
  773. "path1/item1",
  774. "path2/sub/item2",
  775. "single/file4",
  776. "single/nested/deep",
  777. "level1/level2/",
  778. "dir1/dir2/",
  779. "path3/sub1/sub2/",
  780. "test1/test2/test3",
  781. "deep/nested/item3/item4",
  782. "simple/item",
  783. "part1/part2/anything",
  784. "seg1/seg2/deep/nested",
  785. ]
  786. self._assert_ignore_match(paths)
  787. def test_asterisk_consecutive_patterns(self) -> None:
  788. """Test patterns with consecutive asterisks and weird spacing."""
  789. self._write_gitignore("a*/b*\n*x*y*\n**z**\n**/.*/**\n*.*./*\n")
  790. # Test a*/b* pattern
  791. self._create_file("a/b") # Minimal match
  792. self._create_file("app/build") # Both have suffixes
  793. self._create_file("api/backup") # Both have suffixes
  794. self._create_file("a/build") # a exact, b with suffix
  795. self._create_file("app/b") # a with suffix, b exact
  796. self._create_file("x/a/b") # Should not match (a not at start)
  797. # Test *x*y* pattern
  798. self._create_file("xy") # Minimal
  799. self._create_file("axby") # x and y in middle
  800. self._create_file("prefixsuffyend") # x and y with text around
  801. self._create_file("xyz") # Should not match (no y after x)
  802. self._create_file("axy") # x and y consecutive
  803. # Test **z** pattern
  804. self._create_file("z") # Just z
  805. self._create_file("az") # z at end
  806. self._create_file("za") # z at start
  807. self._create_file("aza") # z in middle
  808. self._create_file("dir/z") # z at any depth
  809. self._create_file("deep/nested/prefix_z_suffix") # z anywhere in name
  810. # Test **/.*/** pattern (hidden files in any directory structure)
  811. self._create_file("dir/.hidden/file")
  812. self._create_file("deep/nested/.secret/data")
  813. self._create_file(".visible/file") # At root level
  814. self._create_file("other/.config") # Should not match (no trailing path)
  815. # Test *.*./* pattern (files with dots in specific structure)
  816. self._create_file("app.min.js/file") # Two dots, then directory
  817. self._create_file("lib.bundle.css/asset") # Two dots, then directory
  818. self._create_file("simple.js") # Should not match (only one dot, no directory)
  819. self._create_file("no.dots.here") # Should not match (no trailing directory)
  820. paths = [
  821. "a/b",
  822. "app/build",
  823. "api/backup",
  824. "a/build",
  825. "app/b",
  826. "x/a/b",
  827. "xy",
  828. "axby",
  829. "prefixsuffyend",
  830. "xyz",
  831. "axy",
  832. "z",
  833. "az",
  834. "za",
  835. "aza",
  836. "dir/z",
  837. "deep/nested/prefix_z_suffix",
  838. "dir/.hidden/file",
  839. "deep/nested/.secret/data",
  840. ".visible/file",
  841. "other/.config",
  842. "app.min.js/file",
  843. "lib.bundle.css/asset",
  844. "simple.js",
  845. "no.dots.here",
  846. ]
  847. self._assert_ignore_match(paths)
  848. def test_asterisk_escaping_and_special_chars(self) -> None:
  849. """Test asterisk patterns with special characters and potential escaping."""
  850. import sys
  851. self._write_gitignore(
  852. "\\*literal\n**/*.\\*\n[*]bracket\n*\\[escape\\]\n*.{tmp,log}\n"
  853. )
  854. # Test \*literal pattern (literal asterisk)
  855. # Skip files with asterisks on Windows as they're invalid filenames
  856. if sys.platform != "win32":
  857. self._create_file("*literal") # Literal asterisk at start
  858. self._create_file("prefix*literal") # Literal asterisk in middle
  859. self._create_file("xliteral") # Should not match (no literal asterisk)
  860. # Test **/*.* pattern (files with .* extension)
  861. # Skip files with asterisks on Windows
  862. if sys.platform != "win32":
  863. self._create_file("file.*") # Literal .* extension
  864. self._create_file("dir/test.*") # At any depth
  865. self._create_file("file.txt") # Should not match (not .* extension)
  866. # Test [*]bracket pattern (bracket containing asterisk)
  867. if sys.platform != "win32":
  868. self._create_file("*bracket") # Literal asterisk from bracket
  869. self._create_file("xbracket") # Should not match
  870. self._create_file("abracket") # Should not match
  871. # Test *\[escape\] pattern (literal brackets)
  872. self._create_file("test[escape]") # Literal brackets
  873. self._create_file("prefix[escape]") # With prefix
  874. self._create_file("test[other]") # Should not match (wrong brackets)
  875. # Test *.{tmp,log} pattern (brace expansion - may not work in gitignore)
  876. self._create_file("file.{tmp,log}") # Literal braces
  877. self._create_file("test.tmp") # Might match if braces are expanded
  878. self._create_file("test.log") # Might match if braces are expanded
  879. self._create_file("test.{other}") # Should not match
  880. paths = [
  881. "xliteral",
  882. "file.txt",
  883. "xbracket",
  884. "abracket",
  885. "test[escape]",
  886. "prefix[escape]",
  887. "test[other]",
  888. "file.{tmp,log}",
  889. "test.tmp",
  890. "test.log",
  891. "test.{other}",
  892. ]
  893. # Add files with asterisks only on non-Windows platforms
  894. if sys.platform != "win32":
  895. paths.extend(
  896. [
  897. "*literal",
  898. "prefix*literal",
  899. "file.*",
  900. "dir/test.*",
  901. "*bracket",
  902. ]
  903. )
  904. self._assert_ignore_match(paths)
  905. def test_quote_path_true_unicode_filenames(self) -> None:
  906. """Test quote_path=True functionality with unicode filenames."""
  907. try:
  908. self._write_gitignore("тест*\n*.测试\n")
  909. self._create_file("тест.txt")
  910. self._create_file("файл.测试")
  911. self._create_file("normal.txt")
  912. paths = ["тест.txt", "файл.测试", "normal.txt"]
  913. # Test that dulwich with quote_path=True matches git's quoted output
  914. git_ignored = self._git_check_ignore_quoted(paths)
  915. dulwich_ignored = self._dulwich_check_ignore_quoted(paths)
  916. self.assertEqual(
  917. git_ignored,
  918. dulwich_ignored,
  919. f"Mismatch for quoted paths {paths}: git={git_ignored}, dulwich={dulwich_ignored}",
  920. )
  921. except (UnicodeEncodeError, OSError):
  922. # Skip test if filesystem doesn't support unicode
  923. self.skipTest("Filesystem doesn't support unicode filenames")
  924. def test_quote_path_consistency(self) -> None:
  925. """Test that quote_path=True and quote_path=False are consistent."""
  926. try:
  927. self._write_gitignore("тест*\n*.测试\nmixed_тест*\n")
  928. self._create_file("тест.txt")
  929. self._create_file("файл.测试")
  930. self._create_file("normal.txt")
  931. self._create_file("mixed_тест.log")
  932. paths = ["тест.txt", "файл.测试", "normal.txt", "mixed_тест.log"]
  933. # Get both quoted and unquoted results from dulwich
  934. quoted_ignored = self._dulwich_check_ignore_quoted(paths)
  935. unquoted_ignored = self._dulwich_check_ignore(paths)
  936. # Verify that the number of ignored files is the same
  937. self.assertEqual(
  938. len(quoted_ignored),
  939. len(unquoted_ignored),
  940. "Quote path setting should not change which files are ignored",
  941. )
  942. # Verify quoted paths contain the expected files
  943. expected_quoted = {
  944. '"\\321\\202\\320\\265\\321\\201\\321\\202.txt"',
  945. '"\\321\\204\\320\\260\\320\\271\\320\\273.\\346\\265\\213\\350\\257\\225"',
  946. '"mixed_\\321\\202\\320\\265\\321\\201\\321\\202.log"',
  947. }
  948. self.assertEqual(quoted_ignored, expected_quoted)
  949. # Verify unquoted paths contain the expected files
  950. expected_unquoted = {"тест.txt", "файл.测试", "mixed_тест.log"}
  951. self.assertEqual(unquoted_ignored, expected_unquoted)
  952. except (UnicodeEncodeError, OSError):
  953. # Skip test if filesystem doesn't support unicode
  954. self.skipTest("Filesystem doesn't support unicode filenames")
  955. def _git_check_ignore_quoted(self, paths: list[str]) -> set[str]:
  956. """Run git check-ignore with default quoting and return set of ignored paths."""
  957. try:
  958. # Use default git settings (core.quotePath=true by default)
  959. output = run_git_or_fail(
  960. ["check-ignore", *paths],
  961. cwd=self.test_dir,
  962. )
  963. # git check-ignore returns paths separated by newlines
  964. return set(
  965. line.decode("utf-8") for line in output.strip().split(b"\n") if line
  966. )
  967. except AssertionError:
  968. # git check-ignore returns non-zero when no paths are ignored
  969. return set()
  970. def _dulwich_check_ignore_quoted(self, paths: list[str]) -> set[str]:
  971. """Run dulwich check_ignore with quote_path=True and return set of ignored paths."""
  972. # Convert to absolute paths relative to the test directory
  973. abs_paths = [os.path.join(self.test_dir, path) for path in paths]
  974. ignored = set(porcelain.check_ignore(self.test_dir, abs_paths, quote_path=True))
  975. # Convert back to relative paths and preserve original path format
  976. result = set()
  977. path_mapping = {}
  978. for orig_path, abs_path in zip(paths, abs_paths):
  979. path_mapping[abs_path] = orig_path
  980. for path in ignored:
  981. if path.startswith(self.test_dir + "/"):
  982. rel_path = path[len(self.test_dir) + 1 :]
  983. # Find the original path format that was requested
  984. orig_path = None
  985. for requested_path in paths:
  986. if requested_path.rstrip("/") == rel_path.rstrip("/"):
  987. orig_path = requested_path
  988. break
  989. result.add(orig_path if orig_path else rel_path)
  990. else:
  991. result.add(path)
  992. return result