test_ignore.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465
  1. # test_ignore.py -- Tests for ignore files.
  2. # Copyright (C) 2017 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for ignore files."""
  22. import os
  23. import re
  24. import shutil
  25. import tempfile
  26. from io import BytesIO
  27. from dulwich.ignore import (
  28. IgnoreFilter,
  29. IgnoreFilterManager,
  30. IgnoreFilterStack,
  31. Pattern,
  32. match_pattern,
  33. read_ignore_patterns,
  34. translate,
  35. )
  36. from dulwich.porcelain import _quote_path
  37. from dulwich.repo import Repo
  38. from . import TestCase
  39. POSITIVE_MATCH_TESTS = [
  40. (b"foo.c", b"*.c"),
  41. (b".c", b"*.c"),
  42. (b"foo/foo.c", b"*.c"),
  43. (b"foo/foo.c", b"foo.c"),
  44. (b"foo.c", b"/*.c"),
  45. (b"foo.c", b"/foo.c"),
  46. (b"foo.c", b"foo.c"),
  47. (b"foo.c", b"foo.[ch]"),
  48. (b"foo/bar/bla.c", b"foo/**"),
  49. (b"foo/bar/bla/blie.c", b"foo/**/blie.c"),
  50. (b"foo/bar/bla.c", b"**/bla.c"),
  51. (b"bla.c", b"**/bla.c"),
  52. (b"foo/bar", b"foo/**/bar"),
  53. (b"foo/bla/bar", b"foo/**/bar"),
  54. (b"foo/bar/", b"bar/"),
  55. (b"foo/bar/", b"bar"),
  56. (b"foo/bar/something", b"foo/bar/*"),
  57. ]
  58. NEGATIVE_MATCH_TESTS = [
  59. (b"foo.c", b"foo.[dh]"),
  60. (b"foo/foo.c", b"/foo.c"),
  61. (b"foo/foo.c", b"/*.c"),
  62. (b"foo/bar/", b"/bar/"),
  63. (b"foo/bar/", b"foo/bar/*"),
  64. (b"foo/bar", b"foo?bar"),
  65. ]
  66. TRANSLATE_TESTS = [
  67. (b"*.c", b"(?ms)(.*/)?[^/]*\\.c/?\\Z"),
  68. (b"foo.c", b"(?ms)(.*/)?foo\\.c/?\\Z"),
  69. (b"/*.c", b"(?ms)[^/]*\\.c/?\\Z"),
  70. (b"/foo.c", b"(?ms)foo\\.c/?\\Z"),
  71. (b"foo.c", b"(?ms)(.*/)?foo\\.c/?\\Z"),
  72. (b"foo.[ch]", b"(?ms)(.*/)?foo\\.[ch]/?\\Z"),
  73. (b"bar/", b"(?ms)(.*/)?bar\\/\\Z"),
  74. (b"foo/**", b"(?ms)foo/.*/?\\Z"),
  75. (b"foo/**/blie.c", b"(?ms)foo/(?:[^/]+/)*blie\\.c/?\\Z"),
  76. (b"**/bla.c", b"(?ms)(.*/)?bla\\.c/?\\Z"),
  77. (b"foo/**/bar", b"(?ms)foo/(?:[^/]+/)*bar/?\\Z"),
  78. (b"foo/bar/*", b"(?ms)foo\\/bar\\/[^/]+/?\\Z"),
  79. (b"/foo\\[bar\\]", b"(?ms)foo\\[bar\\]/?\\Z"),
  80. (b"/foo[bar]", b"(?ms)foo[bar]/?\\Z"),
  81. (b"/foo[0-9]", b"(?ms)foo[0-9]/?\\Z"),
  82. ]
  83. class TranslateTests(TestCase):
  84. def test_translate(self) -> None:
  85. for pattern, regex in TRANSLATE_TESTS:
  86. if re.escape(b"/") == b"/":
  87. # Slash is no longer escaped in Python3.7, so undo the escaping
  88. # in the expected return value..
  89. regex = regex.replace(b"\\/", b"/")
  90. self.assertEqual(
  91. regex,
  92. translate(pattern),
  93. f"orig pattern: {pattern!r}, regex: {translate(pattern)!r}, expected: {regex!r}",
  94. )
  95. class ReadIgnorePatterns(TestCase):
  96. def test_read_file(self) -> None:
  97. f = BytesIO(
  98. b"""
  99. # a comment
  100. \x20\x20
  101. # and an empty line:
  102. \\#not a comment
  103. !negative
  104. with trailing whitespace
  105. with escaped trailing whitespace\\
  106. """
  107. )
  108. self.assertEqual(
  109. list(read_ignore_patterns(f)),
  110. [
  111. b"\\#not a comment",
  112. b"!negative",
  113. b"with trailing whitespace",
  114. b"with escaped trailing whitespace ",
  115. ],
  116. )
  117. class MatchPatternTests(TestCase):
  118. def test_matches(self) -> None:
  119. for path, pattern in POSITIVE_MATCH_TESTS:
  120. self.assertTrue(
  121. match_pattern(path, pattern),
  122. f"path: {path!r}, pattern: {pattern!r}",
  123. )
  124. def test_no_matches(self) -> None:
  125. for path, pattern in NEGATIVE_MATCH_TESTS:
  126. self.assertFalse(
  127. match_pattern(path, pattern),
  128. f"path: {path!r}, pattern: {pattern!r}",
  129. )
  130. class IgnoreFilterTests(TestCase):
  131. def test_included(self) -> None:
  132. filter = IgnoreFilter([b"a.c", b"b.c"])
  133. self.assertTrue(filter.is_ignored(b"a.c"))
  134. self.assertIs(None, filter.is_ignored(b"c.c"))
  135. self.assertEqual([Pattern(b"a.c")], list(filter.find_matching(b"a.c")))
  136. self.assertEqual([], list(filter.find_matching(b"c.c")))
  137. def test_included_ignorecase(self) -> None:
  138. filter = IgnoreFilter([b"a.c", b"b.c"], ignorecase=False)
  139. self.assertTrue(filter.is_ignored(b"a.c"))
  140. self.assertFalse(filter.is_ignored(b"A.c"))
  141. filter = IgnoreFilter([b"a.c", b"b.c"], ignorecase=True)
  142. self.assertTrue(filter.is_ignored(b"a.c"))
  143. self.assertTrue(filter.is_ignored(b"A.c"))
  144. self.assertTrue(filter.is_ignored(b"A.C"))
  145. def test_excluded(self) -> None:
  146. filter = IgnoreFilter([b"a.c", b"b.c", b"!c.c"])
  147. self.assertFalse(filter.is_ignored(b"c.c"))
  148. self.assertIs(None, filter.is_ignored(b"d.c"))
  149. self.assertEqual([Pattern(b"!c.c")], list(filter.find_matching(b"c.c")))
  150. self.assertEqual([], list(filter.find_matching(b"d.c")))
  151. def test_include_exclude_include(self) -> None:
  152. filter = IgnoreFilter([b"a.c", b"!a.c", b"a.c"])
  153. self.assertTrue(filter.is_ignored(b"a.c"))
  154. self.assertEqual(
  155. [Pattern(b"a.c"), Pattern(b"!a.c"), Pattern(b"a.c")],
  156. list(filter.find_matching(b"a.c")),
  157. )
  158. def test_manpage(self) -> None:
  159. # A specific example from the gitignore manpage
  160. filter = IgnoreFilter([b"/*", b"!/foo", b"/foo/*", b"!/foo/bar"])
  161. self.assertTrue(filter.is_ignored(b"a.c"))
  162. self.assertTrue(filter.is_ignored(b"foo/blie"))
  163. self.assertFalse(filter.is_ignored(b"foo"))
  164. self.assertFalse(filter.is_ignored(b"foo/bar"))
  165. self.assertFalse(filter.is_ignored(b"foo/bar/"))
  166. self.assertFalse(filter.is_ignored(b"foo/bar/bloe"))
  167. def test_regex_special(self) -> None:
  168. # See https://github.com/dulwich/dulwich/issues/930#issuecomment-1026166429
  169. filter = IgnoreFilter([b"/foo\\[bar\\]", b"/foo"])
  170. self.assertTrue(filter.is_ignored("foo"))
  171. self.assertTrue(filter.is_ignored("foo[bar]"))
  172. def test_from_path_pathlib(self) -> None:
  173. import tempfile
  174. from pathlib import Path
  175. # Create a temporary .gitignore file
  176. with tempfile.NamedTemporaryFile(
  177. mode="w", suffix=".gitignore", delete=False
  178. ) as f:
  179. f.write("*.pyc\n__pycache__/\n")
  180. temp_path = f.name
  181. self.addCleanup(os.unlink, temp_path)
  182. # Test with pathlib.Path
  183. path_obj = Path(temp_path)
  184. ignore_filter = IgnoreFilter.from_path(path_obj)
  185. # Test that it loaded the patterns correctly
  186. self.assertTrue(ignore_filter.is_ignored("test.pyc"))
  187. self.assertTrue(ignore_filter.is_ignored("__pycache__/"))
  188. self.assertFalse(ignore_filter.is_ignored("test.py"))
  189. class IgnoreFilterStackTests(TestCase):
  190. def test_stack_first(self) -> None:
  191. filter1 = IgnoreFilter([b"[a].c", b"[b].c", b"![d].c"])
  192. filter2 = IgnoreFilter([b"[a].c", b"![b],c", b"[c].c", b"[d].c"])
  193. stack = IgnoreFilterStack([filter1, filter2])
  194. self.assertIs(True, stack.is_ignored(b"a.c"))
  195. self.assertIs(True, stack.is_ignored(b"b.c"))
  196. self.assertIs(True, stack.is_ignored(b"c.c"))
  197. self.assertIs(False, stack.is_ignored(b"d.c"))
  198. self.assertIs(None, stack.is_ignored(b"e.c"))
  199. class IgnoreFilterManagerTests(TestCase):
  200. def test_load_ignore(self) -> None:
  201. tmp_dir = tempfile.mkdtemp()
  202. self.addCleanup(shutil.rmtree, tmp_dir)
  203. repo = Repo.init(tmp_dir)
  204. with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
  205. f.write(b"/foo/bar\n")
  206. f.write(b"/dir2\n")
  207. f.write(b"/dir3/\n")
  208. os.mkdir(os.path.join(repo.path, "dir"))
  209. with open(os.path.join(repo.path, "dir", ".gitignore"), "wb") as f:
  210. f.write(b"/blie\n")
  211. with open(os.path.join(repo.path, "dir", "blie"), "wb") as f:
  212. f.write(b"IGNORED")
  213. p = os.path.join(repo.controldir(), "info", "exclude")
  214. with open(p, "wb") as f:
  215. f.write(b"/excluded\n")
  216. m = IgnoreFilterManager.from_repo(repo)
  217. self.assertTrue(m.is_ignored("dir/blie"))
  218. self.assertIs(None, m.is_ignored(os.path.join("dir", "bloe")))
  219. self.assertIs(None, m.is_ignored("dir"))
  220. self.assertTrue(m.is_ignored(os.path.join("foo", "bar")))
  221. self.assertTrue(m.is_ignored(os.path.join("excluded")))
  222. self.assertTrue(m.is_ignored(os.path.join("dir2", "fileinignoreddir")))
  223. self.assertFalse(m.is_ignored("dir3"))
  224. self.assertTrue(m.is_ignored("dir3/"))
  225. self.assertTrue(m.is_ignored("dir3/bla"))
  226. def test_nested_gitignores(self) -> None:
  227. tmp_dir = tempfile.mkdtemp()
  228. self.addCleanup(shutil.rmtree, tmp_dir)
  229. repo = Repo.init(tmp_dir)
  230. with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
  231. f.write(b"/*\n")
  232. f.write(b"!/foo\n")
  233. os.mkdir(os.path.join(repo.path, "foo"))
  234. with open(os.path.join(repo.path, "foo", ".gitignore"), "wb") as f:
  235. f.write(b"/bar\n")
  236. with open(os.path.join(repo.path, "foo", "bar"), "wb") as f:
  237. f.write(b"IGNORED")
  238. m = IgnoreFilterManager.from_repo(repo)
  239. self.assertTrue(m.is_ignored("foo/bar"))
  240. def test_load_ignore_ignorecase(self) -> None:
  241. tmp_dir = tempfile.mkdtemp()
  242. self.addCleanup(shutil.rmtree, tmp_dir)
  243. repo = Repo.init(tmp_dir)
  244. config = repo.get_config()
  245. config.set(b"core", b"ignorecase", True)
  246. config.write_to_path()
  247. with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
  248. f.write(b"/foo/bar\n")
  249. f.write(b"/dir\n")
  250. m = IgnoreFilterManager.from_repo(repo)
  251. self.assertTrue(m.is_ignored(os.path.join("dir", "blie")))
  252. self.assertTrue(m.is_ignored(os.path.join("DIR", "blie")))
  253. def test_ignored_contents(self) -> None:
  254. tmp_dir = tempfile.mkdtemp()
  255. self.addCleanup(shutil.rmtree, tmp_dir)
  256. repo = Repo.init(tmp_dir)
  257. with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
  258. f.write(b"a/*\n")
  259. f.write(b"!a/*.txt\n")
  260. m = IgnoreFilterManager.from_repo(repo)
  261. os.mkdir(os.path.join(repo.path, "a"))
  262. self.assertIs(None, m.is_ignored("a"))
  263. self.assertIs(None, m.is_ignored("a/"))
  264. self.assertFalse(m.is_ignored("a/b.txt"))
  265. self.assertTrue(m.is_ignored("a/c.dat"))
  266. def test_issue_1203_directory_negation(self) -> None:
  267. """Test for issue #1203: gitignore patterns with directory negation."""
  268. tmp_dir = tempfile.mkdtemp()
  269. self.addCleanup(shutil.rmtree, tmp_dir)
  270. repo = Repo.init(tmp_dir)
  271. # Create .gitignore with the patterns from the issue
  272. with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
  273. f.write(b"data/**\n")
  274. f.write(b"!data/*/\n")
  275. # Create directory structure
  276. os.makedirs(os.path.join(repo.path, "data", "subdir"))
  277. m = IgnoreFilterManager.from_repo(repo)
  278. # Test the expected behavior
  279. self.assertTrue(
  280. m.is_ignored("data/test.dvc")
  281. ) # File in data/ should be ignored
  282. self.assertFalse(m.is_ignored("data/")) # data/ directory should not be ignored
  283. self.assertTrue(
  284. m.is_ignored("data/subdir/")
  285. ) # Subdirectory should be ignored (matches Git behavior)
  286. class QuotePathTests(TestCase):
  287. """Tests for _quote_path function."""
  288. def test_ascii_paths(self) -> None:
  289. """Test that ASCII paths are not quoted."""
  290. self.assertEqual(_quote_path("file.txt"), "file.txt")
  291. self.assertEqual(_quote_path("dir/file.txt"), "dir/file.txt")
  292. self.assertEqual(_quote_path("path with spaces.txt"), "path with spaces.txt")
  293. def test_unicode_paths(self) -> None:
  294. """Test that unicode paths are quoted with C-style escapes."""
  295. # Russian characters
  296. self.assertEqual(
  297. _quote_path("тест.txt"), '"\\321\\202\\320\\265\\321\\201\\321\\202.txt"'
  298. )
  299. # Chinese characters
  300. self.assertEqual(
  301. _quote_path("файл.测试"),
  302. '"\\321\\204\\320\\260\\320\\271\\320\\273.\\346\\265\\213\\350\\257\\225"',
  303. )
  304. # Mixed ASCII and unicode
  305. self.assertEqual(
  306. _quote_path("test-тест.txt"),
  307. '"test-\\321\\202\\320\\265\\321\\201\\321\\202.txt"',
  308. )
  309. def test_special_characters(self) -> None:
  310. """Test that special characters are properly escaped."""
  311. # Quotes in filename
  312. self.assertEqual(
  313. _quote_path('file"with"quotes.txt'), '"file\\"with\\"quotes.txt"'
  314. )
  315. # Backslashes in filename
  316. self.assertEqual(
  317. _quote_path("file\\with\\backslashes.txt"),
  318. '"file\\\\with\\\\backslashes.txt"',
  319. )
  320. # Mixed special chars and unicode
  321. self.assertEqual(
  322. _quote_path('тест"файл.txt'),
  323. '"\\321\\202\\320\\265\\321\\201\\321\\202\\"\\321\\204\\320\\260\\320\\271\\320\\273.txt"',
  324. )
  325. def test_empty_and_edge_cases(self) -> None:
  326. """Test edge cases."""
  327. self.assertEqual(_quote_path(""), "")
  328. self.assertEqual(_quote_path("a"), "a") # Single ASCII char
  329. self.assertEqual(_quote_path("я"), '"\\321\\217"') # Single unicode char
  330. class CheckIgnoreQuotePathTests(TestCase):
  331. """Integration tests for check_ignore with quote_path parameter."""
  332. def setUp(self) -> None:
  333. self.test_dir = tempfile.mkdtemp()
  334. self.addCleanup(shutil.rmtree, self.test_dir)
  335. def test_quote_path_true_unicode_filenames(self) -> None:
  336. """Test that quote_path=True returns quoted unicode filenames."""
  337. from dulwich import porcelain
  338. # Create a repository
  339. repo = Repo.init(self.test_dir)
  340. self.addCleanup(repo.close)
  341. # Create .gitignore with unicode patterns
  342. gitignore_path = os.path.join(self.test_dir, ".gitignore")
  343. with open(gitignore_path, "w", encoding="utf-8") as f:
  344. f.write("тест*\n")
  345. f.write("*.测试\n")
  346. # Create unicode files
  347. test_files = ["тест.txt", "файл.测试", "normal.txt"]
  348. for filename in test_files:
  349. filepath = os.path.join(self.test_dir, filename)
  350. with open(filepath, "w", encoding="utf-8") as f:
  351. f.write("test content")
  352. # Test with quote_path=True (default)
  353. abs_paths = [os.path.join(self.test_dir, f) for f in test_files]
  354. ignored_quoted = set(
  355. porcelain.check_ignore(self.test_dir, abs_paths, quote_path=True)
  356. )
  357. # Test with quote_path=False
  358. ignored_unquoted = set(
  359. porcelain.check_ignore(self.test_dir, abs_paths, quote_path=False)
  360. )
  361. # Verify quoted results
  362. expected_quoted = {
  363. '"\\321\\202\\320\\265\\321\\201\\321\\202.txt"', # тест.txt
  364. '"\\321\\204\\320\\260\\320\\271\\320\\273.\\346\\265\\213\\350\\257\\225"', # файл.测试
  365. }
  366. self.assertEqual(ignored_quoted, expected_quoted)
  367. # Verify unquoted results
  368. expected_unquoted = {"тест.txt", "файл.测试"}
  369. self.assertEqual(ignored_unquoted, expected_unquoted)
  370. def test_quote_path_ascii_filenames(self) -> None:
  371. """Test that ASCII filenames are unaffected by quote_path setting."""
  372. from dulwich import porcelain
  373. # Create a repository
  374. repo = Repo.init(self.test_dir)
  375. self.addCleanup(repo.close)
  376. # Create .gitignore
  377. gitignore_path = os.path.join(self.test_dir, ".gitignore")
  378. with open(gitignore_path, "w") as f:
  379. f.write("*.tmp\n")
  380. f.write("test*\n")
  381. # Create ASCII files
  382. test_files = ["test.txt", "file.tmp", "normal.txt"]
  383. for filename in test_files:
  384. filepath = os.path.join(self.test_dir, filename)
  385. with open(filepath, "w") as f:
  386. f.write("test content")
  387. # Test both settings
  388. abs_paths = [os.path.join(self.test_dir, f) for f in test_files]
  389. ignored_quoted = set(
  390. porcelain.check_ignore(self.test_dir, abs_paths, quote_path=True)
  391. )
  392. ignored_unquoted = set(
  393. porcelain.check_ignore(self.test_dir, abs_paths, quote_path=False)
  394. )
  395. # Both should return the same results for ASCII filenames
  396. expected = {"test.txt", "file.tmp"}
  397. self.assertEqual(ignored_quoted, expected)
  398. self.assertEqual(ignored_unquoted, expected)