2
0

test_ignore.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. # test_ignore.py -- Tests for ignore files.
  2. # Copyright (C) 2017 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for ignore files."""
  22. import os
  23. import re
  24. import shutil
  25. import tempfile
  26. from io import BytesIO
  27. from dulwich.ignore import (
  28. IgnoreFilter,
  29. IgnoreFilterManager,
  30. IgnoreFilterStack,
  31. Pattern,
  32. match_pattern,
  33. read_ignore_patterns,
  34. translate,
  35. )
  36. from dulwich.porcelain import _quote_path
  37. from dulwich.repo import Repo
  38. from . import TestCase
  39. POSITIVE_MATCH_TESTS = [
  40. (b"foo.c", b"*.c"),
  41. (b".c", b"*.c"),
  42. (b"foo/foo.c", b"*.c"),
  43. (b"foo/foo.c", b"foo.c"),
  44. (b"foo.c", b"/*.c"),
  45. (b"foo.c", b"/foo.c"),
  46. (b"foo.c", b"foo.c"),
  47. (b"foo.c", b"foo.[ch]"),
  48. (b"foo/bar/bla.c", b"foo/**"),
  49. (b"foo/bar/bla/blie.c", b"foo/**/blie.c"),
  50. (b"foo/bar/bla.c", b"**/bla.c"),
  51. (b"bla.c", b"**/bla.c"),
  52. (b"foo/bar", b"foo/**/bar"),
  53. (b"foo/bla/bar", b"foo/**/bar"),
  54. (b"foo/bar/", b"bar/"),
  55. (b"foo/bar/", b"bar"),
  56. (b"foo/bar/something", b"foo/bar/*"),
  57. ]
  58. NEGATIVE_MATCH_TESTS = [
  59. (b"foo.c", b"foo.[dh]"),
  60. (b"foo/foo.c", b"/foo.c"),
  61. (b"foo/foo.c", b"/*.c"),
  62. (b"foo/bar/", b"/bar/"),
  63. (b"foo/bar/", b"foo/bar/*"),
  64. (b"foo/bar", b"foo?bar"),
  65. ]
  66. TRANSLATE_TESTS = [
  67. (b"*.c", b"(?ms)(.*/)?[^/]*\\.c/?\\Z"),
  68. (b"foo.c", b"(?ms)(.*/)?foo\\.c/?\\Z"),
  69. (b"/*.c", b"(?ms)[^/]*\\.c/?\\Z"),
  70. (b"/foo.c", b"(?ms)foo\\.c/?\\Z"),
  71. (b"foo.c", b"(?ms)(.*/)?foo\\.c/?\\Z"),
  72. (b"foo.[ch]", b"(?ms)(.*/)?foo\\.[ch]/?\\Z"),
  73. (b"bar/", b"(?ms)(.*/)?bar\\/\\Z"),
  74. (b"foo/**", b"(?ms)foo/.*/?\\Z"),
  75. (b"foo/**/blie.c", b"(?ms)foo/(?:[^/]+/)*blie\\.c/?\\Z"),
  76. (b"**/bla.c", b"(?ms)(.*/)?bla\\.c/?\\Z"),
  77. (b"foo/**/bar", b"(?ms)foo/(?:[^/]+/)*bar/?\\Z"),
  78. (b"foo/bar/*", b"(?ms)foo\\/bar\\/[^/]+/?\\Z"),
  79. (b"/foo\\[bar\\]", b"(?ms)foo\\[bar\\]/?\\Z"),
  80. (b"/foo[bar]", b"(?ms)foo[bar]/?\\Z"),
  81. (b"/foo[0-9]", b"(?ms)foo[0-9]/?\\Z"),
  82. ]
  83. class TranslateTests(TestCase):
  84. def test_translate(self) -> None:
  85. for pattern, regex in TRANSLATE_TESTS:
  86. if re.escape(b"/") == b"/":
  87. # Slash is no longer escaped in Python3.7, so undo the escaping
  88. # in the expected return value..
  89. regex = regex.replace(b"\\/", b"/")
  90. self.assertEqual(
  91. regex,
  92. translate(pattern),
  93. f"orig pattern: {pattern!r}, regex: {translate(pattern)!r}, expected: {regex!r}",
  94. )
  95. class ReadIgnorePatterns(TestCase):
  96. def test_read_file(self) -> None:
  97. f = BytesIO(
  98. b"""
  99. # a comment
  100. \x20\x20
  101. # and an empty line:
  102. \\#not a comment
  103. !negative
  104. with trailing whitespace
  105. with escaped trailing whitespace\\
  106. """
  107. )
  108. self.assertEqual(
  109. list(read_ignore_patterns(f)),
  110. [
  111. b"\\#not a comment",
  112. b"!negative",
  113. b"with trailing whitespace",
  114. b"with escaped trailing whitespace ",
  115. ],
  116. )
  117. class MatchPatternTests(TestCase):
  118. def test_matches(self) -> None:
  119. for path, pattern in POSITIVE_MATCH_TESTS:
  120. self.assertTrue(
  121. match_pattern(path, pattern),
  122. f"path: {path!r}, pattern: {pattern!r}",
  123. )
  124. def test_no_matches(self) -> None:
  125. for path, pattern in NEGATIVE_MATCH_TESTS:
  126. self.assertFalse(
  127. match_pattern(path, pattern),
  128. f"path: {path!r}, pattern: {pattern!r}",
  129. )
  130. class IgnoreFilterTests(TestCase):
  131. def test_included(self) -> None:
  132. filter = IgnoreFilter([b"a.c", b"b.c"])
  133. self.assertTrue(filter.is_ignored(b"a.c"))
  134. self.assertIs(None, filter.is_ignored(b"c.c"))
  135. self.assertEqual([Pattern(b"a.c")], list(filter.find_matching(b"a.c")))
  136. self.assertEqual([], list(filter.find_matching(b"c.c")))
  137. def test_included_ignorecase(self) -> None:
  138. filter = IgnoreFilter([b"a.c", b"b.c"], ignorecase=False)
  139. self.assertTrue(filter.is_ignored(b"a.c"))
  140. self.assertFalse(filter.is_ignored(b"A.c"))
  141. filter = IgnoreFilter([b"a.c", b"b.c"], ignorecase=True)
  142. self.assertTrue(filter.is_ignored(b"a.c"))
  143. self.assertTrue(filter.is_ignored(b"A.c"))
  144. self.assertTrue(filter.is_ignored(b"A.C"))
  145. def test_excluded(self) -> None:
  146. filter = IgnoreFilter([b"a.c", b"b.c", b"!c.c"])
  147. self.assertFalse(filter.is_ignored(b"c.c"))
  148. self.assertIs(None, filter.is_ignored(b"d.c"))
  149. self.assertEqual([Pattern(b"!c.c")], list(filter.find_matching(b"c.c")))
  150. self.assertEqual([], list(filter.find_matching(b"d.c")))
  151. def test_include_exclude_include(self) -> None:
  152. filter = IgnoreFilter([b"a.c", b"!a.c", b"a.c"])
  153. self.assertTrue(filter.is_ignored(b"a.c"))
  154. self.assertEqual(
  155. [Pattern(b"a.c"), Pattern(b"!a.c"), Pattern(b"a.c")],
  156. list(filter.find_matching(b"a.c")),
  157. )
  158. def test_manpage(self) -> None:
  159. # A specific example from the gitignore manpage
  160. filter = IgnoreFilter([b"/*", b"!/foo", b"/foo/*", b"!/foo/bar"])
  161. self.assertTrue(filter.is_ignored(b"a.c"))
  162. self.assertTrue(filter.is_ignored(b"foo/blie"))
  163. self.assertFalse(filter.is_ignored(b"foo"))
  164. self.assertFalse(filter.is_ignored(b"foo/bar"))
  165. self.assertFalse(filter.is_ignored(b"foo/bar/"))
  166. self.assertFalse(filter.is_ignored(b"foo/bar/bloe"))
  167. def test_regex_special(self) -> None:
  168. # See https://github.com/dulwich/dulwich/issues/930#issuecomment-1026166429
  169. filter = IgnoreFilter([b"/foo\\[bar\\]", b"/foo"])
  170. self.assertTrue(filter.is_ignored("foo"))
  171. self.assertTrue(filter.is_ignored("foo[bar]"))
  172. def test_from_path_pathlib(self) -> None:
  173. import tempfile
  174. from pathlib import Path
  175. # Create a temporary .gitignore file
  176. with tempfile.NamedTemporaryFile(
  177. mode="w", suffix=".gitignore", delete=False
  178. ) as f:
  179. f.write("*.pyc\n__pycache__/\n")
  180. temp_path = f.name
  181. try:
  182. # Test with pathlib.Path
  183. path_obj = Path(temp_path)
  184. ignore_filter = IgnoreFilter.from_path(path_obj)
  185. # Test that it loaded the patterns correctly
  186. self.assertTrue(ignore_filter.is_ignored("test.pyc"))
  187. self.assertTrue(ignore_filter.is_ignored("__pycache__/"))
  188. self.assertFalse(ignore_filter.is_ignored("test.py"))
  189. finally:
  190. # Clean up
  191. os.unlink(temp_path)
  192. class IgnoreFilterStackTests(TestCase):
  193. def test_stack_first(self) -> None:
  194. filter1 = IgnoreFilter([b"[a].c", b"[b].c", b"![d].c"])
  195. filter2 = IgnoreFilter([b"[a].c", b"![b],c", b"[c].c", b"[d].c"])
  196. stack = IgnoreFilterStack([filter1, filter2])
  197. self.assertIs(True, stack.is_ignored(b"a.c"))
  198. self.assertIs(True, stack.is_ignored(b"b.c"))
  199. self.assertIs(True, stack.is_ignored(b"c.c"))
  200. self.assertIs(False, stack.is_ignored(b"d.c"))
  201. self.assertIs(None, stack.is_ignored(b"e.c"))
  202. class IgnoreFilterManagerTests(TestCase):
  203. def test_load_ignore(self) -> None:
  204. tmp_dir = tempfile.mkdtemp()
  205. self.addCleanup(shutil.rmtree, tmp_dir)
  206. repo = Repo.init(tmp_dir)
  207. with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
  208. f.write(b"/foo/bar\n")
  209. f.write(b"/dir2\n")
  210. f.write(b"/dir3/\n")
  211. os.mkdir(os.path.join(repo.path, "dir"))
  212. with open(os.path.join(repo.path, "dir", ".gitignore"), "wb") as f:
  213. f.write(b"/blie\n")
  214. with open(os.path.join(repo.path, "dir", "blie"), "wb") as f:
  215. f.write(b"IGNORED")
  216. p = os.path.join(repo.controldir(), "info", "exclude")
  217. with open(p, "wb") as f:
  218. f.write(b"/excluded\n")
  219. m = IgnoreFilterManager.from_repo(repo)
  220. self.assertTrue(m.is_ignored("dir/blie"))
  221. self.assertIs(None, m.is_ignored(os.path.join("dir", "bloe")))
  222. self.assertIs(None, m.is_ignored("dir"))
  223. self.assertTrue(m.is_ignored(os.path.join("foo", "bar")))
  224. self.assertTrue(m.is_ignored(os.path.join("excluded")))
  225. self.assertTrue(m.is_ignored(os.path.join("dir2", "fileinignoreddir")))
  226. self.assertFalse(m.is_ignored("dir3"))
  227. self.assertTrue(m.is_ignored("dir3/"))
  228. self.assertTrue(m.is_ignored("dir3/bla"))
  229. def test_nested_gitignores(self) -> None:
  230. tmp_dir = tempfile.mkdtemp()
  231. self.addCleanup(shutil.rmtree, tmp_dir)
  232. repo = Repo.init(tmp_dir)
  233. with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
  234. f.write(b"/*\n")
  235. f.write(b"!/foo\n")
  236. os.mkdir(os.path.join(repo.path, "foo"))
  237. with open(os.path.join(repo.path, "foo", ".gitignore"), "wb") as f:
  238. f.write(b"/bar\n")
  239. with open(os.path.join(repo.path, "foo", "bar"), "wb") as f:
  240. f.write(b"IGNORED")
  241. m = IgnoreFilterManager.from_repo(repo)
  242. self.assertTrue(m.is_ignored("foo/bar"))
  243. def test_load_ignore_ignorecase(self) -> None:
  244. tmp_dir = tempfile.mkdtemp()
  245. self.addCleanup(shutil.rmtree, tmp_dir)
  246. repo = Repo.init(tmp_dir)
  247. config = repo.get_config()
  248. config.set(b"core", b"ignorecase", True)
  249. config.write_to_path()
  250. with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
  251. f.write(b"/foo/bar\n")
  252. f.write(b"/dir\n")
  253. m = IgnoreFilterManager.from_repo(repo)
  254. self.assertTrue(m.is_ignored(os.path.join("dir", "blie")))
  255. self.assertTrue(m.is_ignored(os.path.join("DIR", "blie")))
  256. def test_ignored_contents(self) -> None:
  257. tmp_dir = tempfile.mkdtemp()
  258. self.addCleanup(shutil.rmtree, tmp_dir)
  259. repo = Repo.init(tmp_dir)
  260. with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
  261. f.write(b"a/*\n")
  262. f.write(b"!a/*.txt\n")
  263. m = IgnoreFilterManager.from_repo(repo)
  264. os.mkdir(os.path.join(repo.path, "a"))
  265. self.assertIs(None, m.is_ignored("a"))
  266. self.assertIs(None, m.is_ignored("a/"))
  267. self.assertFalse(m.is_ignored("a/b.txt"))
  268. self.assertTrue(m.is_ignored("a/c.dat"))
  269. def test_issue_1203_directory_negation(self) -> None:
  270. """Test for issue #1203: gitignore patterns with directory negation."""
  271. tmp_dir = tempfile.mkdtemp()
  272. self.addCleanup(shutil.rmtree, tmp_dir)
  273. repo = Repo.init(tmp_dir)
  274. # Create .gitignore with the patterns from the issue
  275. with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
  276. f.write(b"data/**\n")
  277. f.write(b"!data/*/\n")
  278. # Create directory structure
  279. os.makedirs(os.path.join(repo.path, "data", "subdir"))
  280. m = IgnoreFilterManager.from_repo(repo)
  281. # Test the expected behavior
  282. self.assertTrue(
  283. m.is_ignored("data/test.dvc")
  284. ) # File in data/ should be ignored
  285. self.assertFalse(m.is_ignored("data/")) # data/ directory should not be ignored
  286. self.assertTrue(
  287. m.is_ignored("data/subdir/")
  288. ) # Subdirectory should be ignored (matches Git behavior)
  289. class QuotePathTests(TestCase):
  290. """Tests for _quote_path function."""
  291. def test_ascii_paths(self) -> None:
  292. """Test that ASCII paths are not quoted."""
  293. self.assertEqual(_quote_path("file.txt"), "file.txt")
  294. self.assertEqual(_quote_path("dir/file.txt"), "dir/file.txt")
  295. self.assertEqual(_quote_path("path with spaces.txt"), "path with spaces.txt")
  296. def test_unicode_paths(self) -> None:
  297. """Test that unicode paths are quoted with C-style escapes."""
  298. # Russian characters
  299. self.assertEqual(
  300. _quote_path("тест.txt"), '"\\321\\202\\320\\265\\321\\201\\321\\202.txt"'
  301. )
  302. # Chinese characters
  303. self.assertEqual(
  304. _quote_path("файл.测试"),
  305. '"\\321\\204\\320\\260\\320\\271\\320\\273.\\346\\265\\213\\350\\257\\225"',
  306. )
  307. # Mixed ASCII and unicode
  308. self.assertEqual(
  309. _quote_path("test-тест.txt"),
  310. '"test-\\321\\202\\320\\265\\321\\201\\321\\202.txt"',
  311. )
  312. def test_special_characters(self) -> None:
  313. """Test that special characters are properly escaped."""
  314. # Quotes in filename
  315. self.assertEqual(
  316. _quote_path('file"with"quotes.txt'), '"file\\"with\\"quotes.txt"'
  317. )
  318. # Backslashes in filename
  319. self.assertEqual(
  320. _quote_path("file\\with\\backslashes.txt"),
  321. '"file\\\\with\\\\backslashes.txt"',
  322. )
  323. # Mixed special chars and unicode
  324. self.assertEqual(
  325. _quote_path('тест"файл.txt'),
  326. '"\\321\\202\\320\\265\\321\\201\\321\\202\\"\\321\\204\\320\\260\\320\\271\\320\\273.txt"',
  327. )
  328. def test_empty_and_edge_cases(self) -> None:
  329. """Test edge cases."""
  330. self.assertEqual(_quote_path(""), "")
  331. self.assertEqual(_quote_path("a"), "a") # Single ASCII char
  332. self.assertEqual(_quote_path("я"), '"\\321\\217"') # Single unicode char
  333. class CheckIgnoreQuotePathTests(TestCase):
  334. """Integration tests for check_ignore with quote_path parameter."""
  335. def setUp(self) -> None:
  336. self.test_dir = tempfile.mkdtemp()
  337. self.addCleanup(shutil.rmtree, self.test_dir)
  338. def test_quote_path_true_unicode_filenames(self) -> None:
  339. """Test that quote_path=True returns quoted unicode filenames."""
  340. from dulwich import porcelain
  341. # Create a repository
  342. repo = Repo.init(self.test_dir)
  343. self.addCleanup(repo.close)
  344. # Create .gitignore with unicode patterns
  345. gitignore_path = os.path.join(self.test_dir, ".gitignore")
  346. with open(gitignore_path, "w", encoding="utf-8") as f:
  347. f.write("тест*\n")
  348. f.write("*.测试\n")
  349. # Create unicode files
  350. test_files = ["тест.txt", "файл.测试", "normal.txt"]
  351. for filename in test_files:
  352. filepath = os.path.join(self.test_dir, filename)
  353. with open(filepath, "w", encoding="utf-8") as f:
  354. f.write("test content")
  355. # Test with quote_path=True (default)
  356. abs_paths = [os.path.join(self.test_dir, f) for f in test_files]
  357. ignored_quoted = set(
  358. porcelain.check_ignore(self.test_dir, abs_paths, quote_path=True)
  359. )
  360. # Test with quote_path=False
  361. ignored_unquoted = set(
  362. porcelain.check_ignore(self.test_dir, abs_paths, quote_path=False)
  363. )
  364. # Verify quoted results
  365. expected_quoted = {
  366. '"\\321\\202\\320\\265\\321\\201\\321\\202.txt"', # тест.txt
  367. '"\\321\\204\\320\\260\\320\\271\\320\\273.\\346\\265\\213\\350\\257\\225"', # файл.测试
  368. }
  369. self.assertEqual(ignored_quoted, expected_quoted)
  370. # Verify unquoted results
  371. expected_unquoted = {"тест.txt", "файл.测试"}
  372. self.assertEqual(ignored_unquoted, expected_unquoted)
  373. def test_quote_path_ascii_filenames(self) -> None:
  374. """Test that ASCII filenames are unaffected by quote_path setting."""
  375. from dulwich import porcelain
  376. # Create a repository
  377. repo = Repo.init(self.test_dir)
  378. self.addCleanup(repo.close)
  379. # Create .gitignore
  380. gitignore_path = os.path.join(self.test_dir, ".gitignore")
  381. with open(gitignore_path, "w") as f:
  382. f.write("*.tmp\n")
  383. f.write("test*\n")
  384. # Create ASCII files
  385. test_files = ["test.txt", "file.tmp", "normal.txt"]
  386. for filename in test_files:
  387. filepath = os.path.join(self.test_dir, filename)
  388. with open(filepath, "w") as f:
  389. f.write("test content")
  390. # Test both settings
  391. abs_paths = [os.path.join(self.test_dir, f) for f in test_files]
  392. ignored_quoted = set(
  393. porcelain.check_ignore(self.test_dir, abs_paths, quote_path=True)
  394. )
  395. ignored_unquoted = set(
  396. porcelain.check_ignore(self.test_dir, abs_paths, quote_path=False)
  397. )
  398. # Both should return the same results for ASCII filenames
  399. expected = {"test.txt", "file.tmp"}
  400. self.assertEqual(ignored_quoted, expected)
  401. self.assertEqual(ignored_unquoted, expected)