2
0

test_ignore.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. # test_ignore.py -- Tests for ignore files.
  2. # Copyright (C) 2017 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for ignore files."""
  22. import os
  23. import re
  24. import shutil
  25. import tempfile
  26. from io import BytesIO
  27. from dulwich.ignore import (
  28. IgnoreFilter,
  29. IgnoreFilterManager,
  30. IgnoreFilterStack,
  31. Pattern,
  32. match_pattern,
  33. read_ignore_patterns,
  34. translate,
  35. )
  36. from dulwich.porcelain import _quote_path
  37. from dulwich.repo import Repo
  38. from . import TestCase
  39. POSITIVE_MATCH_TESTS = [
  40. (b"foo.c", b"*.c"),
  41. (b".c", b"*.c"),
  42. (b"foo/foo.c", b"*.c"),
  43. (b"foo/foo.c", b"foo.c"),
  44. (b"foo.c", b"/*.c"),
  45. (b"foo.c", b"/foo.c"),
  46. (b"foo.c", b"foo.c"),
  47. (b"foo.c", b"foo.[ch]"),
  48. (b"foo/bar/bla.c", b"foo/**"),
  49. (b"foo/bar/bla/blie.c", b"foo/**/blie.c"),
  50. (b"foo/bar/bla.c", b"**/bla.c"),
  51. (b"bla.c", b"**/bla.c"),
  52. (b"foo/bar", b"foo/**/bar"),
  53. (b"foo/bla/bar", b"foo/**/bar"),
  54. (b"foo/bar/", b"bar/"),
  55. (b"foo/bar/", b"bar"),
  56. (b"foo/bar/something", b"foo/bar/*"),
  57. ]
  58. NEGATIVE_MATCH_TESTS = [
  59. (b"foo.c", b"foo.[dh]"),
  60. (b"foo/foo.c", b"/foo.c"),
  61. (b"foo/foo.c", b"/*.c"),
  62. (b"foo/bar/", b"/bar/"),
  63. (b"foo/bar/", b"foo/bar/*"),
  64. (b"foo/bar", b"foo?bar"),
  65. ]
  66. TRANSLATE_TESTS = [
  67. (b"*.c", b"(?ms)(.*/)?[^/]*\\.c/?\\Z"),
  68. (b"foo.c", b"(?ms)(.*/)?foo\\.c/?\\Z"),
  69. (b"/*.c", b"(?ms)[^/]*\\.c/?\\Z"),
  70. (b"/foo.c", b"(?ms)foo\\.c/?\\Z"),
  71. (b"foo.c", b"(?ms)(.*/)?foo\\.c/?\\Z"),
  72. (b"foo.[ch]", b"(?ms)(.*/)?foo\\.[ch]/?\\Z"),
  73. (b"bar/", b"(?ms)(.*/)?bar\\/\\Z"),
  74. (b"foo/**", b"(?ms)foo/.*/?\\Z"),
  75. (b"foo/**/blie.c", b"(?ms)foo/(?:[^/]+/)*blie\\.c/?\\Z"),
  76. (b"**/bla.c", b"(?ms)(.*/)?bla\\.c/?\\Z"),
  77. (b"foo/**/bar", b"(?ms)foo/(?:[^/]+/)*bar/?\\Z"),
  78. (b"foo/bar/*", b"(?ms)foo\\/bar\\/[^/]+/?\\Z"),
  79. (b"/foo\\[bar\\]", b"(?ms)foo\\[bar\\]/?\\Z"),
  80. (b"/foo[bar]", b"(?ms)foo[bar]/?\\Z"),
  81. (b"/foo[0-9]", b"(?ms)foo[0-9]/?\\Z"),
  82. ]
  83. class TranslateTests(TestCase):
  84. def test_translate(self) -> None:
  85. for pattern, regex in TRANSLATE_TESTS:
  86. if re.escape(b"/") == b"/":
  87. # Slash is no longer escaped in Python3.7, so undo the escaping
  88. # in the expected return value..
  89. regex = regex.replace(b"\\/", b"/")
  90. self.assertEqual(
  91. regex,
  92. translate(pattern),
  93. f"orig pattern: {pattern!r}, regex: {translate(pattern)!r}, expected: {regex!r}",
  94. )
  95. class ReadIgnorePatterns(TestCase):
  96. def test_read_file(self) -> None:
  97. f = BytesIO(
  98. b"""
  99. # a comment
  100. \x20\x20
  101. # and an empty line:
  102. \\#not a comment
  103. !negative
  104. with trailing whitespace
  105. with escaped trailing whitespace\\
  106. """
  107. )
  108. self.assertEqual(
  109. list(read_ignore_patterns(f)),
  110. [
  111. b"\\#not a comment",
  112. b"!negative",
  113. b"with trailing whitespace",
  114. b"with escaped trailing whitespace ",
  115. ],
  116. )
  117. class MatchPatternTests(TestCase):
  118. def test_matches(self) -> None:
  119. for path, pattern in POSITIVE_MATCH_TESTS:
  120. self.assertTrue(
  121. match_pattern(path, pattern),
  122. f"path: {path!r}, pattern: {pattern!r}",
  123. )
  124. def test_no_matches(self) -> None:
  125. for path, pattern in NEGATIVE_MATCH_TESTS:
  126. self.assertFalse(
  127. match_pattern(path, pattern),
  128. f"path: {path!r}, pattern: {pattern!r}",
  129. )
  130. class IgnoreFilterTests(TestCase):
  131. def test_included(self) -> None:
  132. filter = IgnoreFilter([b"a.c", b"b.c"])
  133. self.assertTrue(filter.is_ignored(b"a.c"))
  134. self.assertIs(None, filter.is_ignored(b"c.c"))
  135. self.assertEqual([Pattern(b"a.c")], list(filter.find_matching(b"a.c")))
  136. self.assertEqual([], list(filter.find_matching(b"c.c")))
  137. def test_included_ignorecase(self) -> None:
  138. filter = IgnoreFilter([b"a.c", b"b.c"], ignorecase=False)
  139. self.assertTrue(filter.is_ignored(b"a.c"))
  140. self.assertFalse(filter.is_ignored(b"A.c"))
  141. filter = IgnoreFilter([b"a.c", b"b.c"], ignorecase=True)
  142. self.assertTrue(filter.is_ignored(b"a.c"))
  143. self.assertTrue(filter.is_ignored(b"A.c"))
  144. self.assertTrue(filter.is_ignored(b"A.C"))
  145. def test_excluded(self) -> None:
  146. filter = IgnoreFilter([b"a.c", b"b.c", b"!c.c"])
  147. self.assertFalse(filter.is_ignored(b"c.c"))
  148. self.assertIs(None, filter.is_ignored(b"d.c"))
  149. self.assertEqual([Pattern(b"!c.c")], list(filter.find_matching(b"c.c")))
  150. self.assertEqual([], list(filter.find_matching(b"d.c")))
  151. def test_include_exclude_include(self) -> None:
  152. filter = IgnoreFilter([b"a.c", b"!a.c", b"a.c"])
  153. self.assertTrue(filter.is_ignored(b"a.c"))
  154. self.assertEqual(
  155. [Pattern(b"a.c"), Pattern(b"!a.c"), Pattern(b"a.c")],
  156. list(filter.find_matching(b"a.c")),
  157. )
  158. def test_manpage(self) -> None:
  159. # A specific example from the gitignore manpage
  160. filter = IgnoreFilter([b"/*", b"!/foo", b"/foo/*", b"!/foo/bar"])
  161. self.assertTrue(filter.is_ignored(b"a.c"))
  162. self.assertTrue(filter.is_ignored(b"foo/blie"))
  163. self.assertFalse(filter.is_ignored(b"foo"))
  164. self.assertFalse(filter.is_ignored(b"foo/bar"))
  165. self.assertFalse(filter.is_ignored(b"foo/bar/"))
  166. self.assertFalse(filter.is_ignored(b"foo/bar/bloe"))
  167. def test_regex_special(self) -> None:
  168. # See https://github.com/dulwich/dulwich/issues/930#issuecomment-1026166429
  169. filter = IgnoreFilter([b"/foo\\[bar\\]", b"/foo"])
  170. self.assertTrue(filter.is_ignored("foo"))
  171. self.assertTrue(filter.is_ignored("foo[bar]"))
  172. class IgnoreFilterStackTests(TestCase):
  173. def test_stack_first(self) -> None:
  174. filter1 = IgnoreFilter([b"[a].c", b"[b].c", b"![d].c"])
  175. filter2 = IgnoreFilter([b"[a].c", b"![b],c", b"[c].c", b"[d].c"])
  176. stack = IgnoreFilterStack([filter1, filter2])
  177. self.assertIs(True, stack.is_ignored(b"a.c"))
  178. self.assertIs(True, stack.is_ignored(b"b.c"))
  179. self.assertIs(True, stack.is_ignored(b"c.c"))
  180. self.assertIs(False, stack.is_ignored(b"d.c"))
  181. self.assertIs(None, stack.is_ignored(b"e.c"))
  182. class IgnoreFilterManagerTests(TestCase):
  183. def test_load_ignore(self) -> None:
  184. tmp_dir = tempfile.mkdtemp()
  185. self.addCleanup(shutil.rmtree, tmp_dir)
  186. repo = Repo.init(tmp_dir)
  187. with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
  188. f.write(b"/foo/bar\n")
  189. f.write(b"/dir2\n")
  190. f.write(b"/dir3/\n")
  191. os.mkdir(os.path.join(repo.path, "dir"))
  192. with open(os.path.join(repo.path, "dir", ".gitignore"), "wb") as f:
  193. f.write(b"/blie\n")
  194. with open(os.path.join(repo.path, "dir", "blie"), "wb") as f:
  195. f.write(b"IGNORED")
  196. p = os.path.join(repo.controldir(), "info", "exclude")
  197. with open(p, "wb") as f:
  198. f.write(b"/excluded\n")
  199. m = IgnoreFilterManager.from_repo(repo)
  200. self.assertTrue(m.is_ignored("dir/blie"))
  201. self.assertIs(None, m.is_ignored(os.path.join("dir", "bloe")))
  202. self.assertIs(None, m.is_ignored("dir"))
  203. self.assertTrue(m.is_ignored(os.path.join("foo", "bar")))
  204. self.assertTrue(m.is_ignored(os.path.join("excluded")))
  205. self.assertTrue(m.is_ignored(os.path.join("dir2", "fileinignoreddir")))
  206. self.assertFalse(m.is_ignored("dir3"))
  207. self.assertTrue(m.is_ignored("dir3/"))
  208. self.assertTrue(m.is_ignored("dir3/bla"))
  209. def test_nested_gitignores(self) -> None:
  210. tmp_dir = tempfile.mkdtemp()
  211. self.addCleanup(shutil.rmtree, tmp_dir)
  212. repo = Repo.init(tmp_dir)
  213. with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
  214. f.write(b"/*\n")
  215. f.write(b"!/foo\n")
  216. os.mkdir(os.path.join(repo.path, "foo"))
  217. with open(os.path.join(repo.path, "foo", ".gitignore"), "wb") as f:
  218. f.write(b"/bar\n")
  219. with open(os.path.join(repo.path, "foo", "bar"), "wb") as f:
  220. f.write(b"IGNORED")
  221. m = IgnoreFilterManager.from_repo(repo)
  222. self.assertTrue(m.is_ignored("foo/bar"))
  223. def test_load_ignore_ignorecase(self) -> None:
  224. tmp_dir = tempfile.mkdtemp()
  225. self.addCleanup(shutil.rmtree, tmp_dir)
  226. repo = Repo.init(tmp_dir)
  227. config = repo.get_config()
  228. config.set(b"core", b"ignorecase", True)
  229. config.write_to_path()
  230. with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
  231. f.write(b"/foo/bar\n")
  232. f.write(b"/dir\n")
  233. m = IgnoreFilterManager.from_repo(repo)
  234. self.assertTrue(m.is_ignored(os.path.join("dir", "blie")))
  235. self.assertTrue(m.is_ignored(os.path.join("DIR", "blie")))
  236. def test_ignored_contents(self) -> None:
  237. tmp_dir = tempfile.mkdtemp()
  238. self.addCleanup(shutil.rmtree, tmp_dir)
  239. repo = Repo.init(tmp_dir)
  240. with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
  241. f.write(b"a/*\n")
  242. f.write(b"!a/*.txt\n")
  243. m = IgnoreFilterManager.from_repo(repo)
  244. os.mkdir(os.path.join(repo.path, "a"))
  245. self.assertIs(None, m.is_ignored("a"))
  246. self.assertIs(None, m.is_ignored("a/"))
  247. self.assertFalse(m.is_ignored("a/b.txt"))
  248. self.assertTrue(m.is_ignored("a/c.dat"))
  249. def test_issue_1203_directory_negation(self) -> None:
  250. """Test for issue #1203: gitignore patterns with directory negation."""
  251. tmp_dir = tempfile.mkdtemp()
  252. self.addCleanup(shutil.rmtree, tmp_dir)
  253. repo = Repo.init(tmp_dir)
  254. # Create .gitignore with the patterns from the issue
  255. with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
  256. f.write(b"data/**\n")
  257. f.write(b"!data/*/\n")
  258. # Create directory structure
  259. os.makedirs(os.path.join(repo.path, "data", "subdir"))
  260. m = IgnoreFilterManager.from_repo(repo)
  261. # Test the expected behavior
  262. self.assertTrue(
  263. m.is_ignored("data/test.dvc")
  264. ) # File in data/ should be ignored
  265. self.assertFalse(m.is_ignored("data/")) # data/ directory should not be ignored
  266. self.assertTrue(
  267. m.is_ignored("data/subdir/")
  268. ) # Subdirectory should be ignored (matches Git behavior)
  269. class QuotePathTests(TestCase):
  270. """Tests for _quote_path function."""
  271. def test_ascii_paths(self) -> None:
  272. """Test that ASCII paths are not quoted."""
  273. self.assertEqual(_quote_path("file.txt"), "file.txt")
  274. self.assertEqual(_quote_path("dir/file.txt"), "dir/file.txt")
  275. self.assertEqual(_quote_path("path with spaces.txt"), "path with spaces.txt")
  276. def test_unicode_paths(self) -> None:
  277. """Test that unicode paths are quoted with C-style escapes."""
  278. # Russian characters
  279. self.assertEqual(
  280. _quote_path("тест.txt"), '"\\321\\202\\320\\265\\321\\201\\321\\202.txt"'
  281. )
  282. # Chinese characters
  283. self.assertEqual(
  284. _quote_path("файл.测试"),
  285. '"\\321\\204\\320\\260\\320\\271\\320\\273.\\346\\265\\213\\350\\257\\225"',
  286. )
  287. # Mixed ASCII and unicode
  288. self.assertEqual(
  289. _quote_path("test-тест.txt"),
  290. '"test-\\321\\202\\320\\265\\321\\201\\321\\202.txt"',
  291. )
  292. def test_special_characters(self) -> None:
  293. """Test that special characters are properly escaped."""
  294. # Quotes in filename
  295. self.assertEqual(
  296. _quote_path('file"with"quotes.txt'), '"file\\"with\\"quotes.txt"'
  297. )
  298. # Backslashes in filename
  299. self.assertEqual(
  300. _quote_path("file\\with\\backslashes.txt"),
  301. '"file\\\\with\\\\backslashes.txt"',
  302. )
  303. # Mixed special chars and unicode
  304. self.assertEqual(
  305. _quote_path('тест"файл.txt'),
  306. '"\\321\\202\\320\\265\\321\\201\\321\\202\\"\\321\\204\\320\\260\\320\\271\\320\\273.txt"',
  307. )
  308. def test_empty_and_edge_cases(self) -> None:
  309. """Test edge cases."""
  310. self.assertEqual(_quote_path(""), "")
  311. self.assertEqual(_quote_path("a"), "a") # Single ASCII char
  312. self.assertEqual(_quote_path("я"), '"\\321\\217"') # Single unicode char
  313. class CheckIgnoreQuotePathTests(TestCase):
  314. """Integration tests for check_ignore with quote_path parameter."""
  315. def setUp(self) -> None:
  316. self.test_dir = tempfile.mkdtemp()
  317. self.addCleanup(shutil.rmtree, self.test_dir)
  318. def test_quote_path_true_unicode_filenames(self) -> None:
  319. """Test that quote_path=True returns quoted unicode filenames."""
  320. from dulwich import porcelain
  321. # Create a repository
  322. repo = Repo.init(self.test_dir)
  323. self.addCleanup(repo.close)
  324. # Create .gitignore with unicode patterns
  325. gitignore_path = os.path.join(self.test_dir, ".gitignore")
  326. with open(gitignore_path, "w", encoding="utf-8") as f:
  327. f.write("тест*\n")
  328. f.write("*.测试\n")
  329. # Create unicode files
  330. test_files = ["тест.txt", "файл.测试", "normal.txt"]
  331. for filename in test_files:
  332. filepath = os.path.join(self.test_dir, filename)
  333. with open(filepath, "w", encoding="utf-8") as f:
  334. f.write("test content")
  335. # Test with quote_path=True (default)
  336. abs_paths = [os.path.join(self.test_dir, f) for f in test_files]
  337. ignored_quoted = set(
  338. porcelain.check_ignore(self.test_dir, abs_paths, quote_path=True)
  339. )
  340. # Test with quote_path=False
  341. ignored_unquoted = set(
  342. porcelain.check_ignore(self.test_dir, abs_paths, quote_path=False)
  343. )
  344. # Verify quoted results
  345. expected_quoted = {
  346. '"\\321\\202\\320\\265\\321\\201\\321\\202.txt"', # тест.txt
  347. '"\\321\\204\\320\\260\\320\\271\\320\\273.\\346\\265\\213\\350\\257\\225"', # файл.测试
  348. }
  349. self.assertEqual(ignored_quoted, expected_quoted)
  350. # Verify unquoted results
  351. expected_unquoted = {"тест.txt", "файл.测试"}
  352. self.assertEqual(ignored_unquoted, expected_unquoted)
  353. def test_quote_path_ascii_filenames(self) -> None:
  354. """Test that ASCII filenames are unaffected by quote_path setting."""
  355. from dulwich import porcelain
  356. # Create a repository
  357. repo = Repo.init(self.test_dir)
  358. self.addCleanup(repo.close)
  359. # Create .gitignore
  360. gitignore_path = os.path.join(self.test_dir, ".gitignore")
  361. with open(gitignore_path, "w") as f:
  362. f.write("*.tmp\n")
  363. f.write("test*\n")
  364. # Create ASCII files
  365. test_files = ["test.txt", "file.tmp", "normal.txt"]
  366. for filename in test_files:
  367. filepath = os.path.join(self.test_dir, filename)
  368. with open(filepath, "w") as f:
  369. f.write("test content")
  370. # Test both settings
  371. abs_paths = [os.path.join(self.test_dir, f) for f in test_files]
  372. ignored_quoted = set(
  373. porcelain.check_ignore(self.test_dir, abs_paths, quote_path=True)
  374. )
  375. ignored_unquoted = set(
  376. porcelain.check_ignore(self.test_dir, abs_paths, quote_path=False)
  377. )
  378. # Both should return the same results for ASCII filenames
  379. expected = {"test.txt", "file.tmp"}
  380. self.assertEqual(ignored_quoted, expected)
  381. self.assertEqual(ignored_unquoted, expected)