ignore.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634
  1. # Copyright (C) 2017 Jelmer Vernooij <jelmer@jelmer.uk>
  2. #
  3. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Parsing of gitignore files.
  21. For details for the matching rules, see https://git-scm.com/docs/gitignore
  22. Important: When checking if directories are ignored, include a trailing slash in the path.
  23. For example, use "dir/" instead of "dir" to check if a directory is ignored.
  24. """
  25. import os.path
  26. import re
  27. from collections.abc import Iterable
  28. from contextlib import suppress
  29. from typing import TYPE_CHECKING, BinaryIO, Optional, Union
  30. if TYPE_CHECKING:
  31. from .repo import Repo
  32. from .config import Config, get_xdg_config_home_path
  33. def _pattern_to_str(pattern: Union["Pattern", bytes, str]) -> str:
  34. """Convert a pattern to string, handling both Pattern objects and raw patterns."""
  35. if hasattr(pattern, "pattern"):
  36. pattern_bytes = pattern.pattern
  37. else:
  38. pattern_bytes = pattern
  39. return pattern_bytes.decode() if isinstance(pattern_bytes, bytes) else pattern_bytes
  40. def _check_parent_exclusion(path: str, matching_patterns: list) -> bool:
  41. """Check if a parent directory exclusion prevents negation patterns from taking effect.
  42. Args:
  43. path: Path to check
  44. matching_patterns: List of Pattern objects that matched the path
  45. Returns:
  46. True if parent exclusion applies (negation should be ineffective), False otherwise
  47. """
  48. # Find the final negation pattern that would include this file
  49. final_negation_pattern = None
  50. for pattern in reversed(matching_patterns):
  51. if not pattern.is_exclude: # is_exclude=False means negation/inclusion
  52. final_negation_pattern = pattern
  53. break
  54. if not final_negation_pattern:
  55. return False # No negation to check
  56. final_pattern_str = _pattern_to_str(final_negation_pattern)
  57. # Check each exclusion pattern to see if it excludes a parent directory
  58. for pattern in matching_patterns:
  59. if not pattern.is_exclude: # Skip negations
  60. continue
  61. pattern_str = _pattern_to_str(pattern)
  62. if _pattern_excludes_parent(pattern_str, path, final_pattern_str):
  63. return True
  64. return False # No parent exclusion applies
  65. def _pattern_excludes_parent(
  66. pattern_str: str, path: str, final_pattern_str: str
  67. ) -> bool:
  68. """Check if a pattern excludes a parent directory of the given path."""
  69. # Case 1: Direct directory exclusion (pattern ending with /)
  70. if pattern_str.endswith("/"):
  71. excluded_dir = pattern_str[:-1] # Remove trailing /
  72. return "/" in path and path.startswith(excluded_dir + "/")
  73. # Case 2: Recursive exclusion patterns (**/dir/**)
  74. if pattern_str.startswith("**/") and pattern_str.endswith("/**"):
  75. dir_name = pattern_str[3:-3] # Remove **/ and /**
  76. return dir_name != "" and ("/" + dir_name + "/") in ("/" + path)
  77. # Case 3: Directory glob patterns (dir/**)
  78. if pattern_str.endswith("/**") and not pattern_str.startswith("**/"):
  79. dir_prefix = pattern_str[:-3] # Remove /**
  80. if path.startswith(dir_prefix + "/"):
  81. # Check if this is a nested path (more than one level under dir_prefix)
  82. remaining_path = path[len(dir_prefix + "/") :]
  83. if "/" in remaining_path:
  84. # This is a nested path - parent directory exclusion applies
  85. # BUT only for directory negations, not file negations
  86. return final_pattern_str.endswith("/")
  87. return False
  88. def _translate_segment(segment: bytes) -> bytes:
  89. """Translate a single path segment to regex, following Git rules exactly."""
  90. if segment == b"*":
  91. return b"[^/]+"
  92. res = b""
  93. i, n = 0, len(segment)
  94. while i < n:
  95. c = segment[i : i + 1]
  96. i += 1
  97. if c == b"*":
  98. res += b"[^/]*"
  99. elif c == b"?":
  100. res += b"[^/]"
  101. elif c == b"\\":
  102. if i < n:
  103. res += re.escape(segment[i : i + 1])
  104. i += 1
  105. else:
  106. res += re.escape(c)
  107. elif c == b"[":
  108. j = i
  109. if j < n and segment[j : j + 1] == b"!":
  110. j += 1
  111. if j < n and segment[j : j + 1] == b"]":
  112. j += 1
  113. while j < n and segment[j : j + 1] != b"]":
  114. j += 1
  115. if j >= n:
  116. res += b"\\["
  117. else:
  118. stuff = segment[i:j].replace(b"\\", b"\\\\")
  119. i = j + 1
  120. if stuff.startswith(b"!"):
  121. stuff = b"^" + stuff[1:]
  122. elif stuff.startswith(b"^"):
  123. stuff = b"\\" + stuff
  124. res += b"[" + stuff + b"]"
  125. else:
  126. res += re.escape(c)
  127. return res
  128. def _handle_double_asterisk(segments: list[bytes], i: int) -> tuple[bytes, bool]:
  129. """Handle ** segment processing, returns (regex_part, skip_next)."""
  130. # Check if ** is at end
  131. remaining = segments[i + 1 :]
  132. if all(s == b"" for s in remaining):
  133. # ** at end - matches everything
  134. return b".*", False
  135. # Check if next segment is also **
  136. if i + 1 < len(segments) and segments[i + 1] == b"**":
  137. # Consecutive ** segments
  138. # Check if this ends with a directory pattern (trailing /)
  139. remaining_after_next = segments[i + 2 :]
  140. is_dir_pattern = (
  141. len(remaining_after_next) == 1 and remaining_after_next[0] == b""
  142. )
  143. if is_dir_pattern:
  144. # Pattern like c/**/**/ - requires at least one intermediate directory
  145. return b"[^/]+/(?:[^/]+/)*", True
  146. else:
  147. # Pattern like c/**/**/d - allows zero intermediate directories
  148. return b"(?:[^/]+/)*", True
  149. else:
  150. # ** in middle - handle differently depending on what follows
  151. if i == 0:
  152. # ** at start - any prefix
  153. return b"(?:.*/)??", False
  154. else:
  155. # ** in middle - match zero or more complete directory segments
  156. return b"(?:[^/]+/)*", False
  157. def _handle_leading_patterns(pat: bytes, res: bytes) -> tuple[bytes, bytes]:
  158. """Handle leading patterns like ``/**/``, ``**/``, or ``/``."""
  159. if pat.startswith(b"/**/"):
  160. # Leading /** is same as **
  161. return pat[4:], b"(.*/)?"
  162. elif pat.startswith(b"**/"):
  163. # Leading **/
  164. return pat[3:], b"(.*/)?"
  165. elif pat.startswith(b"/"):
  166. # Leading / means relative to .gitignore location
  167. return pat[1:], b""
  168. else:
  169. return pat, b""
  170. def translate(pat: bytes) -> bytes:
  171. """Translate a gitignore pattern to a regular expression following Git rules exactly."""
  172. res = b"(?ms)"
  173. # Check for invalid patterns with // - Git treats these as broken patterns
  174. if b"//" in pat:
  175. # Pattern with // doesn't match anything in Git
  176. return b"(?!.*)" # Negative lookahead - matches nothing
  177. # Don't normalize consecutive ** patterns - Git treats them specially
  178. # c/**/**/ requires at least one intermediate directory
  179. # So we keep the pattern as-is
  180. # Handle patterns with no slashes (match at any level)
  181. if b"/" not in pat[:-1]: # No slash except possibly at end
  182. res += b"(.*/)?"
  183. # Handle leading patterns
  184. pat, prefix_added = _handle_leading_patterns(pat, res)
  185. if prefix_added:
  186. res += prefix_added
  187. # Process the rest of the pattern
  188. if pat == b"**":
  189. res += b".*"
  190. else:
  191. segments = pat.split(b"/")
  192. i = 0
  193. while i < len(segments):
  194. segment = segments[i]
  195. # Add slash separator (except for first segment)
  196. if i > 0 and segments[i - 1] != b"**":
  197. res += re.escape(b"/")
  198. if segment == b"**":
  199. regex_part, skip_next = _handle_double_asterisk(segments, i)
  200. res += regex_part
  201. if regex_part == b".*": # End of pattern
  202. break
  203. if skip_next:
  204. i += 1
  205. else:
  206. res += _translate_segment(segment)
  207. i += 1
  208. # Add optional trailing slash for files
  209. if not pat.endswith(b"/"):
  210. res += b"/?"
  211. return res + b"\\Z"
  212. def read_ignore_patterns(f: BinaryIO) -> Iterable[bytes]:
  213. """Read a git ignore file.
  214. Args:
  215. f: File-like object to read from
  216. Returns: List of patterns
  217. """
  218. for line in f:
  219. line = line.rstrip(b"\r\n")
  220. # Ignore blank lines, they're used for readability.
  221. if not line.strip():
  222. continue
  223. if line.startswith(b"#"):
  224. # Comment
  225. continue
  226. # Trailing spaces are ignored unless they are quoted with a backslash.
  227. while line.endswith(b" ") and not line.endswith(b"\\ "):
  228. line = line[:-1]
  229. line = line.replace(b"\\ ", b" ")
  230. yield line
  231. def match_pattern(path: bytes, pattern: bytes, ignorecase: bool = False) -> bool:
  232. """Match a gitignore-style pattern against a path.
  233. Args:
  234. path: Path to match
  235. pattern: Pattern to match
  236. ignorecase: Whether to do case-sensitive matching
  237. Returns:
  238. bool indicating whether the pattern matched
  239. """
  240. return Pattern(pattern, ignorecase).match(path)
  241. class Pattern:
  242. """A single ignore pattern."""
  243. def __init__(self, pattern: bytes, ignorecase: bool = False) -> None:
  244. self.pattern = pattern
  245. self.ignorecase = ignorecase
  246. # Handle negation
  247. if pattern.startswith(b"!"):
  248. self.is_exclude = False
  249. pattern = pattern[1:]
  250. else:
  251. # Handle escaping of ! and # at start only
  252. if (
  253. pattern.startswith(b"\\")
  254. and len(pattern) > 1
  255. and pattern[1:2] in (b"!", b"#")
  256. ):
  257. pattern = pattern[1:]
  258. self.is_exclude = True
  259. # Check if this is a directory-only pattern
  260. self.is_directory_only = pattern.endswith(b"/")
  261. flags = 0
  262. if self.ignorecase:
  263. flags = re.IGNORECASE
  264. self._re = re.compile(translate(pattern), flags)
  265. def __bytes__(self) -> bytes:
  266. return self.pattern
  267. def __str__(self) -> str:
  268. return os.fsdecode(self.pattern)
  269. def __eq__(self, other: object) -> bool:
  270. return (
  271. isinstance(other, type(self))
  272. and self.pattern == other.pattern
  273. and self.ignorecase == other.ignorecase
  274. )
  275. def __repr__(self) -> str:
  276. return f"{type(self).__name__}({self.pattern!r}, {self.ignorecase!r})"
  277. def match(self, path: bytes) -> bool:
  278. """Try to match a path against this ignore pattern.
  279. Args:
  280. path: Path to match (relative to ignore location)
  281. Returns: boolean
  282. """
  283. if self._re.match(path):
  284. return True
  285. # Special handling for directory patterns that exclude files under them
  286. if self.is_directory_only and self.is_exclude:
  287. # For exclusion directory patterns, also match files under the directory
  288. if not path.endswith(b"/"):
  289. # This is a file - check if it's under any directory that matches the pattern
  290. path_dir = path.rsplit(b"/", 1)[0] + b"/"
  291. if len(path.split(b"/")) > 1 and self._re.match(path_dir):
  292. return True
  293. return False
  294. class IgnoreFilter:
  295. """Filter to apply gitignore patterns.
  296. Important: When checking if directories are ignored, include a trailing slash.
  297. For example, use is_ignored("dir/") instead of is_ignored("dir").
  298. """
  299. def __init__(
  300. self,
  301. patterns: Iterable[bytes],
  302. ignorecase: bool = False,
  303. path: Optional[str] = None,
  304. ) -> None:
  305. self._patterns: list[Pattern] = []
  306. self._ignorecase = ignorecase
  307. self._path = path
  308. for pattern in patterns:
  309. self.append_pattern(pattern)
  310. def append_pattern(self, pattern: bytes) -> None:
  311. """Add a pattern to the set."""
  312. self._patterns.append(Pattern(pattern, self._ignorecase))
  313. def find_matching(self, path: Union[bytes, str]) -> Iterable[Pattern]:
  314. """Yield all matching patterns for path.
  315. Args:
  316. path: Path to match
  317. Returns:
  318. Iterator over iterators
  319. """
  320. if not isinstance(path, bytes):
  321. path = os.fsencode(path)
  322. for pattern in self._patterns:
  323. if pattern.match(path):
  324. yield pattern
  325. def is_ignored(self, path: Union[bytes, str]) -> Optional[bool]:
  326. """Check whether a path is ignored using Git-compliant logic.
  327. For directories, include a trailing slash.
  328. Returns: status is None if file is not mentioned, True if it is
  329. included, False if it is explicitly excluded.
  330. """
  331. matching_patterns = list(self.find_matching(path))
  332. if not matching_patterns:
  333. return None
  334. # Basic rule: last matching pattern wins
  335. last_pattern = matching_patterns[-1]
  336. result = last_pattern.is_exclude
  337. # Apply Git's parent directory exclusion rule for negations
  338. if not result: # Only applies to inclusions (negations)
  339. result = self._apply_parent_exclusion_rule(
  340. path.decode() if isinstance(path, bytes) else path, matching_patterns
  341. )
  342. return result
  343. def _apply_parent_exclusion_rule(
  344. self, path: str, matching_patterns: list[Pattern]
  345. ) -> bool:
  346. """Apply Git's parent directory exclusion rule.
  347. "It is not possible to re-include a file if a parent directory of that file is excluded."
  348. """
  349. return _check_parent_exclusion(path, matching_patterns)
  350. @classmethod
  351. def from_path(
  352. cls, path: Union[str, os.PathLike], ignorecase: bool = False
  353. ) -> "IgnoreFilter":
  354. with open(path, "rb") as f:
  355. return cls(read_ignore_patterns(f), ignorecase, path=str(path))
  356. def __repr__(self) -> str:
  357. path = getattr(self, "_path", None)
  358. if path is not None:
  359. return f"{type(self).__name__}.from_path({path!r})"
  360. else:
  361. return f"<{type(self).__name__}>"
  362. class IgnoreFilterStack:
  363. """Check for ignore status in multiple filters."""
  364. def __init__(self, filters: list[IgnoreFilter]) -> None:
  365. self._filters = filters
  366. def is_ignored(self, path: str) -> Optional[bool]:
  367. """Check whether a path is explicitly included or excluded in ignores.
  368. Args:
  369. path: Path to check
  370. Returns:
  371. None if the file is not mentioned, True if it is included,
  372. False if it is explicitly excluded.
  373. """
  374. status = None
  375. for filter in self._filters:
  376. status = filter.is_ignored(path)
  377. if status is not None:
  378. return status
  379. return status
  380. def default_user_ignore_filter_path(config: Config) -> str:
  381. """Return default user ignore filter path.
  382. Args:
  383. config: A Config object
  384. Returns:
  385. Path to a global ignore file
  386. """
  387. try:
  388. value = config.get((b"core",), b"excludesFile")
  389. assert isinstance(value, bytes)
  390. return value.decode(encoding="utf-8")
  391. except KeyError:
  392. pass
  393. return get_xdg_config_home_path("git", "ignore")
  394. class IgnoreFilterManager:
  395. """Ignore file manager with Git-compliant behavior.
  396. Important: When checking if directories are ignored, include a trailing slash.
  397. For example, use is_ignored("dir/") instead of is_ignored("dir").
  398. """
  399. def __init__(
  400. self,
  401. top_path: str,
  402. global_filters: list[IgnoreFilter],
  403. ignorecase: bool,
  404. ) -> None:
  405. self._path_filters: dict[str, Optional[IgnoreFilter]] = {}
  406. self._top_path = top_path
  407. self._global_filters = global_filters
  408. self._ignorecase = ignorecase
  409. def __repr__(self) -> str:
  410. return f"{type(self).__name__}({self._top_path}, {self._global_filters!r}, {self._ignorecase!r})"
  411. def _load_path(self, path: str) -> Optional[IgnoreFilter]:
  412. try:
  413. return self._path_filters[path]
  414. except KeyError:
  415. pass
  416. p = os.path.join(self._top_path, path, ".gitignore")
  417. try:
  418. self._path_filters[path] = IgnoreFilter.from_path(p, self._ignorecase)
  419. except (FileNotFoundError, NotADirectoryError):
  420. self._path_filters[path] = None
  421. except OSError as e:
  422. # On Windows, opening a path that contains a symlink can fail with
  423. # errno 22 (Invalid argument) when the symlink points outside the repo
  424. if e.errno == 22:
  425. self._path_filters[path] = None
  426. else:
  427. raise
  428. return self._path_filters[path]
  429. def find_matching(self, path: str) -> Iterable[Pattern]:
  430. """Find matching patterns for path.
  431. Args:
  432. path: Path to check
  433. Returns:
  434. Iterator over Pattern instances
  435. """
  436. if os.path.isabs(path):
  437. raise ValueError(f"{path} is an absolute path")
  438. filters = [(0, f) for f in self._global_filters]
  439. if os.path.sep != "/":
  440. path = path.replace(os.path.sep, "/")
  441. parts = path.split("/")
  442. matches = []
  443. for i in range(len(parts) + 1):
  444. dirname = "/".join(parts[:i])
  445. for s, f in filters:
  446. relpath = "/".join(parts[s:i])
  447. if i < len(parts):
  448. # Paths leading up to the final part are all directories,
  449. # so need a trailing slash.
  450. relpath += "/"
  451. matches += list(f.find_matching(relpath))
  452. ignore_filter = self._load_path(dirname)
  453. if ignore_filter is not None:
  454. filters.insert(0, (i, ignore_filter))
  455. return iter(matches)
  456. def is_ignored(self, path: str) -> Optional[bool]:
  457. """Check whether a path is explicitly included or excluded in ignores.
  458. Args:
  459. path: Path to check. For directories, the path should end with '/'.
  460. Returns:
  461. None if the file is not mentioned, True if it is included,
  462. False if it is explicitly excluded.
  463. """
  464. matches = list(self.find_matching(path))
  465. if not matches:
  466. return None
  467. # Standard behavior - last matching pattern wins
  468. result = matches[-1].is_exclude
  469. # Apply Git's parent directory exclusion rule for negations
  470. if not result: # Only check if we would include due to negation
  471. result = _check_parent_exclusion(path, matches)
  472. # Apply special case for issue #1203: directory traversal with ** patterns
  473. if result and path.endswith("/"):
  474. result = self._apply_directory_traversal_rule(path, matches)
  475. return result
  476. def _apply_directory_traversal_rule(self, path: str, matches: list) -> bool:
  477. """Apply directory traversal rule for issue #1203.
  478. If a directory would be ignored by a ** pattern, but there are negation
  479. patterns for its subdirectories, then the directory itself should not
  480. be ignored (to allow traversal).
  481. """
  482. # Get the last pattern that determined the result
  483. last_excluding_pattern = None
  484. for match in matches:
  485. if match.is_exclude:
  486. last_excluding_pattern = match
  487. if last_excluding_pattern and (
  488. last_excluding_pattern.pattern.endswith(b"**")
  489. or b"**" in last_excluding_pattern.pattern
  490. ):
  491. # Check if subdirectories would be unignored
  492. test_subdir = path + "test/"
  493. test_matches = list(self.find_matching(test_subdir))
  494. if test_matches:
  495. # Use standard logic for test case - last matching pattern wins
  496. test_result = test_matches[-1].is_exclude
  497. if test_result is False:
  498. return False
  499. return True # Keep original result
  500. @classmethod
  501. def from_repo(cls, repo: "Repo") -> "IgnoreFilterManager":
  502. """Create a IgnoreFilterManager from a repository.
  503. Args:
  504. repo: Repository object
  505. Returns:
  506. A `IgnoreFilterManager` object
  507. """
  508. global_filters = []
  509. for p in [
  510. os.path.join(repo.controldir(), "info", "exclude"),
  511. default_user_ignore_filter_path(repo.get_config_stack()),
  512. ]:
  513. with suppress(OSError):
  514. global_filters.append(IgnoreFilter.from_path(os.path.expanduser(p)))
  515. config = repo.get_config_stack()
  516. ignorecase = config.get_boolean((b"core"), (b"ignorecase"), False)
  517. return cls(repo.path, global_filters, ignorecase)