ignore.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. # Copyright (C) 2017 Jelmer Vernooij <jelmer@jelmer.uk>
  2. #
  3. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  4. # General Public License as public by the Free Software Foundation; version 2.0
  5. # or (at your option) any later version. You can redistribute it and/or
  6. # modify it under the terms of either of these two licenses.
  7. #
  8. # Unless required by applicable law or agreed to in writing, software
  9. # distributed under the License is distributed on an "AS IS" BASIS,
  10. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. # See the License for the specific language governing permissions and
  12. # limitations under the License.
  13. #
  14. # You should have received a copy of the licenses; if not, see
  15. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  16. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  17. # License, Version 2.0.
  18. #
  19. """Parsing of gitignore files.
  20. For details for the matching rules, see https://git-scm.com/docs/gitignore
  21. """
  22. import os.path
  23. import re
  24. from typing import (
  25. BinaryIO,
  26. Iterable,
  27. List,
  28. Optional,
  29. TYPE_CHECKING,
  30. Dict,
  31. Union,
  32. )
  33. if TYPE_CHECKING:
  34. from dulwich.repo import Repo
  35. from dulwich.config import get_xdg_config_home_path, Config
  36. def _translate_segment(segment: bytes) -> bytes:
  37. if segment == b"*":
  38. return b"[^/]+"
  39. res = b""
  40. i, n = 0, len(segment)
  41. while i < n:
  42. c = segment[i : i + 1]
  43. i = i + 1
  44. if c == b"*":
  45. res += b"[^/]*"
  46. elif c == b"?":
  47. res += b"[^/]"
  48. elif c == b"\\":
  49. res += re.escape(segment[i : i + 1])
  50. i += 1
  51. elif c == b"[":
  52. j = i
  53. if j < n and segment[j : j + 1] == b"!":
  54. j = j + 1
  55. if j < n and segment[j : j + 1] == b"]":
  56. j = j + 1
  57. while j < n and segment[j : j + 1] != b"]":
  58. j = j + 1
  59. if j >= n:
  60. res += b"\\["
  61. else:
  62. stuff = segment[i:j].replace(b"\\", b"\\\\")
  63. i = j + 1
  64. if stuff.startswith(b"!"):
  65. stuff = b"^" + stuff[1:]
  66. elif stuff.startswith(b"^"):
  67. stuff = b"\\" + stuff
  68. res += b"[" + stuff + b"]"
  69. else:
  70. res += re.escape(c)
  71. return res
  72. def translate(pat: bytes) -> bytes:
  73. """Translate a shell PATTERN to a regular expression.
  74. There is no way to quote meta-characters.
  75. Originally copied from fnmatch in Python 2.7, but modified for Dulwich
  76. to cope with features in Git ignore patterns.
  77. """
  78. res = b"(?ms)"
  79. if b"/" not in pat[:-1]:
  80. # If there's no slash, this is a filename-based match
  81. res += b"(.*/)?"
  82. if pat.startswith(b"**/"):
  83. # Leading **/
  84. pat = pat[2:]
  85. res += b"(.*/)?"
  86. if pat.startswith(b"/"):
  87. pat = pat[1:]
  88. for i, segment in enumerate(pat.split(b"/")):
  89. if segment == b"**":
  90. res += b"(/.*)?"
  91. continue
  92. else:
  93. res += (re.escape(b"/") if i > 0 else b"") + _translate_segment(segment)
  94. if not pat.endswith(b"/"):
  95. res += b"/?"
  96. return res + b"\\Z"
  97. def read_ignore_patterns(f: BinaryIO) -> Iterable[bytes]:
  98. """Read a git ignore file.
  99. Args:
  100. f: File-like object to read from
  101. Returns: List of patterns
  102. """
  103. for line in f:
  104. line = line.rstrip(b"\r\n")
  105. # Ignore blank lines, they're used for readability.
  106. if not line.strip():
  107. continue
  108. if line.startswith(b"#"):
  109. # Comment
  110. continue
  111. # Trailing spaces are ignored unless they are quoted with a backslash.
  112. while line.endswith(b" ") and not line.endswith(b"\\ "):
  113. line = line[:-1]
  114. line = line.replace(b"\\ ", b" ")
  115. yield line
  116. def match_pattern(path: bytes, pattern: bytes, ignorecase: bool = False) -> bool:
  117. """Match a gitignore-style pattern against a path.
  118. Args:
  119. path: Path to match
  120. pattern: Pattern to match
  121. ignorecase: Whether to do case-sensitive matching
  122. Returns:
  123. bool indicating whether the pattern matched
  124. """
  125. return Pattern(pattern, ignorecase).match(path)
  126. class Pattern(object):
  127. """A single ignore pattern."""
  128. def __init__(self, pattern: bytes, ignorecase: bool = False):
  129. self.pattern = pattern
  130. self.ignorecase = ignorecase
  131. if pattern[0:1] == b"!":
  132. self.is_exclude = False
  133. pattern = pattern[1:]
  134. else:
  135. if pattern[0:1] == b"\\":
  136. pattern = pattern[1:]
  137. self.is_exclude = True
  138. flags = 0
  139. if self.ignorecase:
  140. flags = re.IGNORECASE
  141. self._re = re.compile(translate(pattern), flags)
  142. def __bytes__(self) -> bytes:
  143. return self.pattern
  144. def __str__(self) -> str:
  145. return os.fsdecode(self.pattern)
  146. def __eq__(self, other: object) -> bool:
  147. return (
  148. isinstance(other, type(self))
  149. and self.pattern == other.pattern
  150. and self.ignorecase == other.ignorecase
  151. )
  152. def __repr__(self) -> str:
  153. return "%s(%r, %r)" % (
  154. type(self).__name__,
  155. self.pattern,
  156. self.ignorecase,
  157. )
  158. def match(self, path: bytes) -> bool:
  159. """Try to match a path against this ignore pattern.
  160. Args:
  161. path: Path to match (relative to ignore location)
  162. Returns: boolean
  163. """
  164. return bool(self._re.match(path))
  165. class IgnoreFilter(object):
  166. def __init__(self, patterns: Iterable[bytes], ignorecase: bool = False, path=None):
  167. self._patterns: List[Pattern] = []
  168. self._ignorecase = ignorecase
  169. self._path = path
  170. for pattern in patterns:
  171. self.append_pattern(pattern)
  172. def append_pattern(self, pattern: bytes) -> None:
  173. """Add a pattern to the set."""
  174. self._patterns.append(Pattern(pattern, self._ignorecase))
  175. def find_matching(self, path: Union[bytes, str]) -> Iterable[Pattern]:
  176. """Yield all matching patterns for path.
  177. Args:
  178. path: Path to match
  179. Returns:
  180. Iterator over iterators
  181. """
  182. if not isinstance(path, bytes):
  183. path = os.fsencode(path)
  184. for pattern in self._patterns:
  185. if pattern.match(path):
  186. yield pattern
  187. def is_ignored(self, path: bytes) -> Optional[bool]:
  188. """Check whether a path is ignored.
  189. For directories, include a trailing slash.
  190. Returns: status is None if file is not mentioned, True if it is
  191. included, False if it is explicitly excluded.
  192. """
  193. status = None
  194. for pattern in self.find_matching(path):
  195. status = pattern.is_exclude
  196. return status
  197. @classmethod
  198. def from_path(cls, path, ignorecase: bool = False) -> "IgnoreFilter":
  199. with open(path, "rb") as f:
  200. return cls(read_ignore_patterns(f), ignorecase, path=path)
  201. def __repr__(self) -> str:
  202. path = getattr(self, "_path", None)
  203. if path is not None:
  204. return "%s.from_path(%r)" % (type(self).__name__, path)
  205. else:
  206. return "<%s>" % (type(self).__name__)
  207. class IgnoreFilterStack(object):
  208. """Check for ignore status in multiple filters."""
  209. def __init__(self, filters):
  210. self._filters = filters
  211. def is_ignored(self, path: str) -> Optional[bool]:
  212. """Check whether a path is explicitly included or excluded in ignores.
  213. Args:
  214. path: Path to check
  215. Returns:
  216. None if the file is not mentioned, True if it is included,
  217. False if it is explicitly excluded.
  218. """
  219. status = None
  220. for filter in self._filters:
  221. status = filter.is_ignored(path)
  222. if status is not None:
  223. return status
  224. return status
  225. def default_user_ignore_filter_path(config: Config) -> str:
  226. """Return default user ignore filter path.
  227. Args:
  228. config: A Config object
  229. Returns:
  230. Path to a global ignore file
  231. """
  232. try:
  233. value = config.get((b"core",), b"excludesFile")
  234. assert isinstance(value, bytes)
  235. return value.decode(encoding="utf-8")
  236. except KeyError:
  237. pass
  238. return get_xdg_config_home_path("git", "ignore")
  239. class IgnoreFilterManager(object):
  240. """Ignore file manager."""
  241. def __init__(
  242. self,
  243. top_path: str,
  244. global_filters: List[IgnoreFilter],
  245. ignorecase: bool,
  246. ):
  247. self._path_filters: Dict[str, Optional[IgnoreFilter]] = {}
  248. self._top_path = top_path
  249. self._global_filters = global_filters
  250. self._ignorecase = ignorecase
  251. def __repr__(self) -> str:
  252. return "%s(%s, %r, %r)" % (
  253. type(self).__name__,
  254. self._top_path,
  255. self._global_filters,
  256. self._ignorecase,
  257. )
  258. def _load_path(self, path: str) -> Optional[IgnoreFilter]:
  259. try:
  260. return self._path_filters[path]
  261. except KeyError:
  262. pass
  263. p = os.path.join(self._top_path, path, ".gitignore")
  264. try:
  265. self._path_filters[path] = IgnoreFilter.from_path(p, self._ignorecase)
  266. except IOError:
  267. self._path_filters[path] = None
  268. return self._path_filters[path]
  269. def find_matching(self, path: str) -> Iterable[Pattern]:
  270. """Find matching patterns for path.
  271. Args:
  272. path: Path to check
  273. Returns:
  274. Iterator over Pattern instances
  275. """
  276. if os.path.isabs(path):
  277. raise ValueError("%s is an absolute path" % path)
  278. filters = [(0, f) for f in self._global_filters]
  279. if os.path.sep != "/":
  280. path = path.replace(os.path.sep, "/")
  281. parts = path.split("/")
  282. matches = []
  283. for i in range(len(parts) + 1):
  284. dirname = "/".join(parts[:i])
  285. for s, f in filters:
  286. relpath = "/".join(parts[s:i])
  287. if i < len(parts):
  288. # Paths leading up to the final part are all directories,
  289. # so need a trailing slash.
  290. relpath += "/"
  291. matches += list(f.find_matching(relpath))
  292. ignore_filter = self._load_path(dirname)
  293. if ignore_filter is not None:
  294. filters.insert(0, (i, ignore_filter))
  295. return iter(matches)
  296. def is_ignored(self, path: str) -> Optional[bool]:
  297. """Check whether a path is explicitly included or excluded in ignores.
  298. Args:
  299. path: Path to check
  300. Returns:
  301. None if the file is not mentioned, True if it is included,
  302. False if it is explicitly excluded.
  303. """
  304. matches = list(self.find_matching(path))
  305. if matches:
  306. return matches[-1].is_exclude
  307. return None
  308. @classmethod
  309. def from_repo(cls, repo: "Repo") -> "IgnoreFilterManager":
  310. """Create a IgnoreFilterManager from a repository.
  311. Args:
  312. repo: Repository object
  313. Returns:
  314. A `IgnoreFilterManager` object
  315. """
  316. global_filters = []
  317. for p in [
  318. os.path.join(repo.controldir(), "info", "exclude"),
  319. default_user_ignore_filter_path(repo.get_config_stack()),
  320. ]:
  321. try:
  322. global_filters.append(IgnoreFilter.from_path(os.path.expanduser(p)))
  323. except IOError:
  324. pass
  325. config = repo.get_config_stack()
  326. ignorecase = config.get_boolean((b"core"), (b"ignorecase"), False)
  327. return cls(repo.path, global_filters, ignorecase)