ignore.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. # Copyright (C) 2017 Jelmer Vernooij <jelmer@jelmer.uk>
  2. #
  3. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  4. # General Public License as public by the Free Software Foundation; version 2.0
  5. # or (at your option) any later version. You can redistribute it and/or
  6. # modify it under the terms of either of these two licenses.
  7. #
  8. # Unless required by applicable law or agreed to in writing, software
  9. # distributed under the License is distributed on an "AS IS" BASIS,
  10. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. # See the License for the specific language governing permissions and
  12. # limitations under the License.
  13. #
  14. # You should have received a copy of the licenses; if not, see
  15. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  16. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  17. # License, Version 2.0.
  18. #
  19. """Parsing of gitignore files.
  20. For details for the matching rules, see https://git-scm.com/docs/gitignore
  21. """
  22. import os.path
  23. import re
  24. import sys
  25. def _translate_segment(segment):
  26. if segment == b"*":
  27. return b'[^/]+'
  28. res = b""
  29. i, n = 0, len(segment)
  30. while i < n:
  31. c = segment[i:i+1]
  32. i = i+1
  33. if c == b'*':
  34. res += b'[^/]*'
  35. elif c == b'?':
  36. res += b'[^/]'
  37. elif c == b'[':
  38. j = i
  39. if j < n and segment[j:j+1] == b'!':
  40. j = j+1
  41. if j < n and segment[j:j+1] == b']':
  42. j = j+1
  43. while j < n and segment[j:j+1] != b']':
  44. j = j+1
  45. if j >= n:
  46. res += b'\\['
  47. else:
  48. stuff = segment[i:j].replace(b'\\', b'\\\\')
  49. i = j+1
  50. if stuff.startswith(b'!'):
  51. stuff = b'^' + stuff[1:]
  52. elif stuff.startswith(b'^'):
  53. stuff = b'\\' + stuff
  54. res += b'[' + stuff + b']'
  55. else:
  56. res += re.escape(c)
  57. return res
  58. def translate(pat):
  59. """Translate a shell PATTERN to a regular expression.
  60. There is no way to quote meta-characters.
  61. Originally copied from fnmatch in Python 2.7, but modified for Dulwich
  62. to cope with features in Git ignore patterns.
  63. """
  64. res = b'(?ms)'
  65. if b'/' not in pat[:-1]:
  66. # If there's no slash, this is a filename-based match
  67. res += b'(.*/)?'
  68. if pat.startswith(b'**/'):
  69. # Leading **/
  70. pat = pat[2:]
  71. res += b'(.*/)?'
  72. if pat.startswith(b'/'):
  73. pat = pat[1:]
  74. for i, segment in enumerate(pat.split(b'/')):
  75. if segment == b'**':
  76. res += b'(/.*)?'
  77. continue
  78. else:
  79. res += ((re.escape(b'/') if i > 0 else b'') +
  80. _translate_segment(segment))
  81. if not pat.endswith(b'/'):
  82. res += b'/?'
  83. return res + b'\\Z'
  84. def read_ignore_patterns(f):
  85. """Read a git ignore file.
  86. Args:
  87. f: File-like object to read from
  88. Returns: List of patterns
  89. """
  90. for line in f:
  91. line = line.rstrip(b"\r\n")
  92. # Ignore blank lines, they're used for readability.
  93. if not line:
  94. continue
  95. if line.startswith(b'#'):
  96. # Comment
  97. continue
  98. # Trailing spaces are ignored unless they are quoted with a backslash.
  99. while line.endswith(b' ') and not line.endswith(b'\\ '):
  100. line = line[:-1]
  101. line = line.replace(b'\\ ', b' ')
  102. yield line
  103. def match_pattern(path, pattern, ignorecase=False):
  104. """Match a gitignore-style pattern against a path.
  105. Args:
  106. path: Path to match
  107. pattern: Pattern to match
  108. ignorecase: Whether to do case-sensitive matching
  109. Returns:
  110. bool indicating whether the pattern matched
  111. """
  112. return Pattern(pattern, ignorecase).match(path)
  113. class Pattern(object):
  114. """A single ignore pattern."""
  115. def __init__(self, pattern, ignorecase=False):
  116. self.pattern = pattern
  117. self.ignorecase = ignorecase
  118. if pattern[0:1] == b'!':
  119. self.is_exclude = False
  120. pattern = pattern[1:]
  121. else:
  122. if pattern[0:1] == b'\\':
  123. pattern = pattern[1:]
  124. self.is_exclude = True
  125. flags = 0
  126. if self.ignorecase:
  127. flags = re.IGNORECASE
  128. self._re = re.compile(translate(pattern), flags)
  129. def __bytes__(self):
  130. return self.pattern
  131. def __str__(self):
  132. return self.pattern.decode(sys.getfilesystemencoding())
  133. def __eq__(self, other):
  134. return (type(self) == type(other) and
  135. self.pattern == other.pattern and
  136. self.ignorecase == other.ignorecase)
  137. def __repr__(self):
  138. return "%s(%s, %r)" % (
  139. type(self).__name__, self.pattern, self.ignorecase)
  140. def match(self, path):
  141. """Try to match a path against this ignore pattern.
  142. Args:
  143. path: Path to match (relative to ignore location)
  144. Returns: boolean
  145. """
  146. return bool(self._re.match(path))
  147. class IgnoreFilter(object):
  148. def __init__(self, patterns, ignorecase=False):
  149. self._patterns = []
  150. self._ignorecase = ignorecase
  151. for pattern in patterns:
  152. self.append_pattern(pattern)
  153. def append_pattern(self, pattern):
  154. """Add a pattern to the set."""
  155. self._patterns.append(Pattern(pattern, self._ignorecase))
  156. def find_matching(self, path):
  157. """Yield all matching patterns for path.
  158. Args:
  159. path: Path to match
  160. Returns:
  161. Iterator over iterators
  162. """
  163. if not isinstance(path, bytes):
  164. path = path.encode(sys.getfilesystemencoding())
  165. for pattern in self._patterns:
  166. if pattern.match(path):
  167. yield pattern
  168. def is_ignored(self, path):
  169. """Check whether a path is ignored.
  170. For directories, include a trailing slash.
  171. Returns: status is None if file is not mentioned, True if it is
  172. included, False if it is explicitly excluded.
  173. """
  174. status = None
  175. for pattern in self.find_matching(path):
  176. status = pattern.is_exclude
  177. return status
  178. @classmethod
  179. def from_path(cls, path, ignorecase=False):
  180. with open(path, 'rb') as f:
  181. ret = cls(read_ignore_patterns(f), ignorecase)
  182. ret._path = path
  183. return ret
  184. def __repr__(self):
  185. if getattr(self, '_path', None) is None:
  186. return "<%s>" % (type(self).__name__)
  187. else:
  188. return "%s.from_path(%r)" % (type(self).__name__, self._path)
  189. class IgnoreFilterStack(object):
  190. """Check for ignore status in multiple filters."""
  191. def __init__(self, filters):
  192. self._filters = filters
  193. def is_ignored(self, path):
  194. """Check whether a path is explicitly included or excluded in ignores.
  195. Args:
  196. path: Path to check
  197. Returns:
  198. None if the file is not mentioned, True if it is included,
  199. False if it is explicitly excluded.
  200. """
  201. status = None
  202. for filter in self._filters:
  203. status = filter.is_ignored(path)
  204. if status is not None:
  205. return status
  206. return status
  207. def default_user_ignore_filter_path(config):
  208. """Return default user ignore filter path.
  209. Args:
  210. config: A Config object
  211. Returns:
  212. Path to a global ignore file
  213. """
  214. try:
  215. return config.get((b'core', ), b'excludesFile')
  216. except KeyError:
  217. pass
  218. xdg_config_home = os.environ.get(
  219. "XDG_CONFIG_HOME", os.path.expanduser("~/.config/"),
  220. )
  221. return os.path.join(xdg_config_home, 'git', 'ignore')
  222. class IgnoreFilterManager(object):
  223. """Ignore file manager."""
  224. def __init__(self, top_path, global_filters, ignorecase):
  225. self._path_filters = {}
  226. self._top_path = top_path
  227. self._global_filters = global_filters
  228. self._ignorecase = ignorecase
  229. def __repr__(self):
  230. return "%s(%s, %r, %r)" % (
  231. type(self).__name__, self._top_path,
  232. self._global_filters,
  233. self._ignorecase)
  234. def _load_path(self, path):
  235. try:
  236. return self._path_filters[path]
  237. except KeyError:
  238. pass
  239. p = os.path.join(self._top_path, path, '.gitignore')
  240. try:
  241. self._path_filters[path] = IgnoreFilter.from_path(
  242. p, self._ignorecase)
  243. except IOError:
  244. self._path_filters[path] = None
  245. return self._path_filters[path]
  246. def find_matching(self, path):
  247. """Find matching patterns for path.
  248. Stops after the first ignore file with matches.
  249. Args:
  250. path: Path to check
  251. Returns:
  252. Iterator over Pattern instances
  253. """
  254. if os.path.isabs(path):
  255. raise ValueError('%s is an absolute path' % path)
  256. filters = [(0, f) for f in self._global_filters]
  257. if os.path.sep != '/':
  258. path = path.replace(os.path.sep, '/')
  259. parts = path.split('/')
  260. for i in range(len(parts)+1):
  261. dirname = '/'.join(parts[:i])
  262. for s, f in filters:
  263. relpath = '/'.join(parts[s:i])
  264. if i < len(parts):
  265. # Paths leading up to the final part are all directories,
  266. # so need a trailing slash.
  267. relpath += '/'
  268. matches = list(f.find_matching(relpath))
  269. if matches:
  270. return iter(matches)
  271. ignore_filter = self._load_path(dirname)
  272. if ignore_filter is not None:
  273. filters.insert(0, (i, ignore_filter))
  274. return iter([])
  275. def is_ignored(self, path):
  276. """Check whether a path is explicitly included or excluded in ignores.
  277. Args:
  278. path: Path to check
  279. Returns:
  280. None if the file is not mentioned, True if it is included,
  281. False if it is explicitly excluded.
  282. """
  283. matches = list(self.find_matching(path))
  284. if matches:
  285. return matches[-1].is_exclude
  286. return None
  287. @classmethod
  288. def from_repo(cls, repo):
  289. """Create a IgnoreFilterManager from a repository.
  290. Args:
  291. repo: Repository object
  292. Returns:
  293. A `IgnoreFilterManager` object
  294. """
  295. global_filters = []
  296. for p in [
  297. os.path.join(repo.controldir(), 'info', 'exclude'),
  298. default_user_ignore_filter_path(repo.get_config_stack())]:
  299. try:
  300. global_filters.append(IgnoreFilter.from_path(p))
  301. except IOError:
  302. pass
  303. config = repo.get_config_stack()
  304. ignorecase = config.get_boolean((b'core'), (b'ignorecase'), False)
  305. return cls(repo.path, global_filters, ignorecase)