csrf_migration_helper.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. #!/usr/bin/env python
  2. # This script aims to help developers locate forms and view code that needs to
  3. # use the new CSRF protection in Django 1.2. It tries to find all the code that
  4. # may need the steps described in the CSRF documentation. It does not modify
  5. # any code directly, it merely attempts to locate it. Developers should be
  6. # aware of its limitations, described below.
  7. #
  8. # For each template that contains at least one POST form, the following info is printed:
  9. #
  10. # <Absolute path to template>
  11. # AKA: <Aliases (relative to template directory/directories that contain it)>
  12. # POST forms: <Number of POST forms>
  13. # With token: <Number of POST forms with the CSRF token already added>
  14. # Without token:
  15. # <File name and line number of form without token>
  16. #
  17. # Searching for:
  18. # <Template names that need to be searched for in view code
  19. # (includes templates that 'include' current template)>
  20. #
  21. # Found:
  22. # <File name and line number of any view code found>
  23. #
  24. # The format used allows this script to be used in Emacs grep mode:
  25. # M-x grep
  26. # Run grep (like this): /path/to/my/virtualenv/python /path/to/django/src/extras/csrf_migration_helper.py --settings=mysettings /path/to/my/srcs
  27. # Limitations
  28. # ===========
  29. #
  30. # - All templates must be stored on disk in '.html' or '.htm' files.
  31. # (extensions configurable below)
  32. #
  33. # - All Python code must be stored on disk in '.py' files. (extensions
  34. # configurable below)
  35. #
  36. # - All templates must be accessible from TEMPLATE_DIRS or from the 'templates/'
  37. # directory in apps specified in INSTALLED_APPS. Non-file based template
  38. # loaders are out of the picture, because there is no way to ask them to
  39. # return all templates.
  40. #
  41. # - It's impossible to programmatically determine which forms should and should
  42. # not have the token added. The developer must decide when to do this,
  43. # ensuring that the token is only added to internally targeted forms.
  44. #
  45. # - It's impossible to programmatically work out when a template is used. The
  46. # attempts to trace back to view functions are guesses, and could easily fail
  47. # in the following ways:
  48. #
  49. # * If the 'include' template tag is used with a variable
  50. # i.e. {% include tname %} where tname is a variable containing the actual
  51. # template name, rather than {% include "my_template.html" %}.
  52. #
  53. # * If the template name has been built up by view code instead of as a simple
  54. # string. For example, generic views and the admin both do this. (These
  55. # apps are both contrib and both use RequestContext already, as it happens).
  56. #
  57. # * If the 'ssl' tag (or any template tag other than 'include') is used to
  58. # include the template in another template.
  59. #
  60. # - All templates belonging to apps referenced in INSTALLED_APPS will be
  61. # searched, which may include third party apps or Django contrib. In some
  62. # cases, this will be a good thing, because even if the templates of these
  63. # apps have been fixed by someone else, your own view code may reference the
  64. # same template and may need to be updated.
  65. #
  66. # You may, however, wish to comment out some entries in INSTALLED_APPS or
  67. # TEMPLATE_DIRS before running this script.
  68. # Improvements to this script are welcome!
  69. # Configuration
  70. # =============
  71. TEMPLATE_EXTENSIONS = [
  72. ".html",
  73. ".htm",
  74. ]
  75. PYTHON_SOURCE_EXTENSIONS = [
  76. ".py",
  77. ]
  78. TEMPLATE_ENCODING = "UTF-8"
  79. PYTHON_ENCODING = "UTF-8"
  80. # Method
  81. # ======
  82. # Find templates:
  83. # - template dirs
  84. # - installed apps
  85. #
  86. # Search for POST forms
  87. # - Work out what the name of the template is, as it would appear in an
  88. # 'include' or get_template() call. This can be done by comparing template
  89. # filename to all template dirs. Some templates can have more than one
  90. # 'name' e.g. if a directory and one of its child directories are both in
  91. # TEMPLATE_DIRS. This is actually a common hack used for
  92. # overriding-and-extending admin templates.
  93. #
  94. # For each POST form,
  95. # - see if it already contains '{% csrf_token %}' immediately after <form>
  96. # - work back to the view function(s):
  97. # - First, see if the form is included in any other templates, then
  98. # recursively compile a list of affected templates.
  99. # - Find any code function that references that template. This is just a
  100. # brute force text search that can easily return false positives
  101. # and fail to find real instances.
  102. from argparse import ArgumentParser
  103. import os
  104. import sys
  105. import re
  106. DESCRIPTION = """This tool helps to locate forms that need CSRF tokens added and the
  107. corresponding view code. This processing is NOT fool proof, and you should read
  108. the help contained in the script itself. Also, this script may need configuring
  109. (by editing the script) before use."""
  110. _POST_FORM_RE = \
  111. re.compile(r'(<form\W[^>]*\bmethod\s*=\s*(\'|"|)POST(\'|"|)\b[^>]*>)', re.IGNORECASE)
  112. _FORM_CLOSE_RE = re.compile(r'</form\s*>')
  113. _TOKEN_RE = re.compile('\{% csrf_token')
  114. def get_template_dirs():
  115. """
  116. Returns a set of all directories that contain project templates.
  117. """
  118. from django.conf import settings
  119. dirs = set()
  120. if ('django.template.loaders.filesystem.load_template_source' in settings.TEMPLATE_LOADERS
  121. or 'django.template.loaders.filesystem.Loader' in settings.TEMPLATE_LOADERS):
  122. dirs.update(map(unicode, settings.TEMPLATE_DIRS))
  123. if ('django.template.loaders.app_directories.load_template_source' in settings.TEMPLATE_LOADERS
  124. or 'django.template.loaders.app_directories.Loader' in settings.TEMPLATE_LOADERS):
  125. from django.template.loaders.app_directories import app_template_dirs
  126. dirs.update(app_template_dirs)
  127. return dirs
  128. def make_template_info(filename, root_dirs):
  129. """
  130. Creates a Template object for a filename, calculating the possible
  131. relative_filenames from the supplied filename and root template directories
  132. """
  133. return Template(filename,
  134. [filename[len(d) + 1:] for d in root_dirs if filename.startswith(d)])
  135. class Template(object):
  136. def __init__(self, absolute_filename, relative_filenames):
  137. self.absolute_filename, self.relative_filenames = absolute_filename, relative_filenames
  138. def content(self):
  139. try:
  140. return self._content
  141. except AttributeError:
  142. with open(self.absolute_filename) as fd:
  143. try:
  144. content = fd.read().decode(TEMPLATE_ENCODING)
  145. except UnicodeDecodeError as e:
  146. message = '%s in %s' % (
  147. e[4], self.absolute_filename.encode('UTF-8', 'ignore'))
  148. raise UnicodeDecodeError(*(e.args[:4] + (message,)))
  149. self._content = content
  150. return content
  151. content = property(content)
  152. def post_form_info(self):
  153. """
  154. Get information about any POST forms in the template.
  155. Returns [(linenumber, csrf_token added)]
  156. """
  157. forms = {}
  158. form_line = 0
  159. for ln, line in enumerate(self.content.split("\n")):
  160. if not form_line and _POST_FORM_RE.search(line):
  161. # record the form with no CSRF token yet
  162. form_line = ln + 1
  163. forms[form_line] = False
  164. if form_line and _TOKEN_RE.search(line):
  165. # found the CSRF token
  166. forms[form_line] = True
  167. form_line = 0
  168. if form_line and _FORM_CLOSE_RE.search(line):
  169. # no token found by form closing tag
  170. form_line = 0
  171. return forms.items()
  172. def includes_template(self, t):
  173. """
  174. Returns true if this template includes template 't' (via {% include %})
  175. """
  176. for r in t.relative_filenames:
  177. if re.search(r'\{%\s*include\s+(\'|")' + re.escape(r) + r'(\1)\s*%\}', self.content):
  178. return True
  179. return False
  180. def related_templates(self):
  181. """
  182. Returns all templates that include this one, recursively. (starting
  183. with this one)
  184. """
  185. try:
  186. return self._related_templates
  187. except AttributeError:
  188. pass
  189. retval = set([self])
  190. for t in self.all_templates:
  191. if t.includes_template(self):
  192. # If two templates mutually include each other, directly or
  193. # indirectly, we have a problem here...
  194. retval = retval.union(t.related_templates())
  195. self._related_templates = retval
  196. return retval
  197. def __repr__(self):
  198. return repr(self.absolute_filename)
  199. def __eq__(self, other):
  200. return self.absolute_filename == other.absolute_filename
  201. def __hash__(self):
  202. return hash(self.absolute_filename)
  203. def get_templates(dirs):
  204. """
  205. Returns all files in dirs that have template extensions, as Template
  206. objects.
  207. """
  208. templates = set()
  209. for root in dirs:
  210. for (dirpath, dirnames, filenames) in os.walk(root):
  211. for f in filenames:
  212. if len([True for e in TEMPLATE_EXTENSIONS if f.endswith(e)]) > 0:
  213. t = make_template_info(os.path.join(dirpath, f), dirs)
  214. # templates need to be able to search others:
  215. t.all_templates = templates
  216. templates.add(t)
  217. return templates
  218. def get_python_code(paths):
  219. """
  220. Returns all Python code, as a list of tuples, each one being:
  221. (filename, list of lines)
  222. """
  223. retval = []
  224. for p in paths:
  225. if not os.path.isdir(p):
  226. raise Exception("'%s' is not a directory." % p)
  227. for (dirpath, dirnames, filenames) in os.walk(p):
  228. for f in filenames:
  229. if len([True for e in PYTHON_SOURCE_EXTENSIONS if f.endswith(e)]) > 0:
  230. fn = os.path.join(dirpath, f)
  231. with open(fn) as fd:
  232. content = [l.decode(PYTHON_ENCODING) for l in fd.readlines()]
  233. retval.append((fn, content))
  234. return retval
  235. def search_python_list(python_code, template_names):
  236. """
  237. Searches python code for a list of template names.
  238. Returns a list of tuples, each one being:
  239. (filename, line number)
  240. """
  241. retval = set()
  242. for tn in template_names:
  243. retval.update(search_python(python_code, tn))
  244. return sorted(retval)
  245. def search_python(python_code, template_name):
  246. """
  247. Searches Python code for a template name.
  248. Returns a list of tuples, each one being:
  249. (filename, line number)
  250. """
  251. retval = []
  252. for fn, content in python_code:
  253. for ln, line in enumerate(content):
  254. if ((u'"%s"' % template_name) in line) or \
  255. ((u"'%s'" % template_name) in line):
  256. retval.append((fn, ln + 1))
  257. return retval
  258. def main(pythonpaths):
  259. template_dirs = get_template_dirs()
  260. templates = get_templates(template_dirs)
  261. python_code = get_python_code(pythonpaths)
  262. for t in templates:
  263. # Logic
  264. form_matches = t.post_form_info()
  265. num_post_forms = len(form_matches)
  266. form_lines_without_token = [ln for (ln, has_token) in form_matches if not has_token]
  267. if num_post_forms == 0:
  268. continue
  269. to_search = [rf for rt in t.related_templates() for rf in rt.relative_filenames]
  270. found = search_python_list(python_code, to_search)
  271. # Display:
  272. print(t.absolute_filename)
  273. for r in t.relative_filenames:
  274. print(" AKA %s" % r)
  275. print(" POST forms: %s" % num_post_forms)
  276. print(" With token: %s" % (num_post_forms - len(form_lines_without_token)))
  277. if form_lines_without_token:
  278. print(" Without token:")
  279. for ln in form_lines_without_token:
  280. print("%s:%d:" % (t.absolute_filename, ln))
  281. print('')
  282. print(" Searching for:")
  283. for r in to_search:
  284. print(" " + r)
  285. print('')
  286. print(" Found:")
  287. if len(found) == 0:
  288. print(" Nothing")
  289. else:
  290. for fn, ln in found:
  291. print("%s:%d:" % (fn, ln))
  292. print('')
  293. print("----")
  294. if __name__ == '__main__':
  295. parser = ArgumentParser(description=DESCRIPTION)
  296. parser.add_argument('files', nargs='*', help='Paths can be specified as relative paths.')
  297. parser.add_argument("--settings", help="Dotted path to settings file")
  298. options = parser.parse_args()
  299. if len(options.files) == 0:
  300. parser.print_help()
  301. sys.exit(1)
  302. if options.settings is None:
  303. if os.environ.get("DJANGO_SETTINGS_MODULE", None) is None:
  304. print("You need to set DJANGO_SETTINGS_MODULE or use the '--settings' parameter")
  305. sys.exit(1)
  306. else:
  307. os.environ["DJANGO_SETTINGS_MODULE"] = settings
  308. main(options.files)