123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356 |
- #!/usr/bin/env python
- # This script aims to help developers locate forms and view code that needs to
- # use the new CSRF protection in Django 1.2. It tries to find all the code that
- # may need the steps described in the CSRF documentation. It does not modify
- # any code directly, it merely attempts to locate it. Developers should be
- # aware of its limitations, described below.
- #
- # For each template that contains at least one POST form, the following info is printed:
- #
- # <Absolute path to template>
- # AKA: <Aliases (relative to template directory/directories that contain it)>
- # POST forms: <Number of POST forms>
- # With token: <Number of POST forms with the CSRF token already added>
- # Without token:
- # <File name and line number of form without token>
- #
- # Searching for:
- # <Template names that need to be searched for in view code
- # (includes templates that 'include' current template)>
- #
- # Found:
- # <File name and line number of any view code found>
- #
- # The format used allows this script to be used in Emacs grep mode:
- # M-x grep
- # Run grep (like this): /path/to/my/virtualenv/python /path/to/django/src/extras/csrf_migration_helper.py --settings=mysettings /path/to/my/srcs
- # Limitations
- # ===========
- #
- # - All templates must be stored on disk in '.html' or '.htm' files.
- # (extensions configurable below)
- #
- # - All Python code must be stored on disk in '.py' files. (extensions
- # configurable below)
- #
- # - All templates must be accessible from TEMPLATE_DIRS or from the 'templates/'
- # directory in apps specified in INSTALLED_APPS. Non-file based template
- # loaders are out of the picture, because there is no way to ask them to
- # return all templates.
- #
- # - It's impossible to programmatically determine which forms should and should
- # not have the token added. The developer must decide when to do this,
- # ensuring that the token is only added to internally targeted forms.
- #
- # - It's impossible to programmatically work out when a template is used. The
- # attempts to trace back to view functions are guesses, and could easily fail
- # in the following ways:
- #
- # * If the 'include' template tag is used with a variable
- # i.e. {% include tname %} where tname is a variable containing the actual
- # template name, rather than {% include "my_template.html" %}.
- #
- # * If the template name has been built up by view code instead of as a simple
- # string. For example, generic views and the admin both do this. (These
- # apps are both contrib and both use RequestContext already, as it happens).
- #
- # * If the 'ssl' tag (or any template tag other than 'include') is used to
- # include the template in another template.
- #
- # - All templates belonging to apps referenced in INSTALLED_APPS will be
- # searched, which may include third party apps or Django contrib. In some
- # cases, this will be a good thing, because even if the templates of these
- # apps have been fixed by someone else, your own view code may reference the
- # same template and may need to be updated.
- #
- # You may, however, wish to comment out some entries in INSTALLED_APPS or
- # TEMPLATE_DIRS before running this script.
- # Improvements to this script are welcome!
- # Configuration
- # =============
- TEMPLATE_EXTENSIONS = [
- ".html",
- ".htm",
- ]
- PYTHON_SOURCE_EXTENSIONS = [
- ".py",
- ]
- TEMPLATE_ENCODING = "UTF-8"
- PYTHON_ENCODING = "UTF-8"
- # Method
- # ======
- # Find templates:
- # - template dirs
- # - installed apps
- #
- # Search for POST forms
- # - Work out what the name of the template is, as it would appear in an
- # 'include' or get_template() call. This can be done by comparing template
- # filename to all template dirs. Some templates can have more than one
- # 'name' e.g. if a directory and one of its child directories are both in
- # TEMPLATE_DIRS. This is actually a common hack used for
- # overriding-and-extending admin templates.
- #
- # For each POST form,
- # - see if it already contains '{% csrf_token %}' immediately after <form>
- # - work back to the view function(s):
- # - First, see if the form is included in any other templates, then
- # recursively compile a list of affected templates.
- # - Find any code function that references that template. This is just a
- # brute force text search that can easily return false positives
- # and fail to find real instances.
- from argparse import ArgumentParser
- import os
- import sys
- import re
- DESCRIPTION = """This tool helps to locate forms that need CSRF tokens added and the
- corresponding view code. This processing is NOT fool proof, and you should read
- the help contained in the script itself. Also, this script may need configuring
- (by editing the script) before use."""
- _POST_FORM_RE = \
- re.compile(r'(<form\W[^>]*\bmethod\s*=\s*(\'|"|)POST(\'|"|)\b[^>]*>)', re.IGNORECASE)
- _FORM_CLOSE_RE = re.compile(r'</form\s*>')
- _TOKEN_RE = re.compile('\{% csrf_token')
- def get_template_dirs():
- """
- Returns a set of all directories that contain project templates.
- """
- from django.conf import settings
- dirs = set()
- if ('django.template.loaders.filesystem.load_template_source' in settings.TEMPLATE_LOADERS
- or 'django.template.loaders.filesystem.Loader' in settings.TEMPLATE_LOADERS):
- dirs.update(map(unicode, settings.TEMPLATE_DIRS))
- if ('django.template.loaders.app_directories.load_template_source' in settings.TEMPLATE_LOADERS
- or 'django.template.loaders.app_directories.Loader' in settings.TEMPLATE_LOADERS):
- from django.template.loaders.app_directories import app_template_dirs
- dirs.update(app_template_dirs)
- return dirs
- def make_template_info(filename, root_dirs):
- """
- Creates a Template object for a filename, calculating the possible
- relative_filenames from the supplied filename and root template directories
- """
- return Template(filename,
- [filename[len(d) + 1:] for d in root_dirs if filename.startswith(d)])
- class Template(object):
- def __init__(self, absolute_filename, relative_filenames):
- self.absolute_filename, self.relative_filenames = absolute_filename, relative_filenames
- def content(self):
- try:
- return self._content
- except AttributeError:
- with open(self.absolute_filename) as fd:
- try:
- content = fd.read().decode(TEMPLATE_ENCODING)
- except UnicodeDecodeError as e:
- message = '%s in %s' % (
- e[4], self.absolute_filename.encode('UTF-8', 'ignore'))
- raise UnicodeDecodeError(*(e.args[:4] + (message,)))
- self._content = content
- return content
- content = property(content)
- def post_form_info(self):
- """
- Get information about any POST forms in the template.
- Returns [(linenumber, csrf_token added)]
- """
- forms = {}
- form_line = 0
- for ln, line in enumerate(self.content.split("\n")):
- if not form_line and _POST_FORM_RE.search(line):
- # record the form with no CSRF token yet
- form_line = ln + 1
- forms[form_line] = False
- if form_line and _TOKEN_RE.search(line):
- # found the CSRF token
- forms[form_line] = True
- form_line = 0
- if form_line and _FORM_CLOSE_RE.search(line):
- # no token found by form closing tag
- form_line = 0
- return forms.items()
- def includes_template(self, t):
- """
- Returns true if this template includes template 't' (via {% include %})
- """
- for r in t.relative_filenames:
- if re.search(r'\{%\s*include\s+(\'|")' + re.escape(r) + r'(\1)\s*%\}', self.content):
- return True
- return False
- def related_templates(self):
- """
- Returns all templates that include this one, recursively. (starting
- with this one)
- """
- try:
- return self._related_templates
- except AttributeError:
- pass
- retval = set([self])
- for t in self.all_templates:
- if t.includes_template(self):
- # If two templates mutually include each other, directly or
- # indirectly, we have a problem here...
- retval = retval.union(t.related_templates())
- self._related_templates = retval
- return retval
- def __repr__(self):
- return repr(self.absolute_filename)
- def __eq__(self, other):
- return self.absolute_filename == other.absolute_filename
- def __hash__(self):
- return hash(self.absolute_filename)
- def get_templates(dirs):
- """
- Returns all files in dirs that have template extensions, as Template
- objects.
- """
- templates = set()
- for root in dirs:
- for (dirpath, dirnames, filenames) in os.walk(root):
- for f in filenames:
- if len([True for e in TEMPLATE_EXTENSIONS if f.endswith(e)]) > 0:
- t = make_template_info(os.path.join(dirpath, f), dirs)
- # templates need to be able to search others:
- t.all_templates = templates
- templates.add(t)
- return templates
- def get_python_code(paths):
- """
- Returns all Python code, as a list of tuples, each one being:
- (filename, list of lines)
- """
- retval = []
- for p in paths:
- if not os.path.isdir(p):
- raise Exception("'%s' is not a directory." % p)
- for (dirpath, dirnames, filenames) in os.walk(p):
- for f in filenames:
- if len([True for e in PYTHON_SOURCE_EXTENSIONS if f.endswith(e)]) > 0:
- fn = os.path.join(dirpath, f)
- with open(fn) as fd:
- content = [l.decode(PYTHON_ENCODING) for l in fd.readlines()]
- retval.append((fn, content))
- return retval
- def search_python_list(python_code, template_names):
- """
- Searches python code for a list of template names.
- Returns a list of tuples, each one being:
- (filename, line number)
- """
- retval = set()
- for tn in template_names:
- retval.update(search_python(python_code, tn))
- return sorted(retval)
- def search_python(python_code, template_name):
- """
- Searches Python code for a template name.
- Returns a list of tuples, each one being:
- (filename, line number)
- """
- retval = []
- for fn, content in python_code:
- for ln, line in enumerate(content):
- if ((u'"%s"' % template_name) in line) or \
- ((u"'%s'" % template_name) in line):
- retval.append((fn, ln + 1))
- return retval
- def main(pythonpaths):
- template_dirs = get_template_dirs()
- templates = get_templates(template_dirs)
- python_code = get_python_code(pythonpaths)
- for t in templates:
- # Logic
- form_matches = t.post_form_info()
- num_post_forms = len(form_matches)
- form_lines_without_token = [ln for (ln, has_token) in form_matches if not has_token]
- if num_post_forms == 0:
- continue
- to_search = [rf for rt in t.related_templates() for rf in rt.relative_filenames]
- found = search_python_list(python_code, to_search)
- # Display:
- print(t.absolute_filename)
- for r in t.relative_filenames:
- print(" AKA %s" % r)
- print(" POST forms: %s" % num_post_forms)
- print(" With token: %s" % (num_post_forms - len(form_lines_without_token)))
- if form_lines_without_token:
- print(" Without token:")
- for ln in form_lines_without_token:
- print("%s:%d:" % (t.absolute_filename, ln))
- print('')
- print(" Searching for:")
- for r in to_search:
- print(" " + r)
- print('')
- print(" Found:")
- if len(found) == 0:
- print(" Nothing")
- else:
- for fn, ln in found:
- print("%s:%d:" % (fn, ln))
- print('')
- print("----")
- if __name__ == '__main__':
- parser = ArgumentParser(description=DESCRIPTION)
- parser.add_argument('files', nargs='*', help='Paths can be specified as relative paths.')
- parser.add_argument("--settings", help="Dotted path to settings file")
- options = parser.parse_args()
- if len(options.files) == 0:
- parser.print_help()
- sys.exit(1)
- if options.settings is None:
- if os.environ.get("DJANGO_SETTINGS_MODULE", None) is None:
- print("You need to set DJANGO_SETTINGS_MODULE or use the '--settings' parameter")
- sys.exit(1)
- else:
- os.environ["DJANGO_SETTINGS_MODULE"] = settings
- main(options.files)
|