123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284 |
- from __future__ import unicode_literals
- import codecs
- import logging
- import sys
- from io import BytesIO
- from threading import Lock
- import warnings
- from django import http
- from django.conf import settings
- from django.core import signals
- from django.core.handlers import base
- from django.core.urlresolvers import set_script_prefix
- from django.utils import datastructures
- from django.utils.encoding import force_str, force_text
- from django.utils import six
- # For backwards compatibility -- lots of code uses this in the wild!
- from django.http.response import REASON_PHRASES as STATUS_CODE_TEXT # NOQA
- logger = logging.getLogger('django.request')
- # encode() and decode() expect the charset to be a native string.
- ISO_8859_1, UTF_8 = str('iso-8859-1'), str('utf-8')
- class LimitedStream(object):
- '''
- LimitedStream wraps another stream in order to not allow reading from it
- past specified amount of bytes.
- '''
- def __init__(self, stream, limit, buf_size=64 * 1024 * 1024):
- self.stream = stream
- self.remaining = limit
- self.buffer = b''
- self.buf_size = buf_size
- def _read_limited(self, size=None):
- if size is None or size > self.remaining:
- size = self.remaining
- if size == 0:
- return b''
- result = self.stream.read(size)
- self.remaining -= len(result)
- return result
- def read(self, size=None):
- if size is None:
- result = self.buffer + self._read_limited()
- self.buffer = b''
- elif size < len(self.buffer):
- result = self.buffer[:size]
- self.buffer = self.buffer[size:]
- else: # size >= len(self.buffer)
- result = self.buffer + self._read_limited(size - len(self.buffer))
- self.buffer = b''
- return result
- def readline(self, size=None):
- while b'\n' not in self.buffer and \
- (size is None or len(self.buffer) < size):
- if size:
- # since size is not None here, len(self.buffer) < size
- chunk = self._read_limited(size - len(self.buffer))
- else:
- chunk = self._read_limited()
- if not chunk:
- break
- self.buffer += chunk
- sio = BytesIO(self.buffer)
- if size:
- line = sio.readline(size)
- else:
- line = sio.readline()
- self.buffer = sio.read()
- return line
- class WSGIRequest(http.HttpRequest):
- def __init__(self, environ):
- script_name = get_script_name(environ)
- path_info = get_path_info(environ)
- if not path_info:
- # Sometimes PATH_INFO exists, but is empty (e.g. accessing
- # the SCRIPT_NAME URL without a trailing slash). We really need to
- # operate as if they'd requested '/'. Not amazingly nice to force
- # the path like this, but should be harmless.
- path_info = '/'
- self.environ = environ
- self.path_info = path_info
- self.path = '%s/%s' % (script_name.rstrip('/'), path_info.lstrip('/'))
- self.META = environ
- self.META['PATH_INFO'] = path_info
- self.META['SCRIPT_NAME'] = script_name
- self.method = environ['REQUEST_METHOD'].upper()
- _, content_params = self._parse_content_type(environ.get('CONTENT_TYPE', ''))
- if 'charset' in content_params:
- try:
- codecs.lookup(content_params['charset'])
- except LookupError:
- pass
- else:
- self.encoding = content_params['charset']
- self._post_parse_error = False
- try:
- content_length = int(environ.get('CONTENT_LENGTH'))
- except (ValueError, TypeError):
- content_length = 0
- self._stream = LimitedStream(self.environ['wsgi.input'], content_length)
- self._read_started = False
- self.resolver_match = None
- def _get_scheme(self):
- return self.environ.get('wsgi.url_scheme')
- def _parse_content_type(self, ctype):
- """
- Media Types parsing according to RFC 2616, section 3.7.
- Returns the data type and parameters. For example:
- Input: "text/plain; charset=iso-8859-1"
- Output: ('text/plain', {'charset': 'iso-8859-1'})
- """
- content_type, _, params = ctype.partition(';')
- content_params = {}
- for parameter in params.split(';'):
- k, _, v = parameter.strip().partition('=')
- content_params[k] = v
- return content_type, content_params
- def _get_request(self):
- warnings.warn('`request.REQUEST` is deprecated, use `request.GET` or '
- '`request.POST` instead.', PendingDeprecationWarning, 2)
- if not hasattr(self, '_request'):
- self._request = datastructures.MergeDict(self.POST, self.GET)
- return self._request
- def _get_get(self):
- if not hasattr(self, '_get'):
- # The WSGI spec says 'QUERY_STRING' may be absent.
- raw_query_string = get_bytes_from_wsgi(self.environ, 'QUERY_STRING', '')
- self._get = http.QueryDict(raw_query_string, encoding=self._encoding)
- return self._get
- def _set_get(self, get):
- self._get = get
- def _get_post(self):
- if not hasattr(self, '_post'):
- self._load_post_and_files()
- return self._post
- def _set_post(self, post):
- self._post = post
- def _get_cookies(self):
- if not hasattr(self, '_cookies'):
- raw_cookie = get_str_from_wsgi(self.environ, 'HTTP_COOKIE', '')
- self._cookies = http.parse_cookie(raw_cookie)
- return self._cookies
- def _set_cookies(self, cookies):
- self._cookies = cookies
- def _get_files(self):
- if not hasattr(self, '_files'):
- self._load_post_and_files()
- return self._files
- GET = property(_get_get, _set_get)
- POST = property(_get_post, _set_post)
- COOKIES = property(_get_cookies, _set_cookies)
- FILES = property(_get_files)
- REQUEST = property(_get_request)
- class WSGIHandler(base.BaseHandler):
- initLock = Lock()
- request_class = WSGIRequest
- def __call__(self, environ, start_response):
- # Set up middleware if needed. We couldn't do this earlier, because
- # settings weren't available.
- if self._request_middleware is None:
- with self.initLock:
- try:
- # Check that middleware is still uninitialised.
- if self._request_middleware is None:
- self.load_middleware()
- except:
- # Unload whatever middleware we got
- self._request_middleware = None
- raise
- set_script_prefix(get_script_name(environ))
- signals.request_started.send(sender=self.__class__)
- try:
- request = self.request_class(environ)
- except UnicodeDecodeError:
- logger.warning('Bad Request (UnicodeDecodeError)',
- exc_info=sys.exc_info(),
- extra={
- 'status_code': 400,
- }
- )
- response = http.HttpResponseBadRequest()
- else:
- response = self.get_response(request)
- response._handler_class = self.__class__
- status = '%s %s' % (response.status_code, response.reason_phrase)
- response_headers = [(str(k), str(v)) for k, v in response.items()]
- for c in response.cookies.values():
- response_headers.append((str('Set-Cookie'), str(c.output(header=''))))
- start_response(force_str(status), response_headers)
- return response
- def get_path_info(environ):
- """
- Returns the HTTP request's PATH_INFO as a unicode string.
- """
- path_info = get_bytes_from_wsgi(environ, 'PATH_INFO', '/')
- # It'd be better to implement URI-to-IRI decoding, see #19508.
- return path_info.decode(UTF_8)
- def get_script_name(environ):
- """
- Returns the equivalent of the HTTP request's SCRIPT_NAME environment
- variable. If Apache mod_rewrite has been used, returns what would have been
- the script name prior to any rewriting (so it's the script name as seen
- from the client's perspective), unless the FORCE_SCRIPT_NAME setting is
- set (to anything).
- """
- if settings.FORCE_SCRIPT_NAME is not None:
- return force_text(settings.FORCE_SCRIPT_NAME)
- # If Apache's mod_rewrite had a whack at the URL, Apache set either
- # SCRIPT_URL or REDIRECT_URL to the full resource URL before applying any
- # rewrites. Unfortunately not every Web server (lighttpd!) passes this
- # information through all the time, so FORCE_SCRIPT_NAME, above, is still
- # needed.
- script_url = get_bytes_from_wsgi(environ, 'SCRIPT_URL', '')
- if not script_url:
- script_url = get_bytes_from_wsgi(environ, 'REDIRECT_URL', '')
- if script_url:
- path_info = get_bytes_from_wsgi(environ, 'PATH_INFO', '')
- script_name = script_url[:-len(path_info)]
- else:
- script_name = get_bytes_from_wsgi(environ, 'SCRIPT_NAME', '')
- # It'd be better to implement URI-to-IRI decoding, see #19508.
- return script_name.decode(UTF_8)
- def get_bytes_from_wsgi(environ, key, default):
- """
- Get a value from the WSGI environ dictionary as bytes.
- key and default should be str objects. Under Python 2 they may also be
- unicode objects provided they only contain ASCII characters.
- """
- value = environ.get(str(key), str(default))
- # Under Python 3, non-ASCII values in the WSGI environ are arbitrarily
- # decoded with ISO-8859-1. This is wrong for Django websites where UTF-8
- # is the default. Re-encode to recover the original bytestring.
- return value if six.PY2 else value.encode(ISO_8859_1)
- def get_str_from_wsgi(environ, key, default):
- """
- Get a value from the WSGI environ dictionary as bytes.
- key and default should be str objects. Under Python 2 they may also be
- unicode objects provided they only contain ASCII characters.
- """
- value = environ.get(str(key), str(default))
- # Same comment as above
- return value if six.PY2 else value.encode(ISO_8859_1).decode(UTF_8)
|