wsgi.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. from __future__ import unicode_literals
  2. import codecs
  3. import logging
  4. import sys
  5. from io import BytesIO
  6. from threading import Lock
  7. import warnings
  8. from django import http
  9. from django.conf import settings
  10. from django.core import signals
  11. from django.core.handlers import base
  12. from django.core.urlresolvers import set_script_prefix
  13. from django.utils import datastructures
  14. from django.utils.encoding import force_str, force_text
  15. from django.utils import six
  16. # For backwards compatibility -- lots of code uses this in the wild!
  17. from django.http.response import REASON_PHRASES as STATUS_CODE_TEXT # NOQA
  18. logger = logging.getLogger('django.request')
  19. # encode() and decode() expect the charset to be a native string.
  20. ISO_8859_1, UTF_8 = str('iso-8859-1'), str('utf-8')
  21. class LimitedStream(object):
  22. '''
  23. LimitedStream wraps another stream in order to not allow reading from it
  24. past specified amount of bytes.
  25. '''
  26. def __init__(self, stream, limit, buf_size=64 * 1024 * 1024):
  27. self.stream = stream
  28. self.remaining = limit
  29. self.buffer = b''
  30. self.buf_size = buf_size
  31. def _read_limited(self, size=None):
  32. if size is None or size > self.remaining:
  33. size = self.remaining
  34. if size == 0:
  35. return b''
  36. result = self.stream.read(size)
  37. self.remaining -= len(result)
  38. return result
  39. def read(self, size=None):
  40. if size is None:
  41. result = self.buffer + self._read_limited()
  42. self.buffer = b''
  43. elif size < len(self.buffer):
  44. result = self.buffer[:size]
  45. self.buffer = self.buffer[size:]
  46. else: # size >= len(self.buffer)
  47. result = self.buffer + self._read_limited(size - len(self.buffer))
  48. self.buffer = b''
  49. return result
  50. def readline(self, size=None):
  51. while b'\n' not in self.buffer and \
  52. (size is None or len(self.buffer) < size):
  53. if size:
  54. # since size is not None here, len(self.buffer) < size
  55. chunk = self._read_limited(size - len(self.buffer))
  56. else:
  57. chunk = self._read_limited()
  58. if not chunk:
  59. break
  60. self.buffer += chunk
  61. sio = BytesIO(self.buffer)
  62. if size:
  63. line = sio.readline(size)
  64. else:
  65. line = sio.readline()
  66. self.buffer = sio.read()
  67. return line
  68. class WSGIRequest(http.HttpRequest):
  69. def __init__(self, environ):
  70. script_name = get_script_name(environ)
  71. path_info = get_path_info(environ)
  72. if not path_info:
  73. # Sometimes PATH_INFO exists, but is empty (e.g. accessing
  74. # the SCRIPT_NAME URL without a trailing slash). We really need to
  75. # operate as if they'd requested '/'. Not amazingly nice to force
  76. # the path like this, but should be harmless.
  77. path_info = '/'
  78. self.environ = environ
  79. self.path_info = path_info
  80. self.path = '%s/%s' % (script_name.rstrip('/'), path_info.lstrip('/'))
  81. self.META = environ
  82. self.META['PATH_INFO'] = path_info
  83. self.META['SCRIPT_NAME'] = script_name
  84. self.method = environ['REQUEST_METHOD'].upper()
  85. _, content_params = self._parse_content_type(environ.get('CONTENT_TYPE', ''))
  86. if 'charset' in content_params:
  87. try:
  88. codecs.lookup(content_params['charset'])
  89. except LookupError:
  90. pass
  91. else:
  92. self.encoding = content_params['charset']
  93. self._post_parse_error = False
  94. try:
  95. content_length = int(environ.get('CONTENT_LENGTH'))
  96. except (ValueError, TypeError):
  97. content_length = 0
  98. self._stream = LimitedStream(self.environ['wsgi.input'], content_length)
  99. self._read_started = False
  100. self.resolver_match = None
  101. def _get_scheme(self):
  102. return self.environ.get('wsgi.url_scheme')
  103. def _parse_content_type(self, ctype):
  104. """
  105. Media Types parsing according to RFC 2616, section 3.7.
  106. Returns the data type and parameters. For example:
  107. Input: "text/plain; charset=iso-8859-1"
  108. Output: ('text/plain', {'charset': 'iso-8859-1'})
  109. """
  110. content_type, _, params = ctype.partition(';')
  111. content_params = {}
  112. for parameter in params.split(';'):
  113. k, _, v = parameter.strip().partition('=')
  114. content_params[k] = v
  115. return content_type, content_params
  116. def _get_request(self):
  117. warnings.warn('`request.REQUEST` is deprecated, use `request.GET` or '
  118. '`request.POST` instead.', PendingDeprecationWarning, 2)
  119. if not hasattr(self, '_request'):
  120. self._request = datastructures.MergeDict(self.POST, self.GET)
  121. return self._request
  122. def _get_get(self):
  123. if not hasattr(self, '_get'):
  124. # The WSGI spec says 'QUERY_STRING' may be absent.
  125. raw_query_string = get_bytes_from_wsgi(self.environ, 'QUERY_STRING', '')
  126. self._get = http.QueryDict(raw_query_string, encoding=self._encoding)
  127. return self._get
  128. def _set_get(self, get):
  129. self._get = get
  130. def _get_post(self):
  131. if not hasattr(self, '_post'):
  132. self._load_post_and_files()
  133. return self._post
  134. def _set_post(self, post):
  135. self._post = post
  136. def _get_cookies(self):
  137. if not hasattr(self, '_cookies'):
  138. raw_cookie = get_str_from_wsgi(self.environ, 'HTTP_COOKIE', '')
  139. self._cookies = http.parse_cookie(raw_cookie)
  140. return self._cookies
  141. def _set_cookies(self, cookies):
  142. self._cookies = cookies
  143. def _get_files(self):
  144. if not hasattr(self, '_files'):
  145. self._load_post_and_files()
  146. return self._files
  147. GET = property(_get_get, _set_get)
  148. POST = property(_get_post, _set_post)
  149. COOKIES = property(_get_cookies, _set_cookies)
  150. FILES = property(_get_files)
  151. REQUEST = property(_get_request)
  152. class WSGIHandler(base.BaseHandler):
  153. initLock = Lock()
  154. request_class = WSGIRequest
  155. def __call__(self, environ, start_response):
  156. # Set up middleware if needed. We couldn't do this earlier, because
  157. # settings weren't available.
  158. if self._request_middleware is None:
  159. with self.initLock:
  160. try:
  161. # Check that middleware is still uninitialised.
  162. if self._request_middleware is None:
  163. self.load_middleware()
  164. except:
  165. # Unload whatever middleware we got
  166. self._request_middleware = None
  167. raise
  168. set_script_prefix(get_script_name(environ))
  169. signals.request_started.send(sender=self.__class__)
  170. try:
  171. request = self.request_class(environ)
  172. except UnicodeDecodeError:
  173. logger.warning('Bad Request (UnicodeDecodeError)',
  174. exc_info=sys.exc_info(),
  175. extra={
  176. 'status_code': 400,
  177. }
  178. )
  179. response = http.HttpResponseBadRequest()
  180. else:
  181. response = self.get_response(request)
  182. response._handler_class = self.__class__
  183. status = '%s %s' % (response.status_code, response.reason_phrase)
  184. response_headers = [(str(k), str(v)) for k, v in response.items()]
  185. for c in response.cookies.values():
  186. response_headers.append((str('Set-Cookie'), str(c.output(header=''))))
  187. start_response(force_str(status), response_headers)
  188. return response
  189. def get_path_info(environ):
  190. """
  191. Returns the HTTP request's PATH_INFO as a unicode string.
  192. """
  193. path_info = get_bytes_from_wsgi(environ, 'PATH_INFO', '/')
  194. # It'd be better to implement URI-to-IRI decoding, see #19508.
  195. return path_info.decode(UTF_8)
  196. def get_script_name(environ):
  197. """
  198. Returns the equivalent of the HTTP request's SCRIPT_NAME environment
  199. variable. If Apache mod_rewrite has been used, returns what would have been
  200. the script name prior to any rewriting (so it's the script name as seen
  201. from the client's perspective), unless the FORCE_SCRIPT_NAME setting is
  202. set (to anything).
  203. """
  204. if settings.FORCE_SCRIPT_NAME is not None:
  205. return force_text(settings.FORCE_SCRIPT_NAME)
  206. # If Apache's mod_rewrite had a whack at the URL, Apache set either
  207. # SCRIPT_URL or REDIRECT_URL to the full resource URL before applying any
  208. # rewrites. Unfortunately not every Web server (lighttpd!) passes this
  209. # information through all the time, so FORCE_SCRIPT_NAME, above, is still
  210. # needed.
  211. script_url = get_bytes_from_wsgi(environ, 'SCRIPT_URL', '')
  212. if not script_url:
  213. script_url = get_bytes_from_wsgi(environ, 'REDIRECT_URL', '')
  214. if script_url:
  215. path_info = get_bytes_from_wsgi(environ, 'PATH_INFO', '')
  216. script_name = script_url[:-len(path_info)]
  217. else:
  218. script_name = get_bytes_from_wsgi(environ, 'SCRIPT_NAME', '')
  219. # It'd be better to implement URI-to-IRI decoding, see #19508.
  220. return script_name.decode(UTF_8)
  221. def get_bytes_from_wsgi(environ, key, default):
  222. """
  223. Get a value from the WSGI environ dictionary as bytes.
  224. key and default should be str objects. Under Python 2 they may also be
  225. unicode objects provided they only contain ASCII characters.
  226. """
  227. value = environ.get(str(key), str(default))
  228. # Under Python 3, non-ASCII values in the WSGI environ are arbitrarily
  229. # decoded with ISO-8859-1. This is wrong for Django websites where UTF-8
  230. # is the default. Re-encode to recover the original bytestring.
  231. return value if six.PY2 else value.encode(ISO_8859_1)
  232. def get_str_from_wsgi(environ, key, default):
  233. """
  234. Get a value from the WSGI environ dictionary as bytes.
  235. key and default should be str objects. Under Python 2 they may also be
  236. unicode objects provided they only contain ASCII characters.
  237. """
  238. value = environ.get(str(key), str(default))
  239. # Same comment as above
  240. return value if six.PY2 else value.encode(ISO_8859_1).decode(UTF_8)