web.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. # web.py -- WSGI smart-http server
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; version 2
  7. # or (at your option) any later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """HTTP server for dulwich that implements the git smart HTTP protocol."""
  19. from cStringIO import StringIO
  20. import os
  21. import re
  22. import time
  23. try:
  24. from urlparse import parse_qs
  25. except ImportError:
  26. from dulwich.misc import parse_qs
  27. from dulwich import log_utils
  28. from dulwich.protocol import (
  29. ReceivableProtocol,
  30. )
  31. from dulwich.server import (
  32. ReceivePackHandler,
  33. UploadPackHandler,
  34. DEFAULT_HANDLERS,
  35. )
  36. logger = log_utils.getLogger(__name__)
  37. # HTTP error strings
  38. HTTP_OK = '200 OK'
  39. HTTP_NOT_FOUND = '404 Not Found'
  40. HTTP_FORBIDDEN = '403 Forbidden'
  41. def date_time_string(timestamp=None):
  42. # Based on BaseHTTPServer.py in python2.5
  43. weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
  44. months = [None,
  45. 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
  46. 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
  47. if timestamp is None:
  48. timestamp = time.time()
  49. year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
  50. return '%s, %02d %3s %4d %02d:%02d:%02d GMD' % (
  51. weekdays[wd], day, months[month], year, hh, mm, ss)
  52. def url_prefix(mat):
  53. """Extract the URL prefix from a regex match.
  54. :param mat: A regex match object.
  55. :returns: The URL prefix, defined as the text before the match in the
  56. original string. Normalized to start with one leading slash and end with
  57. zero.
  58. """
  59. return '/' + mat.string[:mat.start()].strip('/')
  60. def get_repo(backend, mat):
  61. """Get a Repo instance for the given backend and URL regex match."""
  62. return backend.open_repository(url_prefix(mat))
  63. def send_file(req, f, content_type):
  64. """Send a file-like object to the request output.
  65. :param req: The HTTPGitRequest object to send output to.
  66. :param f: An open file-like object to send; will be closed.
  67. :param content_type: The MIME type for the file.
  68. :yield: The contents of the file.
  69. """
  70. if f is None:
  71. yield req.not_found('File not found')
  72. return
  73. try:
  74. req.respond(HTTP_OK, content_type)
  75. while True:
  76. data = f.read(10240)
  77. if not data:
  78. break
  79. yield data
  80. f.close()
  81. except IOError:
  82. f.close()
  83. yield req.not_found('Error reading file')
  84. except:
  85. f.close()
  86. raise
  87. def _url_to_path(url):
  88. return url.replace('/', os.path.sep)
  89. def get_text_file(req, backend, mat):
  90. req.nocache()
  91. path = _url_to_path(mat.group())
  92. logger.info('Sending plain text file %s', path)
  93. return send_file(req, get_repo(backend, mat).get_named_file(path),
  94. 'text/plain')
  95. def get_loose_object(req, backend, mat):
  96. sha = mat.group(1) + mat.group(2)
  97. logger.info('Sending loose object %s', sha)
  98. object_store = get_repo(backend, mat).object_store
  99. if not object_store.contains_loose(sha):
  100. yield req.not_found('Object not found')
  101. return
  102. try:
  103. data = object_store[sha].as_legacy_object()
  104. except IOError:
  105. yield req.not_found('Error reading object')
  106. req.cache_forever()
  107. req.respond(HTTP_OK, 'application/x-git-loose-object')
  108. yield data
  109. def get_pack_file(req, backend, mat):
  110. req.cache_forever()
  111. path = _url_to_path(mat.group())
  112. logger.info('Sending pack file %s', path)
  113. return send_file(req, get_repo(backend, mat).get_named_file(path),
  114. 'application/x-git-packed-objects')
  115. def get_idx_file(req, backend, mat):
  116. req.cache_forever()
  117. path = _url_to_path(mat.group())
  118. logger.info('Sending pack file %s', path)
  119. return send_file(req, get_repo(backend, mat).get_named_file(path),
  120. 'application/x-git-packed-objects-toc')
  121. def get_info_refs(req, backend, mat):
  122. params = parse_qs(req.environ['QUERY_STRING'])
  123. service = params.get('service', [None])[0]
  124. if service and not req.dumb:
  125. handler_cls = req.handlers.get(service, None)
  126. if handler_cls is None:
  127. yield req.forbidden('Unsupported service %s' % service)
  128. return
  129. req.nocache()
  130. req.respond(HTTP_OK, 'application/x-%s-advertisement' % service)
  131. output = StringIO()
  132. proto = ReceivableProtocol(StringIO().read, output.write)
  133. handler = handler_cls(backend, [url_prefix(mat)], proto,
  134. stateless_rpc=True, advertise_refs=True)
  135. handler.proto.write_pkt_line('# service=%s\n' % service)
  136. handler.proto.write_pkt_line(None)
  137. handler.handle()
  138. yield output.getvalue()
  139. else:
  140. # non-smart fallback
  141. # TODO: select_getanyfile() (see http-backend.c)
  142. req.nocache()
  143. req.respond(HTTP_OK, 'text/plain')
  144. logger.info('Emulating dumb info/refs')
  145. repo = get_repo(backend, mat)
  146. refs = repo.get_refs()
  147. for name in sorted(refs.iterkeys()):
  148. # get_refs() includes HEAD as a special case, but we don't want to
  149. # advertise it
  150. if name == 'HEAD':
  151. continue
  152. sha = refs[name]
  153. o = repo[sha]
  154. if not o:
  155. continue
  156. yield '%s\t%s\n' % (sha, name)
  157. peeled_sha = repo.get_peeled(name)
  158. if peeled_sha != sha:
  159. yield '%s\t%s^{}\n' % (peeled_sha, name)
  160. def get_info_packs(req, backend, mat):
  161. req.nocache()
  162. req.respond(HTTP_OK, 'text/plain')
  163. logger.info('Emulating dumb info/packs')
  164. for pack in get_repo(backend, mat).object_store.packs:
  165. yield 'P pack-%s.pack\n' % pack.name()
  166. class _LengthLimitedFile(object):
  167. """Wrapper class to limit the length of reads from a file-like object.
  168. This is used to ensure EOF is read from the wsgi.input object once
  169. Content-Length bytes are read. This behavior is required by the WSGI spec
  170. but not implemented in wsgiref as of 2.5.
  171. """
  172. def __init__(self, input, max_bytes):
  173. self._input = input
  174. self._bytes_avail = max_bytes
  175. def read(self, size=-1):
  176. if self._bytes_avail <= 0:
  177. return ''
  178. if size == -1 or size > self._bytes_avail:
  179. size = self._bytes_avail
  180. self._bytes_avail -= size
  181. return self._input.read(size)
  182. # TODO: support more methods as necessary
  183. def handle_service_request(req, backend, mat):
  184. service = mat.group().lstrip('/')
  185. logger.info('Handling service request for %s', service)
  186. handler_cls = req.handlers.get(service, None)
  187. if handler_cls is None:
  188. yield req.forbidden('Unsupported service %s' % service)
  189. return
  190. req.nocache()
  191. req.respond(HTTP_OK, 'application/x-%s-response' % service)
  192. output = StringIO()
  193. input = req.environ['wsgi.input']
  194. # This is not necessary if this app is run from a conforming WSGI server.
  195. # Unfortunately, there's no way to tell that at this point.
  196. # TODO: git may used HTTP/1.1 chunked encoding instead of specifying
  197. # content-length
  198. if 'CONTENT_LENGTH' in req.environ:
  199. input = _LengthLimitedFile(input, int(req.environ['CONTENT_LENGTH']))
  200. proto = ReceivableProtocol(input.read, output.write)
  201. handler = handler_cls(backend, [url_prefix(mat)], proto, stateless_rpc=True)
  202. handler.handle()
  203. yield output.getvalue()
  204. class HTTPGitRequest(object):
  205. """Class encapsulating the state of a single git HTTP request.
  206. :ivar environ: the WSGI environment for the request.
  207. """
  208. def __init__(self, environ, start_response, dumb=False, handlers=None):
  209. self.environ = environ
  210. self.dumb = dumb
  211. self.handlers = handlers and handlers or DEFAULT_HANDLERS
  212. self._start_response = start_response
  213. self._cache_headers = []
  214. self._headers = []
  215. def add_header(self, name, value):
  216. """Add a header to the response."""
  217. self._headers.append((name, value))
  218. def respond(self, status=HTTP_OK, content_type=None, headers=None):
  219. """Begin a response with the given status and other headers."""
  220. if headers:
  221. self._headers.extend(headers)
  222. if content_type:
  223. self._headers.append(('Content-Type', content_type))
  224. self._headers.extend(self._cache_headers)
  225. self._start_response(status, self._headers)
  226. def not_found(self, message):
  227. """Begin a HTTP 404 response and return the text of a message."""
  228. self._cache_headers = []
  229. logger.info('Not found: %s', message)
  230. self.respond(HTTP_NOT_FOUND, 'text/plain')
  231. return message
  232. def forbidden(self, message):
  233. """Begin a HTTP 403 response and return the text of a message."""
  234. self._cache_headers = []
  235. logger.info('Forbidden: %s', message)
  236. self.respond(HTTP_FORBIDDEN, 'text/plain')
  237. return message
  238. def nocache(self):
  239. """Set the response to never be cached by the client."""
  240. self._cache_headers = [
  241. ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT'),
  242. ('Pragma', 'no-cache'),
  243. ('Cache-Control', 'no-cache, max-age=0, must-revalidate'),
  244. ]
  245. def cache_forever(self):
  246. """Set the response to be cached forever by the client."""
  247. now = time.time()
  248. self._cache_headers = [
  249. ('Date', date_time_string(now)),
  250. ('Expires', date_time_string(now + 31536000)),
  251. ('Cache-Control', 'public, max-age=31536000'),
  252. ]
  253. class HTTPGitApplication(object):
  254. """Class encapsulating the state of a git WSGI application.
  255. :ivar backend: the Backend object backing this application
  256. """
  257. services = {
  258. ('GET', re.compile('/HEAD$')): get_text_file,
  259. ('GET', re.compile('/info/refs$')): get_info_refs,
  260. ('GET', re.compile('/objects/info/alternates$')): get_text_file,
  261. ('GET', re.compile('/objects/info/http-alternates$')): get_text_file,
  262. ('GET', re.compile('/objects/info/packs$')): get_info_packs,
  263. ('GET', re.compile('/objects/([0-9a-f]{2})/([0-9a-f]{38})$')): get_loose_object,
  264. ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.pack$')): get_pack_file,
  265. ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.idx$')): get_idx_file,
  266. ('POST', re.compile('/git-upload-pack$')): handle_service_request,
  267. ('POST', re.compile('/git-receive-pack$')): handle_service_request,
  268. }
  269. def __init__(self, backend, dumb=False, handlers=None):
  270. self.backend = backend
  271. self.dumb = dumb
  272. self.handlers = handlers
  273. def __call__(self, environ, start_response):
  274. path = environ['PATH_INFO']
  275. method = environ['REQUEST_METHOD']
  276. req = HTTPGitRequest(environ, start_response, dumb=self.dumb,
  277. handlers=self.handlers)
  278. # environ['QUERY_STRING'] has qs args
  279. handler = None
  280. for smethod, spath in self.services.iterkeys():
  281. if smethod != method:
  282. continue
  283. mat = spath.search(path)
  284. if mat:
  285. handler = self.services[smethod, spath]
  286. break
  287. if handler is None:
  288. return req.not_found('Sorry, that method is not supported')
  289. return handler(req, self.backend, mat)