web.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. # web.py -- WSGI smart-http server
  2. # Copryight (C) 2010 Google, Inc.
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; version 2
  7. # or (at your option) any later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """HTTP server for dulwich that implements the git smart HTTP protocol."""
  19. from cStringIO import StringIO
  20. import re
  21. import time
  22. import urlparse
  23. from dulwich.server import (
  24. ReceivePackHandler,
  25. UploadPackHandler,
  26. )
  27. HTTP_OK = '200 OK'
  28. HTTP_NOT_FOUND = '404 Not Found'
  29. HTTP_FORBIDDEN = '403 Forbidden'
  30. def date_time_string(self, timestamp=None):
  31. # Based on BaseHTTPServer.py in python2.5
  32. weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
  33. months = [None,
  34. 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
  35. 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
  36. if timestamp is None:
  37. timestamp = time.time()
  38. year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
  39. return '%s, %02d %3s %4d %02d:%02d:%02d GMD' % (
  40. weekdays[wd], day, months[month], year, hh, mm, ss)
  41. def send_file(req, f, content_type):
  42. """Send a file-like object to the request output.
  43. :param req: The HTTPGitRequest object to send output to.
  44. :param f: An open file-like object to send; will be closed.
  45. :param content_type: The MIME type for the file.
  46. :yield: The contents of the file.
  47. """
  48. if f is None:
  49. yield req.not_found('File not found')
  50. return
  51. try:
  52. try:
  53. req.respond(HTTP_OK, content_type)
  54. while True:
  55. data = f.read(10240)
  56. if not data:
  57. break
  58. yield data
  59. except IOError:
  60. yield req.not_found('Error reading file')
  61. finally:
  62. f.close()
  63. def get_text_file(req, backend, mat):
  64. req.nocache()
  65. return send_file(req, backend.repo.get_named_file(mat.group()),
  66. 'text/plain')
  67. def get_loose_object(req, backend, mat):
  68. sha = mat.group(1) + mat.group(2)
  69. object_store = backend.object_store
  70. if not object_store.contains_loose(sha):
  71. yield req.not_found('Object not found')
  72. return
  73. try:
  74. data = object_store[sha].as_legacy_object()
  75. except IOError:
  76. yield req.not_found('Error reading object')
  77. req.cache_forever()
  78. req.respond(HTTP_OK, 'application/x-git-loose-object')
  79. yield data
  80. def get_pack_file(req, backend, mat):
  81. req.cache_forever()
  82. return send_file(req, backend.repo.get_named_file(mat.group()),
  83. 'application/x-git-packed-objects')
  84. def get_idx_file(req, backend, mat):
  85. req.cache_forever()
  86. return send_file(req, backend.repo.get_named_file(mat.group()),
  87. 'application/x-git-packed-objects-toc')
  88. default_services = {'git-upload-pack': UploadPackHandler,
  89. 'git-receive-pack': ReceivePackHandler}
  90. def get_info_refs(req, backend, mat, services=None):
  91. if services is None:
  92. services = default_services
  93. params = urlparse.parse_qs(req.environ['QUERY_STRING'])
  94. service = params.get('service', [None])[0]
  95. if service and not req.dumb:
  96. handler_cls = services.get(service, None)
  97. if handler_cls is None:
  98. yield req.forbidden('Unsupported service %s' % service)
  99. return
  100. req.nocache()
  101. req.respond(HTTP_OK, 'application/x-%s-advertisement' % service)
  102. output = StringIO()
  103. dummy_input = StringIO() # GET request, handler doesn't need to read
  104. handler = handler_cls(backend, dummy_input.read, output.write,
  105. stateless_rpc=True, advertise_refs=True)
  106. handler.proto.write_pkt_line('# service=%s\n' % service)
  107. handler.proto.write_pkt_line(None)
  108. handler.handle()
  109. yield output.getvalue()
  110. else:
  111. # non-smart fallback
  112. # TODO: select_getanyfile() (see http-backend.c)
  113. req.nocache()
  114. req.respond(HTTP_OK, 'text/plain')
  115. refs = backend.get_refs()
  116. for name in sorted(refs.iterkeys()):
  117. # get_refs() includes HEAD as a special case, but we don't want to
  118. # advertise it
  119. if name == 'HEAD':
  120. continue
  121. sha = refs[name]
  122. o = backend.repo[sha]
  123. if not o:
  124. continue
  125. yield '%s\t%s\n' % (sha, name)
  126. peeled_sha = backend.repo.get_peeled(name)
  127. if peeled_sha != sha:
  128. yield '%s\t%s^{}\n' % (peeled_sha, name)
  129. def get_info_packs(req, backend, mat):
  130. req.nocache()
  131. req.respond(HTTP_OK, 'text/plain')
  132. for pack in backend.object_store.packs:
  133. yield 'P pack-%s.pack\n' % pack.name()
  134. class _LengthLimitedFile(object):
  135. """Wrapper class to limit the length of reads from a file-like object.
  136. This is used to ensure EOF is read from the wsgi.input object once
  137. Content-Length bytes are read. This behavior is required by the WSGI spec
  138. but not implemented in wsgiref as of 2.5.
  139. """
  140. def __init__(self, input, max_bytes):
  141. self._input = input
  142. self._bytes_avail = max_bytes
  143. def read(self, size=-1):
  144. if self._bytes_avail <= 0:
  145. return ''
  146. if size == -1 or size > self._bytes_avail:
  147. size = self._bytes_avail
  148. self._bytes_avail -= size
  149. return self._input.read(size)
  150. # TODO: support more methods as necessary
  151. def handle_service_request(req, backend, mat, services=None):
  152. if services is None:
  153. services = default_services
  154. service = mat.group().lstrip('/')
  155. handler_cls = services.get(service, None)
  156. if handler_cls is None:
  157. yield req.forbidden('Unsupported service %s' % service)
  158. return
  159. req.nocache()
  160. req.respond(HTTP_OK, 'application/x-%s-response' % service)
  161. output = StringIO()
  162. input = req.environ['wsgi.input']
  163. # This is not necessary if this app is run from a conforming WSGI server.
  164. # Unfortunately, there's no way to tell that at this point.
  165. # TODO: git may used HTTP/1.1 chunked encoding instead of specifying
  166. # content-length
  167. if 'CONTENT_LENGTH' in req.environ:
  168. input = _LengthLimitedFile(input, int(req.environ['CONTENT_LENGTH']))
  169. handler = handler_cls(backend, input.read, output.write, stateless_rpc=True)
  170. handler.handle()
  171. yield output.getvalue()
  172. class HTTPGitRequest(object):
  173. """Class encapsulating the state of a single git HTTP request.
  174. :ivar environ: the WSGI environment for the request.
  175. """
  176. def __init__(self, environ, start_response, dumb=False):
  177. self.environ = environ
  178. self.dumb = dumb
  179. self._start_response = start_response
  180. self._cache_headers = []
  181. self._headers = []
  182. def add_header(self, name, value):
  183. """Add a header to the response."""
  184. self._headers.append((name, value))
  185. def respond(self, status=HTTP_OK, content_type=None, headers=None):
  186. """Begin a response with the given status and other headers."""
  187. if headers:
  188. self._headers.extend(headers)
  189. if content_type:
  190. self._headers.append(('Content-Type', content_type))
  191. self._headers.extend(self._cache_headers)
  192. self._start_response(status, self._headers)
  193. def not_found(self, message):
  194. """Begin a HTTP 404 response and return the text of a message."""
  195. self._cache_headers = []
  196. self.respond(HTTP_NOT_FOUND, 'text/plain')
  197. return message
  198. def forbidden(self, message):
  199. """Begin a HTTP 403 response and return the text of a message."""
  200. self._cache_headers = []
  201. self.respond(HTTP_FORBIDDEN, 'text/plain')
  202. return message
  203. def nocache(self):
  204. """Set the response to never be cached by the client."""
  205. self._cache_headers = [
  206. ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT'),
  207. ('Pragma', 'no-cache'),
  208. ('Cache-Control', 'no-cache, max-age=0, must-revalidate'),
  209. ]
  210. def cache_forever(self):
  211. """Set the response to be cached forever by the client."""
  212. now = time.time()
  213. self._cache_headers = [
  214. ('Date', date_time_string(now)),
  215. ('Expires', date_time_string(now + 31536000)),
  216. ('Cache-Control', 'public, max-age=31536000'),
  217. ]
  218. class HTTPGitApplication(object):
  219. """Class encapsulating the state of a git WSGI application.
  220. :ivar backend: the Backend object backing this application
  221. """
  222. services = {
  223. ('GET', re.compile('/HEAD$')): get_text_file,
  224. ('GET', re.compile('/info/refs$')): get_info_refs,
  225. ('GET', re.compile('/objects/info/alternates$')): get_text_file,
  226. ('GET', re.compile('/objects/info/http-alternates$')): get_text_file,
  227. ('GET', re.compile('/objects/info/packs$')): get_info_packs,
  228. ('GET', re.compile('/objects/([0-9a-f]{2})/([0-9a-f]{38})$')): get_loose_object,
  229. ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.pack$')): get_pack_file,
  230. ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.idx$')): get_idx_file,
  231. ('POST', re.compile('/git-upload-pack$')): handle_service_request,
  232. ('POST', re.compile('/git-receive-pack$')): handle_service_request,
  233. }
  234. def __init__(self, backend, dumb=False):
  235. self.backend = backend
  236. self.dumb = dumb
  237. def __call__(self, environ, start_response):
  238. path = environ['PATH_INFO']
  239. method = environ['REQUEST_METHOD']
  240. req = HTTPGitRequest(environ, start_response, self.dumb)
  241. # environ['QUERY_STRING'] has qs args
  242. handler = None
  243. for smethod, spath in self.services.iterkeys():
  244. if smethod != method:
  245. continue
  246. mat = spath.search(path)
  247. if mat:
  248. handler = self.services[smethod, spath]
  249. break
  250. if handler is None:
  251. return req.not_found('Sorry, that method is not supported')
  252. return handler(req, self.backend, mat)