Browse Source

Merge gzip filter support.

Jelmer Vernooij 13 năm trước cách đây
mục cha
commit
66f5e4182d
4 tập tin đã thay đổi với 122 bổ sung29 xóa
  1. 4 0
      NEWS
  2. 8 3
      dulwich/tests/compat/test_web.py
  3. 59 15
      dulwich/tests/test_web.py
  4. 51 11
      dulwich/web.py

+ 4 - 0
NEWS

@@ -22,6 +22,10 @@
   * Additional arguments to get_transport_and_path are now passed
     on to the constructor of the transport. (Sam Vilain)
 
+  * The WSGI server now transparently handles when a git client submits data
+    using Content-Encoding: gzip.
+    (David Blewett, Jelmer Vernooij)
+
 0.8.3	2012-01-21
 
  FEATURES

+ 8 - 3
dulwich/tests/compat/test_web.py

@@ -34,6 +34,7 @@ from dulwich.tests import (
     SkipTest,
     )
 from dulwich.web import (
+    make_wsgi_chain,
     HTTPGitApplication,
     HTTPGitRequestHandler,
     )
@@ -101,8 +102,12 @@ class SmartWebTestCase(WebTests, CompatTestCase):
         self.assertFalse('side-band-64k' in caps)
 
     def _make_app(self, backend):
-        app = HTTPGitApplication(backend, handlers=self._handlers())
-        self._check_app(app)
+        app = make_wsgi_chain(backend, handlers=self._handlers())
+        to_check = app
+        # peel back layers until we're at the base application
+        while not issubclass(to_check.__class__, HTTPGitApplication):
+            to_check = to_check.app
+        self._check_app(to_check)
         return app
 
 
@@ -125,7 +130,7 @@ class DumbWebTestCase(WebTests, CompatTestCase):
     """Test cases for dumb HTTP server."""
 
     def _make_app(self, backend):
-        return HTTPGitApplication(backend, dumb=True)
+        return make_wsgi_chain(backend, dumb=True)
 
     def test_push_to_dulwich(self):
         # Note: remove this if dumb pushing is supported

+ 59 - 15
dulwich/tests/test_web.py

@@ -19,6 +19,7 @@
 """Tests for the Git HTTP server."""
 
 from cStringIO import StringIO
+import gzip
 import re
 import os
 
@@ -44,6 +45,7 @@ from dulwich.web import (
     HTTP_NOT_FOUND,
     HTTP_FORBIDDEN,
     HTTP_ERROR,
+    GunzipFilter,
     send_file,
     get_text_file,
     get_loose_object,
@@ -419,19 +421,61 @@ class HTTPGitApplicationTestCase(TestCase):
         super(HTTPGitApplicationTestCase, self).setUp()
         self._app = HTTPGitApplication('backend')
 
-    def test_call(self):
-        def test_handler(req, backend, mat):
-            # tests interface used by all handlers
-            self.assertEqual(environ, req.environ)
-            self.assertEqual('backend', backend)
-            self.assertEqual('/foo', mat.group(0))
-            return 'output'
-
-        self._app.services = {
-          ('GET', re.compile('/foo$')): test_handler,
+        self._environ = {
+            'PATH_INFO': '/foo',
+            'REQUEST_METHOD': 'GET',
         }
-        environ = {
-          'PATH_INFO': '/foo',
-          'REQUEST_METHOD': 'GET',
-          }
-        self.assertEqual('output', self._app(environ, None))
+
+    def _test_handler(self, req, backend, mat):
+        # tests interface used by all handlers
+        self.assertEquals(self._environ, req.environ)
+        self.assertEquals('backend', backend)
+        self.assertEquals('/foo', mat.group(0))
+        return 'output'
+
+    def _add_handler(self, app):
+        req = self._environ['REQUEST_METHOD']
+        app.services = {
+          (req, re.compile('/foo$')): self._test_handler,
+        }
+
+    def test_call(self):
+        self._add_handler(self._app)
+        self.assertEquals('output', self._app(self._environ, None))
+
+
+class GunzipTestCase(HTTPGitApplicationTestCase):
+    """TestCase for testing the GunzipFilter, ensuring the wsgi.input
+    is correctly decompressed and headers are corrected.
+    """
+
+    def setUp(self):
+        super(GunzipTestCase, self).setUp()
+        self._app = GunzipFilter(self._app)
+        self._environ['HTTP_CONTENT_ENCODING'] = 'gzip'
+        self._environ['REQUEST_METHOD'] = 'POST'
+
+    def _get_zstream(self, text):
+        zstream = StringIO()
+        zfile = gzip.GzipFile(fileobj=zstream, mode='w')
+        zfile.write(text)
+        zfile.close()
+        return zstream
+
+    def test_call(self):
+        self._add_handler(self._app.app)
+        orig = self.__class__.__doc__
+        zstream = self._get_zstream(orig)
+        zlength = zstream.tell()
+        zstream.seek(0)
+        self.assertLess(zlength, len(orig))
+        self.assertEquals(self._environ['HTTP_CONTENT_ENCODING'], 'gzip')
+        self._environ['CONTENT_LENGTH'] = zlength
+        self._environ['wsgi.input'] = zstream
+        app_output = self._app(self._environ, None)
+        buf = self._environ['wsgi.input']
+        self.assertIsNot(buf, zstream)
+        buf.seek(0)
+        self.assertEquals(orig, buf.read())
+        self.assertIs(None, self._environ.get('CONTENT_LENGTH'))
+        self.assertNotIn('HTTP_CONTENT_ENCODING', self._environ)

+ 51 - 11
dulwich/web.py

@@ -19,6 +19,7 @@
 """HTTP server for dulwich that implements the git smart HTTP protocol."""
 
 from cStringIO import StringIO
+import gzip
 import os
 import re
 import sys
@@ -225,16 +226,7 @@ def handle_service_request(req, backend, mat):
         return
     req.nocache()
     write = req.respond(HTTP_OK, 'application/x-%s-result' % service)
-
-    input = req.environ['wsgi.input']
-    # This is not necessary if this app is run from a conforming WSGI server.
-    # Unfortunately, there's no way to tell that at this point.
-    # TODO: git may used HTTP/1.1 chunked encoding instead of specifying
-    # content-length
-    content_length = req.environ.get('CONTENT_LENGTH', '')
-    if content_length:
-        input = _LengthLimitedFile(input, int(content_length))
-    proto = ReceivableProtocol(input.read, write)
+    proto = ReceivableProtocol(req.environ['wsgi.input'].read, write)
     handler = handler_cls(backend, [url_prefix(mat)], proto, http_req=req)
     handler.handle()
 
@@ -352,6 +344,54 @@ class HTTPGitApplication(object):
         return handler(req, self.backend, mat)
 
 
+class GunzipFilter(object):
+    """WSGI middleware that unzips gzip-encoded requests before
+    passing on to the underlying application.
+    """
+
+    def __init__(self, application):
+        self.app = application
+
+    def __call__(self, environ, start_response):
+        if environ.get('HTTP_CONTENT_ENCODING', '') == 'gzip':
+            zlength = int(environ.get('CONTENT_LENGTH', '0'))
+            environ.pop('HTTP_CONTENT_ENCODING')
+            if 'CONTENT_LENGTH' in environ:
+                del environ['CONTENT_LENGTH']
+            environ['wsgi.input'] = gzip.GzipFile(filename=None,
+                fileobj=environ['wsgi.input'], mode='r')
+        return self.app(environ, start_response)
+
+
+class LimitedInputFilter(object):
+    """WSGI middleware that limits the input length of a request to that
+    specified in Content-Length.
+    """
+
+    def __init__(self, application):
+        self.app = application
+
+    def __call__(self, environ, start_response):
+        # This is not necessary if this app is run from a conforming WSGI
+        # server. Unfortunately, there's no way to tell that at this point.
+        # TODO: git may used HTTP/1.1 chunked encoding instead of specifying
+        # content-length
+        content_length = environ.get('CONTENT_LENGTH', '')
+        if content_length:
+            environ['wsgi.input'] = _LengthLimitedFile(
+                environ['wsgi.input'], int(content_length))
+        return self.app(environ, start_response)
+
+
+def make_wsgi_chain(backend, dumb=False, handlers=None):
+    """Factory function to create an instance of HTTPGitApplication,
+    correctly wrapped with needed middleware.
+    """
+    app = HTTPGitApplication(backend, dumb, handlers)
+    wrapped_app = GunzipFilter(LimitedInputFilter(app))
+    return wrapped_app
+
+
 # The reference server implementation is based on wsgiref, which is not
 # distributed with python 2.4. If wsgiref is not present, users will not be able
 # to use the HTTP server without a little extra work.
@@ -388,7 +428,7 @@ try:
 
         log_utils.default_logging_config()
         backend = DictBackend({'/': Repo(gitdir)})
-        app = HTTPGitApplication(backend)
+        app = make_wsgi_chain(backend)
         server = make_server(listen_addr, port, app,
                              handler_class=HTTPGitRequestHandler)
         logger.info('Listening for HTTP connections on %s:%d', listen_addr,