2
0
Эх сурвалжийг харах

Added WSGI middleware to handle transparent gzip decoding
(dulwich.web.GunzipFilter).
Added WSGI middleware to handle ensuring the wsgi.input is correctly
limited to the Content-Length value (dulwich.web.LimitedInputFilter).
Added a factory function (dulwich.web.make_wsgi_chain) to create an
instance of HTTPGitApplication, wrapped in the WSGI middleware.
Added tests to ensure the gzip decoder functions correctly.

David Blewett 13 жил өмнө
parent
commit
88d4a8aae3

+ 4 - 0
NEWS

@@ -22,6 +22,10 @@
   * Additional arguments to get_transport_and_path are now passed
     on to the constructor of the transport. (Sam Vilain)
 
+  * The WSGI server now transparently handles when a git client submits data
+    using Content-Encoding: gzip.
+    (David Blewett)
+
 0.8.3	2012-01-21
 
  FEATURES

+ 95 - 0
dulwich/gzip.py

@@ -0,0 +1,95 @@
+# gzip.py -- Implementation of gzip decoder, using the consumer pattern.
+# GzipConsumer Copyright (C) 1995-2010 by Fredrik Lundh
+#
+# By obtaining, using, and/or copying this software and/or its associated
+# documentation, you agree that you have read, understood, and will comply with
+# the following terms and conditions:
+#
+# Permission to use, copy, modify, and distribute this software and its
+# associated documentation for any purpose and without fee is hereby granted,
+# provided that the above copyright notice appears in all copies, and that both
+# that copyright notice and this permission notice appear in supporting
+# documentation, and that the name of Secret Labs AB or the author not be used in
+# advertising or publicity pertaining to distribution of the software without
+# specific, written prior permission.
+#
+# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
+# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN
+# NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT
+# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+# SOFTWARE.
+
+"""Implementation of gzip decoder, using the consumer pattern."""
+
+from cStringIO import StringIO
+
+class StringConsumer(object):
+
+    def __init__(self):
+        self._data = StringIO()
+
+    def feed(self, data):
+        self._data.write(data)
+
+    def close(self):
+        # We don't want to close the underlying StringIO instance
+        return self._data
+
+# The below courtesy of Fredrik Lundh
+# http://effbot.org/zone/consumer-gzip.htm
+class GzipConsumer(object):
+    """Consumer class to provide gzip decoding on the fly.
+    The consumer acts like a filter, passing decoded data on to another
+    consumer object.
+    """
+    def __init__(self, consumer=None):
+        if consumer is None:
+            consumer = StringConsumer()
+        self._consumer = consumer
+        self._decoder = None
+        self._data = ''
+
+    def feed(self, data):
+        if self._decoder is None:
+            # check if we have a full gzip header
+            data = self._data + data
+            try:
+                i = 10
+                flag = ord(data[3])
+                if flag & 4: # extra
+                    x = ord(data[i]) + 256*ord(data[i+1])
+                    i = i + 2 + x
+                if flag & 8: # filename
+                    while ord(data[i]):
+                        i = i + 1
+                    i = i + 1
+                if flag & 16: # comment
+                    while ord(data[i]):
+                        i = i + 1
+                    i = i + 1
+                if flag & 2: # crc
+                    i = i + 2
+                if len(data) < i:
+                    raise IndexError('not enough data')
+                if data[:3] != '\x1f\x8b\x08':
+                    raise IOError('invalid gzip data')
+                data = data[i:]
+            except IndexError:
+                self.__data = data
+                return # need more data
+            import zlib
+            self._data = ''
+            self._decoder = zlib.decompressobj(-zlib.MAX_WBITS)
+        data = self._decoder.decompress(data)
+        if data:
+            self._consumer.feed(data)
+
+    def close(self):
+        if self._decoder:
+            data = self._decoder.flush()
+            if data:
+                self._consumer.feed(data)
+        return self._consumer.close()
+

+ 8 - 3
dulwich/tests/compat/test_web.py

@@ -34,6 +34,7 @@ from dulwich.tests import (
     SkipTest,
     )
 from dulwich.web import (
+    make_wsgi_chain,
     HTTPGitApplication,
     HTTPGitRequestHandler,
     )
@@ -101,8 +102,12 @@ class SmartWebTestCase(WebTests, CompatTestCase):
         self.assertFalse('side-band-64k' in caps)
 
     def _make_app(self, backend):
-        app = HTTPGitApplication(backend, handlers=self._handlers())
-        self._check_app(app)
+        app = make_wsgi_chain(backend, handlers=self._handlers())
+        to_check = app
+        # peel back layers until we're at the base application
+        while not issubclass(to_check.__class__, HTTPGitApplication):
+            to_check = to_check.app
+        self._check_app(to_check)
         return app
 
 
@@ -125,7 +130,7 @@ class DumbWebTestCase(WebTests, CompatTestCase):
     """Test cases for dumb HTTP server."""
 
     def _make_app(self, backend):
-        return HTTPGitApplication(backend, dumb=True)
+        return make_wsgi_chain(backend, dumb=True)
 
     def test_push_to_dulwich(self):
         # Note: remove this if dumb pushing is supported

+ 60 - 15
dulwich/tests/test_web.py

@@ -19,6 +19,7 @@
 """Tests for the Git HTTP server."""
 
 from cStringIO import StringIO
+import gzip
 import re
 import os
 
@@ -44,6 +45,7 @@ from dulwich.web import (
     HTTP_NOT_FOUND,
     HTTP_FORBIDDEN,
     HTTP_ERROR,
+    GunzipFilter,
     send_file,
     get_text_file,
     get_loose_object,
@@ -419,19 +421,62 @@ class HTTPGitApplicationTestCase(TestCase):
         super(HTTPGitApplicationTestCase, self).setUp()
         self._app = HTTPGitApplication('backend')
 
-    def test_call(self):
-        def test_handler(req, backend, mat):
-            # tests interface used by all handlers
-            self.assertEqual(environ, req.environ)
-            self.assertEqual('backend', backend)
-            self.assertEqual('/foo', mat.group(0))
-            return 'output'
-
-        self._app.services = {
-          ('GET', re.compile('/foo$')): test_handler,
+        self._environ = {
+            'PATH_INFO': '/foo',
+            'REQUEST_METHOD': 'GET',
         }
-        environ = {
-          'PATH_INFO': '/foo',
-          'REQUEST_METHOD': 'GET',
-          }
-        self.assertEqual('output', self._app(environ, None))
+
+    def _test_handler(self, req, backend, mat):
+        # tests interface used by all handlers
+        self.assertEquals(self._environ, req.environ)
+        self.assertEquals('backend', backend)
+        self.assertEquals('/foo', mat.group(0))
+        return 'output'
+
+    def _add_handler(self, app):
+        req = self._environ['REQUEST_METHOD']
+        app.services = {
+          (req, re.compile('/foo$')): self._test_handler,
+        }
+
+    def test_call(self):
+        self._add_handler(self._app)
+        self.assertEquals('output', self._app(self._environ, None))
+
+
+class GunzipTestCase(HTTPGitApplicationTestCase):
+    """TestCase for testing the GunzipFilter, ensuring the wsgi.input
+    is correctly decompressed and headers are corrected.
+    """
+
+    def setUp(self):
+        super(GunzipTestCase, self).setUp()
+        self._app = GunzipFilter(self._app)
+        self._environ['HTTP_CONTENT_ENCODING'] = 'gzip'
+        self._environ['REQUEST_METHOD'] = 'POST'
+
+    def _get_zstream(self, text):
+        zstream = StringIO()
+        zfile = gzip.GzipFile(fileobj=zstream, mode='w')
+        zfile.write(text)
+        zfile.close()
+        return zstream
+
+    def test_call(self):
+        self._add_handler(self._app.app)
+        orig = self.__class__.__doc__
+        zstream = self._get_zstream(orig)
+        zlength = zstream.tell()
+        zstream.seek(0)
+        self.assertLess(zlength, len(orig))
+        self.assertEquals(self._environ['HTTP_CONTENT_ENCODING'],
+                          'gzip')
+        self._environ['CONTENT_LENGTH'] = zlength
+        self._environ['wsgi.input'] = zstream
+        app_output = self._app(self._environ, None)
+        buf = self._environ['wsgi.input']
+        self.assertIsNot(buf, zstream)
+        buf.seek(0)
+        self.assertEquals(orig, buf.read())
+        self.assertLess(zlength, int(self._environ['CONTENT_LENGTH']))
+        self.assertNotIn('HTTP_CONTENT_ENCODING', self._environ)

+ 61 - 11
dulwich/web.py

@@ -225,16 +225,7 @@ def handle_service_request(req, backend, mat):
         return
     req.nocache()
     write = req.respond(HTTP_OK, 'application/x-%s-result' % service)
-
-    input = req.environ['wsgi.input']
-    # This is not necessary if this app is run from a conforming WSGI server.
-    # Unfortunately, there's no way to tell that at this point.
-    # TODO: git may used HTTP/1.1 chunked encoding instead of specifying
-    # content-length
-    content_length = req.environ.get('CONTENT_LENGTH', '')
-    if content_length:
-        input = _LengthLimitedFile(input, int(content_length))
-    proto = ReceivableProtocol(input.read, write)
+    proto = ReceivableProtocol(req.environ['wsgi.input'].read, write)
     handler = handler_cls(backend, [url_prefix(mat)], proto, http_req=req)
     handler.handle()
 
@@ -352,6 +343,65 @@ class HTTPGitApplication(object):
         return handler(req, self.backend, mat)
 
 
+class GunzipFilter(object):
+    """WSGI middleware that unzips gzip-encoded requests before
+    passing on to the underlying application.
+    """
+
+    def __init__(self, application):
+        self.app = application
+
+    def __call__(self, environ, start_response):
+        if environ.get('HTTP_CONTENT_ENCODING', '') == 'gzip':
+            from dulwich.gzip import GzipConsumer
+            # Note, we decompress everything in wsgi.input
+            # so that anything further in the chain sees
+            # a regular stream, and all relevant HTTP headers
+            # are updated
+            zlength = int(environ.get('CONTENT_LENGTH', '0'))
+            consumer = GzipConsumer()
+            consumer.feed(environ['wsgi.input'].read(zlength))
+            buf = consumer.close()
+            environ.pop('HTTP_CONTENT_ENCODING')
+
+            environ['CONTENT_LENGTH'] = str(buf.tell())
+            buf.seek(0)
+            environ['wsgi.input'] = buf
+
+        return self.app(environ, start_response)
+
+
+class LimitedInputFilter(object):
+    """WSGI middleware that limits the input length of a request to that
+    specified in Content-Length.
+    """
+
+    def __init__(self, application):
+        self.app = application
+
+    def __call__(self, environ, start_response):
+        # This is not necessary if this app is run from a conforming WSGI
+        # server. Unfortunately, there's no way to tell that at this point.
+        # TODO: git may used HTTP/1.1 chunked encoding instead of specifying
+        # content-length
+        content_length = environ.get('CONTENT_LENGTH', '')
+        if content_length:
+            input = environ['wsgi.input']
+            environ['wsgi.input'] = _LengthLimitedFile(input,
+                                                       int(content_length))
+
+        return self.app(environ, start_response)
+
+
+def make_wsgi_chain(backend, dumb=False, handlers=None):
+    """Factory function to create an instance of HTTPGitApplication,
+    correctly wrapped with needed middleware.
+    """
+    app = HTTPGitApplication(backend, dumb, handlers)
+    wrapped_app = GunzipFilter(LimitedInputFilter(app))
+    return wrapped_app
+
+
 # The reference server implementation is based on wsgiref, which is not
 # distributed with python 2.4. If wsgiref is not present, users will not be able
 # to use the HTTP server without a little extra work.
@@ -388,7 +438,7 @@ try:
 
         log_utils.default_logging_config()
         backend = DictBackend({'/': Repo(gitdir)})
-        app = HTTPGitApplication(backend)
+        app = make_wsgi_chain(backend)
         server = make_server(listen_addr, port, app,
                              handler_class=HTTPGitRequestHandler)
         logger.info('Listening for HTTP connections on %s:%d', listen_addr,