فهرست منبع

Added WSGI middleware to handle transparent gzip decoding
(dulwich.web.GunzipFilter).
Added WSGI middleware to handle ensuring the wsgi.input is correctly
limited to the Content-Length value (dulwich.web.LimitedInputFilter).
Added a factory function (dulwich.web.make_wsgi_chain) to create an
instance of HTTPGitApplication, wrapped in the WSGI middleware.
Added tests to ensure the gzip decoder functions correctly.

David Blewett 13 سال پیش
والد
کامیت
4dfcf0288b
5فایلهای تغییر یافته به همراه228 افزوده شده و 29 حذف شده
  1. 4 0
      NEWS
  2. 95 0
      dulwich/gzip.py
  3. 8 3
      dulwich/tests/compat/test_web.py
  4. 60 15
      dulwich/tests/test_web.py
  5. 61 11
      dulwich/web.py

+ 4 - 0
NEWS

@@ -22,6 +22,10 @@
   * Additional arguments to get_transport_and_path are now passed
     on to the constructor of the transport. (Sam Vilain)
 
+  * The WSGI server now transparently handles when a git client submits data
+    using Content-Encoding: gzip.
+    (David Blewett)
+
 0.8.3	2012-01-21
 
  FEATURES

+ 95 - 0
dulwich/gzip.py

@@ -0,0 +1,95 @@
+# gzip.py -- Implementation of gzip decoder, using the consumer pattern.
+# GzipConsumer Copyright (C) 1995-2010 by Fredrik Lundh
+#
+# By obtaining, using, and/or copying this software and/or its associated
+# documentation, you agree that you have read, understood, and will comply with
+# the following terms and conditions:
+#
+# Permission to use, copy, modify, and distribute this software and its
+# associated documentation for any purpose and without fee is hereby granted,
+# provided that the above copyright notice appears in all copies, and that both
+# that copyright notice and this permission notice appear in supporting
+# documentation, and that the name of Secret Labs AB or the author not be used in
+# advertising or publicity pertaining to distribution of the software without
+# specific, written prior permission.
+#
+# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
+# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN
+# NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT
+# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+# SOFTWARE.
+
+"""Implementation of gzip decoder, using the consumer pattern."""
+
+from cStringIO import StringIO
+
+class StringConsumer(object):
+
+    def __init__(self):
+        self._data = StringIO()
+
+    def feed(self, data):
+        self._data.write(data)
+
+    def close(self):
+        # We don't want to close the underlying StringIO instance
+        return self._data
+
+# The below courtesy of Fredrik Lundh
+# http://effbot.org/zone/consumer-gzip.htm
+class GzipConsumer(object):
+    """Consumer class to provide gzip decoding on the fly.
+    The consumer acts like a filter, passing decoded data on to another
+    consumer object.
+    """
+    def __init__(self, consumer=None):
+        if consumer is None:
+            consumer = StringConsumer()
+        self._consumer = consumer
+        self._decoder = None
+        self._data = ''
+
+    def feed(self, data):
+        if self._decoder is None:
+            # check if we have a full gzip header
+            data = self._data + data
+            try:
+                i = 10
+                flag = ord(data[3])
+                if flag & 4: # extra
+                    x = ord(data[i]) + 256*ord(data[i+1])
+                    i = i + 2 + x
+                if flag & 8: # filename
+                    while ord(data[i]):
+                        i = i + 1
+                    i = i + 1
+                if flag & 16: # comment
+                    while ord(data[i]):
+                        i = i + 1
+                    i = i + 1
+                if flag & 2: # crc
+                    i = i + 2
+                if len(data) < i:
+                    raise IndexError('not enough data')
+                if data[:3] != '\x1f\x8b\x08':
+                    raise IOError('invalid gzip data')
+                data = data[i:]
+            except IndexError:
+                self.__data = data
+                return # need more data
+            import zlib
+            self._data = ''
+            self._decoder = zlib.decompressobj(-zlib.MAX_WBITS)
+        data = self._decoder.decompress(data)
+        if data:
+            self._consumer.feed(data)
+
+    def close(self):
+        if self._decoder:
+            data = self._decoder.flush()
+            if data:
+                self._consumer.feed(data)
+        return self._consumer.close()
+

+ 8 - 3
dulwich/tests/compat/test_web.py

@@ -34,6 +34,7 @@ from dulwich.tests import (
     SkipTest,
     )
 from dulwich.web import (
+    make_wsgi_chain,
     HTTPGitApplication,
     HTTPGitRequestHandler,
     )
@@ -101,8 +102,12 @@ class SmartWebTestCase(WebTests, CompatTestCase):
         self.assertFalse('side-band-64k' in caps)
 
     def _make_app(self, backend):
-        app = HTTPGitApplication(backend, handlers=self._handlers())
-        self._check_app(app)
+        app = make_wsgi_chain(backend, handlers=self._handlers())
+        to_check = app
+        # peel back layers until we're at the base application
+        while not issubclass(to_check.__class__, HTTPGitApplication):
+            to_check = to_check.app
+        self._check_app(to_check)
         return app
 
 
@@ -125,7 +130,7 @@ class DumbWebTestCase(WebTests, CompatTestCase):
     """Test cases for dumb HTTP server."""
 
     def _make_app(self, backend):
-        return HTTPGitApplication(backend, dumb=True)
+        return make_wsgi_chain(backend, dumb=True)
 
     def test_push_to_dulwich(self):
         # Note: remove this if dumb pushing is supported

+ 60 - 15
dulwich/tests/test_web.py

@@ -19,6 +19,7 @@
 """Tests for the Git HTTP server."""
 
 from cStringIO import StringIO
+import gzip
 import re
 import os
 
@@ -44,6 +45,7 @@ from dulwich.web import (
     HTTP_NOT_FOUND,
     HTTP_FORBIDDEN,
     HTTP_ERROR,
+    GunzipFilter,
     send_file,
     get_text_file,
     get_loose_object,
@@ -419,19 +421,62 @@ class HTTPGitApplicationTestCase(TestCase):
         super(HTTPGitApplicationTestCase, self).setUp()
         self._app = HTTPGitApplication('backend')
 
-    def test_call(self):
-        def test_handler(req, backend, mat):
-            # tests interface used by all handlers
-            self.assertEqual(environ, req.environ)
-            self.assertEqual('backend', backend)
-            self.assertEqual('/foo', mat.group(0))
-            return 'output'
-
-        self._app.services = {
-          ('GET', re.compile('/foo$')): test_handler,
+        self._environ = {
+            'PATH_INFO': '/foo',
+            'REQUEST_METHOD': 'GET',
         }
-        environ = {
-          'PATH_INFO': '/foo',
-          'REQUEST_METHOD': 'GET',
-          }
-        self.assertEqual('output', self._app(environ, None))
+
+    def _test_handler(self, req, backend, mat):
+        # tests interface used by all handlers
+        self.assertEquals(self._environ, req.environ)
+        self.assertEquals('backend', backend)
+        self.assertEquals('/foo', mat.group(0))
+        return 'output'
+
+    def _add_handler(self, app):
+        req = self._environ['REQUEST_METHOD']
+        app.services = {
+          (req, re.compile('/foo$')): self._test_handler,
+        }
+
+    def test_call(self):
+        self._add_handler(self._app)
+        self.assertEquals('output', self._app(self._environ, None))
+
+
+class GunzipTestCase(HTTPGitApplicationTestCase):
+    """TestCase for testing the GunzipFilter, ensuring the wsgi.input
+    is correctly decompressed and headers are corrected.
+    """
+
+    def setUp(self):
+        super(GunzipTestCase, self).setUp()
+        self._app = GunzipFilter(self._app)
+        self._environ['HTTP_CONTENT_ENCODING'] = 'gzip'
+        self._environ['REQUEST_METHOD'] = 'POST'
+
+    def _get_zstream(self, text):
+        zstream = StringIO()
+        zfile = gzip.GzipFile(fileobj=zstream, mode='w')
+        zfile.write(text)
+        zfile.close()
+        return zstream
+
+    def test_call(self):
+        self._add_handler(self._app.app)
+        orig = self.__class__.__doc__
+        zstream = self._get_zstream(orig)
+        zlength = zstream.tell()
+        zstream.seek(0)
+        self.assertLess(zlength, len(orig))
+        self.assertEquals(self._environ['HTTP_CONTENT_ENCODING'],
+                          'gzip')
+        self._environ['CONTENT_LENGTH'] = zlength
+        self._environ['wsgi.input'] = zstream
+        app_output = self._app(self._environ, None)
+        buf = self._environ['wsgi.input']
+        self.assertIsNot(buf, zstream)
+        buf.seek(0)
+        self.assertEquals(orig, buf.read())
+        self.assertLess(zlength, int(self._environ['CONTENT_LENGTH']))
+        self.assertNotIn('HTTP_CONTENT_ENCODING', self._environ)

+ 61 - 11
dulwich/web.py

@@ -225,16 +225,7 @@ def handle_service_request(req, backend, mat):
         return
     req.nocache()
     write = req.respond(HTTP_OK, 'application/x-%s-result' % service)
-
-    input = req.environ['wsgi.input']
-    # This is not necessary if this app is run from a conforming WSGI server.
-    # Unfortunately, there's no way to tell that at this point.
-    # TODO: git may used HTTP/1.1 chunked encoding instead of specifying
-    # content-length
-    content_length = req.environ.get('CONTENT_LENGTH', '')
-    if content_length:
-        input = _LengthLimitedFile(input, int(content_length))
-    proto = ReceivableProtocol(input.read, write)
+    proto = ReceivableProtocol(req.environ['wsgi.input'].read, write)
     handler = handler_cls(backend, [url_prefix(mat)], proto, http_req=req)
     handler.handle()
 
@@ -352,6 +343,65 @@ class HTTPGitApplication(object):
         return handler(req, self.backend, mat)
 
 
+class GunzipFilter(object):
+    """WSGI middleware that unzips gzip-encoded requests before
+    passing on to the underlying application.
+    """
+
+    def __init__(self, application):
+        self.app = application
+
+    def __call__(self, environ, start_response):
+        if environ.get('HTTP_CONTENT_ENCODING', '') == 'gzip':
+            from dulwich.gzip import GzipConsumer
+            # Note, we decompress everything in wsgi.input
+            # so that anything further in the chain sees
+            # a regular stream, and all relevant HTTP headers
+            # are updated
+            zlength = int(environ.get('CONTENT_LENGTH', '0'))
+            consumer = GzipConsumer()
+            consumer.feed(environ['wsgi.input'].read(zlength))
+            buf = consumer.close()
+            environ.pop('HTTP_CONTENT_ENCODING')
+
+            environ['CONTENT_LENGTH'] = str(buf.tell())
+            buf.seek(0)
+            environ['wsgi.input'] = buf
+
+        return self.app(environ, start_response)
+
+
+class LimitedInputFilter(object):
+    """WSGI middleware that limits the input length of a request to that
+    specified in Content-Length.
+    """
+
+    def __init__(self, application):
+        self.app = application
+
+    def __call__(self, environ, start_response):
+        # This is not necessary if this app is run from a conforming WSGI
+        # server. Unfortunately, there's no way to tell that at this point.
+        # TODO: git may used HTTP/1.1 chunked encoding instead of specifying
+        # content-length
+        content_length = environ.get('CONTENT_LENGTH', '')
+        if content_length:
+            input = environ['wsgi.input']
+            environ['wsgi.input'] = _LengthLimitedFile(input,
+                                                       int(content_length))
+
+        return self.app(environ, start_response)
+
+
+def make_wsgi_chain(backend, dumb=False, handlers=None):
+    """Factory function to create an instance of HTTPGitApplication,
+    correctly wrapped with needed middleware.
+    """
+    app = HTTPGitApplication(backend, dumb, handlers)
+    wrapped_app = GunzipFilter(LimitedInputFilter(app))
+    return wrapped_app
+
+
 # The reference server implementation is based on wsgiref, which is not
 # distributed with python 2.4. If wsgiref is not present, users will not be able
 # to use the HTTP server without a little extra work.
@@ -388,7 +438,7 @@ try:
 
         log_utils.default_logging_config()
         backend = DictBackend({'/': Repo(gitdir)})
-        app = HTTPGitApplication(backend)
+        app = make_wsgi_chain(backend)
         server = make_server(listen_addr, port, app,
                              handler_class=HTTPGitRequestHandler)
         logger.info('Listening for HTTP connections on %s:%d', listen_addr,