Browse Source

Fix read_zlib_chunks to avoid appending junk data.

Adding junk data to the end of data fed to the zlib.decompressobj
creates the potential for zlib errors if a read doesn't fill the entire
buffer. This implementation is cleaner, and only requires the assumption
that there is some data in the read source after the zlib stream. Since
compressed streams in packfiles are always followed by either a new
packed object or the SHA-1 trailer, this assumption should be safe.

Added more robust tests for zlib reading with various buffer sizes.

Change-Id: I813d30997edf3e0c924eceac05675e0eb121b96c
Dave Borowitz 15 năm trước cách đây
mục cha
commit
85c2b302c7
2 tập tin đã thay đổi với 89 bổ sung18 xóa
  1. 28 14
      dulwich/pack.py
  2. 61 4
      dulwich/tests/test_pack.py

+ 28 - 14
dulwich/pack.py

@@ -84,23 +84,37 @@ def take_msb_bytes(read):
     return ret
 
 
-def read_zlib_chunks(read, buffer_size=4096):
-    """Read chunks of zlib data from a buffer.
-    
-    :param read: Read function
-    :return: Tuple with list of chunks, length of 
-        compressed data length and unused read data
+def read_zlib_chunks(read_some, dec_size, buffer_size=4096):
+    """Read zlib data from a buffer.
+
+    This function requires that the buffer have additional data following the
+    compressed data, which is guaranteed to be the case for git pack files.
+
+    :param read_some: Read function that returns at least one byte, but may
+        return less than the requested size
+    :param dec_size: Expected size of the decompressed buffer
+    :param buffer_size: Size of the read buffer
+    :return: Tuple with list of chunks, length of compressed data length and
+        and unused read data.
+    :raise zlib.error: if a decompression error occurred.
     """
+    if dec_size <= -1:
+        raise ValueError("non-negative zlib data stream size expected")
     obj = zlib.decompressobj()
     ret = []
     fed = 0
+    size = 0
     while obj.unused_data == "":
-        add = read(buffer_size)
-        if len(add) < buffer_size:
-            add += "Z"
+        add = read_some(buffer_size)
+        if not add:
+            raise zlib.error("EOF before end of zlib stream")
         fed += len(add)
-        ret.append(obj.decompress(add))
-    comp_len = fed-len(obj.unused_data)
+        decomp = obj.decompress(add)
+        size += len(decomp)
+        ret.append(decomp)
+    if size != dec_size:
+        raise zlib.error("decompressed data does not match expected size")
+    comp_len = fed - len(obj.unused_data)
     return ret, comp_len, obj.unused_data
 
 
@@ -441,17 +455,17 @@ def unpack_object(read):
             delta_base_offset += 1
             delta_base_offset <<= 7
             delta_base_offset += (byte & 0x7f)
-        uncomp, comp_len, unused = read_zlib_chunks(read)
+        uncomp, comp_len, unused = read_zlib_chunks(read, size)
         assert size == chunks_length(uncomp)
         return type, (delta_base_offset, uncomp), comp_len+raw_base, unused
     elif type == 7: # ref delta
         basename = read(20)
         raw_base += 20
-        uncomp, comp_len, unused = read_zlib_chunks(read)
+        uncomp, comp_len, unused = read_zlib_chunks(read, size)
         assert size == chunks_length(uncomp)
         return type, (basename, uncomp), comp_len+raw_base, unused
     else:
-        uncomp, comp_len, unused = read_zlib_chunks(read)
+        uncomp, comp_len, unused = read_zlib_chunks(read, size)
         assert chunks_length(uncomp) == size
         return type, uncomp, comp_len+raw_base, unused
 

+ 61 - 4
dulwich/tests/test_pack.py

@@ -24,6 +24,7 @@
 from cStringIO import StringIO
 import os
 import unittest
+import zlib
 
 from dulwich.objects import (
     Tree,
@@ -277,11 +278,67 @@ class TestPackIndexWritingv2(unittest.TestCase, BaseTestPackIndexWriting):
         self._expected_version = 2
         self._write_fn = write_pack_index_v2
 
-TEST_COMP1 = """\x78\x9c\x9d\x8e\xc1\x0a\xc2\x30\x10\x44\xef\xf9\x8a\xbd\xa9\x08\x92\x86\xb4\x26\x20\xe2\xd9\x83\x78\xf2\xbe\x49\x37\xb5\xa5\x69\xca\x36\xf5\xfb\x4d\xfd\x04\x67\x6e\x33\xcc\xf0\x32\x13\x81\xc6\x16\x8d\xa9\xbd\xad\x6c\xe3\x8a\x03\x4a\x73\xd6\xda\xd5\xa6\x51\x2e\x58\x65\x6c\x13\xbc\x94\x4a\xcc\xc8\x34\x65\x78\xa4\x89\x04\xae\xf9\x9d\x18\xee\x34\x46\x62\x78\x11\x4f\x29\xf5\x03\x5c\x86\x5f\x70\x5b\x30\x3a\x3c\x25\xee\xae\x50\xa9\xf2\x60\xa4\xaa\x34\x1c\x65\x91\xf0\x29\xc6\x3e\x67\xfa\x6f\x2d\x9e\x9c\x3e\x7d\x4b\xc0\x34\x8f\xe8\x29\x6e\x48\xa1\xa0\xc4\x88\xf3\xfe\xb0\x5b\x20\x85\xb0\x50\x06\xe4\x6e\xdd\xca\xd3\x17\x26\xfa\x49\x23"""
 
+class ReadZlibTests(unittest.TestCase):
 
-class ZlibTests(unittest.TestCase):
+    decomp = (
+      'tree 4ada885c9196b6b6fa08744b5862bf92896fc002\n'
+      'parent None\n'
+      'author Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n'
+      'committer Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n'
+      '\n'
+      "Provide replacement for mmap()'s offset argument.")
+    comp = zlib.compress(decomp)
+    extra = 'nextobject'
+
+    def setUp(self):
+        self.read = StringIO(self.comp + self.extra).read
+
+    def test_decompress_size(self):
+        good_decomp_len = len(self.decomp)
+        self.assertRaises(ValueError, read_zlib_chunks, self.read, -1)
+        self.assertRaises(zlib.error, read_zlib_chunks, self.read,
+                          good_decomp_len - 1)
+        self.assertRaises(zlib.error, read_zlib_chunks, self.read,
+                          good_decomp_len + 1)
+
+    def test_decompress_truncated(self):
+        read = StringIO(self.comp[:10]).read
+        self.assertRaises(zlib.error, read_zlib_chunks, read, len(self.decomp))
+
+        read = StringIO(self.comp).read
+        self.assertRaises(zlib.error, read_zlib_chunks, read, len(self.decomp))
+
+    def test_decompress_empty(self):
+        comp = zlib.compress('')
+        read = StringIO(comp + self.extra).read
+        decomp, comp_len, unused_data = read_zlib_chunks(read, 0)
+        self.assertEqual('', ''.join(decomp))
+        self.assertEqual(len(comp), comp_len)
+        self.assertNotEquals('', unused_data)
+        self.assertEquals(self.extra, unused_data + read())
+
+    def _do_decompress_test(self, buffer_size):
+        decomp, comp_len, unused_data = read_zlib_chunks(
+          self.read, len(self.decomp), buffer_size=buffer_size)
+        self.assertEquals(self.decomp, ''.join(decomp))
+        self.assertEquals(len(self.comp), comp_len)
+        self.assertNotEquals('', unused_data)
+        self.assertEquals(self.extra, unused_data + self.read())
 
     def test_simple_decompress(self):
-        self.assertEquals((["tree 4ada885c9196b6b6fa08744b5862bf92896fc002\nparent None\nauthor Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\ncommitter Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n\nProvide replacement for mmap()'s offset argument."], 158, 'Z'), 
-        read_zlib_chunks(StringIO(TEST_COMP1).read, 229))
+        self._do_decompress_test(4096)
+
+    # These buffer sizes are not intended to be realistic, but rather simulate
+    # larger buffer sizes that may end at various places.
+    def test_decompress_buffer_size_1(self):
+        self._do_decompress_test(1)
+
+    def test_decompress_buffer_size_2(self):
+        self._do_decompress_test(2)
+
+    def test_decompress_buffer_size_3(self):
+        self._do_decompress_test(3)
+
+    def test_decompress_buffer_size_4(self):
+        self._do_decompress_test(4)