Browse Source

pack: Add option to include compressed data in UnpackedObjects.

Change-Id: I877a7815e50fe32e616861d166f217d7883ea515
Dave Borowitz 13 years ago
parent
commit
0214e566f0
2 changed files with 28 additions and 6 deletions
  1. 21 4
      dulwich/pack.py
  2. 7 2
      dulwich/tests/test_pack.py

+ 21 - 4
dulwich/pack.py

@@ -126,6 +126,7 @@ class UnpackedObject(object):
       'obj_chunks',     # Decompressed and delta-resolved chunks.
       'obj_chunks',     # Decompressed and delta-resolved chunks.
       'pack_type_num',  # Type of this object in the pack (may be a delta).
       'pack_type_num',  # Type of this object in the pack (may be a delta).
       'delta_base',     # Delta base offset or SHA.
       'delta_base',     # Delta base offset or SHA.
+      'comp_chunks',    # Compressed object chunks.
       'comp_len',       # Compressed length of this object.
       'comp_len',       # Compressed length of this object.
       'decomp_chunks',  # Decompressed object chunks.
       'decomp_chunks',  # Decompressed object chunks.
       'decomp_len',     # Decompressed length of this object.
       'decomp_len',     # Decompressed length of this object.
@@ -138,6 +139,7 @@ class UnpackedObject(object):
         self.offset = None
         self.offset = None
         self.pack_type_num = pack_type_num
         self.pack_type_num = pack_type_num
         self.delta_base = delta_base
         self.delta_base = delta_base
+        self.comp_chunks = None
         self.comp_len = None
         self.comp_len = None
         self.decomp_chunks = []
         self.decomp_chunks = []
         self.decomp_len = decomp_len
         self.decomp_len = decomp_len
@@ -187,7 +189,8 @@ class UnpackedObject(object):
 _ZLIB_BUFSIZE = 4096
 _ZLIB_BUFSIZE = 4096
 
 
 
 
-def read_zlib_chunks(read_some, unpacked, buffer_size=_ZLIB_BUFSIZE):
+def read_zlib_chunks(read_some, unpacked, include_comp=False,
+                     buffer_size=_ZLIB_BUFSIZE):
     """Read zlib data from a buffer.
     """Read zlib data from a buffer.
 
 
     This function requires that the buffer have additional data following the
     This function requires that the buffer have additional data following the
@@ -199,10 +202,12 @@ def read_zlib_chunks(read_some, unpacked, buffer_size=_ZLIB_BUFSIZE):
         attr is not None, the CRC32 of the compressed bytes will be computed
         attr is not None, the CRC32 of the compressed bytes will be computed
         using this starting CRC32.
         using this starting CRC32.
         After this function, will have the following attrs set:
         After this function, will have the following attrs set:
+            comp_chunks    (if include_comp is True)
             comp_len
             comp_len
             decomp_chunks
             decomp_chunks
             decomp_len
             decomp_len
             crc32
             crc32
+    :param include_comp: If True, include compressed data in the result.
     :param buffer_size: Size of the read buffer.
     :param buffer_size: Size of the read buffer.
     :return: Leftover unused data from the decompression.
     :return: Leftover unused data from the decompression.
     :raise zlib.error: if a decompression error occurred.
     :raise zlib.error: if a decompression error occurred.
@@ -211,6 +216,7 @@ def read_zlib_chunks(read_some, unpacked, buffer_size=_ZLIB_BUFSIZE):
         raise ValueError('non-negative zlib data stream size expected')
         raise ValueError('non-negative zlib data stream size expected')
     decomp_obj = zlib.decompressobj()
     decomp_obj = zlib.decompressobj()
 
 
+    comp_chunks = []
     decomp_chunks = unpacked.decomp_chunks
     decomp_chunks = unpacked.decomp_chunks
     decomp_len = 0
     decomp_len = 0
     comp_len = 0
     comp_len = 0
@@ -220,6 +226,7 @@ def read_zlib_chunks(read_some, unpacked, buffer_size=_ZLIB_BUFSIZE):
         add = read_some(buffer_size)
         add = read_some(buffer_size)
         if not add:
         if not add:
             raise zlib.error('EOF before end of zlib stream')
             raise zlib.error('EOF before end of zlib stream')
+        comp_chunks.append(add)
         comp_len += len(add)
         comp_len += len(add)
         decomp = decomp_obj.decompress(add)
         decomp = decomp_obj.decompress(add)
         decomp_len += len(decomp)
         decomp_len += len(decomp)
@@ -230,6 +237,8 @@ def read_zlib_chunks(read_some, unpacked, buffer_size=_ZLIB_BUFSIZE):
             comp_len -= left
             comp_len -= left
             if crc32 is not None:
             if crc32 is not None:
                 crc32 = binascii.crc32(add[:-left], crc32)
                 crc32 = binascii.crc32(add[:-left], crc32)
+            if include_comp:
+                comp_chunks[-1] = add[:-left]
             break
             break
         elif crc32 is not None:
         elif crc32 is not None:
             crc32 = binascii.crc32(add, crc32)
             crc32 = binascii.crc32(add, crc32)
@@ -241,6 +250,8 @@ def read_zlib_chunks(read_some, unpacked, buffer_size=_ZLIB_BUFSIZE):
 
 
     unpacked.comp_len = comp_len
     unpacked.comp_len = comp_len
     unpacked.crc32 = crc32
     unpacked.crc32 = crc32
+    if include_comp:
+        unpacked.comp_chunks = comp_chunks
     return unused
     return unused
 
 
 
 
@@ -643,7 +654,7 @@ def chunks_length(chunks):
 
 
 
 
 def unpack_object(read_all, read_some=None, compute_crc32=False,
 def unpack_object(read_all, read_some=None, compute_crc32=False,
-                  zlib_bufsize=_ZLIB_BUFSIZE):
+                  include_comp=False, zlib_bufsize=_ZLIB_BUFSIZE):
     """Unpack a Git object.
     """Unpack a Git object.
 
 
     :param read_all: Read function that blocks until the number of requested
     :param read_all: Read function that blocks until the number of requested
@@ -652,6 +663,7 @@ def unpack_object(read_all, read_some=None, compute_crc32=False,
         return the number of bytes requested.
         return the number of bytes requested.
     :param compute_crc32: If True, compute the CRC32 of the compressed data. If
     :param compute_crc32: If True, compute the CRC32 of the compressed data. If
         False, the returned CRC32 will be None.
         False, the returned CRC32 will be None.
+    :param include_comp: If True, include compressed data in the result.
     :param zlib_bufsize: An optional buffer size for zlib operations.
     :param zlib_bufsize: An optional buffer size for zlib operations.
     :return: A tuple of (unpacked, unused), where unused is the unused data
     :return: A tuple of (unpacked, unused), where unused is the unused data
         leftover from decompression, and unpacked i an UnpackedObject with the
         leftover from decompression, and unpacked i an UnpackedObject with the
@@ -659,6 +671,7 @@ def unpack_object(read_all, read_some=None, compute_crc32=False,
             obj_chunks     (for non-delta types)
             obj_chunks     (for non-delta types)
             pack_type_num
             pack_type_num
             delta_base     (for delta types)
             delta_base     (for delta types)
+            comp_chunks    (if include_comp is True)
             comp_len
             comp_len
             decomp_chunks
             decomp_chunks
             decomp_len
             decomp_len
@@ -697,7 +710,8 @@ def unpack_object(read_all, read_some=None, compute_crc32=False,
         delta_base = None
         delta_base = None
 
 
     unpacked = UnpackedObject(type_num, delta_base, size, crc32)
     unpacked = UnpackedObject(type_num, delta_base, size, crc32)
-    unused = read_zlib_chunks(read_some, unpacked, buffer_size=zlib_bufsize)
+    unused = read_zlib_chunks(read_some, unpacked, buffer_size=zlib_bufsize,
+                              include_comp=include_comp)
     unpacked.comp_len += raw_base
     unpacked.comp_len += raw_base
     return unpacked, unused
     return unpacked, unused
 
 
@@ -1170,6 +1184,7 @@ class DeltaChainIterator(object):
         obj_chunks
         obj_chunks
         pack_type_num
         pack_type_num
         delta_base     (for delta types)
         delta_base     (for delta types)
+        comp_chunks    (if _include_comp is True)
         comp_len
         comp_len
         decomp_chunks
         decomp_chunks
         decomp_len
         decomp_len
@@ -1177,6 +1192,7 @@ class DeltaChainIterator(object):
     """
     """
 
 
     _compute_crc32 = False
     _compute_crc32 = False
+    _include_comp = False
 
 
     def __init__(self, file_obj, resolve_ext_ref=None):
     def __init__(self, file_obj, resolve_ext_ref=None):
         self._file = file_obj
         self._file = file_obj
@@ -1248,7 +1264,8 @@ class DeltaChainIterator(object):
     def _resolve_object(self, offset, obj_type_num, base_chunks):
     def _resolve_object(self, offset, obj_type_num, base_chunks):
         self._file.seek(offset)
         self._file.seek(offset)
         unpacked, _ = unpack_object(
         unpacked, _ = unpack_object(
-          self._file.read, compute_crc32=self._compute_crc32)
+          self._file.read, include_comp=self._include_comp,
+          compute_crc32=self._compute_crc32)
         unpacked.offset = offset
         unpacked.offset = offset
         if base_chunks is None:
         if base_chunks is None:
             assert unpacked.pack_type_num == obj_type_num
             assert unpacked.pack_type_num == obj_type_num

+ 7 - 2
dulwich/tests/test_pack.py

@@ -612,9 +612,9 @@ class ReadZlibTests(TestCase):
         read_zlib_chunks(self.read, self.unpacked)
         read_zlib_chunks(self.read, self.unpacked)
         self.assertEquals(None, self.unpacked.crc32)
         self.assertEquals(None, self.unpacked.crc32)
 
 
-    def _do_decompress_test(self, buffer_size):
+    def _do_decompress_test(self, buffer_size, **kwargs):
         unused = read_zlib_chunks(self.read, self.unpacked,
         unused = read_zlib_chunks(self.read, self.unpacked,
-                                  buffer_size=buffer_size)
+                                  buffer_size=buffer_size, **kwargs)
         self.assertEquals(self.decomp, ''.join(self.unpacked.decomp_chunks))
         self.assertEquals(self.decomp, ''.join(self.unpacked.decomp_chunks))
         self.assertEquals(len(self.comp), self.unpacked.comp_len)
         self.assertEquals(len(self.comp), self.unpacked.comp_len)
         self.assertEquals(zlib.crc32(self.comp), self.unpacked.crc32)
         self.assertEquals(zlib.crc32(self.comp), self.unpacked.crc32)
@@ -623,6 +623,7 @@ class ReadZlibTests(TestCase):
 
 
     def test_simple_decompress(self):
     def test_simple_decompress(self):
         self._do_decompress_test(4096)
         self._do_decompress_test(4096)
+        self.assertEqual(None, self.unpacked.comp_chunks)
 
 
     # These buffer sizes are not intended to be realistic, but rather simulate
     # These buffer sizes are not intended to be realistic, but rather simulate
     # larger buffer sizes that may end at various places.
     # larger buffer sizes that may end at various places.
@@ -638,6 +639,10 @@ class ReadZlibTests(TestCase):
     def test_decompress_buffer_size_4(self):
     def test_decompress_buffer_size_4(self):
         self._do_decompress_test(4)
         self._do_decompress_test(4)
 
 
+    def test_decompress_include_comp(self):
+        self._do_decompress_test(4096, include_comp=True)
+        self.assertEqual(self.comp, ''.join(self.unpacked.comp_chunks))
+
 
 
 class DeltifyTests(TestCase):
 class DeltifyTests(TestCase):