浏览代码

pack: Remove comp_len from UnpackedObject.

This was only ever used for offset calculation, which can be done
instead by actually backing up over the unused data (more closely
matching the comment).

Also, despite being called comp_len, it was also including the length of
the (not compressed) header, which could be confusing. For now, callers
that really need it can compute it from comp_chunks. (We could add
back comp_len and/or header_len in the future if it turns out to be
useful.)

Change-Id: I902d2f1817c1ad18ac45a086aaee429eea41bd36
Dave Borowitz 13 年之前
父节点
当前提交
9105602440
共有 2 个文件被更改,包括 11 次插入28 次删除
  1. 9 19
      dulwich/pack.py
  2. 2 9
      dulwich/tests/test_pack.py

+ 9 - 19
dulwich/pack.py

@@ -127,7 +127,6 @@ class UnpackedObject(object):
       'pack_type_num',  # Type of this object in the pack (may be a delta).
       'pack_type_num',  # Type of this object in the pack (may be a delta).
       'delta_base',     # Delta base offset or SHA.
       'delta_base',     # Delta base offset or SHA.
       'comp_chunks',    # Compressed object chunks.
       'comp_chunks',    # Compressed object chunks.
-      'comp_len',       # Compressed length of this object.
       'decomp_chunks',  # Decompressed object chunks.
       'decomp_chunks',  # Decompressed object chunks.
       'decomp_len',     # Decompressed length of this object.
       'decomp_len',     # Decompressed length of this object.
       'crc32',          # CRC32.
       'crc32',          # CRC32.
@@ -140,7 +139,6 @@ class UnpackedObject(object):
         self.pack_type_num = pack_type_num
         self.pack_type_num = pack_type_num
         self.delta_base = delta_base
         self.delta_base = delta_base
         self.comp_chunks = None
         self.comp_chunks = None
-        self.comp_len = None
         self.decomp_chunks = []
         self.decomp_chunks = []
         self.decomp_len = decomp_len
         self.decomp_len = decomp_len
         self.crc32 = crc32
         self.crc32 = crc32
@@ -203,7 +201,6 @@ def read_zlib_chunks(read_some, unpacked, include_comp=False,
         using this starting CRC32.
         using this starting CRC32.
         After this function, will have the following attrs set:
         After this function, will have the following attrs set:
             comp_chunks    (if include_comp is True)
             comp_chunks    (if include_comp is True)
-            comp_len
             decomp_chunks
             decomp_chunks
             decomp_len
             decomp_len
             crc32
             crc32
@@ -219,7 +216,6 @@ def read_zlib_chunks(read_some, unpacked, include_comp=False,
     comp_chunks = []
     comp_chunks = []
     decomp_chunks = unpacked.decomp_chunks
     decomp_chunks = unpacked.decomp_chunks
     decomp_len = 0
     decomp_len = 0
-    comp_len = 0
     crc32 = unpacked.crc32
     crc32 = unpacked.crc32
 
 
     while True:
     while True:
@@ -227,14 +223,12 @@ def read_zlib_chunks(read_some, unpacked, include_comp=False,
         if not add:
         if not add:
             raise zlib.error('EOF before end of zlib stream')
             raise zlib.error('EOF before end of zlib stream')
         comp_chunks.append(add)
         comp_chunks.append(add)
-        comp_len += len(add)
         decomp = decomp_obj.decompress(add)
         decomp = decomp_obj.decompress(add)
         decomp_len += len(decomp)
         decomp_len += len(decomp)
         decomp_chunks.append(decomp)
         decomp_chunks.append(decomp)
         unused = decomp_obj.unused_data
         unused = decomp_obj.unused_data
         if unused:
         if unused:
             left = len(unused)
             left = len(unused)
-            comp_len -= left
             if crc32 is not None:
             if crc32 is not None:
                 crc32 = binascii.crc32(add[:-left], crc32)
                 crc32 = binascii.crc32(add[:-left], crc32)
             if include_comp:
             if include_comp:
@@ -248,7 +242,6 @@ def read_zlib_chunks(read_some, unpacked, include_comp=False,
     if decomp_len != unpacked.decomp_len:
     if decomp_len != unpacked.decomp_len:
         raise zlib.error('decompressed data does not match expected size')
         raise zlib.error('decompressed data does not match expected size')
 
 
-    unpacked.comp_len = comp_len
     unpacked.crc32 = crc32
     unpacked.crc32 = crc32
     if include_comp:
     if include_comp:
         unpacked.comp_chunks = comp_chunks
         unpacked.comp_chunks = comp_chunks
@@ -672,7 +665,6 @@ def unpack_object(read_all, read_some=None, compute_crc32=False,
             pack_type_num
             pack_type_num
             delta_base     (for delta types)
             delta_base     (for delta types)
             comp_chunks    (if include_comp is True)
             comp_chunks    (if include_comp is True)
-            comp_len
             decomp_chunks
             decomp_chunks
             decomp_len
             decomp_len
             crc32          (if compute_crc32 is True)
             crc32          (if compute_crc32 is True)
@@ -712,7 +704,6 @@ def unpack_object(read_all, read_some=None, compute_crc32=False,
     unpacked = UnpackedObject(type_num, delta_base, size, crc32)
     unpacked = UnpackedObject(type_num, delta_base, size, crc32)
     unused = read_zlib_chunks(read_some, unpacked, buffer_size=zlib_bufsize,
     unused = read_zlib_chunks(read_some, unpacked, buffer_size=zlib_bufsize,
                               include_comp=include_comp)
                               include_comp=include_comp)
-    unpacked.comp_len += raw_base
     return unpacked, unused
     return unpacked, unused
 
 
 
 
@@ -817,7 +808,6 @@ class PackStreamReader(object):
             obj_type_num
             obj_type_num
             obj_chunks (for non-delta types)
             obj_chunks (for non-delta types)
             delta_base (for delta types)
             delta_base (for delta types)
-            comp_len
             decomp_chunks
             decomp_chunks
             decomp_len
             decomp_len
             crc32 (if compute_crc32 is True)
             crc32 (if compute_crc32 is True)
@@ -1052,27 +1042,28 @@ class PackData(object):
         return type, chunks
         return type, chunks
 
 
     def iterobjects(self, progress=None, compute_crc32=True):
     def iterobjects(self, progress=None, compute_crc32=True):
-        offset = self._header_size
+        self._file.seek(self._header_size)
         for i in xrange(1, self._num_objects + 1):
         for i in xrange(1, self._num_objects + 1):
-            self._file.seek(offset)  # Back up over unused data.
-            unpacked, _ = unpack_object(
+            offset = self._file.tell()
+            unpacked, unused = unpack_object(
               self._file.read, compute_crc32=compute_crc32)
               self._file.read, compute_crc32=compute_crc32)
             if progress is not None:
             if progress is not None:
                 progress(i, self._num_objects)
                 progress(i, self._num_objects)
             yield (offset, unpacked.pack_type_num, unpacked._obj(),
             yield (offset, unpacked.pack_type_num, unpacked._obj(),
                    unpacked.crc32)
                    unpacked.crc32)
-            offset += unpacked.comp_len
+            self._file.seek(-len(unused), SEEK_CUR)  # Back up over unused data.
 
 
     def _iter_unpacked(self):
     def _iter_unpacked(self):
         # TODO(dborowitz): Merge this with iterobjects, if we can change its
         # TODO(dborowitz): Merge this with iterobjects, if we can change its
         # return type.
         # return type.
-        offset = self._header_size
+        self._file.seek(self._header_size)
         for _ in xrange(self._num_objects):
         for _ in xrange(self._num_objects):
-            self._file.seek(offset)  # Back up over unused data.
-            unpacked, _ = unpack_object(self._file.read, compute_crc32=False)
+            offset = self._file.tell()
+            unpacked, unused = unpack_object(
+              self._file.read, compute_crc32=False)
             unpacked.offset = offset
             unpacked.offset = offset
             yield unpacked
             yield unpacked
-            offset += unpacked.comp_len
+            self._file.seek(-len(unused), SEEK_CUR)  # Back up over unused data.
 
 
     def iterentries(self, progress=None):
     def iterentries(self, progress=None):
         """Yield entries summarizing the contents of this pack.
         """Yield entries summarizing the contents of this pack.
@@ -1185,7 +1176,6 @@ class DeltaChainIterator(object):
         pack_type_num
         pack_type_num
         delta_base     (for delta types)
         delta_base     (for delta types)
         comp_chunks    (if _include_comp is True)
         comp_chunks    (if _include_comp is True)
-        comp_len
         decomp_chunks
         decomp_chunks
         decomp_len
         decomp_len
         crc32          (if _compute_crc32 is True)
         crc32          (if _compute_crc32 is True)

+ 2 - 9
dulwich/tests/test_pack.py

@@ -441,7 +441,6 @@ class WritePackTests(TestCase):
         self.assertEqual(Blob.type_num, unpacked.pack_type_num)
         self.assertEqual(Blob.type_num, unpacked.pack_type_num)
         self.assertEqual(Blob.type_num, unpacked.obj_type_num)
         self.assertEqual(Blob.type_num, unpacked.obj_type_num)
         self.assertEqual(['blob'], unpacked.decomp_chunks)
         self.assertEqual(['blob'], unpacked.decomp_chunks)
-        self.assertEqual(comp_len, unpacked.comp_len)
         self.assertEqual(crc32, unpacked.crc32)
         self.assertEqual(crc32, unpacked.crc32)
         self.assertEqual('x', unused)
         self.assertEqual('x', unused)
 
 
@@ -603,7 +602,6 @@ class ReadZlibTests(TestCase):
         read = StringIO(comp + self.extra).read
         read = StringIO(comp + self.extra).read
         unused = read_zlib_chunks(read, unpacked)
         unused = read_zlib_chunks(read, unpacked)
         self.assertEqual('', ''.join(unpacked.decomp_chunks))
         self.assertEqual('', ''.join(unpacked.decomp_chunks))
-        self.assertEqual(len(comp), unpacked.comp_len)
         self.assertNotEquals('', unused)
         self.assertNotEquals('', unused)
         self.assertEquals(self.extra, unused + read())
         self.assertEquals(self.extra, unused + read())
 
 
@@ -616,7 +614,6 @@ class ReadZlibTests(TestCase):
         unused = read_zlib_chunks(self.read, self.unpacked,
         unused = read_zlib_chunks(self.read, self.unpacked,
                                   buffer_size=buffer_size, **kwargs)
                                   buffer_size=buffer_size, **kwargs)
         self.assertEquals(self.decomp, ''.join(self.unpacked.decomp_chunks))
         self.assertEquals(self.decomp, ''.join(self.unpacked.decomp_chunks))
-        self.assertEquals(len(self.comp), self.unpacked.comp_len)
         self.assertEquals(zlib.crc32(self.comp), self.unpacked.crc32)
         self.assertEquals(zlib.crc32(self.comp), self.unpacked.crc32)
         self.assertNotEquals('', unused)
         self.assertNotEquals('', unused)
         self.assertEquals(self.extra, unused + self.read())
         self.assertEquals(self.extra, unused + self.read())
@@ -691,8 +688,6 @@ class TestPackStreamReader(TestCase):
         self.assertEqual(Blob.type_num, unpacked_blob.obj_type_num)
         self.assertEqual(Blob.type_num, unpacked_blob.obj_type_num)
         self.assertEqual(None, unpacked_blob.delta_base)
         self.assertEqual(None, unpacked_blob.delta_base)
         self.assertEqual('blob', ''.join(unpacked_blob.decomp_chunks))
         self.assertEqual('blob', ''.join(unpacked_blob.decomp_chunks))
-        self.assertEqual(unpacked_delta.offset - unpacked_blob.offset,
-                         unpacked_blob.comp_len)
         self.assertEqual(entries[0][4], unpacked_blob.crc32)
         self.assertEqual(entries[0][4], unpacked_blob.crc32)
 
 
         self.assertEqual(entries[1][0], unpacked_delta.offset)
         self.assertEqual(entries[1][0], unpacked_delta.offset)
@@ -700,10 +695,8 @@ class TestPackStreamReader(TestCase):
         self.assertEqual(None, unpacked_delta.obj_type_num)
         self.assertEqual(None, unpacked_delta.obj_type_num)
         self.assertEqual(unpacked_delta.offset - unpacked_blob.offset,
         self.assertEqual(unpacked_delta.offset - unpacked_blob.offset,
                          unpacked_delta.delta_base)
                          unpacked_delta.delta_base)
-        self.assertEqual(create_delta('blob', 'blob1'),
-                         ''.join(unpacked_delta.decomp_chunks))
-        self.assertEqual(len(f.getvalue()) - 20 - unpacked_delta.offset,
-                         unpacked_delta.comp_len)
+        delta = create_delta('blob', 'blob1')
+        self.assertEqual(delta, ''.join(unpacked_delta.decomp_chunks))
         self.assertEqual(entries[1][4], unpacked_delta.crc32)
         self.assertEqual(entries[1][4], unpacked_delta.crc32)
 
 
     def test_read_objects_buffered(self):
     def test_read_objects_buffered(self):