Browse Source

Rewrite PackData.resolve_object to iterate rather than recurse.

Delta chains can get long, and recursion doesn't buy us much here.
William Grant 9 năm trước cách đây
mục cha
commit
4589d24832
1 tập tin đã thay đổi với 42 bổ sung27 xóa
  1. 42 27
      dulwich/pack.py

+ 42 - 27
dulwich/pack.py

@@ -1061,33 +1061,48 @@ class PackData(object):
 
         :return: Tuple with object type and contents.
         """
-        if type not in DELTA_TYPES:
-            return type, obj
-
-        if get_ref is None:
-            get_ref = self.get_ref
-        if type == OFS_DELTA:
-            (delta_offset, delta) = obj
-            # TODO: clean up asserts and replace with nicer error messages
-            assert isinstance(offset, int) or isinstance(offset, long)
-            assert isinstance(delta_offset, int) or isinstance(offset, long)
-            base_offset = offset-delta_offset
-            type, base_obj = self.get_object_at(base_offset)
-            assert isinstance(type, int)
-        elif type == REF_DELTA:
-            (basename, delta) = obj
-            assert isinstance(basename, bytes) and len(basename) == 20
-            base_offset, type, base_obj = get_ref(basename)
-            assert isinstance(type, int)
-        type, base_chunks = self.resolve_object(base_offset, type, base_obj)
-        chunks = apply_delta(base_chunks, delta)
-        # TODO(dborowitz): This can result in poor performance if large base
-        # objects are separated from deltas in the pack. We should reorganize
-        # so that we apply deltas to all objects in a chain one after the other
-        # to optimize cache performance.
-        if offset is not None:
-            self._offset_cache[offset] = type, chunks
-        return type, chunks
+        # Walk down the delta chain, building a stack of deltas to reach
+        # the requested object.
+        base_offset = offset
+        base_type = type
+        base_obj = obj
+        delta_stack = []
+        while base_type in DELTA_TYPES:
+            prev_offset = base_offset
+            if get_ref is None:
+                get_ref = self.get_ref
+            if base_type == OFS_DELTA:
+                (delta_offset, delta) = base_obj
+                # TODO: clean up asserts and replace with nicer error messages
+                assert (
+                    isinstance(base_offset, int)
+                    or isinstance(base_offset, long))
+                assert (
+                    isinstance(delta_offset, int)
+                    or isinstance(base_offset, long))
+                base_offset = base_offset - delta_offset
+                base_type, base_obj = self.get_object_at(base_offset)
+                assert isinstance(base_type, int)
+            elif base_type == REF_DELTA:
+                (basename, delta) = base_obj
+                assert isinstance(basename, bytes) and len(basename) == 20
+                base_offset, base_type, base_obj = get_ref(basename)
+                assert isinstance(base_type, int)
+            delta_stack.append((prev_offset, base_type, delta))
+
+        # Now grab the base object (mustn't be a delta) and apply the
+        # deltas all the way up the stack.
+        chunks = base_obj
+        for prev_offset, delta_type, delta in reversed(delta_stack):
+            chunks = apply_delta(chunks, delta)
+            # TODO(dborowitz): This can result in poor performance if
+            # large base objects are separated from deltas in the pack.
+            # We should reorganize so that we apply deltas to all
+            # objects in a chain one after the other to optimize cache
+            # performance.
+            if prev_offset is not None:
+                self._offset_cache[prev_offset] = base_type, chunks
+        return base_type, chunks
 
     def iterobjects(self, progress=None, compute_crc32=True):
         self._file.seek(self._header_size)