|
@@ -51,6 +51,9 @@ from dulwich.errors import (
|
|
ApplyDeltaError,
|
|
ApplyDeltaError,
|
|
ChecksumMismatch,
|
|
ChecksumMismatch,
|
|
)
|
|
)
|
|
|
|
+from dulwich.lru_cache import (
|
|
|
|
+ LRUSizeCache,
|
|
|
|
+ )
|
|
from dulwich.objects import (
|
|
from dulwich.objects import (
|
|
ShaFile,
|
|
ShaFile,
|
|
hex_to_sha,
|
|
hex_to_sha,
|
|
@@ -93,7 +96,7 @@ def iter_sha1(iter):
|
|
return sha1.hexdigest()
|
|
return sha1.hexdigest()
|
|
|
|
|
|
|
|
|
|
-MAX_MMAP_SIZE = 256 * 1024 * 1024
|
|
|
|
|
|
+MAX_MMAP_SIZE = 1024 * 1024 * 1024
|
|
|
|
|
|
def simple_mmap(f, offset, size, access=mmap.ACCESS_READ):
|
|
def simple_mmap(f, offset, size, access=mmap.ACCESS_READ):
|
|
"""Simple wrapper for mmap() which always supports the offset parameter.
|
|
"""Simple wrapper for mmap() which always supports the offset parameter.
|
|
@@ -134,28 +137,6 @@ def simple_mmap(f, offset, size, access=mmap.ACCESS_READ):
|
|
return ArraySkipper(mem, offset)
|
|
return ArraySkipper(mem, offset)
|
|
|
|
|
|
|
|
|
|
-def resolve_object(offset, type, obj, get_ref, get_offset):
|
|
|
|
- """Resolve an object, possibly resolving deltas when necessary."""
|
|
|
|
- if not type in (6, 7): # Not a delta
|
|
|
|
- return type, obj
|
|
|
|
-
|
|
|
|
- if type == 6: # offset delta
|
|
|
|
- (delta_offset, delta) = obj
|
|
|
|
- assert isinstance(delta_offset, int)
|
|
|
|
- assert isinstance(delta, str)
|
|
|
|
- offset = offset-delta_offset
|
|
|
|
- type, base_obj = get_offset(offset)
|
|
|
|
- assert isinstance(type, int)
|
|
|
|
- elif type == 7: # ref delta
|
|
|
|
- (basename, delta) = obj
|
|
|
|
- assert isinstance(basename, str) and len(basename) == 20
|
|
|
|
- assert isinstance(delta, str)
|
|
|
|
- type, base_obj = get_ref(basename)
|
|
|
|
- assert isinstance(type, int)
|
|
|
|
- type, base_text = resolve_object(offset, type, base_obj, get_ref, get_offset)
|
|
|
|
- return type, apply_delta(base_text, delta)
|
|
|
|
-
|
|
|
|
-
|
|
|
|
class PackIndex(object):
|
|
class PackIndex(object):
|
|
"""An index in to a packfile.
|
|
"""An index in to a packfile.
|
|
|
|
|
|
@@ -367,6 +348,13 @@ def unpack_object(map):
|
|
return type, uncomp, comp_len+raw_base
|
|
return type, uncomp, comp_len+raw_base
|
|
|
|
|
|
|
|
|
|
|
|
+def compute_object_size((num, obj)):
|
|
|
|
+ if num in (6, 7):
|
|
|
|
+ return len(obj[1])
|
|
|
|
+ assert isinstance(obj, str)
|
|
|
|
+ return len(obj)
|
|
|
|
+
|
|
|
|
+
|
|
class PackData(object):
|
|
class PackData(object):
|
|
"""The data contained in a packfile.
|
|
"""The data contained in a packfile.
|
|
|
|
|
|
@@ -409,6 +397,8 @@ class PackData(object):
|
|
self._header_size = 12
|
|
self._header_size = 12
|
|
assert self._size >= self._header_size, "%s is too small for a packfile (%d < %d)" % (filename, self._size, self._header_size)
|
|
assert self._size >= self._header_size, "%s is too small for a packfile (%d < %d)" % (filename, self._size, self._header_size)
|
|
self._read_header()
|
|
self._read_header()
|
|
|
|
+ self._offset_cache = LRUSizeCache(1024*1024*100,
|
|
|
|
+ compute_size=compute_object_size)
|
|
|
|
|
|
def _read_header(self):
|
|
def _read_header(self):
|
|
f = open(self._filename, 'rb')
|
|
f = open(self._filename, 'rb')
|
|
@@ -432,6 +422,39 @@ class PackData(object):
|
|
return make_sha(map[:-20]).digest()
|
|
return make_sha(map[:-20]).digest()
|
|
finally:
|
|
finally:
|
|
f.close()
|
|
f.close()
|
|
|
|
+
|
|
|
|
+ def resolve_object(self, offset, type, obj, get_ref, get_offset=None):
|
|
|
|
+ """Resolve an object, possibly resolving deltas when necessary.
|
|
|
|
+
|
|
|
|
+ :return: Tuple with object type and contents.
|
|
|
|
+ """
|
|
|
|
+ if not type in (6, 7): # Not a delta
|
|
|
|
+ return type, obj
|
|
|
|
+
|
|
|
|
+ if get_offset is None:
|
|
|
|
+ get_offset = self.get_object_at
|
|
|
|
+
|
|
|
|
+ if type == 6: # offset delta
|
|
|
|
+ (delta_offset, delta) = obj
|
|
|
|
+ assert isinstance(delta_offset, int)
|
|
|
|
+ assert isinstance(delta, str)
|
|
|
|
+ base_offset = offset-delta_offset
|
|
|
|
+ type, base_obj = get_offset(base_offset)
|
|
|
|
+ assert isinstance(type, int)
|
|
|
|
+ elif type == 7: # ref delta
|
|
|
|
+ (basename, delta) = obj
|
|
|
|
+ assert isinstance(basename, str) and len(basename) == 20
|
|
|
|
+ assert isinstance(delta, str)
|
|
|
|
+ type, base_obj = get_ref(basename)
|
|
|
|
+ assert isinstance(type, int)
|
|
|
|
+ # Can't be a ofs delta, as we wouldn't know the base offset
|
|
|
|
+ assert type != 6
|
|
|
|
+ base_offset = None
|
|
|
|
+ type, base_text = self.resolve_object(base_offset, type, base_obj, get_ref)
|
|
|
|
+ if base_offset is not None:
|
|
|
|
+ self._offset_cache[base_offset] = type, base_text
|
|
|
|
+ ret = (type, apply_delta(base_text, delta))
|
|
|
|
+ return ret
|
|
|
|
|
|
def iterobjects(self):
|
|
def iterobjects(self):
|
|
offset = self._header_size
|
|
offset = self._header_size
|
|
@@ -446,7 +469,6 @@ class PackData(object):
|
|
|
|
|
|
def iterentries(self, ext_resolve_ref=None):
|
|
def iterentries(self, ext_resolve_ref=None):
|
|
found = {}
|
|
found = {}
|
|
- at = {}
|
|
|
|
postponed = defaultdict(list)
|
|
postponed = defaultdict(list)
|
|
class Postpone(Exception):
|
|
class Postpone(Exception):
|
|
"""Raised to postpone delta resolving."""
|
|
"""Raised to postpone delta resolving."""
|
|
@@ -463,13 +485,11 @@ class PackData(object):
|
|
todo = list(self.iterobjects())
|
|
todo = list(self.iterobjects())
|
|
while todo:
|
|
while todo:
|
|
(offset, type, obj, crc32) = todo.pop(0)
|
|
(offset, type, obj, crc32) = todo.pop(0)
|
|
- at[offset] = (type, obj)
|
|
|
|
assert isinstance(offset, int)
|
|
assert isinstance(offset, int)
|
|
assert isinstance(type, int)
|
|
assert isinstance(type, int)
|
|
assert isinstance(obj, tuple) or isinstance(obj, str)
|
|
assert isinstance(obj, tuple) or isinstance(obj, str)
|
|
try:
|
|
try:
|
|
- type, obj = resolve_object(offset, type, obj, get_ref_text,
|
|
|
|
- at.__getitem__)
|
|
|
|
|
|
+ type, obj = self.resolve_object(offset, type, obj, get_ref_text)
|
|
except Postpone, (sha, ):
|
|
except Postpone, (sha, ):
|
|
postponed[sha].append((offset, type, obj))
|
|
postponed[sha].append((offset, type, obj))
|
|
else:
|
|
else:
|
|
@@ -507,6 +527,8 @@ class PackData(object):
|
|
then the packfile can be asked directly for that object using this
|
|
then the packfile can be asked directly for that object using this
|
|
function.
|
|
function.
|
|
"""
|
|
"""
|
|
|
|
+ if offset in self._offset_cache:
|
|
|
|
+ return self._offset_cache[offset]
|
|
assert isinstance(offset, long) or isinstance(offset, int),\
|
|
assert isinstance(offset, long) or isinstance(offset, int),\
|
|
"offset was %r" % offset
|
|
"offset was %r" % offset
|
|
assert offset >= self._header_size
|
|
assert offset >= self._header_size
|
|
@@ -516,7 +538,8 @@ class PackData(object):
|
|
f = open(self._filename, 'rb')
|
|
f = open(self._filename, 'rb')
|
|
try:
|
|
try:
|
|
map = simple_mmap(f, offset, size-offset)
|
|
map = simple_mmap(f, offset, size-offset)
|
|
- return unpack_object(map)[:2]
|
|
|
|
|
|
+ ret = unpack_object(map)[:2]
|
|
|
|
+ return ret
|
|
finally:
|
|
finally:
|
|
f.close()
|
|
f.close()
|
|
|
|
|
|
@@ -884,8 +907,7 @@ class Pack(object):
|
|
if isinstance(offset, long):
|
|
if isinstance(offset, long):
|
|
offset = int(offset)
|
|
offset = int(offset)
|
|
assert isinstance(offset, int)
|
|
assert isinstance(offset, int)
|
|
- return resolve_object(offset, type, obj, resolve_ref,
|
|
|
|
- self.data.get_object_at)
|
|
|
|
|
|
+ return self.data.resolve_object(offset, type, obj, resolve_ref)
|
|
|
|
|
|
def __getitem__(self, sha1):
|
|
def __getitem__(self, sha1):
|
|
"""Retrieve the specified SHA1."""
|
|
"""Retrieve the specified SHA1."""
|
|
@@ -899,9 +921,7 @@ class Pack(object):
|
|
for offset, type, obj, crc32 in self.data.iterobjects():
|
|
for offset, type, obj, crc32 in self.data.iterobjects():
|
|
assert isinstance(offset, int)
|
|
assert isinstance(offset, int)
|
|
yield ShaFile.from_raw_string(
|
|
yield ShaFile.from_raw_string(
|
|
- *resolve_object(offset, type, obj,
|
|
|
|
- get_raw,
|
|
|
|
- self.data.get_object_at))
|
|
|
|
|
|
+ *self.data.resolve_object(offset, type, obj, get_raw))
|
|
|
|
|
|
|
|
|
|
def load_packs(path):
|
|
def load_packs(path):
|