Prechádzať zdrojové kódy

merge support for use with in-memory packs.

Jelmer Vernooij 15 rokov pred
rodič
commit
6196883bb2
2 zmenil súbory, kde vykonal 83 pridanie a 68 odobranie
  1. 81 67
      dulwich/pack.py
  2. 2 1
      dulwich/tests/test_pack.py

+ 81 - 67
dulwich/pack.py

@@ -72,24 +72,21 @@ supports_mmap_offset = (sys.version_info[0] >= 3 or
         (sys.version_info[0] == 2 and sys.version_info[1] >= 6))
         (sys.version_info[0] == 2 and sys.version_info[1] >= 6))
 
 
 
 
-def take_msb_bytes(map, offset):
+def take_msb_bytes(read):
     """Read bytes marked with most significant bit.
     """Read bytes marked with most significant bit.
     
     
-    :param map: The buffer.
-    :param offset: Offset in the buffer at which to start reading.
+    :param read: Read function
     """
     """
     ret = []
     ret = []
     while len(ret) == 0 or ret[-1] & 0x80:
     while len(ret) == 0 or ret[-1] & 0x80:
-        ret.append(ord(map[offset]))
-        offset += 1
+        ret.append(ord(read(1)))
     return ret
     return ret
 
 
 
 
-def read_zlib_chunks(data, offset):
+def read_zlib_chunks(read, buffer_size=4096):
     """Read chunks of zlib data from a buffer.
     """Read chunks of zlib data from a buffer.
     
     
-    :param data: Buffer to read from
-    :param offset: Offset at which to start reading
+    :param read: Read function
     :return: Tuple with list of chunks and length of 
     :return: Tuple with list of chunks and length of 
         compressed data length
         compressed data length
     """
     """
@@ -97,9 +94,8 @@ def read_zlib_chunks(data, offset):
     ret = []
     ret = []
     fed = 0
     fed = 0
     while obj.unused_data == "":
     while obj.unused_data == "":
-        base = offset+fed
-        add = data[base:base+1024]
-        if len(add) < 1024:
+        add = read(buffer_size)
+        if len(add) < buffer_size:
             add += "Z"
             add += "Z"
         fed += len(add)
         fed += len(add)
         ret.append(obj.decompress(add))
         ret.append(obj.decompress(add))
@@ -107,15 +103,14 @@ def read_zlib_chunks(data, offset):
     return ret, comp_len
     return ret, comp_len
 
 
 
 
-def read_zlib(data, offset, dec_size):
+def read_zlib(read, dec_size):
     """Read zlib-compressed data from a buffer.
     """Read zlib-compressed data from a buffer.
     
     
-    :param data: Buffer
-    :param offset: Offset in the buffer at which to read
+    :param read: Read function
     :param dec_size: Size of the decompressed buffer
     :param dec_size: Size of the decompressed buffer
     :return: Uncompressed buffer and compressed buffer length.
     :return: Uncompressed buffer and compressed buffer length.
     """
     """
-    ret, comp_len = read_zlib_chunks(data, offset)
+    ret, comp_len = read_zlib_chunks(read)
     x = "".join(ret)
     x = "".join(ret)
     assert len(x) == dec_size
     assert len(x) == dec_size
     return x, comp_len
     return x, comp_len
@@ -133,35 +128,31 @@ def iter_sha1(iter):
     return sha1.hexdigest()
     return sha1.hexdigest()
 
 
 
 
-def simple_mmap(f, offset, size):
-    """Simple wrapper for mmap() which always supports the offset parameter.
+def load_pack_index(path):
+    """Load an index file by path.
 
 
-    :param f: File object.
-    :param offset: Offset in the file, from the beginning of the file.
-    :param size: Size of the mmap'ed area
-    :param access: Access mechanism.
-    :return: MMAP'd area.
+    :param filename: Path to the index file
     """
     """
-    mem = mmap.mmap(f.fileno(), size+offset, access=mmap.ACCESS_READ)
-    return mem, offset
+    f = GitFile(path, 'rb')
+    return load_pack_index_file(path, f)
 
 
 
 
-def load_pack_index(filename):
-    """Load an index file by path.
+def load_pack_index_file(path, f):
+    """Load an index file from a file-like object.
 
 
-    :param filename: Path to the index file
+    :param path: Path for the index file
+    :param f: File-like object
     """
     """
-    f = GitFile(filename, 'rb')
     if f.read(4) == '\377tOc':
     if f.read(4) == '\377tOc':
         version = struct.unpack(">L", f.read(4))[0]
         version = struct.unpack(">L", f.read(4))[0]
         if version == 2:
         if version == 2:
             f.seek(0)
             f.seek(0)
-            return PackIndex2(filename, file=f)
+            return PackIndex2(path, file=f)
         else:
         else:
             raise KeyError("Unknown pack index format %d" % version)
             raise KeyError("Unknown pack index format %d" % version)
     else:
     else:
         f.seek(0)
         f.seek(0)
-        return PackIndex1(filename, file=f)
+        return PackIndex1(path, file=f)
 
 
 
 
 def bisect_find_sha(start, end, sha, unpack_name):
 def bisect_find_sha(start, end, sha, unpack_name):
@@ -201,7 +192,7 @@ class PackIndex(object):
     the start and end offset and then bisect in to find if the value is present.
     the start and end offset and then bisect in to find if the value is present.
     """
     """
   
   
-    def __init__(self, filename, file=None):
+    def __init__(self, filename, file=None, size=None):
         """Create a pack index object.
         """Create a pack index object.
     
     
         Provide it with the name of the index file to consider, and it will map
         Provide it with the name of the index file to consider, and it will map
@@ -210,13 +201,23 @@ class PackIndex(object):
         self._filename = filename
         self._filename = filename
         # Take the size now, so it can be checked each time we map the file to
         # Take the size now, so it can be checked each time we map the file to
         # ensure that it hasn't changed.
         # ensure that it hasn't changed.
-        self._size = os.path.getsize(filename)
         if file is None:
         if file is None:
             self._file = GitFile(filename, 'rb')
             self._file = GitFile(filename, 'rb')
         else:
         else:
             self._file = file
             self._file = file
-        self._contents, map_offset = simple_mmap(self._file, 0, self._size)
-        assert map_offset == 0
+        fileno = getattr(self._file, 'fileno', None)
+        if fileno is not None:
+            fd = self._file.fileno()
+            if size is None:
+                self._size = os.fstat(fd).st_size
+            else:
+                self._size = size
+            self._contents = mmap.mmap(fd, self._size,
+                access=mmap.ACCESS_READ)
+        else:
+            self._file.seek(0)
+            self._contents = self._file.read()
+            self._size = len(self._contents)
   
   
     def __eq__(self, other):
     def __eq__(self, other):
         if not isinstance(other, PackIndex):
         if not isinstance(other, PackIndex):
@@ -347,8 +348,8 @@ class PackIndex(object):
 class PackIndex1(PackIndex):
 class PackIndex1(PackIndex):
     """Version 1 Pack Index."""
     """Version 1 Pack Index."""
 
 
-    def __init__(self, filename, file=None):
-        PackIndex.__init__(self, filename, file)
+    def __init__(self, filename, file=None, size=None):
+        PackIndex.__init__(self, filename, file, size)
         self.version = 1
         self.version = 1
         self._fan_out_table = self._read_fan_out_table(0)
         self._fan_out_table = self._read_fan_out_table(0)
 
 
@@ -373,8 +374,8 @@ class PackIndex1(PackIndex):
 class PackIndex2(PackIndex):
 class PackIndex2(PackIndex):
     """Version 2 Pack Index."""
     """Version 2 Pack Index."""
 
 
-    def __init__(self, filename, file=None):
-        PackIndex.__init__(self, filename, file)
+    def __init__(self, filename, file=None, size=None):
+        PackIndex.__init__(self, filename, file, size)
         assert self._contents[:4] == '\377tOc', "Not a v2 pack index file"
         assert self._contents[:4] == '\377tOc', "Not a v2 pack index file"
         (self.version, ) = unpack_from(">L", self._contents, 4)
         (self.version, ) = unpack_from(">L", self._contents, 4)
         assert self.version == 2, "Version was %d" % self.version
         assert self.version == 2, "Version was %d" % self.version
@@ -414,36 +415,37 @@ def read_pack_header(f):
     return (version, num_objects)
     return (version, num_objects)
 
 
 
 
-def unpack_object(map, offset=0):
+def unpack_object(read):
     """Unpack a Git object.
     """Unpack a Git object.
 
 
     :return: tuple with type, uncompressed data and compressed size
     :return: tuple with type, uncompressed data and compressed size
     """
     """
-    bytes = take_msb_bytes(map, offset)
+    bytes = take_msb_bytes(read)
     type = (bytes[0] >> 4) & 0x07
     type = (bytes[0] >> 4) & 0x07
     size = bytes[0] & 0x0f
     size = bytes[0] & 0x0f
     for i, byte in enumerate(bytes[1:]):
     for i, byte in enumerate(bytes[1:]):
         size += (byte & 0x7f) << ((i * 7) + 4)
         size += (byte & 0x7f) << ((i * 7) + 4)
     raw_base = len(bytes)
     raw_base = len(bytes)
     if type == 6: # offset delta
     if type == 6: # offset delta
-        bytes = take_msb_bytes(map, raw_base + offset)
+        bytes = take_msb_bytes(read)
+        raw_base += len(bytes)
         assert not (bytes[-1] & 0x80)
         assert not (bytes[-1] & 0x80)
         delta_base_offset = bytes[0] & 0x7f
         delta_base_offset = bytes[0] & 0x7f
         for byte in bytes[1:]:
         for byte in bytes[1:]:
             delta_base_offset += 1
             delta_base_offset += 1
             delta_base_offset <<= 7
             delta_base_offset <<= 7
             delta_base_offset += (byte & 0x7f)
             delta_base_offset += (byte & 0x7f)
-        raw_base+=len(bytes)
-        uncomp, comp_len = read_zlib(map, offset + raw_base, size)
+        uncomp, comp_len = read_zlib(read, size)
         assert size == len(uncomp)
         assert size == len(uncomp)
         return type, (delta_base_offset, uncomp), comp_len+raw_base
         return type, (delta_base_offset, uncomp), comp_len+raw_base
     elif type == 7: # ref delta
     elif type == 7: # ref delta
-        basename = map[offset+raw_base:offset+raw_base+20]
-        uncomp, comp_len = read_zlib(map, offset+raw_base+20, size)
+        basename = map.read(20)
+        raw_base += 20
+        uncomp, comp_len = read_zlib(read, size)
         assert size == len(uncomp)
         assert size == len(uncomp)
-        return type, (basename, uncomp), comp_len+raw_base+20
+        return type, (basename, uncomp), comp_len+raw_base
     else:
     else:
-        uncomp, comp_len = read_zlib(map, offset+raw_base, size)
+        uncomp, comp_len = read_zlib(read, size)
         assert len(uncomp) == size
         assert len(uncomp) == size
         return type, uncomp, comp_len+raw_base
         return type, uncomp, comp_len+raw_base
 
 
@@ -484,7 +486,7 @@ class PackData(object):
     It will all just throw a zlib or KeyError.
     It will all just throw a zlib or KeyError.
     """
     """
   
   
-    def __init__(self, filename):
+    def __init__(self, filename, file=None, size=None):
         """Create a PackData object that represents the pack in the given filename.
         """Create a PackData object that represents the pack in the given filename.
     
     
         The file must exist and stay readable until the object is disposed of. It
         The file must exist and stay readable until the object is disposed of. It
@@ -494,14 +496,28 @@ class PackData(object):
         mmap implementation is flawed.
         mmap implementation is flawed.
         """
         """
         self._filename = filename
         self._filename = filename
-        self._size = os.path.getsize(filename)
+        if size is None:
+            self._size = os.path.getsize(filename)
+        else:
+            self._size = size
         self._header_size = 12
         self._header_size = 12
         assert self._size >= self._header_size, "%s is too small for a packfile (%d < %d)" % (filename, self._size, self._header_size)
         assert self._size >= self._header_size, "%s is too small for a packfile (%d < %d)" % (filename, self._size, self._header_size)
-        self._file = GitFile(self._filename, 'rb')
+        if file is None:
+            self._file = GitFile(self._filename, 'rb')
+        else:
+            self._file = file
         self._read_header()
         self._read_header()
         self._offset_cache = LRUSizeCache(1024*1024*20, 
         self._offset_cache = LRUSizeCache(1024*1024*20, 
             compute_size=_compute_object_size)
             compute_size=_compute_object_size)
 
 
+    @classmethod
+    def from_file(cls, file, size):
+        return cls(str(file), file=file, size=size)
+
+    @classmethod
+    def from_path(cls, path):
+        return cls(filename=path)
+
     def close(self):
     def close(self):
         self._file.close()
         self._file.close()
   
   
@@ -519,11 +535,14 @@ class PackData(object):
 
 
         :return: 20-byte binary SHA1 digest
         :return: 20-byte binary SHA1 digest
         """
         """
-        map, map_offset = simple_mmap(self._file, 0, self._size - 20)
-        try:
-            return make_sha(map[map_offset:self._size-20]).digest()
-        finally:
-            map.close()
+        s = make_sha()
+        self._file.seek(0)
+        todo = self._size - 20
+        while todo > 0:
+            x = self._file.read(min(todo, 1<<16))
+            s.update(x)
+            todo -= len(x)
+        return s.digest()
 
 
     def resolve_object(self, offset, type, obj, get_ref, get_offset=None):
     def resolve_object(self, offset, type, obj, get_ref, get_offset=None):
         """Resolve an object, possibly resolving deltas when necessary.
         """Resolve an object, possibly resolving deltas when necessary.
@@ -566,10 +585,7 @@ class PackData(object):
                 self.i = 0
                 self.i = 0
                 self.offset = pack._header_size
                 self.offset = pack._header_size
                 self.num = len(pack)
                 self.num = len(pack)
-                self.map, _ = simple_mmap(pack._file, 0, pack._size)
-
-            def __del__(self):
-                self.map.close()
+                self.map = pack._file
 
 
             def __iter__(self):
             def __iter__(self):
                 return self
                 return self
@@ -580,8 +596,10 @@ class PackData(object):
             def next(self):
             def next(self):
                 if self.i == self.num:
                 if self.i == self.num:
                     raise StopIteration
                     raise StopIteration
-                (type, obj, total_size) = unpack_object(self.map, self.offset)
-                crc32 = zlib.crc32(self.map[self.offset:self.offset+total_size]) & 0xffffffff
+                self.map.seek(self.offset)
+                (type, obj, total_size) = unpack_object(self.map.read)
+                self.map.seek(self.offset)
+                crc32 = zlib.crc32(self.map.read(total_size)) & 0xffffffff
                 ret = (self.offset, type, obj, crc32)
                 ret = (self.offset, type, obj, crc32)
                 self.offset += total_size
                 self.offset += total_size
                 if progress:
                 if progress:
@@ -705,12 +723,8 @@ class PackData(object):
         assert isinstance(offset, long) or isinstance(offset, int),\
         assert isinstance(offset, long) or isinstance(offset, int),\
                 "offset was %r" % offset
                 "offset was %r" % offset
         assert offset >= self._header_size
         assert offset >= self._header_size
-        map, map_offset = simple_mmap(self._file, offset, self._size-offset)
-        try:
-            ret = unpack_object(map, map_offset)[:2]
-            return ret
-        finally:
-            map.close()
+        self._file.seek(offset)
+        return unpack_object(self._file.read)[:2]
 
 
 
 
 class SHA1Reader(object):
 class SHA1Reader(object):

+ 2 - 1
dulwich/tests/test_pack.py

@@ -21,6 +21,7 @@
 """Tests for Dulwich packs."""
 """Tests for Dulwich packs."""
 
 
 
 
+from cStringIO import StringIO
 import os
 import os
 import unittest
 import unittest
 
 
@@ -283,5 +284,5 @@ class ZlibTests(unittest.TestCase):
 
 
     def test_simple_decompress(self):
     def test_simple_decompress(self):
         self.assertEquals(("tree 4ada885c9196b6b6fa08744b5862bf92896fc002\nparent None\nauthor Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\ncommitter Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n\nProvide replacement for mmap()'s offset argument.", 158), 
         self.assertEquals(("tree 4ada885c9196b6b6fa08744b5862bf92896fc002\nparent None\nauthor Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\ncommitter Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n\nProvide replacement for mmap()'s offset argument.", 158), 
-        read_zlib(TEST_COMP1, 0, 229))
+        read_zlib(StringIO(TEST_COMP1).read, 229))