Browse Source

Add helper code for applying deltas.

Jelmer Vernooij 16 years ago
parent
commit
8182d23a9a
2 changed files with 81 additions and 15 deletions
  1. 2 1
      dulwich/objects.py
  2. 79 14
      dulwich/pack.py

+ 2 - 1
dulwich/objects.py

@@ -52,6 +52,7 @@ def sha_to_hex(sha):
          len(hexsha)
          len(hexsha)
   return hexsha
   return hexsha
 
 
+
 class ShaFile(object):
 class ShaFile(object):
   """A git SHA file."""
   """A git SHA file."""
 
 
@@ -228,7 +229,7 @@ class Tree(ShaFile):
     return tree
     return tree
 
 
   def entries(self):
   def entries(self):
-    """Reutrn a list of tuples describing the tree entries"""
+    """Return a list of tuples describing the tree entries"""
     return self._entries
     return self._entries
 
 
   def _parse_text(self):
   def _parse_text(self):

+ 79 - 14
dulwich/pack.py

@@ -113,10 +113,6 @@ class PackIndex(object):
   the start and end offset and then bisect in to find if the value is present.
   the start and end offset and then bisect in to find if the value is present.
   """
   """
 
 
-  PACK_INDEX_HEADER_SIZE = 0x100 * 4
-  sha_bytes = 20
-  record_size = sha_bytes + 4
-
   def __init__(self, filename):
   def __init__(self, filename):
     """Create a pack index object.
     """Create a pack index object.
 
 
@@ -155,7 +151,7 @@ class PackIndex(object):
           CRC32 checksum (if known)."""
           CRC32 checksum (if known)."""
     if self.version == 1:
     if self.version == 1:
         (offset, name) = struct.unpack_from(">L20s", self._contents, 
         (offset, name) = struct.unpack_from(">L20s", self._contents, 
-            self.PACK_INDEX_HEADER_SIZE + (i * self.record_size))
+            (0x100 * 4) + (i * 24))
         return (name, offset, None)
         return (name, offset, None)
     else:
     else:
         return (self._unpack_name(i), self._unpack_offset(i), 
         return (self._unpack_name(i), self._unpack_offset(i), 
@@ -288,7 +284,7 @@ class PackData(object):
     self._filename = filename
     self._filename = filename
     assert os.path.exists(filename), "%s is not a packfile" % filename
     assert os.path.exists(filename), "%s is not a packfile" % filename
     self._size = os.path.getsize(filename)
     self._size = os.path.getsize(filename)
-    self._read_header()
+    self._header_size = self._read_header()
 
 
   def _read_header(self):
   def _read_header(self):
     f = open(self._filename, 'rb')
     f = open(self._filename, 'rb')
@@ -302,6 +298,7 @@ class PackData(object):
     (version,) = struct.unpack_from(">L", header, 4)
     (version,) = struct.unpack_from(">L", header, 4)
     assert version in (2, 3), "Version was %d" % version
     assert version in (2, 3), "Version was %d" % version
     (self._num_objects,) = struct.unpack_from(">L", header, 8)
     (self._num_objects,) = struct.unpack_from(">L", header, 8)
+    return 12 # Header size
 
 
   def __len__(self):
   def __len__(self):
       """Returns the number of objects in this pack."""
       """Returns the number of objects in this pack."""
@@ -315,6 +312,16 @@ class PackData(object):
     finally:
     finally:
         f.close()
         f.close()
 
 
+  def iterentries(self):
+    """Yields (name, offset, crc32 checksum)."""
+    offset = self._header_size
+    f = open(self._filename, 'rb')
+    f.close()
+    for i in len(self):
+        map = simple_mmap(f, offset, self._size-offset)
+        (type, raw, size, total_size) = self._unpack_object(map)
+        offset += total_size
+
   def check(self):
   def check(self):
     return (self.calculate_checksum() == self._stored_checksum)
     return (self.calculate_checksum() == self._stored_checksum)
 
 
@@ -332,11 +339,11 @@ class PackData(object):
     f = open(self._filename, 'rb')
     f = open(self._filename, 'rb')
     try:
     try:
       map = simple_mmap(f, offset, size-offset)
       map = simple_mmap(f, offset, size-offset)
-      return self._get_object_at(map)
+      return self._unpack_object(map)[:3]
     finally:
     finally:
       f.close()
       f.close()
 
 
-  def _get_object_at(self, map):
+  def _unpack_object(self, map):
     first_byte = ord(map[0])
     first_byte = ord(map[0])
     sign_extend = first_byte & 0x80
     sign_extend = first_byte & 0x80
     type = (first_byte >> 4) & 0x07
     type = (first_byte >> 4) & 0x07
@@ -349,7 +356,7 @@ class PackData(object):
       size += size_part << ((cur_offset * 7) + 4)
       size += size_part << ((cur_offset * 7) + 4)
       cur_offset += 1
       cur_offset += 1
     raw_base = cur_offset+1
     raw_base = cur_offset+1
-    return type, map[raw_base:], size
+    return type, map[raw_base:], size, cur_offset+size
 
 
 
 
 class SHA1Writer(object):
 class SHA1Writer(object):
@@ -415,6 +422,62 @@ def write_pack_index_v1(filename, entries, pack_checksum):
     f.close()
     f.close()
 
 
 
 
+def apply_delta(src_buf, delta):
+    """Based on the similar function in git's patch-delta.c."""
+    data = str(delta)
+    def pop():
+        ret = delta[0]
+        delta = delta[1:]
+        return ret
+    def get_delta_header_size(buf):
+        size = 0
+        i = 0
+        while True:
+            cmd = pop()
+            size |= (cmd & ~0x80) << i
+            i += 7
+            if not cmd & 0x80:
+                break
+        return size
+    src_size = get_delta_header_size()
+    dest_size = get_delta_header_size()
+    while delta:
+        cmd = pop()
+        if cmd & 0x80:
+            cp_off = 0
+            cp_size = 0
+            if cmd & 0x01: cp_off = pop()
+            if cmd & 0x02: cp_off |= (pop() << 8)
+            if cmd & 0x04: cp_off |= (pop() << 16)
+            if cmd & 0x08: cp_off |= (pop() << 24)
+            if cmd & 0x10: cp_size = pop()
+            if cmd & 0x20: cp_size |= (pop() << 8)
+            if cmd & 0x40: cp_size |= (pop() << 16)
+            if cp_size == 0: cp_size = 0x10000
+            if (cp_off + cp_size < cp_size or
+                cp_off + cp_size > src_size or
+                cp_size > dest_size):
+                break
+            out += text[cp_off:cp_off+cp_size]
+            dest_size -= cp_size
+        elif cmd != 0:
+            if cmd > dest_size:
+                break
+            out += data[:cmd]
+            data = data[cmd:]
+            dest_size -= cmd
+        else:
+            raise AssertionError("Invalid opcode 0")
+    
+    if data != []:
+        raise AssertionError("data not empty")
+
+    if dest_size != 0:
+        raise AssertionError("dest size not empty")
+
+    return out
+
+
 def write_pack_index_v2(filename, entries, pack_checksum):
 def write_pack_index_v2(filename, entries, pack_checksum):
     """Write a new pack index file.
     """Write a new pack index file.
 
 
@@ -475,8 +538,7 @@ class Pack(object):
         """Check whether this pack contains a particular SHA1."""
         """Check whether this pack contains a particular SHA1."""
         return (self._idx.object_index(sha1) is not None)
         return (self._idx.object_index(sha1) is not None)
 
 
-    def __getitem__(self, sha1):
-        """Retrieve the specified SHA1."""
+    def _get_text(self, sha1):
         offset = self._idx.object_index(sha1)
         offset = self._idx.object_index(sha1)
         if offset is None:
         if offset is None:
             raise KeyError(sha1)
             raise KeyError(sha1)
@@ -501,6 +563,9 @@ class Pack(object):
             # thing then just use it.
             # thing then just use it.
             uncomp = _decompress(raw)
             uncomp = _decompress(raw)
             assert len(uncomp) == size
             assert len(uncomp) == size
-            obj = ShaFile.from_raw_string(type, uncomp)
-            return obj
-        return 
+            return type, uncomp
+
+    def __getitem__(self, sha1):
+        """Retrieve the specified SHA1."""
+        type, uncomp = self._get_text(sha1)
+        return ShaFile.from_raw_string(type, uncomp)