15 éve · 89ec0c85bb
--- a/dulwich/pack.py
+++ b/dulwich/pack.py
@@ -1,17 +1,17 @@
 
				 # pack.py -- For dealing wih packed git objects.
			
 
				 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
			
 
				 # Copryight (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
			
 
				-# 
			
 
				+#
			
 
				 # This program is free software; you can redistribute it and/or
			
 
				 # modify it under the terms of the GNU General Public License
			
 
				 # as published by the Free Software Foundation; version 2
			
 
				 # of the License or (at your option) a later version.
			
 
				-# 
			
 
				+#
			
 
				 # This program is distributed in the hope that it will be useful,
			
 
				 # but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				 # GNU General Public License for more details.
			
 
				-# 
			
 
				+#
			
 
				 # You should have received a copy of the GNU General Public License
			
 
				 # along with this program; if not, write to the Free Software
			
 
				 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
			
@@ -75,7 +75,7 @@ supports_mmap_offset = (sys.version_info[0] >= 3 or
 
				 
			
 
				 def take_msb_bytes(read):
			
 
				     """Read bytes marked with most significant bit.
			
 
				-    
			
 
				+
			
 
				     :param read: Read function
			
 
				     """
			
 
				     ret = []
			
@@ -84,29 +84,42 @@ def take_msb_bytes(read):
 
				     return ret
			
 
				 
			
 
				 
			
 
				-def read_zlib_chunks(read, buffer_size=4096):
			
 
				-    """Read chunks of zlib data from a buffer.
			
 
				-    
			
 
				-    :param read: Read function
			
 
				-    :return: Tuple with list of chunks, length of 
			
 
				-        compressed data length and unused read data
			
 
				+def read_zlib_chunks(read_some, dec_size, buffer_size=4096):
			
 
				+    """Read zlib data from a buffer.
			
 
				+
			
 
				+    This function requires that the buffer have additional data following the
			
 
				+    compressed data, which is guaranteed to be the case for git pack files.
			
 
				+
			
 
				+    :param read_some: Read function that returns at least one byte, but may
			
 
				+        return less than the requested size
			
 
				+    :param dec_size: Expected size of the decompressed buffer
			
 
				+    :param buffer_size: Size of the read buffer
			
 
				+    :return: Tuple with list of chunks, length of compressed data length and
			
 
				+        and unused read data.
			
 
				+    :raise zlib.error: if a decompression error occurred.
			
 
				     """
			
 
				+    if dec_size <= -1:
			
 
				+        raise ValueError("non-negative zlib data stream size expected")
			
 
				     obj = zlib.decompressobj()
			
 
				     ret = []
			
 
				     fed = 0
			
 
				+    size = 0
			
 
				     while obj.unused_data == "":
			
 
				-        add = read(buffer_size)
			
 
				-        if len(add) < buffer_size:
			
 
				-            add += "Z"
			
 
				+        add = read_some(buffer_size)
			
 
				+        if not add:
			
 
				+            raise zlib.error("EOF before end of zlib stream")
			
 
				         fed += len(add)
			
 
				-        ret.append(obj.decompress(add))
			
 
				-    comp_len = fed-len(obj.unused_data)
			
 
				+        decomp = obj.decompress(add)
			
 
				+        size += len(decomp)
			
 
				+        ret.append(decomp)
			
 
				+    if size != dec_size:
			
 
				+        raise zlib.error("decompressed data does not match expected size")
			
 
				+    comp_len = fed - len(obj.unused_data)
			
 
				     return ret, comp_len, obj.unused_data
			
 
				 
			
 
				-
			
 
				 def iter_sha1(iter):
			
 
				     """Return the hexdigest of the SHA1 over a set of names.
			
 
				-    
			
 
				+
			
 
				     :param iter: Iterator over string objects
			
 
				     :return: 40-byte hex sha1 digest
			
 
				     """
			
@@ -120,6 +133,7 @@ def load_pack_index(path):
 
				     """Load an index file by path.
			
 
				 
			
 
				     :param filename: Path to the index file
			
 
				+    :return: A PackIndex loaded from the given path
			
 
				     """
			
 
				     f = GitFile(path, 'rb')
			
 
				     return load_pack_index_file(path, f)
			
@@ -149,6 +163,7 @@ def load_pack_index_file(path, f):
 
				 
			
 
				     :param path: Path for the index file
			
 
				     :param f: File-like object
			
 
				+    :return: A PackIndex loaded from the given file
			
 
				     """
			
 
				     contents, size = _load_file_contents(f)
			
 
				     if contents[:4] == '\377tOc':
			
@@ -164,7 +179,7 @@ def load_pack_index_file(path, f):
 
				 
			
 
				 def bisect_find_sha(start, end, sha, unpack_name):
			
 
				     """Find a SHA in a data blob with sorted SHAs.
			
 
				-    
			
 
				+
			
 
				     :param start: Start index of range to search
			
 
				     :param end: End index of range to search
			
 
				     :param sha: Sha to find
			
@@ -187,10 +202,10 @@ def bisect_find_sha(start, end, sha, unpack_name):
 
				 
			
 
				 class PackIndex(object):
			
 
				     """An index in to a packfile.
			
 
				-  
			
 
				+
			
 
				     Given a sha id of an object a pack index can tell you the location in the
			
 
				     packfile of that object if it has it.
			
 
				-  
			
 
				+
			
 
				     To do the loop it opens the file, and indexes first 256 4 byte groups
			
 
				     with the first byte of the sha id. The value in the four byte group indexed
			
 
				     is the end of the group that shares the same starting byte. Subtract one
			
@@ -198,10 +213,10 @@ class PackIndex(object):
 
				     The values are sorted by sha id within the group, so do the math to find
			
 
				     the start and end offset and then bisect in to find if the value is present.
			
 
				     """
			
 
				-  
			
 
				+
			
 
				     def __init__(self, filename, file=None, contents=None, size=None):
			
 
				         """Create a pack index object.
			
 
				-    
			
 
				+
			
 
				         Provide it with the name of the index file to consider, and it will map
			
 
				         it whenever required.
			
 
				         """
			
@@ -216,14 +231,14 @@ class PackIndex(object):
 
				             self._contents, self._size = _load_file_contents(file, size)
			
 
				         else:
			
 
				             self._contents, self._size = (contents, size)
			
 
				-  
			
 
				+
			
 
				     def __eq__(self, other):
			
 
				         if not isinstance(other, PackIndex):
			
 
				             return False
			
 
				-    
			
 
				+
			
 
				         if self._fan_out_table != other._fan_out_table:
			
 
				             return False
			
 
				-    
			
 
				+
			
 
				         for (name1, _, _), (name2, _, _) in izip(self.iterentries(),
			
 
				                                                  other.iterentries()):
			
 
				             if name1 != name2:
			
@@ -232,25 +247,25 @@ class PackIndex(object):
 
				 
			
 
				     def __ne__(self, other):
			
 
				         return not self.__eq__(other)
			
 
				-  
			
 
				+
			
 
				     def close(self):
			
 
				         self._file.close()
			
 
				-  
			
 
				+
			
 
				     def __len__(self):
			
 
				         """Return the number of entries in this pack index."""
			
 
				         return self._fan_out_table[-1]
			
 
				-  
			
 
				+
			
 
				     def _unpack_entry(self, i):
			
 
				         """Unpack the i-th entry in the index file.
			
 
				-    
			
 
				-        :return: Tuple with object name (SHA), offset in pack file and 
			
 
				-              CRC32 checksum (if known)."""
			
 
				+
			
 
				+        :return: Tuple with object name (SHA), offset in pack file and CRC32
			
 
				+            checksum (if known)."""
			
 
				         raise NotImplementedError(self._unpack_entry)
			
 
				-  
			
 
				+
			
 
				     def _unpack_name(self, i):
			
 
				         """Unpack the i-th name from the index file."""
			
 
				         raise NotImplementedError(self._unpack_name)
			
 
				-  
			
 
				+
			
 
				     def _unpack_offset(self, i):
			
 
				         """Unpack the i-th object offset from the index file."""
			
 
				         raise NotImplementedError(self._unpack_offset)
			
@@ -258,43 +273,43 @@ class PackIndex(object):
 
				     def _unpack_crc32_checksum(self, i):
			
 
				         """Unpack the crc32 checksum for the i-th object from the index file."""
			
 
				         raise NotImplementedError(self._unpack_crc32_checksum)
			
 
				-  
			
 
				+
			
 
				     def __iter__(self):
			
 
				         """Iterate over the SHAs in this pack."""
			
 
				         return imap(sha_to_hex, self._itersha())
			
 
				-  
			
 
				+
			
 
				     def _itersha(self):
			
 
				         for i in range(len(self)):
			
 
				             yield self._unpack_name(i)
			
 
				-  
			
 
				+
			
 
				     def objects_sha1(self):
			
 
				         """Return the hex SHA1 over all the shas of all objects in this pack.
			
 
				-        
			
 
				+
			
 
				         :note: This is used for the filename of the pack.
			
 
				         """
			
 
				         return iter_sha1(self._itersha())
			
 
				-  
			
 
				+
			
 
				     def iterentries(self):
			
 
				         """Iterate over the entries in this pack index.
			
 
				-       
			
 
				+
			
 
				         Will yield tuples with object name, offset in packfile and crc32
			
 
				         checksum.
			
 
				         """
			
 
				         for i in range(len(self)):
			
 
				             yield self._unpack_entry(i)
			
 
				-  
			
 
				+
			
 
				     def _read_fan_out_table(self, start_offset):
			
 
				         ret = []
			
 
				         for i in range(0x100):
			
 
				-            ret.append(struct.unpack(">L",
			
 
				-                self._contents[start_offset+i*4:start_offset+(i+1)*4])[0])
			
 
				+            fanout_entry = self._contents[start_offset+i*4:start_offset+(i+1)*4]
			
 
				+            ret.append(struct.unpack(">L", fanout_entry)[0])
			
 
				         return ret
			
 
				-  
			
 
				+
			
 
				     def check(self):
			
 
				         """Check that the stored checksum matches the actual checksum."""
			
 
				         # TODO: Check pack contents, too
			
 
				         return self.calculate_checksum() == self.get_stored_checksum()
			
 
				-  
			
 
				+
			
 
				     def calculate_checksum(self):
			
 
				         """Calculate the SHA1 checksum over this pack index.
			
 
				 
			
@@ -304,21 +319,21 @@ class PackIndex(object):
 
				 
			
 
				     def get_pack_checksum(self):
			
 
				         """Return the SHA1 checksum stored for the corresponding packfile.
			
 
				-        
			
 
				+
			
 
				         :return: 20-byte binary digest
			
 
				         """
			
 
				         return str(self._contents[-40:-20])
			
 
				-  
			
 
				+
			
 
				     def get_stored_checksum(self):
			
 
				         """Return the SHA1 checksum stored for this index.
			
 
				-        
			
 
				+
			
 
				         :return: 20-byte binary digest
			
 
				         """
			
 
				         return str(self._contents[-20:])
			
 
				-  
			
 
				+
			
 
				     def object_index(self, sha):
			
 
				         """Return the index in to the corresponding packfile for the object.
			
 
				-    
			
 
				+
			
 
				         Given the name of an object it will return the offset that object
			
 
				         lives at within the corresponding pack file. If the pack file doesn't
			
 
				         have the object then None will be returned.
			
@@ -326,10 +341,10 @@ class PackIndex(object):
 
				         if len(sha) == 40:
			
 
				             sha = hex_to_sha(sha)
			
 
				         return self._object_index(sha)
			
 
				-  
			
 
				+
			
 
				     def _object_index(self, sha):
			
 
				         """See object_index.
			
 
				-        
			
 
				+
			
 
				         :param sha: A *binary* SHA string. (20 characters long)_
			
 
				         """
			
 
				         assert len(sha) == 20
			
@@ -343,7 +358,6 @@ class PackIndex(object):
 
				         if i is None:
			
 
				             raise KeyError(sha)
			
 
				         return self._unpack_offset(i)
			
 
				-            
			
 
				 
			
 
				 
			
 
				 class PackIndex1(PackIndex):
			
@@ -355,22 +369,22 @@ class PackIndex1(PackIndex):
 
				         self._fan_out_table = self._read_fan_out_table(0)
			
 
				 
			
 
				     def _unpack_entry(self, i):
			
 
				-        (offset, name) = unpack_from(">L20s", self._contents, 
			
 
				-            (0x100 * 4) + (i * 24))
			
 
				+        (offset, name) = unpack_from(">L20s", self._contents,
			
 
				+                                     (0x100 * 4) + (i * 24))
			
 
				         return (name, offset, None)
			
 
				- 
			
 
				+
			
 
				     def _unpack_name(self, i):
			
 
				         offset = (0x100 * 4) + (i * 24) + 4
			
 
				         return self._contents[offset:offset+20]
			
 
				-  
			
 
				+
			
 
				     def _unpack_offset(self, i):
			
 
				         offset = (0x100 * 4) + (i * 24)
			
 
				         return unpack_from(">L", self._contents, offset)[0]
			
 
				-  
			
 
				+
			
 
				     def _unpack_crc32_checksum(self, i):
			
 
				         # Not stored in v1 index files
			
 
				-        return None 
			
 
				-  
			
 
				+        return None
			
 
				+
			
 
				 
			
 
				 class PackIndex2(PackIndex):
			
 
				     """Version 2 Pack Index."""
			
@@ -383,32 +397,32 @@ class PackIndex2(PackIndex):
 
				         self._fan_out_table = self._read_fan_out_table(8)
			
 
				         self._name_table_offset = 8 + 0x100 * 4
			
 
				         self._crc32_table_offset = self._name_table_offset + 20 * len(self)
			
 
				-        self._pack_offset_table_offset = self._crc32_table_offset + 4 * len(self)
			
 
				+        self._pack_offset_table_offset = (self._crc32_table_offset +
			
 
				+                                          4 * len(self))
			
 
				 
			
 
				     def _unpack_entry(self, i):
			
 
				-        return (self._unpack_name(i), self._unpack_offset(i), 
			
 
				+        return (self._unpack_name(i), self._unpack_offset(i),
			
 
				                 self._unpack_crc32_checksum(i))
			
 
				- 
			
 
				+
			
 
				     def _unpack_name(self, i):
			
 
				         offset = self._name_table_offset + i * 20
			
 
				         return self._contents[offset:offset+20]
			
 
				-  
			
 
				+
			
 
				     def _unpack_offset(self, i):
			
 
				         offset = self._pack_offset_table_offset + i * 4
			
 
				         return unpack_from(">L", self._contents, offset)[0]
			
 
				-  
			
 
				+
			
 
				     def _unpack_crc32_checksum(self, i):
			
 
				-        return unpack_from(">L", self._contents, 
			
 
				+        return unpack_from(">L", self._contents,
			
 
				                           self._crc32_table_offset + i * 4)[0]
			
 
				-  
			
 
				 
			
 
				 
			
 
				-def read_pack_header(f):
			
 
				+def read_pack_header(read):
			
 
				     """Read the header of a pack file.
			
 
				 
			
 
				-    :param f: File-like object to read from
			
 
				+    :param read: Read function
			
 
				     """
			
 
				-    header = f.read(12)
			
 
				+    header = read(12)
			
 
				     assert header[:4] == "PACK"
			
 
				     (version,) = unpack_from(">L", header, 4)
			
 
				     assert version in (2, 3), "Version was %d" % version
			
@@ -420,20 +434,25 @@ def chunks_length(chunks):
 
				     return sum(imap(len, chunks))
			
 
				 
			
 
				 
			
 
				-def unpack_object(read):
			
 
				+def unpack_object(read_all, read_some=None):
			
 
				     """Unpack a Git object.
			
 
				 
			
 
				-    :return: tuple with type, uncompressed data as chunks, compressed size and 
			
 
				-        tail data
			
 
				+    :param read_all: Read function that blocks until the number of requested
			
 
				+        bytes are read.
			
 
				+    :param read_some: Read function that returns at least one byte, but may not
			
 
				+        return the number of bytes requested.
			
 
				+    :return: tuple with type, uncompressed data, compressed size and tail data.
			
 
				     """
			
 
				-    bytes = take_msb_bytes(read)
			
 
				+    if read_some is None:
			
 
				+        read_some = read_all
			
 
				+    bytes = take_msb_bytes(read_all)
			
 
				     type = (bytes[0] >> 4) & 0x07
			
 
				     size = bytes[0] & 0x0f
			
 
				     for i, byte in enumerate(bytes[1:]):
			
 
				         size += (byte & 0x7f) << ((i * 7) + 4)
			
 
				     raw_base = len(bytes)
			
 
				     if type == 6: # offset delta
			
 
				-        bytes = take_msb_bytes(read)
			
 
				+        bytes = take_msb_bytes(read_all)
			
 
				         raw_base += len(bytes)
			
 
				         assert not (bytes[-1] & 0x80)
			
 
				         delta_base_offset = bytes[0] & 0x7f
			
@@ -441,24 +460,23 @@ def unpack_object(read):
 
				             delta_base_offset += 1
			
 
				             delta_base_offset <<= 7
			
 
				             delta_base_offset += (byte & 0x7f)
			
 
				-        uncomp, comp_len, unused = read_zlib_chunks(read)
			
 
				+        uncomp, comp_len, unused = read_zlib_chunks(read_some, size)
			
 
				         assert size == chunks_length(uncomp)
			
 
				         return type, (delta_base_offset, uncomp), comp_len+raw_base, unused
			
 
				     elif type == 7: # ref delta
			
 
				-        basename = read(20)
			
 
				+        basename = read_all(20)
			
 
				         raw_base += 20
			
 
				-        uncomp, comp_len, unused = read_zlib_chunks(read)
			
 
				+        uncomp, comp_len, unused = read_zlib_chunks(read_some, size)
			
 
				         assert size == chunks_length(uncomp)
			
 
				         return type, (basename, uncomp), comp_len+raw_base, unused
			
 
				     else:
			
 
				-        uncomp, comp_len, unused = read_zlib_chunks(read)
			
 
				+        uncomp, comp_len, unused = read_zlib_chunks(read_some, size)
			
 
				         assert chunks_length(uncomp) == size
			
 
				         return type, uncomp, comp_len+raw_base, unused
			
 
				 
			
 
				 
			
 
				 def _compute_object_size((num, obj)):
			
 
				-    """Compute the size of a unresolved object for use with LRUSizeCache.
			
 
				-    """
			
 
				+    """Compute the size of a unresolved object for use with LRUSizeCache."""
			
 
				     if num in (6, 7):
			
 
				         return chunks_length(obj[1])
			
 
				     return chunks_length(obj)
			
@@ -466,38 +484,37 @@ def _compute_object_size((num, obj)):
 
				 
			
 
				 class PackData(object):
			
 
				     """The data contained in a packfile.
			
 
				-  
			
 
				+
			
 
				     Pack files can be accessed both sequentially for exploding a pack, and
			
 
				     directly with the help of an index to retrieve a specific object.
			
 
				-  
			
 
				+
			
 
				     The objects within are either complete or a delta aginst another.
			
 
				-  
			
 
				+
			
 
				     The header is variable length. If the MSB of each byte is set then it
			
 
				     indicates that the subsequent byte is still part of the header.
			
 
				     For the first byte the next MS bits are the type, which tells you the type
			
 
				     of object, and whether it is a delta. The LS byte is the lowest bits of the
			
 
				     size. For each subsequent byte the LS 7 bits are the next MS bits of the
			
 
				     size, i.e. the last byte of the header contains the MS bits of the size.
			
 
				-  
			
 
				+
			
 
				     For the complete objects the data is stored as zlib deflated data.
			
 
				     The size in the header is the uncompressed object size, so to uncompress
			
 
				     you need to just keep feeding data to zlib until you get an object back,
			
 
				     or it errors on bad data. This is done here by just giving the complete
			
 
				     buffer from the start of the deflated object on. This is bad, but until I
			
 
				     get mmap sorted out it will have to do.
			
 
				-  
			
 
				+
			
 
				     Currently there are no integrity checks done. Also no attempt is made to
			
 
				     try and detect the delta case, or a request for an object at the wrong
			
 
				     position.  It will all just throw a zlib or KeyError.
			
 
				     """
			
 
				-  
			
 
				+
			
 
				     def __init__(self, filename, file=None, size=None):
			
 
				-        """Create a PackData object that represents the pack in the given
			
 
				-        filename.
			
 
				-    
			
 
				+        """Create a PackData object representing the pack in the given filename.
			
 
				+
			
 
				         The file must exist and stay readable until the object is disposed of.
			
 
				         It must also stay the same size. It will be mapped whenever needed.
			
 
				-    
			
 
				+
			
 
				         Currently there is a restriction on the size of the pack as the python
			
 
				         mmap implementation is flawed.
			
 
				         """
			
@@ -508,8 +525,8 @@ class PackData(object):
 
				             self._file = GitFile(self._filename, 'rb')
			
 
				         else:
			
 
				             self._file = file
			
 
				-        (version, self._num_objects) = read_pack_header(self._file)
			
 
				-        self._offset_cache = LRUSizeCache(1024*1024*20, 
			
 
				+        (version, self._num_objects) = read_pack_header(self._file.read)
			
 
				+        self._offset_cache = LRUSizeCache(1024*1024*20,
			
 
				             compute_size=_compute_object_size)
			
 
				 
			
 
				     @classmethod
			
@@ -527,13 +544,16 @@ class PackData(object):
 
				         if self._size is not None:
			
 
				             return self._size
			
 
				         self._size = os.path.getsize(self._filename)
			
 
				-        assert self._size >= self._header_size, "%s is too small for a packfile (%d < %d)" % (self._filename, self._size, self._header_size)
			
 
				+        if self._size < self._header_size:
			
 
				+            errmsg = ("%s is too small for a packfile (%d < %d)" %
			
 
				+                      (self._filename, self._size, self._header_size))
			
 
				+            raise AssertionError(errmsg)
			
 
				         return self._size
			
 
				-  
			
 
				+
			
 
				     def __len__(self):
			
 
				         """Returns the number of objects in this pack."""
			
 
				         return self._num_objects
			
 
				-  
			
 
				+
			
 
				     def calculate_checksum(self):
			
 
				         """Calculate the checksum for this pack.
			
 
				 
			
@@ -550,7 +570,7 @@ class PackData(object):
 
				 
			
 
				     def resolve_object(self, offset, type, obj, get_ref, get_offset=None):
			
 
				         """Resolve an object, possibly resolving deltas when necessary.
			
 
				-        
			
 
				+
			
 
				         :return: Tuple with object type and contents.
			
 
				         """
			
 
				         if type not in (6, 7): # Not a delta
			
@@ -558,7 +578,7 @@ class PackData(object):
 
				 
			
 
				         if get_offset is None:
			
 
				             get_offset = self.get_object_at
			
 
				-      
			
 
				+
			
 
				         if type == 6: # offset delta
			
 
				             (delta_offset, delta) = obj
			
 
				             assert isinstance(delta_offset, int)
			
@@ -574,15 +594,15 @@ class PackData(object):
 
				             assert type != 6
			
 
				             base_offset = None
			
 
				         type, base_chunks = self.resolve_object(base_offset, type, base_obj,
			
 
				-            get_ref)
			
 
				+                                                get_ref)
			
 
				         if base_offset is not None:
			
 
				             self._offset_cache[base_offset] = type, base_chunks
			
 
				         return (type, apply_delta(base_chunks, delta))
			
 
				-  
			
 
				+
			
 
				     def iterobjects(self, progress=None):
			
 
				 
			
 
				         class ObjectIterator(object):
			
 
				-            
			
 
				+
			
 
				             def __init__(self, pack):
			
 
				                 self.i = 0
			
 
				                 self.offset = pack._header_size
			
@@ -594,7 +614,7 @@ class PackData(object):
 
				 
			
 
				             def __len__(self):
			
 
				                 return self.num
			
 
				-            
			
 
				+
			
 
				             def next(self):
			
 
				                 if self.i == self.num:
			
 
				                     raise StopIteration
			
@@ -609,7 +629,7 @@ class PackData(object):
 
				                 self.i+=1
			
 
				                 return ret
			
 
				         return ObjectIterator(self)
			
 
				-  
			
 
				+
			
 
				     def iterentries(self, ext_resolve_ref=None, progress=None):
			
 
				         """Yield entries summarizing the contents of this pack.
			
 
				 
			
@@ -624,7 +644,7 @@ class PackData(object):
 
				         postponed = defaultdict(list)
			
 
				         class Postpone(Exception):
			
 
				             """Raised to postpone delta resolving."""
			
 
				-          
			
 
				+
			
 
				         def get_ref_text(sha):
			
 
				             assert len(sha) == 20
			
 
				             if sha in found:
			
@@ -653,20 +673,20 @@ class PackData(object):
 
				                 extra.extend(postponed.get(sha, []))
			
 
				         if postponed:
			
 
				             raise KeyError([sha_to_hex(h) for h in postponed.keys()])
			
 
				-  
			
 
				+
			
 
				     def sorted_entries(self, resolve_ext_ref=None, progress=None):
			
 
				         """Return entries in this pack, sorted by SHA.
			
 
				 
			
 
				-        :param ext_resolve_ref: Optional function to resolve base
			
 
				+        :param resolve_ext_ref: Optional function to resolve base
			
 
				             objects (in case this is a thin pack)
			
 
				         :param progress: Progress function, called with current and
			
 
				-            total object count.
			
 
				+            total object count
			
 
				         :return: List of tuples with (sha, offset, crc32)
			
 
				         """
			
 
				         ret = list(self.iterentries(resolve_ext_ref, progress=progress))
			
 
				         ret.sort()
			
 
				         return ret
			
 
				-  
			
 
				+
			
 
				     def create_index_v1(self, filename, resolve_ext_ref=None, progress=None):
			
 
				         """Create a version 1 file for this data file.
			
 
				 
			
@@ -677,7 +697,7 @@ class PackData(object):
 
				         """
			
 
				         entries = self.sorted_entries(resolve_ext_ref, progress=progress)
			
 
				         write_pack_index_v1(filename, entries, self.calculate_checksum())
			
 
				-  
			
 
				+
			
 
				     def create_index_v2(self, filename, resolve_ext_ref=None, progress=None):
			
 
				         """Create a version 2 index file for this data file.
			
 
				 
			
@@ -704,19 +724,19 @@ class PackData(object):
 
				             self.create_index_v2(filename, resolve_ext_ref, progress)
			
 
				         else:
			
 
				             raise ValueError("unknown index format %d" % version)
			
 
				-  
			
 
				+
			
 
				     def get_stored_checksum(self):
			
 
				         """Return the expected checksum stored in this pack."""
			
 
				         self._file.seek(self._get_size()-20)
			
 
				         return self._file.read(20)
			
 
				-  
			
 
				+
			
 
				     def check(self):
			
 
				         """Check the consistency of this pack."""
			
 
				         return (self.calculate_checksum() == self.get_stored_checksum())
			
 
				-  
			
 
				+
			
 
				     def get_object_at(self, offset):
			
 
				         """Given an offset in to the packfile return the object that is there.
			
 
				-    
			
 
				+
			
 
				         Using the associated index the location of an object can be looked up,
			
 
				         and then the packfile can be asked directly for that object using this
			
 
				         function.
			
@@ -731,8 +751,7 @@ class PackData(object):
 
				 
			
 
				 
			
 
				 class SHA1Reader(object):
			
 
				-    """Wrapper around a file-like object that remembers the SHA1 of 
			
 
				-    the data read from it."""
			
 
				+    """Wrapper around a file-like object that remembers the SHA1 of its data."""
			
 
				 
			
 
				     def __init__(self, f):
			
 
				         self.f = f
			
@@ -756,9 +775,8 @@ class SHA1Reader(object):
 
				 
			
 
				 
			
 
				 class SHA1Writer(object):
			
 
				-    """Wrapper around a file-like object that remembers the SHA1 of 
			
 
				-    the data written to it."""
			
 
				-    
			
 
				+    """Wrapper around a file-like object that remembers the SHA1 of its data."""
			
 
				+
			
 
				     def __init__(self, f):
			
 
				         self.f = f
			
 
				         self.sha1 = make_sha("")
			
@@ -786,7 +804,8 @@ def write_pack_object(f, type, object):
 
				     """Write pack object to a file.
			
 
				 
			
 
				     :param f: File to write to
			
 
				-    :param o: Object to write
			
 
				+    :param type: Numeric type of the object
			
 
				+    :param object: Object to write
			
 
				     :return: Tuple with offset at which the object was written, and crc32
			
 
				     """
			
 
				     offset = f.tell()
			
@@ -851,8 +870,8 @@ def write_pack_data(f, objects, num_objects, window=10):
 
				     for obj, path in recency:
			
 
				         magic.append( (obj.type_num, path, 1, -obj.raw_length(), obj) )
			
 
				     magic.sort()
			
 
				-    # Build a map of objects and their index in magic - so we can find preceeding objects
			
 
				-    # to diff against
			
 
				+    # Build a map of objects and their index in magic - so we can find
			
 
				+    # preceeding objects to diff against
			
 
				     offs = {}
			
 
				     for i in range(len(magic)):
			
 
				         offs[magic[i][4]] = i
			
@@ -968,7 +987,7 @@ def create_delta(base_buf, target_buf):
 
				 
			
 
				 def apply_delta(src_buf, delta):
			
 
				     """Based on the similar function in git's patch-delta.c.
			
 
				-    
			
 
				+
			
 
				     :param src_buf: Source buffer
			
 
				     :param delta: Delta instructions
			
 
				     """
			
@@ -999,17 +1018,17 @@ def apply_delta(src_buf, delta):
 
				         if cmd & 0x80:
			
 
				             cp_off = 0
			
 
				             for i in range(4):
			
 
				-                if cmd & (1 << i): 
			
 
				+                if cmd & (1 << i):
			
 
				                     x = ord(delta[index])
			
 
				                     index += 1
			
 
				                     cp_off |= x << (i * 8)
			
 
				             cp_size = 0
			
 
				             for i in range(3):
			
 
				-                if cmd & (1 << (4+i)): 
			
 
				+                if cmd & (1 << (4+i)):
			
 
				                     x = ord(delta[index])
			
 
				                     index += 1
			
 
				                     cp_size |= x << (i * 8)
			
 
				-            if cp_size == 0: 
			
 
				+            if cp_size == 0:
			
 
				                 cp_size = 0x10000
			
 
				             if (cp_off + cp_size < cp_size or
			
 
				                 cp_off + cp_size > src_size or
			
@@ -1021,7 +1040,7 @@ def apply_delta(src_buf, delta):
 
				             index += cmd
			
 
				         else:
			
 
				             raise ApplyDeltaError("Invalid opcode 0")
			
 
				-    
			
 
				+
			
 
				     if index != delta_length:
			
 
				         raise ApplyDeltaError("delta not empty: %r" % delta[index:])
			
 
				 
			
@@ -1094,7 +1113,7 @@ class Pack(object):
 
				             idx_stored_checksum = self.index.get_pack_checksum()
			
 
				             data_stored_checksum = self._data.get_stored_checksum()
			
 
				             if idx_stored_checksum != data_stored_checksum:
			
 
				-                raise ChecksumMismatch(sha_to_hex(idx_stored_checksum), 
			
 
				+                raise ChecksumMismatch(sha_to_hex(idx_stored_checksum),
			
 
				                                        sha_to_hex(data_stored_checksum))
			
 
				         return self._data
			
 
				 
			
--- a/dulwich/protocol.py
+++ b/dulwich/protocol.py
@@ -19,6 +19,8 @@
 
				 
			
 
				 """Generic functions for talking the git smart server protocol."""
			
 
				 
			
 
				+from cStringIO import StringIO
			
 
				+import os
			
 
				 import socket
			
 
				 
			
 
				 from dulwich.errors import (
			
@@ -162,6 +164,112 @@ class Protocol(object):
 
				         return cmd, args[:-1].split(chr(0))
			
 
				 
			
 
				 
			
 
				+_RBUFSIZE = 8192  # Default read buffer size.
			
 
				+
			
 
				+
			
 
				+class ReceivableProtocol(Protocol):
			
 
				+    """Variant of Protocol that allows reading up to a size without blocking.
			
 
				+
			
 
				+    This class has a recv() method that behaves like socket.recv() in addition
			
 
				+    to a read() method.
			
 
				+
			
 
				+    If you want to read n bytes from the wire and block until exactly n bytes
			
 
				+    (or EOF) are read, use read(n). If you want to read at most n bytes from the
			
 
				+    wire but don't care if you get less, use recv(n). Note that recv(n) will
			
 
				+    still block until at least one byte is read.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, recv, write, report_activity=None, rbufsize=_RBUFSIZE):
			
 
				+        super(ReceivableProtocol, self).__init__(self.read, write,
			
 
				+                                                report_activity)
			
 
				+        self._recv = recv
			
 
				+        self._rbuf = StringIO()
			
 
				+        self._rbufsize = rbufsize
			
 
				+
			
 
				+    def read(self, size):
			
 
				+        # From _fileobj.read in socket.py in the Python 2.6.5 standard library,
			
 
				+        # with the following modifications:
			
 
				+        #  - omit the size <= 0 branch
			
 
				+        #  - seek back to start rather than 0 in case some buffer has been
			
 
				+        #    consumed.
			
 
				+        #  - use os.SEEK_END instead of the magic number.
			
 
				+        # Copyright (c) 2001-2010 Python Software Foundation; All Rights Reserved
			
 
				+        # Licensed under the Python Software Foundation License.
			
 
				+        # TODO: see if buffer is more efficient than cStringIO.
			
 
				+        assert size > 0
			
 
				+
			
 
				+        # Our use of StringIO rather than lists of string objects returned by
			
 
				+        # recv() minimizes memory usage and fragmentation that occurs when
			
 
				+        # rbufsize is large compared to the typical return value of recv().
			
 
				+        buf = self._rbuf
			
 
				+        start = buf.tell()
			
 
				+        buf.seek(0, os.SEEK_END)
			
 
				+        # buffer may have been partially consumed by recv()
			
 
				+        buf_len = buf.tell() - start
			
 
				+        if buf_len >= size:
			
 
				+            # Already have size bytes in our buffer?  Extract and return.
			
 
				+            buf.seek(start)
			
 
				+            rv = buf.read(size)
			
 
				+            self._rbuf = StringIO()
			
 
				+            self._rbuf.write(buf.read())
			
 
				+            self._rbuf.seek(0)
			
 
				+            return rv
			
 
				+
			
 
				+        self._rbuf = StringIO()  # reset _rbuf.  we consume it via buf.
			
 
				+        while True:
			
 
				+            left = size - buf_len
			
 
				+            # recv() will malloc the amount of memory given as its
			
 
				+            # parameter even though it often returns much less data
			
 
				+            # than that.  The returned data string is short lived
			
 
				+            # as we copy it into a StringIO and free it.  This avoids
			
 
				+            # fragmentation issues on many platforms.
			
 
				+            data = self._recv(left)
			
 
				+            if not data:
			
 
				+                break
			
 
				+            n = len(data)
			
 
				+            if n == size and not buf_len:
			
 
				+                # Shortcut.  Avoid buffer data copies when:
			
 
				+                # - We have no data in our buffer.
			
 
				+                # AND
			
 
				+                # - Our call to recv returned exactly the
			
 
				+                #   number of bytes we were asked to read.
			
 
				+                return data
			
 
				+            if n == left:
			
 
				+                buf.write(data)
			
 
				+                del data  # explicit free
			
 
				+                break
			
 
				+            assert n <= left, "_recv(%d) returned %d bytes" % (left, n)
			
 
				+            buf.write(data)
			
 
				+            buf_len += n
			
 
				+            del data  # explicit free
			
 
				+            #assert buf_len == buf.tell()
			
 
				+        buf.seek(start)
			
 
				+        return buf.read()
			
 
				+
			
 
				+    def recv(self, size):
			
 
				+        assert size > 0
			
 
				+
			
 
				+        buf = self._rbuf
			
 
				+        start = buf.tell()
			
 
				+        buf.seek(0, os.SEEK_END)
			
 
				+        buf_len = buf.tell()
			
 
				+        buf.seek(start)
			
 
				+
			
 
				+        left = buf_len - start
			
 
				+        if not left:
			
 
				+            # only read from the wire if our read buffer is exhausted
			
 
				+            data = self._recv(self._rbufsize)
			
 
				+            if len(data) == size:
			
 
				+                # shortcut: skip the buffer if we read exactly size bytes
			
 
				+                return data
			
 
				+            buf = StringIO()
			
 
				+            buf.write(data)
			
 
				+            buf.seek(0)
			
 
				+            del data  # explicit free
			
 
				+            self._rbuf = buf
			
 
				+        return buf.read(size)
			
 
				+
			
 
				+
			
 
				 def extract_capabilities(text):
			
 
				     """Extract a capabilities list from a string, if present.
			
 
				 
			
@@ -171,7 +279,7 @@ def extract_capabilities(text):
 
				     if not "\0" in text:
			
 
				         return text, []
			
 
				     text, capabilities = text.rstrip().split("\0")
			
 
				-    return (text, capabilities.split(" "))
			
 
				+    return (text, capabilities.strip().split(" "))
			
 
				 
			
 
				 
			
 
				 def extract_want_line_capabilities(text):
			
--- a/dulwich/repo.py
+++ b/dulwich/repo.py
@@ -684,6 +684,8 @@ class BaseRepo(object):
 
				         :return: iterator over objects, with __len__ implemented
			
 
				         """
			
 
				         wants = determine_wants(self.get_refs())
			
 
				+        if not wants:
			
 
				+            return []
			
 
				         haves = self.object_store.find_common_revisions(graph_walker)
			
 
				         return self.object_store.iter_shas(
			
 
				             self.object_store.find_missing_objects(haves, wants, progress,
			
--- a/dulwich/server.py
+++ b/dulwich/server.py
@@ -27,19 +27,27 @@ Documentation/technical directory in the cgit distribution, and in particular:
 
				 
			
 
				 
			
 
				 import collections
			
 
				+from cStringIO import StringIO
			
 
				+import socket
			
 
				 import SocketServer
			
 
				+import zlib
			
 
				 
			
 
				 from dulwich.errors import (
			
 
				     ApplyDeltaError,
			
 
				     ChecksumMismatch,
			
 
				     GitProtocolError,
			
 
				     )
			
 
				+from dulwich.misc import (
			
 
				+    make_sha,
			
 
				+    )
			
 
				 from dulwich.objects import (
			
 
				     hex_to_sha,
			
 
				+    sha_to_hex,
			
 
				     )
			
 
				 from dulwich.protocol import (
			
 
				-    Protocol,
			
 
				     ProtocolFile,
			
 
				+    Protocol,
			
 
				+    ReceivableProtocol,
			
 
				     TCP_GIT_PORT,
			
 
				     ZERO_SHA,
			
 
				     extract_capabilities,
			
@@ -50,6 +58,8 @@ from dulwich.protocol import (
 
				     ack_type,
			
 
				     )
			
 
				 from dulwich.pack import (
			
 
				+    read_pack_header,
			
 
				+    unpack_object,
			
 
				     write_pack_data,
			
 
				     )
			
 
				 
			
@@ -102,6 +112,105 @@ class BackendRepo(object):
 
				         raise NotImplementedError
			
 
				 
			
 
				 
			
 
				+class PackStreamVerifier(object):
			
 
				+    """Class to verify a pack stream as it is being read.
			
 
				+
			
 
				+    The pack is read from a ReceivableProtocol using read() or recv() as
			
 
				+    appropriate and written out to the given file-like object.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, proto, outfile):
			
 
				+        self.proto = proto
			
 
				+        self.outfile = outfile
			
 
				+        self.sha = make_sha()
			
 
				+        self._rbuf = StringIO()
			
 
				+        # trailer is a deque to avoid memory allocation on small reads
			
 
				+        self._trailer = collections.deque()
			
 
				+
			
 
				+    def _read(self, read, size):
			
 
				+        """Read up to size bytes using the given callback.
			
 
				+
			
 
				+        As a side effect, update the verifier's hash (excluding the last 20
			
 
				+        bytes read) and write through to the output file.
			
 
				+
			
 
				+        :param read: The read callback to read from.
			
 
				+        :param size: The maximum number of bytes to read; the particular
			
 
				+            behavior is callback-specific.
			
 
				+        """
			
 
				+        data = read(size)
			
 
				+
			
 
				+        # maintain a trailer of the last 20 bytes we've read
			
 
				+        n = len(data)
			
 
				+        tn = len(self._trailer)
			
 
				+        if n >= 20:
			
 
				+            to_pop = tn
			
 
				+            to_add = 20
			
 
				+        else:
			
 
				+            to_pop = max(n + tn - 20, 0)
			
 
				+            to_add = n
			
 
				+        for _ in xrange(to_pop):
			
 
				+            self.sha.update(self._trailer.popleft())
			
 
				+        self._trailer.extend(data[-to_add:])
			
 
				+
			
 
				+        # hash everything but the trailer
			
 
				+        self.sha.update(data[:-to_add])
			
 
				+        self.outfile.write(data)
			
 
				+        return data
			
 
				+
			
 
				+    def _buf_len(self):
			
 
				+        buf = self._rbuf
			
 
				+        start = buf.tell()
			
 
				+        buf.seek(0, 2)
			
 
				+        end = buf.tell()
			
 
				+        buf.seek(start)
			
 
				+        return end - start
			
 
				+
			
 
				+    def read(self, size):
			
 
				+        """Read, blocking until size bytes are read."""
			
 
				+        buf_len = self._buf_len()
			
 
				+        if buf_len >= size:
			
 
				+            return self._rbuf.read(size)
			
 
				+        buf_data = self._rbuf.read()
			
 
				+        self._rbuf = StringIO()
			
 
				+        return buf_data + self._read(self.proto.read, size - buf_len)
			
 
				+
			
 
				+    def recv(self, size):
			
 
				+        """Read up to size bytes, blocking until one byte is read."""
			
 
				+        buf_len = self._buf_len()
			
 
				+        if buf_len:
			
 
				+            data = self._rbuf.read(size)
			
 
				+            if size >= buf_len:
			
 
				+                self._rbuf = StringIO()
			
 
				+            return data
			
 
				+        return self._read(self.proto.recv, size)
			
 
				+
			
 
				+    def verify(self):
			
 
				+        """Verify a pack stream and write it to the output file.
			
 
				+
			
 
				+        :raise AssertionError: if there is an error in the pack format.
			
 
				+        :raise ChecksumMismatch: if the checksum of the pack contents does not
			
 
				+            match the checksum in the pack trailer.
			
 
				+        :raise socket.error: if an error occurred reading from the socket.
			
 
				+        :raise zlib.error: if an error occurred during zlib decompression.
			
 
				+        :raise IOError: if an error occurred writing to the output file.
			
 
				+        """
			
 
				+        _, num_objects = read_pack_header(self.read)
			
 
				+        for i in xrange(num_objects):
			
 
				+            type, _, _, unused = unpack_object(self.read, self.recv)
			
 
				+
			
 
				+            # prepend any unused data to current read buffer
			
 
				+            buf = StringIO()
			
 
				+            buf.write(unused)
			
 
				+            buf.write(self._rbuf.read())
			
 
				+            buf.seek(0)
			
 
				+            self._rbuf = buf
			
 
				+
			
 
				+        pack_sha = sha_to_hex(''.join([c for c in self._trailer]))
			
 
				+        calculated_sha = self.sha.hexdigest()
			
 
				+        if pack_sha != calculated_sha:
			
 
				+            raise ChecksumMismatch(pack_sha, calculated_sha)
			
 
				+
			
 
				+
			
 
				 class DictBackend(Backend):
			
 
				     """Trivial backend that looks up Git repositories in a dictionary."""
			
 
				 
			
@@ -116,9 +225,9 @@ class DictBackend(Backend):
 
				 class Handler(object):
			
 
				     """Smart protocol command handler base class."""
			
 
				 
			
 
				-    def __init__(self, backend, read, write):
			
 
				+    def __init__(self, backend, proto):
			
 
				         self.backend = backend
			
 
				-        self.proto = Protocol(read, write)
			
 
				+        self.proto = proto
			
 
				         self._client_capabilities = None
			
 
				 
			
 
				     def capability_line(self):
			
@@ -157,9 +266,9 @@ class Handler(object):
 
				 class UploadPackHandler(Handler):
			
 
				     """Protocol handler for uploading a pack to the server."""
			
 
				 
			
 
				-    def __init__(self, backend, args, read, write,
			
 
				+    def __init__(self, backend, args, proto,
			
 
				                  stateless_rpc=False, advertise_refs=False):
			
 
				-        Handler.__init__(self, backend, read, write)
			
 
				+        Handler.__init__(self, backend, proto)
			
 
				         self.repo = backend.open_repository(args[0])
			
 
				         self._graph_walker = None
			
 
				         self.stateless_rpc = stateless_rpc
			
@@ -521,9 +630,9 @@ class MultiAckDetailedGraphWalkerImpl(object):
 
				 class ReceivePackHandler(Handler):
			
 
				     """Protocol handler for downloading a pack from the client."""
			
 
				 
			
 
				-    def __init__(self, backend, args, read, write,
			
 
				+    def __init__(self, backend, args, proto,
			
 
				                  stateless_rpc=False, advertise_refs=False):
			
 
				-        Handler.__init__(self, backend, read, write)
			
 
				+        Handler.__init__(self, backend, proto)
			
 
				         self.repo = backend.open_repository(args[0])
			
 
				         self.stateless_rpc = stateless_rpc
			
 
				         self.advertise_refs = advertise_refs
			
@@ -531,20 +640,14 @@ class ReceivePackHandler(Handler):
 
				     def capabilities(self):
			
 
				         return ("report-status", "delete-refs")
			
 
				 
			
 
				-    def _apply_pack(self, refs, read):
			
 
				+    def _apply_pack(self, refs):
			
 
				         f, commit = self.repo.object_store.add_thin_pack()
			
 
				         all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError)
			
 
				         status = []
			
 
				         unpack_error = None
			
 
				         # TODO: more informative error messages than just the exception string
			
 
				         try:
			
 
				-            # TODO: decode the pack as we stream to avoid blocking reads beyond
			
 
				-            # the end of data (when using HTTP/1.1 chunked encoding)
			
 
				-            while True:
			
 
				-                data = read(10240)
			
 
				-                if not data:
			
 
				-                    break
			
 
				-                f.write(data)
			
 
				+            PackStreamVerifier(self.proto, f).verify()
			
 
				         except all_exceptions, e:
			
 
				             unpack_error = str(e).replace('\n', '')
			
 
				         try:
			
@@ -619,7 +722,7 @@ class ReceivePackHandler(Handler):
 
				             ref = self.proto.read_pkt_line()
			
 
				 
			
 
				         # backend can now deal with this refs and read a pack using self.read
			
 
				-        status = self.repo._apply_pack(client_refs, self.proto.read)
			
 
				+        status = self._apply_pack(client_refs)
			
 
				 
			
 
				         # when we have read all the pack from the client, send a status report
			
 
				         # if the client asked for it
			
@@ -637,7 +740,7 @@ class ReceivePackHandler(Handler):
 
				 class TCPGitRequestHandler(SocketServer.StreamRequestHandler):
			
 
				 
			
 
				     def handle(self):
			
 
				-        proto = Protocol(self.rfile.read, self.wfile.write)
			
 
				+        proto = ReceivableProtocol(self.connection.recv, self.wfile.write)
			
 
				         command, args = proto.read_cmd()
			
 
				 
			
 
				         # switch case to handle the specific git command
			
@@ -648,7 +751,7 @@ class TCPGitRequestHandler(SocketServer.StreamRequestHandler):
 
				         else:
			
 
				             return
			
 
				 
			
 
				-        h = cls(self.server.backend, args, self.rfile.read, self.wfile.write)
			
 
				+        h = cls(self.server.backend, args, proto)
			
 
				         h.handle()
			
 
				 
			
 
				 
			
--- a/dulwich/tests/compat/test_server.py
+++ b/dulwich/tests/compat/test_server.py
@@ -78,4 +78,5 @@ class GitServerTestCase(ServerTests, CompatTestCase):
 
				         return port
			
 
				 
			
 
				     def test_push_to_dulwich(self):
			
 
				+        # TODO(dborowitz): enable after merging thin pack fixes.
			
 
				         raise TestSkipped('Skipping push test due to known deadlock bug.')
			
--- a/dulwich/tests/test_pack.py
+++ b/dulwich/tests/test_pack.py
@@ -24,6 +24,7 @@
 
				 from cStringIO import StringIO
			
 
				 import os
			
 
				 import unittest
			
 
				+import zlib
			
 
				 
			
 
				 from dulwich.objects import (
			
 
				     Tree,
			
@@ -277,11 +278,67 @@ class TestPackIndexWritingv2(unittest.TestCase, BaseTestPackIndexWriting):
 
				         self._expected_version = 2
			
 
				         self._write_fn = write_pack_index_v2
			
 
				 
			
 
				-TEST_COMP1 = """\x78\x9c\x9d\x8e\xc1\x0a\xc2\x30\x10\x44\xef\xf9\x8a\xbd\xa9\x08\x92\x86\xb4\x26\x20\xe2\xd9\x83\x78\xf2\xbe\x49\x37\xb5\xa5\x69\xca\x36\xf5\xfb\x4d\xfd\x04\x67\x6e\x33\xcc\xf0\x32\x13\x81\xc6\x16\x8d\xa9\xbd\xad\x6c\xe3\x8a\x03\x4a\x73\xd6\xda\xd5\xa6\x51\x2e\x58\x65\x6c\x13\xbc\x94\x4a\xcc\xc8\x34\x65\x78\xa4\x89\x04\xae\xf9\x9d\x18\xee\x34\x46\x62\x78\x11\x4f\x29\xf5\x03\x5c\x86\x5f\x70\x5b\x30\x3a\x3c\x25\xee\xae\x50\xa9\xf2\x60\xa4\xaa\x34\x1c\x65\x91\xf0\x29\xc6\x3e\x67\xfa\x6f\x2d\x9e\x9c\x3e\x7d\x4b\xc0\x34\x8f\xe8\x29\x6e\x48\xa1\xa0\xc4\x88\xf3\xfe\xb0\x5b\x20\x85\xb0\x50\x06\xe4\x6e\xdd\xca\xd3\x17\x26\xfa\x49\x23"""
			
 
				 
			
 
				+class ReadZlibTests(unittest.TestCase):
			
 
				 
			
 
				-class ZlibTests(unittest.TestCase):
			
 
				+    decomp = (
			
 
				+      'tree 4ada885c9196b6b6fa08744b5862bf92896fc002\n'
			
 
				+      'parent None\n'
			
 
				+      'author Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n'
			
 
				+      'committer Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n'
			
 
				+      '\n'
			
 
				+      "Provide replacement for mmap()'s offset argument.")
			
 
				+    comp = zlib.compress(decomp)
			
 
				+    extra = 'nextobject'
			
 
				+
			
 
				+    def setUp(self):
			
 
				+        self.read = StringIO(self.comp + self.extra).read
			
 
				+
			
 
				+    def test_decompress_size(self):
			
 
				+        good_decomp_len = len(self.decomp)
			
 
				+        self.assertRaises(ValueError, read_zlib_chunks, self.read, -1)
			
 
				+        self.assertRaises(zlib.error, read_zlib_chunks, self.read,
			
 
				+                          good_decomp_len - 1)
			
 
				+        self.assertRaises(zlib.error, read_zlib_chunks, self.read,
			
 
				+                          good_decomp_len + 1)
			
 
				+
			
 
				+    def test_decompress_truncated(self):
			
 
				+        read = StringIO(self.comp[:10]).read
			
 
				+        self.assertRaises(zlib.error, read_zlib_chunks, read, len(self.decomp))
			
 
				+
			
 
				+        read = StringIO(self.comp).read
			
 
				+        self.assertRaises(zlib.error, read_zlib_chunks, read, len(self.decomp))
			
 
				+
			
 
				+    def test_decompress_empty(self):
			
 
				+        comp = zlib.compress('')
			
 
				+        read = StringIO(comp + self.extra).read
			
 
				+        decomp, comp_len, unused_data = read_zlib_chunks(read, 0)
			
 
				+        self.assertEqual('', ''.join(decomp))
			
 
				+        self.assertEqual(len(comp), comp_len)
			
 
				+        self.assertNotEquals('', unused_data)
			
 
				+        self.assertEquals(self.extra, unused_data + read())
			
 
				+
			
 
				+    def _do_decompress_test(self, buffer_size):
			
 
				+        decomp, comp_len, unused_data = read_zlib_chunks(
			
 
				+          self.read, len(self.decomp), buffer_size=buffer_size)
			
 
				+        self.assertEquals(self.decomp, ''.join(decomp))
			
 
				+        self.assertEquals(len(self.comp), comp_len)
			
 
				+        self.assertNotEquals('', unused_data)
			
 
				+        self.assertEquals(self.extra, unused_data + self.read())
			
 
				 
			
 
				     def test_simple_decompress(self):
			
 
				-        self.assertEquals((["tree 4ada885c9196b6b6fa08744b5862bf92896fc002\nparent None\nauthor Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\ncommitter Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n\nProvide replacement for mmap()'s offset argument."], 158, 'Z'), 
			
 
				-        read_zlib_chunks(StringIO(TEST_COMP1).read, 229))
			
 
				+        self._do_decompress_test(4096)
			
 
				+
			
 
				+    # These buffer sizes are not intended to be realistic, but rather simulate
			
 
				+    # larger buffer sizes that may end at various places.
			
 
				+    def test_decompress_buffer_size_1(self):
			
 
				+        self._do_decompress_test(1)
			
 
				+
			
 
				+    def test_decompress_buffer_size_2(self):
			
 
				+        self._do_decompress_test(2)
			
 
				+
			
 
				+    def test_decompress_buffer_size_3(self):
			
 
				+        self._do_decompress_test(3)
			
 
				+
			
 
				+    def test_decompress_buffer_size_4(self):
			
 
				+        self._do_decompress_test(4)
			
--- a/dulwich/tests/test_protocol.py
+++ b/dulwich/tests/test_protocol.py
@@ -20,11 +20,12 @@
 
				 """Tests for the smart protocol utility functions."""
			
 
				 
			
 
				 
			
 
				-from cStringIO import StringIO
			
 
				+from StringIO import StringIO
			
 
				 from unittest import TestCase
			
 
				 
			
 
				 from dulwich.protocol import (
			
 
				     Protocol,
			
 
				+    ReceivableProtocol,
			
 
				     extract_capabilities,
			
 
				     extract_want_line_capabilities,
			
 
				     ack_type,
			
@@ -33,12 +34,7 @@ from dulwich.protocol import (
 
				     MULTI_ACK_DETAILED,
			
 
				     )
			
 
				 
			
 
				-class ProtocolTests(TestCase):
			
 
				-
			
 
				-    def setUp(self):
			
 
				-        self.rout = StringIO()
			
 
				-        self.rin = StringIO()
			
 
				-        self.proto = Protocol(self.rin.read, self.rout.write)
			
 
				+class BaseProtocolTests(object):
			
 
				 
			
 
				     def test_write_pkt_line_none(self):
			
 
				         self.proto.write_pkt_line(None)
			
@@ -82,6 +78,93 @@ class ProtocolTests(TestCase):
 
				         self.assertRaises(AssertionError, self.proto.read_cmd)
			
 
				 
			
 
				 
			
 
				+class ProtocolTests(BaseProtocolTests, TestCase):
			
 
				+
			
 
				+    def setUp(self):
			
 
				+        TestCase.setUp(self)
			
 
				+        self.rout = StringIO()
			
 
				+        self.rin = StringIO()
			
 
				+        self.proto = Protocol(self.rin.read, self.rout.write)
			
 
				+
			
 
				+
			
 
				+class ReceivableStringIO(StringIO):
			
 
				+    """StringIO with socket-like recv semantics for testing."""
			
 
				+
			
 
				+    def recv(self, size):
			
 
				+        # fail fast if no bytes are available; in a real socket, this would
			
 
				+        # block forever
			
 
				+        if self.tell() == len(self.getvalue()):
			
 
				+            raise AssertionError("Blocking read past end of socket")
			
 
				+        if size == 1:
			
 
				+            return self.read(1)
			
 
				+        # calls shouldn't return quite as much as asked for
			
 
				+        return self.read(size - 1)
			
 
				+
			
 
				+
			
 
				+class ReceivableProtocolTests(BaseProtocolTests, TestCase):
			
 
				+
			
 
				+    def setUp(self):
			
 
				+        TestCase.setUp(self)
			
 
				+        self.rout = StringIO()
			
 
				+        self.rin = ReceivableStringIO()
			
 
				+        self.proto = ReceivableProtocol(self.rin.recv, self.rout.write)
			
 
				+        self.proto._rbufsize = 8
			
 
				+
			
 
				+    def test_recv(self):
			
 
				+        all_data = "1234567" * 10  # not a multiple of bufsize
			
 
				+        self.rin.write(all_data)
			
 
				+        self.rin.seek(0)
			
 
				+        data = ""
			
 
				+        # We ask for 8 bytes each time and actually read 7, so it should take
			
 
				+        # exactly 10 iterations.
			
 
				+        for _ in xrange(10):
			
 
				+            data += self.proto.recv(10)
			
 
				+        # any more reads would block
			
 
				+        self.assertRaises(AssertionError, self.proto.recv, 10)
			
 
				+        self.assertEquals(all_data, data)
			
 
				+
			
 
				+    def test_recv_read(self):
			
 
				+        all_data = "1234567"  # recv exactly in one call
			
 
				+        self.rin.write(all_data)
			
 
				+        self.rin.seek(0)
			
 
				+        self.assertEquals("1234", self.proto.recv(4))
			
 
				+        self.assertEquals("567", self.proto.read(3))
			
 
				+        self.assertRaises(AssertionError, self.proto.recv, 10)
			
 
				+
			
 
				+    def test_read_recv(self):
			
 
				+        all_data = "12345678abcdefg"
			
 
				+        self.rin.write(all_data)
			
 
				+        self.rin.seek(0)
			
 
				+        self.assertEquals("1234", self.proto.read(4))
			
 
				+        self.assertEquals("5678abc", self.proto.recv(8))
			
 
				+        self.assertEquals("defg", self.proto.read(4))
			
 
				+        self.assertRaises(AssertionError, self.proto.recv, 10)
			
 
				+
			
 
				+    def test_mixed(self):
			
 
				+        # arbitrary non-repeating string
			
 
				+        all_data = ",".join(str(i) for i in xrange(100))
			
 
				+        self.rin.write(all_data)
			
 
				+        self.rin.seek(0)
			
 
				+        data = ""
			
 
				+
			
 
				+        for i in xrange(1, 100):
			
 
				+            data += self.proto.recv(i)
			
 
				+            # if we get to the end, do a non-blocking read instead of blocking
			
 
				+            if len(data) + i > len(all_data):
			
 
				+                data += self.proto.recv(i)
			
 
				+                # ReceivableStringIO leaves off the last byte unless we ask
			
 
				+                # nicely
			
 
				+                data += self.proto.recv(1)
			
 
				+                break
			
 
				+            else:
			
 
				+                data += self.proto.read(i)
			
 
				+        else:
			
 
				+            # didn't break, something must have gone wrong
			
 
				+            self.fail()
			
 
				+
			
 
				+        self.assertEquals(all_data, data)
			
 
				+
			
 
				+
			
 
				 class CapabilitiesTestCase(TestCase):
			
 
				 
			
 
				     def test_plain(self):
			
--- a/dulwich/tests/test_server.py
+++ b/dulwich/tests/test_server.py
@@ -79,7 +79,7 @@ class TestProto(object):
 
				 class HandlerTestCase(TestCase):
			
 
				 
			
 
				     def setUp(self):
			
 
				-        self._handler = Handler(Backend(), None, None)
			
 
				+        self._handler = Handler(Backend(), None)
			
 
				         self._handler.capabilities = lambda: ('cap1', 'cap2', 'cap3')
			
 
				         self._handler.required_capabilities = lambda: ('cap2',)
			
 
				 
			
--- a/dulwich/tests/test_web.py
+++ b/dulwich/tests/test_web.py
@@ -153,28 +153,16 @@ class DumbHandlersTestCase(WebTestCase):
 
				 
			
 
				 class SmartHandlersTestCase(WebTestCase):
			
 
				 
			
 
				-    class TestProtocol(object):
			
 
				-        def __init__(self, handler):
			
 
				-            self._handler = handler
			
 
				-
			
 
				-        def write_pkt_line(self, line):
			
 
				-            if line is None:
			
 
				-                self._handler.write('flush-pkt\n')
			
 
				-            else:
			
 
				-                self._handler.write('pkt-line: %s' % line)
			
 
				-
			
 
				     class _TestUploadPackHandler(object):
			
 
				-        def __init__(self, backend, args, read, write, stateless_rpc=False,
			
 
				+        def __init__(self, backend, args, proto, stateless_rpc=False,
			
 
				                      advertise_refs=False):
			
 
				             self.args = args
			
 
				-            self.read = read
			
 
				-            self.write = write
			
 
				-            self.proto = SmartHandlersTestCase.TestProtocol(self)
			
 
				+            self.proto = proto
			
 
				             self.stateless_rpc = stateless_rpc
			
 
				             self.advertise_refs = advertise_refs
			
 
				 
			
 
				         def handle(self):
			
 
				-            self.write('handled input: %s' % self.read())
			
 
				+            self.proto.write('handled input: %s' % self.proto.recv(1024))
			
 
				 
			
 
				     def _MakeHandler(self, *args, **kwargs):
			
 
				         self._handler = self._TestUploadPackHandler(*args, **kwargs)
			
@@ -222,8 +210,8 @@ class SmartHandlersTestCase(WebTestCase):
 
				         mat = re.search('.*', '/git-upload-pack')
			
 
				         output = ''.join(get_info_refs(self._req, 'backend', mat,
			
 
				                                        services=self.services()))
			
 
				-        self.assertEquals(('pkt-line: # service=git-upload-pack\n'
			
 
				-                           'flush-pkt\n'
			
 
				+        self.assertEquals(('001e# service=git-upload-pack\n'
			
 
				+                           '0000'
			
 
				                            # input is ignored by the handler
			
 
				                            'handled input: '), output)
			
 
				         self.assertTrue(self._handler.advertise_refs)
			
--- a/dulwich/web.py
+++ b/dulwich/web.py
@@ -26,6 +26,9 @@ try:
 
				     from urlparse import parse_qs
			
 
				 except ImportError:
			
 
				     from dulwich.misc import parse_qs
			
 
				+from dulwich.protocol import (
			
 
				+    ReceivableProtocol,
			
 
				+    )
			
 
				 from dulwich.server import (
			
 
				     ReceivePackHandler,
			
 
				     UploadPackHandler,
			
@@ -138,9 +141,8 @@ def get_info_refs(req, backend, mat, services=None):
 
				         req.nocache()
			
 
				         req.respond(HTTP_OK, 'application/x-%s-advertisement' % service)
			
 
				         output = StringIO()
			
 
				-        dummy_input = StringIO()  # GET request, handler doesn't need to read
			
 
				-        handler = handler_cls(backend, [url_prefix(mat)],
			
 
				-                              dummy_input.read, output.write,
			
 
				+        proto = ReceivableProtocol(StringIO().read, output.write)
			
 
				+        handler = handler_cls(backend, [url_prefix(mat)], proto,
			
 
				                               stateless_rpc=True, advertise_refs=True)
			
 
				         handler.proto.write_pkt_line('# service=%s\n' % service)
			
 
				         handler.proto.write_pkt_line(None)
			
@@ -216,8 +218,8 @@ def handle_service_request(req, backend, mat, services=None):
 
				     # content-length
			
 
				     if 'CONTENT_LENGTH' in req.environ:
			
 
				         input = _LengthLimitedFile(input, int(req.environ['CONTENT_LENGTH']))
			
 
				-    handler = handler_cls(backend, [url_prefix(mat)], input.read, output.write,
			
 
				-                          stateless_rpc=True)
			
 
				+    proto = ReceivableProtocol(input.read, output.write)
			
 
				+    handler = handler_cls(backend, [url_prefix(mat)], proto, stateless_rpc=True)
			
 
				     handler.handle()
			
 
				     yield output.getvalue()