Ver Fonte

* New upstream snapshot.
+ Adds support for IPv6 to git:// client. Closes: #631490
+ Fixes use of ~username in git:// URLs. Closes: #631483

Jelmer Vernooij há 13 anos atrás
pai
commit
e6bae0027c

+ 76 - 0
NEWS

@@ -1,3 +1,79 @@
+0.8.0	UNRELEASED
+
+ FEATURES
+
+  * New DeltaChainIterator abstract class for quickly iterating all objects in
+    a pack, with implementations for pack indexing and inflation.
+    (Dave Borowitz)
+
+ BUG FIXES
+
+  * Avoid storing all objects in memory when writing pack.
+    (Jelmer Vernooij, #813268)
+
+  * Support IPv6 for git:// connections. (Jelmer Vernooij, #801543)
+
+  * Improve performance of Repo.revision_history(). (Timo Schmid, #535118)
+
+  * Fix use of SubprocessWrapper on Windows. (Paulo Madeira, #670035)
+
+  * Fix compilation on newer versions of Mac OS X (Lion and up). (Ryan McKern, #794543)
+
+  * Prevent raising ValueError for correct refs in RefContainer.__delitem__.
+
+  * Correctly return a tuple from MemoryObjectStore.get_raw. (Dave Borowitz)
+
+  * Fix a bug in reading the pack checksum when there are fewer than 20 bytes
+    left in the buffer. (Dave Borowitz)
+
+  * Support ~ in git:// URL paths. (Jelmer Vernooij, #813555)
+
+ API CHANGES
+
+  * write_pack no longer takes the num_objects argument and requires an object
+    to be passed in that is iterable (rather than an iterator) and that
+    provides __len__.  (Jelmer Vernooij)
+
+  * write_pack_data has been renamed to write_pack_objects and no longer takes a
+    num_objects argument. (Jelmer Vernooij)
+
+  * take_msb_bytes, read_zlib_chunks, unpack_objects, and
+    PackStreamReader.read_objects now take an additional argument indicating a
+    crc32 to compute. (Dave Borowitz)
+
+  * PackObjectIterator was removed; its functionality is still exposed by
+    PackData.iterobjects. (Dave Borowitz)
+
+  * Add a sha arg to write_pack_object to incrementally compute a SHA.
+    (Dave Borowitz)
+
+  * Include offset in PackStreamReader results. (Dave Borowitz)
+
+  * Move PackStreamReader from server to pack. (Dave Borowitz)
+
+  * Extract a check_length_and_checksum, compute_file_sha, and
+    pack_object_header pack helper functions. (Dave Borowitz)
+
+  * Extract a compute_file_sha function. (Dave Borowitz)
+
+  * Remove move_in_thin_pack as a separate method; add_thin_pack now completes
+    the thin pack and moves it in in one step. Remove ThinPackData as well.
+    (Dave Borowitz)
+
+  * Custom buffer size in read_zlib_chunks. (Dave Borowitz)
+
+  * New UnpackedObject data class that replaces ad-hoc tuples in the return
+    value of unpack_object and various DeltaChainIterator methods.
+    (Dave Borowitz)
+
+ TEST CHANGES
+
+  * If setuptools is installed, "python setup.py test" will now run the testsuite.
+    (Jelmer Vernooij)
+
+  * Add a new build_pack test utility for building packs from a simple spec.
+    (Dave Borowitz)
+
 0.7.1	2011-04-12
 
  BUG FIXES

+ 8 - 0
debian/changelog

@@ -1,3 +1,11 @@
+dulwich (0.8.0~bzr873-1) UNRELEASED; urgency=low
+
+  * New upstream snapshot.
+   + Adds support for IPv6 to git:// client. Closes: #631490
+   + Fixes use of ~username in git:// URLs. Closes: #631483
+
+ -- Jelmer Vernooij <jelmer@debian.org>  Fri, 29 Jul 2011 12:10:15 +0200
+
 dulwich (0.7.1-2) unstable; urgency=low
 
   * Explicitly make debhelper use dh_python2. Closes: #626414

+ 1 - 1
dulwich/__init__.py

@@ -23,4 +23,4 @@
 
 from dulwich import (client, protocol, repo, server)
 
-__version__ = (0, 7, 1)
+__version__ = (0, 8, 0)

+ 115 - 89
dulwich/_compat.py

@@ -32,8 +32,9 @@ except ImportError:
     from cgi import parse_qs
 
 try:
-    from os import SEEK_END
+    from os import SEEK_CUR, SEEK_END
 except ImportError:
+    SEEK_CUR = 1
     SEEK_END = 2
 
 import struct
@@ -137,94 +138,119 @@ except ImportError:
 
 try:
     from collections import namedtuple
-
-    TreeEntryTuple = namedtuple('TreeEntryTuple', ['path', 'mode', 'sha'])
-    TreeChangeTuple = namedtuple('TreeChangeTuple', ['type', 'old', 'new'])
 except ImportError:
-    # Provide manual implementations of namedtuples for Python <2.5.
-    # If the class definitions change, be sure to keep these in sync by running
-    # namedtuple(..., verbose=True) in a recent Python and pasting the output.
-
-    # Necessary globals go here.
-    _tuple = tuple
-    _property = property
+    # Recipe for namedtuple from http://code.activestate.com/recipes/500261/
+    # Copyright (c) 2007 Python Software Foundation; All Rights Reserved
+    # Licensed under the Python Software Foundation License.
     from operator import itemgetter as _itemgetter
+    from keyword import iskeyword as _iskeyword
+    import sys as _sys
+
+    def namedtuple(typename, field_names, verbose=False, rename=False):
+        """Returns a new subclass of tuple with named fields.
+
+        >>> Point = namedtuple('Point', 'x y')
+        >>> Point.__doc__                   # docstring for the new class
+        'Point(x, y)'
+        >>> p = Point(11, y=22)             # instantiate with positional args or keywords
+        >>> p[0] + p[1]                     # indexable like a plain tuple
+        33
+        >>> x, y = p                        # unpack like a regular tuple
+        >>> x, y
+        (11, 22)
+        >>> p.x + p.y                       # fields also accessable by name
+        33
+        >>> d = p._asdict()                 # convert to a dictionary
+        >>> d['x']
+        11
+        >>> Point(**d)                      # convert from a dictionary
+        Point(x=11, y=22)
+        >>> p._replace(x=100)               # _replace() is like str.replace() but targets named fields
+        Point(x=100, y=22)
+
+        """
+
+        # Parse and validate the field names.  Validation serves two purposes,
+        # generating informative error messages and preventing template injection attacks.
+        if isinstance(field_names, basestring):
+            field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
+        field_names = tuple(map(str, field_names))
+        if rename:
+            names = list(field_names)
+            seen = set()
+            for i, name in enumerate(names):
+                if (not min(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
+                    or not name or name[0].isdigit() or name.startswith('_')
+                    or name in seen):
+                        names[i] = '_%d' % i
+                seen.add(name)
+            field_names = tuple(names)
+        for name in (typename,) + field_names:
+            if not min(c.isalnum() or c=='_' for c in name):
+                raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
+            if _iskeyword(name):
+                raise ValueError('Type names and field names cannot be a keyword: %r' % name)
+            if name[0].isdigit():
+                raise ValueError('Type names and field names cannot start with a number: %r' % name)
+        seen_names = set()
+        for name in field_names:
+            if name.startswith('_') and not rename:
+                raise ValueError('Field names cannot start with an underscore: %r' % name)
+            if name in seen_names:
+                raise ValueError('Encountered duplicate field name: %r' % name)
+            seen_names.add(name)
+
+        # Create and fill-in the class template
+        numfields = len(field_names)
+        argtxt = repr(field_names).replace("'", "")[1:-1]   # tuple repr without parens or quotes
+        reprtxt = ', '.join('%s=%%r' % name for name in field_names)
+        template = '''class %(typename)s(tuple):
+        '%(typename)s(%(argtxt)s)' \n
+        __slots__ = () \n
+        _fields = %(field_names)r \n
+        def __new__(_cls, %(argtxt)s):
+            return _tuple.__new__(_cls, (%(argtxt)s)) \n
+        @classmethod
+        def _make(cls, iterable, new=tuple.__new__, len=len):
+            'Make a new %(typename)s object from a sequence or iterable'
+            result = new(cls, iterable)
+            if len(result) != %(numfields)d:
+                raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
+            return result \n
+        def __repr__(self):
+            return '%(typename)s(%(reprtxt)s)' %% self \n
+        def _asdict(self):
+            'Return a new dict which maps field names to their values'
+            return dict(zip(self._fields, self)) \n
+        def _replace(_self, **kwds):
+            'Return a new %(typename)s object replacing specified fields with new values'
+            result = _self._make(map(kwds.pop, %(field_names)r, _self))
+            if kwds:
+                raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
+            return result \n
+        def __getnewargs__(self):
+            return tuple(self) \n\n''' % locals()
+        for i, name in enumerate(field_names):
+            template += '        %s = _property(_itemgetter(%d))\n' % (name, i)
+        if verbose:
+            print template
+
+        # Execute the template string in a temporary namespace
+        namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
+                         _property=property, _tuple=tuple)
+        try:
+            exec template in namespace
+        except SyntaxError, e:
+            raise SyntaxError(e.message + ':\n' + template)
+        result = namespace[typename]
+
+        # For pickling to work, the __module__ variable needs to be set to the frame
+        # where the named tuple is created.  Bypass this step in enviroments where
+        # sys._getframe is not defined (Jython for example) or sys._getframe is not
+        # defined for arguments greater than 0 (IronPython).
+        try:
+            result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
+        except (AttributeError, ValueError):
+            pass
 
-    class TreeEntryTuple(tuple):
-            'TreeEntryTuple(path, mode, sha)'
-
-            __slots__ = ()
-
-            _fields = ('path', 'mode', 'sha')
-
-            def __new__(_cls, path, mode, sha):
-                return _tuple.__new__(_cls, (path, mode, sha))
-
-            @classmethod
-            def _make(cls, iterable, new=tuple.__new__, len=len):
-                'Make a new TreeEntryTuple object from a sequence or iterable'
-                result = new(cls, iterable)
-                if len(result) != 3:
-                    raise TypeError('Expected 3 arguments, got %d' % len(result))
-                return result
-
-            def __repr__(self):
-                return 'TreeEntryTuple(path=%r, mode=%r, sha=%r)' % self
-
-            def _asdict(t):
-                'Return a new dict which maps field names to their values'
-                return {'path': t[0], 'mode': t[1], 'sha': t[2]}
-
-            def _replace(_self, **kwds):
-                'Return a new TreeEntryTuple object replacing specified fields with new values'
-                result = _self._make(map(kwds.pop, ('path', 'mode', 'sha'), _self))
-                if kwds:
-                    raise ValueError('Got unexpected field names: %r' % kwds.keys())
-                return result
-
-            def __getnewargs__(self):
-                return tuple(self)
-
-            path = _property(_itemgetter(0))
-            mode = _property(_itemgetter(1))
-            sha = _property(_itemgetter(2))
-
-
-    class TreeChangeTuple(tuple):
-            'TreeChangeTuple(type, old, new)'
-
-            __slots__ = ()
-
-            _fields = ('type', 'old', 'new')
-
-            def __new__(_cls, type, old, new):
-                return _tuple.__new__(_cls, (type, old, new))
-
-            @classmethod
-            def _make(cls, iterable, new=tuple.__new__, len=len):
-                'Make a new TreeChangeTuple object from a sequence or iterable'
-                result = new(cls, iterable)
-                if len(result) != 3:
-                    raise TypeError('Expected 3 arguments, got %d' % len(result))
-                return result
-
-            def __repr__(self):
-                return 'TreeChangeTuple(type=%r, old=%r, new=%r)' % self
-
-            def _asdict(t):
-                'Return a new dict which maps field names to their values'
-                return {'type': t[0], 'old': t[1], 'new': t[2]}
-
-            def _replace(_self, **kwds):
-                'Return a new TreeChangeTuple object replacing specified fields with new values'
-                result = _self._make(map(kwds.pop, ('type', 'old', 'new'), _self))
-                if kwds:
-                    raise ValueError('Got unexpected field names: %r' % kwds.keys())
-                return result
-
-            def __getnewargs__(self):
-                return tuple(self)
-
-            type = _property(_itemgetter(0))
-            old = _property(_itemgetter(1))
-            new = _property(_itemgetter(2))
+        return result

+ 1 - 1
dulwich/_objects.c

@@ -25,7 +25,7 @@
 typedef int Py_ssize_t;
 #endif
 
-#if defined(__MINGW32_VERSION) || defined(__APPLE__)
+#if defined(__MINGW32_VERSION) || (defined(__APPLE__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 1070)
 size_t strnlen(char *text, size_t maxlen)
 {
 	const char *last = memchr(text, '\0', maxlen);

+ 38 - 12
dulwich/client.py

@@ -23,9 +23,11 @@ __docformat__ = 'restructuredText'
 
 import select
 import socket
+import subprocess
 import urlparse
 
 from dulwich.errors import (
+    GitProtocolError,
     SendPackError,
     UpdateRefsError,
     )
@@ -36,7 +38,7 @@ from dulwich.protocol import (
     extract_capabilities,
     )
 from dulwich.pack import (
-    write_pack_data,
+    write_pack_objects,
     )
 
 
@@ -89,12 +91,14 @@ class GitClient(object):
         """
         raise NotImplementedError()
 
-    def read_refs(self, proto):
+    def _read_refs(self, proto):
         server_capabilities = None
         refs = {}
         # Receive refs from server
         for pkt in proto.read_pkt_seq():
             (sha, ref) = pkt.rstrip('\n').split(' ', 1)
+            if sha == 'ERR':
+                raise GitProtocolError(ref)
             if server_capabilities is None:
                 (ref, server_capabilities) = extract_capabilities(ref)
             refs[ref] = sha
@@ -144,15 +148,15 @@ class GitClient(object):
         """Upload a pack to a remote repository.
 
         :param path: Repository path
-        :param generate_pack_contents: Function that can return the shas of the
-            objects to upload.
+        :param generate_pack_contents: Function that can return a sequence of the
+            shas of the objects to upload.
 
         :raises SendPackError: if server rejects the pack data
         :raises UpdateRefsError: if the server supports report-status
                                  and rejects ref updates
         """
         proto, unused_can_read = self._connect('receive-pack', path)
-        old_refs, server_capabilities = self.read_refs(proto)
+        old_refs, server_capabilities = self._read_refs(proto)
         if 'report-status' not in server_capabilities:
             self._send_capabilities.remove('report-status')
         new_refs = determine_wants(old_refs)
@@ -180,8 +184,7 @@ class GitClient(object):
         if not want:
             return new_refs
         objects = generate_pack_contents(have, want)
-        entries, sha = write_pack_data(proto.write_file(), objects,
-                                       len(objects))
+        entries, sha = write_pack_objects(proto.write_file(), objects)
 
         if 'report-status' in self._send_capabilities:
             self._parse_status_report(proto)
@@ -220,7 +223,7 @@ class GitClient(object):
         :param progress: Callback for progress reports (strings)
         """
         proto, can_read = self._connect('upload-pack', path)
-        (refs, server_capabilities) = self.read_refs(proto)
+        (refs, server_capabilities) = self._read_refs(proto)
         wants = determine_wants(refs)
         if not wants:
             proto.write_pkt_line(None)
@@ -276,14 +279,30 @@ class TCPGitClient(GitClient):
         GitClient.__init__(self, *args, **kwargs)
 
     def _connect(self, cmd, path):
-        s = socket.socket(type=socket.SOCK_STREAM)
-        s.connect((self._host, self._port))
+        sockaddrs = socket.getaddrinfo(self._host, self._port,
+            socket.AF_UNSPEC, socket.SOCK_STREAM)
+        s = None
+        err = socket.error("no address found for %s" % self._host)
+        for (family, socktype, proto, canonname, sockaddr) in sockaddrs:
+            s = socket.socket(family, socktype, proto)
+            s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+            try:
+                s.connect(sockaddr)
+                break
+            except socket.error, err:
+                if s is not None:
+                    s.close()
+                s = None
+        if s is None:
+            raise err
         # -1 means system default buffering
         rfile = s.makefile('rb', -1)
         # 0 means unbuffered
         wfile = s.makefile('wb', 0)
         proto = Protocol(rfile.read, wfile.write,
                          report_activity=self._report_activity)
+        if path.startswith("/~"):
+            path = path[1:]
         proto.send_cmd('git-%s' % cmd, path, 'host=%s' % self._host)
         return proto, lambda: _fileno_can_read(s)
 
@@ -297,7 +316,13 @@ class SubprocessWrapper(object):
         self.write = proc.stdin.write
 
     def can_read(self):
-        return _fileno_can_read(self.proc.stdout.fileno())
+        if subprocess.mswindows:
+            from msvcrt import get_osfhandle
+            from win32pipe import PeekNamedPipe
+            handle = get_osfhandle(self.proc.stdout.fileno())
+            return PeekNamedPipe(handle, 0)[2] != 0
+        else:
+            return _fileno_can_read(self.proc.stdout.fileno())
 
     def close(self):
         self.proc.stdin.close()
@@ -358,7 +383,8 @@ class SSHGitClient(GitClient):
         con = get_ssh_vendor().connect_ssh(
             self.host, ["%s '%s'" % (self._get_cmd_path(cmd), path)],
             port=self.port, username=self.username)
-        return Protocol(con.read, con.write), con.can_read
+        return (Protocol(con.read, con.write, report_activity=self._report_activity),
+                con.can_read)
 
 
 def get_transport_and_path(uri):

+ 3 - 3
dulwich/diff_tree.py

@@ -24,7 +24,7 @@ import stat
 
 from dulwich._compat import (
     defaultdict,
-    TreeChangeTuple,
+    namedtuple,
     )
 from dulwich.objects import (
     S_ISGITLINK,
@@ -47,8 +47,8 @@ _MAX_FILES = 200
 _REWRITE_THRESHOLD = None
 
 
-class TreeChange(TreeChangeTuple):
-    """Class encapsulating a single change between two trees."""
+class TreeChange(namedtuple('TreeChange', ['type', 'old', 'new'])):
+    """Named tuple a single change between two trees."""
 
     @classmethod
     def add(cls, new):

+ 80 - 42
dulwich/object_store.py

@@ -20,6 +20,7 @@
 """Git object store interfaces and implementation."""
 
 
+from cStringIO import StringIO
 import errno
 import itertools
 import os
@@ -27,6 +28,11 @@ import stat
 import tempfile
 import urllib2
 
+from dulwich._compat import (
+    make_sha,
+    SEEK_END,
+    SEEK_CUR,
+    )
 from dulwich.diff_tree import (
     tree_changes,
     walk_trees,
@@ -50,12 +56,17 @@ from dulwich.objects import (
 from dulwich.pack import (
     Pack,
     PackData,
-    ThinPackData,
+    obj_sha,
     iter_sha1,
     load_pack_index,
     write_pack,
-    write_pack_data,
+    write_pack_header,
     write_pack_index_v2,
+    write_pack_object,
+    write_pack_objects,
+    compute_file_sha,
+    PackIndexer,
+    PackStreamCopier,
     )
 
 INFODIR = 'info'
@@ -271,7 +282,7 @@ class PackBasedObjectStore(BaseObjectStore):
         objects = set()
         for sha in self._iter_loose_objects():
             objects.add((self._get_loose_object(sha), None))
-        self.add_objects(objects)
+        self.add_objects(list(objects))
         for obj, path in objects:
             self._remove_loose_object(obj.id)
         return len(objects)
@@ -321,7 +332,7 @@ class PackBasedObjectStore(BaseObjectStore):
             # Don't bother writing an empty pack file
             return
         f, commit = self.add_pack()
-        write_pack_data(f, objects, len(objects))
+        write_pack_objects(f, objects)
         return commit()
 
 
@@ -387,38 +398,82 @@ class DiskObjectStore(PackBasedObjectStore):
     def _remove_loose_object(self, sha):
         os.remove(self._get_shafile_path(sha))
 
-    def move_in_thin_pack(self, path):
+    def _complete_thin_pack(self, f, path, copier, indexer):
         """Move a specific file containing a pack into the pack directory.
 
         :note: The file should be on the same file system as the
             packs directory.
 
+        :param f: Open file object for the pack.
         :param path: Path to the pack file.
+        :param copier: A PackStreamCopier to use for writing pack data.
+        :param indexer: A PackIndexer for indexing the pack.
         """
-        data = ThinPackData(self.get_raw, path)
-
-        # Write index for the thin pack (do we really need this?)
-        temppath = os.path.join(self.pack_dir,
-            sha_to_hex(urllib2.randombytes(20))+".tempidx")
-        data.create_index_v2(temppath)
-        p = Pack.from_objects(data, load_pack_index(temppath))
-
+        entries = list(indexer)
+
+        # Update the header with the new number of objects.
+        f.seek(0)
+        write_pack_header(f, len(entries) + len(indexer.ext_refs()))
+
+        # Rescan the rest of the pack, computing the SHA with the new header.
+        new_sha = compute_file_sha(f, end_ofs=-20)
+
+        # Complete the pack.
+        for ext_sha in indexer.ext_refs():
+            type_num, data = self.get_raw(ext_sha)
+            offset = f.tell()
+            crc32 = write_pack_object(f, type_num, data, sha=new_sha)
+            entries.append((ext_sha, offset, crc32))
+        pack_sha = new_sha.digest()
+        f.write(pack_sha)
+        f.close()
+
+        # Move the pack in.
+        entries.sort()
+        pack_base_name = os.path.join(
+          self.pack_dir, 'pack-' + iter_sha1(e[0] for e in entries))
+        os.rename(path, pack_base_name + '.pack')
+
+        # Write the index.
+        index_file = GitFile(pack_base_name + '.idx', 'wb')
         try:
-            # Write a full pack version
-            temppath = os.path.join(self.pack_dir,
-                sha_to_hex(urllib2.randombytes(20))+".temppack")
-            write_pack(temppath, ((o, None) for o in p.iterobjects()), len(p))
+            write_pack_index_v2(index_file, entries, pack_sha)
+            index_file.close()
         finally:
-            p.close()
+            index_file.abort()
 
-        pack_sha = load_pack_index(temppath+".idx").objects_sha1()
-        newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
-        os.rename(temppath+".pack", newbasename+".pack")
-        os.rename(temppath+".idx", newbasename+".idx")
-        final_pack = Pack(newbasename)
+        # Add the pack to the store and return it.
+        final_pack = Pack(pack_base_name)
+        final_pack.check_length_and_checksum()
         self._add_known_pack(final_pack)
         return final_pack
 
+    def add_thin_pack(self, read_all, read_some):
+        """Add a new thin pack to this object store.
+
+        Thin packs are packs that contain deltas with parents that exist outside
+        the pack. They should never be placed in the object store directly, and
+        always indexed and completed as they are copied.
+
+        :param read_all: Read function that blocks until the number of requested
+            bytes are read.
+        :param read_some: Read function that returns at least one byte, but may
+            not return the number of bytes requested.
+        :return: A Pack object pointing at the now-completed thin pack in the
+            objects/pack directory.
+        """
+        fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_')
+        f = os.fdopen(fd, 'w+b')
+
+        try:
+            indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
+            copier = PackStreamCopier(read_all, read_some, f,
+                                      delta_iter=indexer)
+            copier.verify()
+            return self._complete_thin_pack(f, path, copier, indexer)
+        finally:
+            f.close()
+
     def move_in_pack(self, path):
         """Move a specific file containing a pack into the pack directory.
 
@@ -442,24 +497,6 @@ class DiskObjectStore(PackBasedObjectStore):
         self._add_known_pack(final_pack)
         return final_pack
 
-    def add_thin_pack(self):
-        """Add a new thin pack to this object store.
-
-        Thin packs are packs that contain deltas with parents that exist
-        in a different pack.
-        """
-        fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
-        f = os.fdopen(fd, 'wb')
-        def commit():
-            os.fsync(fd)
-            f.close()
-            if os.path.getsize(path) > 0:
-                return self.move_in_thin_pack(path)
-            else:
-                os.remove(path)
-                return None
-        return f, commit
-
     def add_pack(self):
         """Add a new pack to this object store.
 
@@ -540,7 +577,8 @@ class MemoryObjectStore(BaseObjectStore):
         :param name: sha for the object.
         :return: tuple with numeric type and object contents.
         """
-        return self[name].as_raw_string()
+        obj = self[name]
+        return obj.type_num, obj.as_raw_string()
 
     def __getitem__(self, name):
         return self._data[name]

+ 3 - 3
dulwich/objects.py

@@ -41,7 +41,7 @@ from dulwich.errors import (
 from dulwich.file import GitFile
 from dulwich._compat import (
     make_sha,
-    TreeEntryTuple,
+    namedtuple,
     )
 
 ZERO_SHA = "0" * 40
@@ -688,8 +688,8 @@ class Tag(ShaFile):
     message = serializable_property("message", "The message attached to this tag")
 
 
-class TreeEntry(TreeEntryTuple):
-    """Namedtuple encapsulating a single tree entry."""
+class TreeEntry(namedtuple('TreeEntry', ['path', 'mode', 'sha'])):
+    """Named tuple encapsulating a single tree entry."""
 
     def in_path(self, path):
         """Return a copy of this entry with the given path prepended."""

Diff do ficheiro suprimidas por serem muito extensas
+ 506 - 212
dulwich/pack.py


+ 32 - 20
dulwich/repo.py

@@ -962,28 +962,39 @@ class BaseRepo(object):
 
         XXX: work out how to handle merges.
         """
-        # We build the list backwards, as parents are more likely to be older
-        # than children
-        pending_commits = [head]
-        history = []
+
+        try:
+            commit = self[head]
+        except KeyError:
+            raise MissingCommitError(head)
+
+        if type(commit) != Commit:
+            raise NotCommitError(commit)
+        pending_commits = [commit]
+
+        history = set()
+
         while pending_commits != []:
-            head = pending_commits.pop(0)
-            try:
-                commit = self[head]
-            except KeyError:
-                raise MissingCommitError(head)
-            if type(commit) != Commit:
-                raise NotCommitError(commit)
+            commit = pending_commits.pop(0)
+
             if commit in history:
                 continue
-            i = 0
-            for known_commit in history:
-                if known_commit.commit_time > commit.commit_time:
-                    break
-                i += 1
-            history.insert(i, commit)
-            pending_commits += commit.parents
-        history.reverse()
+
+            history.add(commit)
+
+            for parent in commit.parents:
+                try:
+                    commit = self[parent]
+                except KeyError:
+                    raise MissingCommitError(head)
+
+                if type(commit) != Commit:
+                    raise NotCommitError(commit)
+
+                pending_commits.append(commit)
+
+        history = list(history)
+        history.sort(key=lambda c:c.commit_time, reverse=True)
         return history
 
     def __getitem__(self, name):
@@ -1017,7 +1028,8 @@ class BaseRepo(object):
     def __delitem__(self, name):
         if name.startswith("refs") or name == "HEAD":
             del self.refs[name]
-        raise ValueError(name)
+        else:
+            raise ValueError(name)
 
     def do_commit(self, message, committer=None,
                   author=None, commit_timestamp=None,

+ 4 - 36
dulwich/server.py

@@ -44,8 +44,7 @@ from dulwich.objects import (
     hex_to_sha,
     )
 from dulwich.pack import (
-    PackStreamReader,
-    write_pack_data,
+    write_pack_objects,
     )
 from dulwich.protocol import (
     BufferedPktLineWriter,
@@ -118,32 +117,6 @@ class BackendRepo(object):
         raise NotImplementedError
 
 
-class PackStreamCopier(PackStreamReader):
-    """Class to verify a pack stream as it is being read.
-
-    The pack is read from a ReceivableProtocol using read() or recv() as
-    appropriate and written out to the given file-like object.
-    """
-
-    def __init__(self, read_all, read_some, outfile):
-        super(PackStreamCopier, self).__init__(read_all, read_some)
-        self.outfile = outfile
-
-    def _read(self, read, size):
-        data = super(PackStreamCopier, self)._read(read, size)
-        self.outfile.write(data)
-        return data
-
-    def verify(self):
-        """Verify a pack stream and write it to the output file.
-
-        See PackStreamReader.iterobjects for a list of exceptions this may
-        throw.
-        """
-        for _, _, _ in self.read_objects():
-            pass
-
-
 class DictBackend(Backend):
     """Trivial backend that looks up Git repositories in a dictionary."""
 
@@ -280,8 +253,7 @@ class UploadPackHandler(Handler):
 
         self.progress("dul-daemon says what\n")
         self.progress("counting objects: %d, done.\n" % len(objects_iter))
-        write_pack_data(ProtocolFile(None, write), objects_iter, 
-                        len(objects_iter))
+        write_pack_objects(ProtocolFile(None, write), objects_iter)
         self.progress("how was that, then?\n")
         # we are done
         self.proto.write("0000")
@@ -615,18 +587,14 @@ class ReceivePackHandler(Handler):
         return ("report-status", "delete-refs", "side-band-64k")
 
     def _apply_pack(self, refs):
-        f, commit = self.repo.object_store.add_thin_pack()
         all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError,
                           AssertionError, socket.error, zlib.error,
                           ObjectFormatException)
         status = []
         # TODO: more informative error messages than just the exception string
         try:
-            PackStreamCopier(self.proto.read, self.proto.recv, f).verify()
-            p = commit()
-            if not p:
-                raise IOError('Failed to write pack')
-            p.check()
+            p = self.repo.object_store.add_thin_pack(self.proto.read,
+                                                     self.proto.recv)
             status.append(('unpack', 'ok'))
         except all_exceptions, e:
             status.append(('unpack', str(e).replace('\n', '')))

+ 1 - 2
dulwich/tests/compat/test_pack.py

@@ -54,8 +54,7 @@ class TestPack(PackTests):
         origpack = self.get_pack(pack1_sha)
         self.assertSucceeds(origpack.index.check)
         pack_path = os.path.join(self._tempdir, "Elch")
-        write_pack(pack_path, [(x, "") for x in origpack.iterobjects()],
-                   len(origpack))
+        write_pack(pack_path, origpack.pack_tuples())
         output = run_git_or_fail(['verify-pack', '-v', pack_path])
 
         pack_shas = set()

+ 2 - 1
dulwich/tests/compat/utils.py

@@ -133,7 +133,8 @@ def run_git_or_fail(args, git_path=_DEFAULT_GIT, input=None, **popen_kwargs):
     popen_kwargs['stderr'] = subprocess.STDOUT
     returncode, stdout = run_git(args, git_path=git_path, input=input,
                                  capture_stdout=True, **popen_kwargs)
-    assert returncode == 0
+    assert returncode == 0, "git with args %r failed with %d" % (
+        args, returncode)
     return stdout
 
 

+ 27 - 6
dulwich/tests/test_object_store.py

@@ -19,6 +19,7 @@
 """Tests for the object store interface."""
 
 
+from cStringIO import StringIO
 import os
 import shutil
 import tempfile
@@ -30,6 +31,7 @@ from dulwich.errors import (
     NotTreeError,
     )
 from dulwich.objects import (
+    sha_to_hex,
     object_class,
     Blob,
     Tag,
@@ -42,13 +44,15 @@ from dulwich.object_store import (
     tree_lookup_path,
     )
 from dulwich.pack import (
-    write_pack_data,
+    REF_DELTA,
+    write_pack_objects,
     )
 from dulwich.tests import (
     TestCase,
     )
 from dulwich.tests.utils import (
     make_object,
+    build_pack,
     )
 
 
@@ -178,6 +182,11 @@ class ObjectStoreTests(object):
         for obj in [testobject, tag1, tag2, tag3]:
             self.assertEqual(testobject, self.store.peel_sha(obj.id))
 
+    def test_get_raw(self):
+        self.store.add_object(testobject)
+        self.assertEqual((Blob.type_num, 'yummy data'),
+                         self.store.get_raw(testobject.id))
+
 
 class MemoryObjectStoreTests(ObjectStoreTests, TestCase):
 
@@ -226,15 +235,27 @@ class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase):
         o = DiskObjectStore(self.store_dir)
         f, commit = o.add_pack()
         b = make_object(Blob, data="more yummy data")
-        write_pack_data(f, [(b, None)], 1)
+        write_pack_objects(f, [(b, None)])
         commit()
 
     def test_add_thin_pack(self):
         o = DiskObjectStore(self.store_dir)
-        f, commit = o.add_thin_pack()
-        b = make_object(Blob, data="more yummy data")
-        write_pack_data(f, [(b, None)], 1)
-        commit()
+        blob = make_object(Blob, data='yummy data')
+        o.add_object(blob)
+
+        f = StringIO()
+        entries = build_pack(f, [
+          (REF_DELTA, (blob.id, 'more yummy data')),
+          ], store=o)
+        pack = o.add_thin_pack(f.read, None)
+
+        packed_blob_sha = sha_to_hex(entries[0][3])
+        pack.check_length_and_checksum()
+        self.assertEqual(sorted([blob.id, packed_blob_sha]), list(pack))
+        self.assertTrue(o.contains_packed(packed_blob_sha))
+        self.assertTrue(o.contains_packed(blob.id))
+        self.assertEqual((Blob.type_num, 'more yummy data'),
+                         o.get_raw(packed_blob_sha))
 
 
 class TreeLookupPathTests(TestCase):

+ 437 - 40
dulwich/tests/test_pack.py

@@ -26,34 +26,57 @@ import shutil
 import tempfile
 import zlib
 
+from dulwich._compat import (
+    make_sha,
+    )
 from dulwich.errors import (
     ChecksumMismatch,
     )
 from dulwich.file import (
     GitFile,
     )
+from dulwich.object_store import (
+    MemoryObjectStore,
+    )
 from dulwich.objects import (
+    Blob,
     hex_to_sha,
     sha_to_hex,
+    Commit,
     Tree,
+    Blob,
     )
 from dulwich.pack import (
+    OFS_DELTA,
+    REF_DELTA,
+    DELTA_TYPES,
     MemoryPackIndex,
     Pack,
     PackData,
-    ThinPackData,
     apply_delta,
     create_delta,
+    deltify_pack_objects,
     load_pack_index,
+    UnpackedObject,
     read_zlib_chunks,
     write_pack_header,
     write_pack_index_v1,
     write_pack_index_v2,
+    SHA1Writer,
+    write_pack_object,
     write_pack,
+    unpack_object,
+    compute_file_sha,
+    PackStreamReader,
+    DeltaChainIterator,
     )
 from dulwich.tests import (
     TestCase,
     )
+from utils import (
+    make_object,
+    build_pack,
+    )
 
 pack1_sha = 'bc63ddad95e7321ee734ea11a7a62d314e0d7481'
 
@@ -170,21 +193,6 @@ class TestPackData(PackTests):
         path = os.path.join(self.datadir, 'pack-%s.pack' % pack1_sha)
         PackData.from_file(open(path), os.path.getsize(path))
 
-    # TODO: more ThinPackData tests.
-    def test_thin_from_file(self):
-        test_sha = '1' * 40
-
-        def resolve(sha):
-            self.assertEqual(test_sha, sha)
-            return 3, 'data'
-
-        path = os.path.join(self.datadir, 'pack-%s.pack' % pack1_sha)
-        data = ThinPackData.from_file(resolve, open(path),
-                                      os.path.getsize(path))
-        idx = self.get_pack_index(pack1_sha)
-        Pack.from_objects(data, idx)
-        self.assertEqual((None, 3, 'data'), data.get_ref(test_sha))
-
     def test_pack_len(self):
         p = self.get_pack_data(pack1_sha)
         self.assertEquals(3, len(p))
@@ -238,6 +246,20 @@ class TestPackData(PackTests):
         idx2 = self.get_pack_index(pack1_sha)
         self.assertEquals(idx1, idx2)
 
+    def test_compute_file_sha(self):
+        f = StringIO('abcd1234wxyz')
+        self.assertEqual(make_sha('abcd1234wxyz').hexdigest(),
+                         compute_file_sha(f).hexdigest())
+        self.assertEqual(make_sha('abcd1234wxyz').hexdigest(),
+                         compute_file_sha(f, buffer_size=5).hexdigest())
+        self.assertEqual(make_sha('abcd1234').hexdigest(),
+                         compute_file_sha(f, end_ofs=-4).hexdigest())
+        self.assertEqual(make_sha('1234wxyz').hexdigest(),
+                         compute_file_sha(f, start_ofs=4).hexdigest())
+        self.assertEqual(
+          make_sha('1234').hexdigest(),
+          compute_file_sha(f, start_ofs=4, end_ofs=-4).hexdigest())
+
 
 class TestPack(PackTests):
 
@@ -257,6 +279,19 @@ class TestPack(PackTests):
         p = self.get_pack(pack1_sha)
         self.assertEquals(set([tree_sha, commit_sha, a_sha]), set(p))
 
+    def test_iterobjects(self):
+        p = self.get_pack(pack1_sha)
+        expected = set([p[s] for s in [commit_sha, tree_sha, a_sha]])
+        self.assertEquals(expected, set(list(p.iterobjects())))
+
+    def test_pack_tuples(self):
+        p = self.get_pack(pack1_sha)
+        tuples = p.pack_tuples()
+        expected = set([(p[s], None) for s in [commit_sha, tree_sha, a_sha]])
+        self.assertEquals(expected, set(list(tuples)))
+        self.assertEquals(expected, set(list(tuples)))
+        self.assertEquals(3, len(tuples))
+
     def test_get_object_at(self):
         """Tests random access for non-delta objects"""
         p = self.get_pack(pack1_sha)
@@ -276,8 +311,7 @@ class TestPack(PackTests):
         try:
             self.assertSucceeds(origpack.index.check)
             basename = os.path.join(self.tempdir, 'Elch')
-            write_pack(basename, [(x, '') for x in origpack.iterobjects()],
-                       len(origpack))
+            write_pack(basename, origpack.pack_tuples())
             newpack = Pack(basename)
 
             try:
@@ -306,8 +340,7 @@ class TestPack(PackTests):
 
     def _copy_pack(self, origpack):
         basename = os.path.join(self.tempdir, 'somepack')
-        write_pack(basename, [(x, '') for x in origpack.iterobjects()],
-                   len(origpack))
+        write_pack(basename, origpack.pack_tuples())
         return Pack(basename)
 
     def test_keep_no_message(self):
@@ -347,15 +380,81 @@ class TestPack(PackTests):
         p = self.get_pack(pack1_sha)
         self.assertEquals(pack1_sha, p.name())
 
+    def test_length_mismatch(self):
+        data = self.get_pack_data(pack1_sha)
+        index = self.get_pack_index(pack1_sha)
+        Pack.from_objects(data, index).check_length_and_checksum()
+
+        data._file.seek(12)
+        bad_file = StringIO()
+        write_pack_header(bad_file, 9999)
+        bad_file.write(data._file.read())
+        bad_file = StringIO(bad_file.getvalue())
+        bad_data = PackData('', file=bad_file)
+        bad_pack = Pack.from_lazy_objects(lambda: bad_data, lambda: index)
+        self.assertRaises(AssertionError, lambda: bad_pack.data)
+        self.assertRaises(AssertionError,
+                          lambda: bad_pack.check_length_and_checksum())
+
+    def test_checksum_mismatch(self):
+        data = self.get_pack_data(pack1_sha)
+        index = self.get_pack_index(pack1_sha)
+        Pack.from_objects(data, index).check_length_and_checksum()
+
+        data._file.seek(0)
+        bad_file = StringIO(data._file.read()[:-20] + ('\xff' * 20))
+        bad_data = PackData('', file=bad_file)
+        bad_pack = Pack.from_lazy_objects(lambda: bad_data, lambda: index)
+        self.assertRaises(ChecksumMismatch, lambda: bad_pack.data)
+        self.assertRaises(ChecksumMismatch, lambda:
+                          bad_pack.check_length_and_checksum())
+
+    def test_iterobjects(self):
+        p = self.get_pack(pack1_sha)
+        objs = dict((o.id, o) for o in p.iterobjects())
+        self.assertEquals(3, len(objs))
+        self.assertEquals(sorted(objs), sorted(p.index))
+        self.assertTrue(isinstance(objs[a_sha], Blob))
+        self.assertTrue(isinstance(objs[tree_sha], Tree))
+        self.assertTrue(isinstance(objs[commit_sha], Commit))
+
 
-class WritePackHeaderTests(TestCase):
+class WritePackTests(TestCase):
 
-    def test_simple(self):
+    def test_write_pack_header(self):
         f = StringIO()
         write_pack_header(f, 42)
         self.assertEquals('PACK\x00\x00\x00\x02\x00\x00\x00*',
                 f.getvalue())
 
+    def test_write_pack_object(self):
+        f = StringIO()
+        f.write('header')
+        offset = f.tell()
+        crc32 = write_pack_object(f, Blob.type_num, 'blob')
+        self.assertEqual(crc32, zlib.crc32(f.getvalue()[6:]) & 0xffffffff)
+
+        f.write('x')  # unpack_object needs extra trailing data.
+        f.seek(offset)
+        comp_len = len(f.getvalue()) - offset - 1
+        unpacked, unused = unpack_object(f.read, compute_crc32=True)
+        self.assertEqual(Blob.type_num, unpacked.pack_type_num)
+        self.assertEqual(Blob.type_num, unpacked.obj_type_num)
+        self.assertEqual(['blob'], unpacked.decomp_chunks)
+        self.assertEqual(crc32, unpacked.crc32)
+        self.assertEqual('x', unused)
+
+    def test_write_pack_object_sha(self):
+        f = StringIO()
+        f.write('header')
+        offset = f.tell()
+        sha_a = make_sha('foo')
+        sha_b = sha_a.copy()
+        write_pack_object(f, Blob.type_num, 'blob', sha=sha_a)
+        self.assertNotEqual(sha_a.digest(), sha_b.digest())
+        sha_b.update(f.getvalue()[offset:])
+        self.assertEqual(sha_a.digest(), sha_b.digest())
+
 
 pack_checksum = hex_to_sha('721980e866af9a5f93ad674144e1459b8ba3e7b7')
 
@@ -476,41 +575,52 @@ class ReadZlibTests(TestCase):
     def setUp(self):
         super(ReadZlibTests, self).setUp()
         self.read = StringIO(self.comp + self.extra).read
+        self.unpacked = UnpackedObject(Tree.type_num, None, len(self.decomp), 0)
 
     def test_decompress_size(self):
         good_decomp_len = len(self.decomp)
-        self.assertRaises(ValueError, read_zlib_chunks, self.read, -1)
+        self.unpacked.decomp_len = -1
+        self.assertRaises(ValueError, read_zlib_chunks, self.read,
+                          self.unpacked)
+        self.unpacked.decomp_len = good_decomp_len - 1
         self.assertRaises(zlib.error, read_zlib_chunks, self.read,
-                          good_decomp_len - 1)
+                          self.unpacked)
+        self.unpacked.decomp_len = good_decomp_len + 1
         self.assertRaises(zlib.error, read_zlib_chunks, self.read,
-                          good_decomp_len + 1)
+                          self.unpacked)
 
     def test_decompress_truncated(self):
         read = StringIO(self.comp[:10]).read
-        self.assertRaises(zlib.error, read_zlib_chunks, read, len(self.decomp))
+        self.assertRaises(zlib.error, read_zlib_chunks, read, self.unpacked)
 
         read = StringIO(self.comp).read
-        self.assertRaises(zlib.error, read_zlib_chunks, read, len(self.decomp))
+        self.assertRaises(zlib.error, read_zlib_chunks, read, self.unpacked)
 
     def test_decompress_empty(self):
+        unpacked = UnpackedObject(Tree.type_num, None, 0, None)
         comp = zlib.compress('')
         read = StringIO(comp + self.extra).read
-        decomp, comp_len, unused_data = read_zlib_chunks(read, 0)
-        self.assertEqual('', ''.join(decomp))
-        self.assertEqual(len(comp), comp_len)
-        self.assertNotEquals('', unused_data)
-        self.assertEquals(self.extra, unused_data + read())
-
-    def _do_decompress_test(self, buffer_size):
-        decomp, comp_len, unused_data = read_zlib_chunks(
-          self.read, len(self.decomp), buffer_size=buffer_size)
-        self.assertEquals(self.decomp, ''.join(decomp))
-        self.assertEquals(len(self.comp), comp_len)
-        self.assertNotEquals('', unused_data)
-        self.assertEquals(self.extra, unused_data + self.read())
+        unused = read_zlib_chunks(read, unpacked)
+        self.assertEqual('', ''.join(unpacked.decomp_chunks))
+        self.assertNotEquals('', unused)
+        self.assertEquals(self.extra, unused + read())
+
+    def test_decompress_no_crc32(self):
+        self.unpacked.crc32 = None
+        read_zlib_chunks(self.read, self.unpacked)
+        self.assertEquals(None, self.unpacked.crc32)
+
+    def _do_decompress_test(self, buffer_size, **kwargs):
+        unused = read_zlib_chunks(self.read, self.unpacked,
+                                  buffer_size=buffer_size, **kwargs)
+        self.assertEquals(self.decomp, ''.join(self.unpacked.decomp_chunks))
+        self.assertEquals(zlib.crc32(self.comp), self.unpacked.crc32)
+        self.assertNotEquals('', unused)
+        self.assertEquals(self.extra, unused + self.read())
 
     def test_simple_decompress(self):
         self._do_decompress_test(4096)
+        self.assertEqual(None, self.unpacked.comp_chunks)
 
     # These buffer sizes are not intended to be realistic, but rather simulate
     # larger buffer sizes that may end at various places.
@@ -525,3 +635,290 @@ class ReadZlibTests(TestCase):
 
     def test_decompress_buffer_size_4(self):
         self._do_decompress_test(4)
+
+    def test_decompress_include_comp(self):
+        self._do_decompress_test(4096, include_comp=True)
+        self.assertEqual(self.comp, ''.join(self.unpacked.comp_chunks))
+
+
+class DeltifyTests(TestCase):
+
+    def test_empty(self):
+        self.assertEquals([], list(deltify_pack_objects([])))
+
+    def test_single(self):
+        b = Blob.from_string("foo")
+        self.assertEquals(
+            [(b.type_num, b.sha().digest(), None, b.as_raw_string())],
+            list(deltify_pack_objects([(b, "")])))
+
+    def test_simple_delta(self):
+        b1 = Blob.from_string("a" * 101)
+        b2 = Blob.from_string("a" * 100)
+        delta = create_delta(b1.as_raw_string(), b2.as_raw_string())
+        self.assertEquals([
+            (b1.type_num, b1.sha().digest(), None, b1.as_raw_string()),
+            (b2.type_num, b2.sha().digest(), b1.sha().digest(), delta)
+            ],
+            list(deltify_pack_objects([(b1, ""), (b2, "")])))
+
+
+class TestPackStreamReader(TestCase):
+
+    def test_read_objects_emtpy(self):
+        f = StringIO()
+        build_pack(f, [])
+        reader = PackStreamReader(f.read)
+        self.assertEqual(0, len(list(reader.read_objects())))
+
+    def test_read_objects(self):
+        f = StringIO()
+        entries = build_pack(f, [
+          (Blob.type_num, 'blob'),
+          (OFS_DELTA, (0, 'blob1')),
+          ])
+        reader = PackStreamReader(f.read)
+        objects = list(reader.read_objects(compute_crc32=True))
+        self.assertEqual(2, len(objects))
+
+        unpacked_blob, unpacked_delta = objects
+
+        self.assertEqual(entries[0][0], unpacked_blob.offset)
+        self.assertEqual(Blob.type_num, unpacked_blob.pack_type_num)
+        self.assertEqual(Blob.type_num, unpacked_blob.obj_type_num)
+        self.assertEqual(None, unpacked_blob.delta_base)
+        self.assertEqual('blob', ''.join(unpacked_blob.decomp_chunks))
+        self.assertEqual(entries[0][4], unpacked_blob.crc32)
+
+        self.assertEqual(entries[1][0], unpacked_delta.offset)
+        self.assertEqual(OFS_DELTA, unpacked_delta.pack_type_num)
+        self.assertEqual(None, unpacked_delta.obj_type_num)
+        self.assertEqual(unpacked_delta.offset - unpacked_blob.offset,
+                         unpacked_delta.delta_base)
+        delta = create_delta('blob', 'blob1')
+        self.assertEqual(delta, ''.join(unpacked_delta.decomp_chunks))
+        self.assertEqual(entries[1][4], unpacked_delta.crc32)
+
+    def test_read_objects_buffered(self):
+        f = StringIO()
+        build_pack(f, [
+          (Blob.type_num, 'blob'),
+          (OFS_DELTA, (0, 'blob1')),
+          ])
+        reader = PackStreamReader(f.read, zlib_bufsize=4)
+        self.assertEqual(2, len(list(reader.read_objects())))
+
+
+class TestPackIterator(DeltaChainIterator):
+
+    _compute_crc32 = True
+
+    def __init__(self, *args, **kwargs):
+        super(TestPackIterator, self).__init__(*args, **kwargs)
+        self._unpacked_offsets = set()
+
+    def _result(self, unpacked):
+        """Return entries in the same format as build_pack."""
+        return (unpacked.offset, unpacked.obj_type_num,
+                ''.join(unpacked.obj_chunks), unpacked.sha(), unpacked.crc32)
+
+    def _resolve_object(self, offset, pack_type_num, base_chunks):
+        assert offset not in self._unpacked_offsets, (
+                'Attempted to re-inflate offset %i' % offset)
+        self._unpacked_offsets.add(offset)
+        return super(TestPackIterator, self)._resolve_object(
+          offset, pack_type_num, base_chunks)
+
+
+class DeltaChainIteratorTests(TestCase):
+
+    def setUp(self):
+        self.store = MemoryObjectStore()
+        self.fetched = set()
+
+    def store_blobs(self, blobs_data):
+        blobs = []
+        for data in blobs_data:
+            blob = make_object(Blob, data=data)
+            blobs.append(blob)
+            self.store.add_object(blob)
+        return blobs
+
+    def get_raw_no_repeat(self, bin_sha):
+        """Wrapper around store.get_raw that doesn't allow repeat lookups."""
+        hex_sha = sha_to_hex(bin_sha)
+        self.assertFalse(hex_sha in self.fetched,
+                         'Attempted to re-fetch object %s' % hex_sha)
+        self.fetched.add(hex_sha)
+        return self.store.get_raw(hex_sha)
+
+    def make_pack_iter(self, f, thin=None):
+        if thin is None:
+            thin = bool(list(self.store))
+        resolve_ext_ref = thin and self.get_raw_no_repeat or None
+        data = PackData('test.pack', file=f)
+        return TestPackIterator.for_pack_data(
+          data, resolve_ext_ref=resolve_ext_ref)
+
+    def assertEntriesMatch(self, expected_indexes, entries, pack_iter):
+        expected = [entries[i] for i in expected_indexes]
+        self.assertEqual(expected, list(pack_iter._walk_all_chains()))
+
+    def test_no_deltas(self):
+        f = StringIO()
+        entries = build_pack(f, [
+          (Commit.type_num, 'commit'),
+          (Blob.type_num, 'blob'),
+          (Tree.type_num, 'tree'),
+          ])
+        self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
+
+    def test_ofs_deltas(self):
+        f = StringIO()
+        entries = build_pack(f, [
+          (Blob.type_num, 'blob'),
+          (OFS_DELTA, (0, 'blob1')),
+          (OFS_DELTA, (0, 'blob2')),
+          ])
+        self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
+
+    def test_ofs_deltas_chain(self):
+        f = StringIO()
+        entries = build_pack(f, [
+          (Blob.type_num, 'blob'),
+          (OFS_DELTA, (0, 'blob1')),
+          (OFS_DELTA, (1, 'blob2')),
+          ])
+        self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
+
+    def test_ref_deltas(self):
+        f = StringIO()
+        entries = build_pack(f, [
+          (REF_DELTA, (1, 'blob1')),
+          (Blob.type_num, ('blob')),
+          (REF_DELTA, (1, 'blob2')),
+          ])
+        self.assertEntriesMatch([1, 0, 2], entries, self.make_pack_iter(f))
+
+    def test_ref_deltas_chain(self):
+        f = StringIO()
+        entries = build_pack(f, [
+          (REF_DELTA, (2, 'blob1')),
+          (Blob.type_num, ('blob')),
+          (REF_DELTA, (1, 'blob2')),
+          ])
+        self.assertEntriesMatch([1, 2, 0], entries, self.make_pack_iter(f))
+
+    def test_ofs_and_ref_deltas(self):
+        # Deltas pending on this offset are popped before deltas depending on
+        # this ref.
+        f = StringIO()
+        entries = build_pack(f, [
+          (REF_DELTA, (1, 'blob1')),
+          (Blob.type_num, ('blob')),
+          (OFS_DELTA, (1, 'blob2')),
+          ])
+        self.assertEntriesMatch([1, 2, 0], entries, self.make_pack_iter(f))
+
+    def test_mixed_chain(self):
+        f = StringIO()
+        entries = build_pack(f, [
+          (Blob.type_num, 'blob'),
+          (REF_DELTA, (2, 'blob2')),
+          (OFS_DELTA, (0, 'blob1')),
+          (OFS_DELTA, (1, 'blob3')),
+          (OFS_DELTA, (0, 'bob')),
+          ])
+        self.assertEntriesMatch([0, 2, 1, 3, 4], entries,
+                                self.make_pack_iter(f))
+
+    def test_long_chain(self):
+        n = 100
+        objects_spec = [(Blob.type_num, 'blob')]
+        for i in xrange(n):
+            objects_spec.append((OFS_DELTA, (i, 'blob%i' % i)))
+        f = StringIO()
+        entries = build_pack(f, objects_spec)
+        self.assertEntriesMatch(xrange(n + 1), entries, self.make_pack_iter(f))
+
+    def test_branchy_chain(self):
+        n = 100
+        objects_spec = [(Blob.type_num, 'blob')]
+        for i in xrange(n):
+            objects_spec.append((OFS_DELTA, (0, 'blob%i' % i)))
+        f = StringIO()
+        entries = build_pack(f, objects_spec)
+        self.assertEntriesMatch(xrange(n + 1), entries, self.make_pack_iter(f))
+
+    def test_ext_ref(self):
+        blob, = self.store_blobs(['blob'])
+        f = StringIO()
+        entries = build_pack(f, [(REF_DELTA, (blob.id, 'blob1'))],
+                             store=self.store)
+        pack_iter = self.make_pack_iter(f)
+        self.assertEntriesMatch([0], entries, pack_iter)
+        self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
+
+    def test_ext_ref_chain(self):
+        blob, = self.store_blobs(['blob'])
+        f = StringIO()
+        entries = build_pack(f, [
+          (REF_DELTA, (1, 'blob2')),
+          (REF_DELTA, (blob.id, 'blob1')),
+          ], store=self.store)
+        pack_iter = self.make_pack_iter(f)
+        self.assertEntriesMatch([1, 0], entries, pack_iter)
+        self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
+
+    def test_ext_ref_multiple_times(self):
+        blob, = self.store_blobs(['blob'])
+        f = StringIO()
+        entries = build_pack(f, [
+          (REF_DELTA, (blob.id, 'blob1')),
+          (REF_DELTA, (blob.id, 'blob2')),
+          ], store=self.store)
+        pack_iter = self.make_pack_iter(f)
+        self.assertEntriesMatch([0, 1], entries, pack_iter)
+        self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
+
+    def test_multiple_ext_refs(self):
+        b1, b2 = self.store_blobs(['foo', 'bar'])
+        f = StringIO()
+        entries = build_pack(f, [
+          (REF_DELTA, (b1.id, 'foo1')),
+          (REF_DELTA, (b2.id, 'bar2')),
+          ], store=self.store)
+        pack_iter = self.make_pack_iter(f)
+        self.assertEntriesMatch([0, 1], entries, pack_iter)
+        self.assertEqual([hex_to_sha(b1.id), hex_to_sha(b2.id)],
+                         pack_iter.ext_refs())
+
+    def test_bad_ext_ref_non_thin_pack(self):
+        blob, = self.store_blobs(['blob'])
+        f = StringIO()
+        entries = build_pack(f, [(REF_DELTA, (blob.id, 'blob1'))],
+                             store=self.store)
+        pack_iter = self.make_pack_iter(f, thin=False)
+        try:
+            list(pack_iter._walk_all_chains())
+            self.fail()
+        except KeyError, e:
+            self.assertEqual(([blob.id],), e.args)
+
+    def test_bad_ext_ref_thin_pack(self):
+        b1, b2, b3 = self.store_blobs(['foo', 'bar', 'baz'])
+        f = StringIO()
+        entries = build_pack(f, [
+          (REF_DELTA, (1, 'foo99')),
+          (REF_DELTA, (b1.id, 'foo1')),
+          (REF_DELTA, (b2.id, 'bar2')),
+          (REF_DELTA, (b3.id, 'baz3')),
+          ], store=self.store)
+        del self.store[b2.id]
+        del self.store[b3.id]
+        pack_iter = self.make_pack_iter(f)
+        try:
+            list(pack_iter._walk_all_chains())
+            self.fail()
+        except KeyError, e:
+            self.assertEqual((sorted([b2.id, b3.id]),), e.args)

+ 11 - 0
dulwich/tests/test_repository.py

@@ -123,6 +123,17 @@ class RepositoryTests(TestCase):
         self.assertEquals('a90fa2d900a17e99b433217e988c4eb4a2e9a097',
                           r["refs/tags/foo"].id)
 
+    def test_delitem(self):
+        r = self._repo = open_repo('a.git')
+
+        del r['refs/heads/master']
+        self.assertRaises(KeyError, lambda: r['refs/heads/master'])
+
+        del r['HEAD']
+        self.assertRaises(KeyError, lambda: r['HEAD'])
+
+        self.assertRaises(ValueError, r.__delitem__, 'notrefs/foo')
+
     def test_get_refs(self):
         r = self._repo = open_repo('a.git')
         self.assertEqual({

+ 83 - 0
dulwich/tests/utils.py

@@ -20,6 +20,7 @@
 """Utility functions common to Dulwich tests."""
 
 
+from cStringIO import StringIO
 import datetime
 import os
 import shutil
@@ -31,6 +32,16 @@ from dulwich.objects import (
     FixedSha,
     Commit,
     )
+from dulwich.pack import (
+    OFS_DELTA,
+    REF_DELTA,
+    DELTA_TYPES,
+    obj_sha,
+    SHA1Writer,
+    write_pack_header,
+    write_pack_object,
+    create_delta,
+    )
 from dulwich.repo import Repo
 from dulwich.tests import (
     TestSkipped,
@@ -147,3 +158,75 @@ def ext_functest_builder(method, func):
         method(self, func)
 
     return do_test
+
+
+def build_pack(f, objects_spec, store=None):
+    """Write test pack data from a concise spec.
+
+    :param f: A file-like object to write the pack to.
+    :param objects_spec: A list of (type_num, obj). For non-delta types, obj
+        is the string of that object's data.
+        For delta types, obj is a tuple of (base, data), where:
+
+        * base can be either an index in objects_spec of the base for that
+        * delta; or for a ref delta, a SHA, in which case the resulting pack
+        * will be thin and the base will be an external ref.
+        * data is a string of the full, non-deltified data for that object.
+
+        Note that offsets/refs and deltas are computed within this function.
+    :param store: An optional ObjectStore for looking up external refs.
+    :return: A list of tuples in the order specified by objects_spec:
+        (offset, type num, data, sha, CRC32)
+    """
+    sf = SHA1Writer(f)
+    num_objects = len(objects_spec)
+    write_pack_header(sf, num_objects)
+
+    full_objects = {}
+    offsets = {}
+    crc32s = {}
+
+    while len(full_objects) < num_objects:
+        for i, (type_num, data) in enumerate(objects_spec):
+            if type_num not in DELTA_TYPES:
+                full_objects[i] = (type_num, data,
+                                   obj_sha(type_num, [data]))
+                continue
+            base, data = data
+            if isinstance(base, int):
+                if base not in full_objects:
+                    continue
+                base_type_num, _, _ = full_objects[base]
+            else:
+                base_type_num, _ = store.get_raw(base)
+            full_objects[i] = (base_type_num, data,
+                               obj_sha(base_type_num, [data]))
+
+    for i, (type_num, obj) in enumerate(objects_spec):
+        offset = f.tell()
+        if type_num == OFS_DELTA:
+            base_index, data = obj
+            base = offset - offsets[base_index]
+            _, base_data, _ = full_objects[base_index]
+            obj = (base, create_delta(base_data, data))
+        elif type_num == REF_DELTA:
+            base_ref, data = obj
+            if isinstance(base_ref, int):
+                _, base_data, base = full_objects[base_ref]
+            else:
+                base_type_num, base_data = store.get_raw(base_ref)
+                base = obj_sha(base_type_num, base_data)
+            obj = (base, create_delta(base_data, data))
+
+        crc32 = write_pack_object(sf, type_num, obj)
+        offsets[i] = offset
+        crc32s[i] = crc32
+
+    expected = []
+    for i in xrange(num_objects):
+        type_num, data, sha = full_objects[i]
+        expected.append((offsets[i], type_num, data, sha, crc32s[i]))
+
+    sf.write_sha()
+    f.seek(0)
+    return expected

+ 10 - 2
setup.py

@@ -4,11 +4,13 @@
 
 try:
     from setuptools import setup, Extension
+    has_setuptools = True
 except ImportError:
     from distutils.core import setup, Extension
+    has_setuptools = False
 from distutils.core import Distribution
 
-dulwich_version_string = '0.7.1'
+dulwich_version_string = '0.8.0'
 
 include_dirs = []
 # Windows MSVC support
@@ -53,7 +55,12 @@ if sys.platform == 'darwin' and os.path.exists('/usr/bin/xcodebuild'):
     # Also parse only first digit, because 3.2.1 can't be parsed nicely
     if (version.startswith('Xcode') and
         int(version.split()[1].split('.')[0]) >= 4):
-        os.environ['ARCHFLAGS'] = '-arch i386 -arch x86_64'
+        os.environ['ARCHFLAGS'] = ''
+
+setup_kwargs = {}
+
+if has_setuptools:
+    setup_kwargs['test_suite'] = 'dulwich.tests'
 
 setup(name='dulwich',
       description='Python Git Library',
@@ -83,4 +90,5 @@ setup(name='dulwich',
               include_dirs=include_dirs),
           ],
       distclass=DulwichDistribution,
+      **setup_kwargs
       )

Alguns ficheiros não foram mostrados porque muitos ficheiros mudaram neste diff