Browse Source

New upstream release.

Jelmer Vernooij 13 years ago
parent
commit
99161d0e7e

+ 43 - 1
NEWS

@@ -1,4 +1,4 @@
-0.8.0	UNRELEASED
+0.8.0	2011-08-07
 
  FEATURES
 
@@ -6,6 +6,14 @@
     a pack, with implementations for pack indexing and inflation.
     (Dave Borowitz)
 
+  * New walk module with a Walker class for customizable commit walking.
+    (Dave Borowitz)
+
+  * New tree_changes_for_merge function in diff_tree. (Dave Borowitz)
+
+  * Easy rename detection in RenameDetector even without find_copies_harder.
+    (Dave Borowitz)
+
  BUG FIXES
 
   * Avoid storing all objects in memory when writing pack.
@@ -28,6 +36,26 @@
 
   * Support ~ in git:// URL paths. (Jelmer Vernooij, #813555)
 
+  * Make ShaFile.__eq__ work when other is not a ShaFile. (Dave Borowitz)
+
+  * ObjectStore.get_graph_walker() now no longer yields the same
+    revision more than once. This has a significant improvement for
+    performance when wide revision graphs are involved.
+    (Jelmer Vernooij, #818168)
+
+  * Teach ReceivePackHandler how to read empty packs. (Dave Borowitz)
+
+  * Don't send a pack with duplicates of the same object. (Dave Borowitz)
+
+  * Teach the server how to serve a clone of an empty repo. (Dave Borowitz)
+
+  * Correctly advertise capabilities during receive-pack. (Dave Borowitz)
+
+  * Fix add/add and add/rename conflicts in tree_changes_for_merge.
+    (Dave Borowitz)
+
+  * Use correct MIME types in web server. (Dave Borowitz)
+
  API CHANGES
 
   * write_pack no longer takes the num_objects argument and requires an object
@@ -66,6 +94,17 @@
     value of unpack_object and various DeltaChainIterator methods.
     (Dave Borowitz)
 
+  * Add a lookup_path convenience method to Tree. (Dave Borowitz)
+
+  * Optionally create RenameDetectors without passing in tree SHAs.
+    (Dave Borowitz)
+
+  * Optionally include unchanged entries in RenameDetectors. (Dave Borowitz)
+
+  * Optionally pass a RenameDetector to tree_changes. (Dave Borowitz)
+
+  * Optionally pass a request object through to server handlers. (Dave Borowitz)
+
  TEST CHANGES
 
   * If setuptools is installed, "python setup.py test" will now run the testsuite.
@@ -74,6 +113,9 @@
   * Add a new build_pack test utility for building packs from a simple spec.
     (Dave Borowitz)
 
+  * Add a new build_commit_graph test utility for building commits from a
+    simple spec. (Dave Borowitz)
+
 0.7.1	2011-04-12
 
  BUG FIXES

+ 3 - 3
debian/changelog

@@ -1,10 +1,10 @@
-dulwich (0.8.0~bzr873-1) UNRELEASED; urgency=low
+dulwich (0.8.0-1) UNRELEASED; urgency=low
 
-  * New upstream snapshot.
+  * New upstream release.
    + Adds support for IPv6 to git:// client. Closes: #631490
    + Fixes use of ~username in git:// URLs. Closes: #631483
 
- -- Jelmer Vernooij <jelmer@debian.org>  Fri, 29 Jul 2011 12:10:15 +0200
+ -- Jelmer Vernooij <jelmer@debian.org>  Sun, 07 Aug 2011 15:03:19 +0200
 
 dulwich (0.7.1-2) unstable; urgency=low
 

+ 14 - 0
dulwich/_compat.py

@@ -136,6 +136,20 @@ except ImportError:
                 return
 
 
+try:
+    all = all
+except NameError:
+    # Implementation of permutations from Python 2.6 documentation:
+    # http://docs.python.org/2.6/library/functions.html#all
+    # Copyright (c) 2001-2010 Python Software Foundation; All Rights Reserved
+    # Licensed under the Python Software Foundation License.
+    def all(iterable):
+        for element in iterable:
+            if not element:
+                return False
+        return True
+
+
 try:
     from collections import namedtuple
 except ImportError:

+ 5 - 1
dulwich/_objects.c

@@ -21,11 +21,15 @@
 #include <stdlib.h>
 #include <sys/stat.h>
 
+#if defined(__APPLE__)
+#include <Availability.h>
+#endif
+
 #if (PY_VERSION_HEX < 0x02050000)
 typedef int Py_ssize_t;
 #endif
 
-#if defined(__MINGW32_VERSION) || (defined(__APPLE__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 1070)
+#if defined(__MINGW32_VERSION) || (defined(__APPLE__) && __MAC_OS_X_VERSION_MIN_REQUIRED < 1070)
 size_t strnlen(char *text, size_t maxlen)
 {
 	const char *last = memchr(text, '\0', maxlen);

+ 131 - 42
dulwich/diff_tree.py

@@ -18,12 +18,16 @@
 
 """Utilities for diffing files and trees."""
 
+try:
+    from collections import defaultdict
+except ImportError:
+    from dulwich._compat import defaultdict
+
 from cStringIO import StringIO
 import itertools
 import stat
 
 from dulwich._compat import (
-    defaultdict,
     namedtuple,
     )
 from dulwich.objects import (
@@ -39,12 +43,14 @@ CHANGE_RENAME = 'rename'
 CHANGE_COPY = 'copy'
 CHANGE_UNCHANGED = 'unchanged'
 
+RENAME_CHANGE_TYPES = (CHANGE_RENAME, CHANGE_COPY)
+
 _NULL_ENTRY = TreeEntry(None, None, None)
 
 _MAX_SCORE = 100
-_RENAME_THRESHOLD = 60
-_MAX_FILES = 200
-_REWRITE_THRESHOLD = None
+RENAME_THRESHOLD = 60
+MAX_FILES = 200
+REWRITE_THRESHOLD = None
 
 
 class TreeChange(namedtuple('TreeChange', ['type', 'old', 'new'])):
@@ -152,7 +158,8 @@ def _skip_tree(entry):
     return entry
 
 
-def tree_changes(store, tree1_id, tree2_id, want_unchanged=False):
+def tree_changes(store, tree1_id, tree2_id, want_unchanged=False,
+                 rename_detector=None):
     """Find the differences between the contents of two trees.
 
     :param store: An ObjectStore for looking up objects.
@@ -160,9 +167,17 @@ def tree_changes(store, tree1_id, tree2_id, want_unchanged=False):
     :param tree2_id: The SHA of the target tree.
     :param want_unchanged: If True, include TreeChanges for unmodified entries
         as well.
+    :param rename_detector: RenameDetector object for detecting renames.
     :return: Iterator over TreeChange instances for each change between the
         source and target tree.
     """
+    if (rename_detector is not None and tree1_id is not None and
+        tree2_id is not None):
+        for change in rename_detector.changes_with_renames(
+          tree1_id, tree2_id, want_unchanged=want_unchanged):
+            yield change
+        return
+
     entries = walk_trees(store, tree1_id, tree2_id,
                          prune_identical=(not want_unchanged))
     for entry1, entry2 in entries:
@@ -193,6 +208,69 @@ def tree_changes(store, tree1_id, tree2_id, want_unchanged=False):
         yield TreeChange(change_type, entry1, entry2)
 
 
+def _all_eq(seq, key, value):
+    for e in seq:
+        if key(e) != value:
+            return False
+    return True
+
+
+def _all_same(seq, key):
+    return _all_eq(seq[1:], key, key(seq[0]))
+
+
+def tree_changes_for_merge(store, parent_tree_ids, tree_id,
+                           rename_detector=None):
+    """Get the tree changes for a merge tree relative to all its parents.
+
+    :param store: An ObjectStore for looking up objects.
+    :param parent_tree_ids: An iterable of the SHAs of the parent trees.
+    :param tree_id: The SHA of the merge tree.
+    :param rename_detector: RenameDetector object for detecting renames.
+
+    :yield: Lists of TreeChange objects, one per conflicted path in the merge.
+
+        Each list contains one element per parent, with the TreeChange for that
+        path relative to that parent. An element may be None if it never existed
+        in one parent and was deleted in two others.
+
+        A path is only included in the output if it is a conflict, i.e. its SHA
+        in the merge tree is not found in any of the parents, or in the case of
+        deletes, if not all of the old SHAs match.
+    """
+    all_parent_changes = [tree_changes(store, t, tree_id,
+                                       rename_detector=rename_detector)
+                          for t in parent_tree_ids]
+    num_parents = len(parent_tree_ids)
+    changes_by_path = defaultdict(lambda: [None] * num_parents)
+
+    # Organize by path.
+    for i, parent_changes in enumerate(all_parent_changes):
+        for change in parent_changes:
+            if change.type == CHANGE_DELETE:
+                path = change.old.path
+            else:
+                path = change.new.path
+            changes_by_path[path][i] = change
+
+    old_sha = lambda c: c.old.sha
+    change_type = lambda c: c.type
+
+    # Yield only conflicting changes.
+    for _, changes in sorted(changes_by_path.iteritems()):
+        assert len(changes) == num_parents
+        have = [c for c in changes if c is not None]
+        if _all_eq(have, change_type, CHANGE_DELETE):
+            if not _all_same(have, old_sha):
+                yield changes
+        elif not _all_same(have, change_type):
+            yield changes
+        elif None not in changes:
+            # If no change was found relative to one parent, that means the SHA
+            # must have matched the SHA in that parent, so it is not a conflict.
+            yield changes
+
+
 _BLOCK_SIZE = 64
 
 
@@ -287,15 +365,13 @@ def _tree_change_key(entry):
 class RenameDetector(object):
     """Object for handling rename detection between two trees."""
 
-    def __init__(self, store, tree1_id, tree2_id,
-                 rename_threshold=_RENAME_THRESHOLD, max_files=_MAX_FILES,
-                 rewrite_threshold=_REWRITE_THRESHOLD,
+    def __init__(self, store, rename_threshold=RENAME_THRESHOLD,
+                 max_files=MAX_FILES,
+                 rewrite_threshold=REWRITE_THRESHOLD,
                  find_copies_harder=False):
         """Initialize the rename detector.
 
         :param store: An ObjectStore for looking up objects.
-        :param tree1_id: The SHA of the first Tree.
-        :param tree2_id: The SHA of the second Tree.
         :param rename_threshold: The threshold similarity score for considering
             an add/delete pair to be a rename/copy; see _similarity_score.
         :param max_files: The maximum number of adds and deletes to consider, or
@@ -309,14 +385,14 @@ class RenameDetector(object):
         :param find_copies_harder: If True, consider unmodified files when
             detecting copies.
         """
-        self._tree1_id = tree1_id
-        self._tree2_id = tree2_id
         self._store = store
         self._rename_threshold = rename_threshold
         self._rewrite_threshold = rewrite_threshold
         self._max_files = max_files
         self._find_copies_harder = find_copies_harder
+        self._want_unchanged = False
 
+    def _reset(self):
         self._adds = []
         self._deletes = []
         self._changes = []
@@ -329,9 +405,10 @@ class RenameDetector(object):
         new_obj = self._store[change.new.sha]
         return _similarity_score(old_obj, new_obj) < self._rewrite_threshold
 
-    def _collect_changes(self):
-        for change in tree_changes(self._store, self._tree1_id, self._tree2_id,
-                                   want_unchanged=self._find_copies_harder):
+    def _collect_changes(self, tree1_id, tree2_id):
+        want_unchanged = self._find_copies_harder or self._want_unchanged
+        for change in tree_changes(self._store, tree1_id, tree2_id,
+                                   want_unchanged=want_unchanged):
             if change.type == CHANGE_ADD:
                 self._adds.append(change)
             elif change.type == CHANGE_DELETE:
@@ -339,10 +416,12 @@ class RenameDetector(object):
             elif self._should_split(change):
                 self._deletes.append(TreeChange.delete(change.old))
                 self._adds.append(TreeChange.add(change.new))
-            elif (self._find_copies_harder and (
-              change.type == CHANGE_MODIFY or change.type == CHANGE_UNCHANGED)):
-                # Treat modified/unchanged as deleted rather than splitting it,
-                # to avoid spurious renames.
+            elif ((self._find_copies_harder and change.type == CHANGE_UNCHANGED)
+                  or change.type == CHANGE_MODIFY):
+                # Treat all modifies as potential deletes for rename detection,
+                # but don't split them (to avoid spurious renames). Setting
+                # find_copies_harder means we treat unchanged the same as
+                # modified.
                 self._deletes.append(change)
             else:
                 self._changes.append(change)
@@ -359,8 +438,7 @@ class RenameDetector(object):
         delete_map = defaultdict(list)
         for delete in self._deletes:
             # Keep track of whether the delete was actually marked as a delete.
-            # If not, it must have been added due to find_copies_harder, and
-            # needs to be marked as a copy.
+            # If not, it needs to be marked as a copy.
             is_delete = delete.type == CHANGE_DELETE
             delete_map[delete.old.sha].append((delete.old, is_delete))
 
@@ -371,7 +449,8 @@ class RenameDetector(object):
             for (old, is_delete), new in itertools.izip(sha_deletes, sha_adds):
                 if stat.S_IFMT(old.mode) != stat.S_IFMT(new.mode):
                     continue
-                delete_paths.add(old.path)
+                if is_delete:
+                    delete_paths.add(old.path)
                 add_paths.add(new.path)
                 new_type = is_delete and CHANGE_RENAME or CHANGE_COPY
                 self._changes.append(TreeChange(new_type, old, new))
@@ -385,18 +464,32 @@ class RenameDetector(object):
                     self._changes.append(TreeChange(CHANGE_COPY, old, new))
         self._prune(add_paths, delete_paths)
 
-    def _find_content_renames(self):
+    def _should_find_content_renames(self):
+        return len(self._adds) * len(self._deletes) <= self._max_files ** 2
+
+    def _rename_type(self, check_paths, delete, add):
+        if check_paths and delete.old.path == add.new.path:
+            # If the paths match, this must be a split modify, so make sure it
+            # comes out as a modify.
+            return CHANGE_MODIFY
+        elif delete.type != CHANGE_DELETE:
+            # If it's in deletes but not marked as a delete, it must have been
+            # added due to find_copies_harder, and needs to be marked as a copy.
+            return CHANGE_COPY
+        return CHANGE_RENAME
+
+    def _find_content_rename_candidates(self):
+        candidates = self._candidates = []
         # TODO: Optimizations:
         #  - Compare object sizes before counting blocks.
         #  - Skip if delete's S_IFMT differs from all adds.
         #  - Skip if adds or deletes is empty.
         # Match C git's behavior of not attempting to find content renames if
         # the matrix size exceeds the threshold.
-        if len(self._adds) * len(self._deletes) > self._max_files ** 2:
+        if not self._should_find_content_renames():
             return
 
         check_paths = self._rename_threshold is not None
-        candidates = []
         for delete in self._deletes:
             if S_ISGITLINK(delete.old.mode):
                 continue  # Git links don't exist in this repo.
@@ -410,26 +503,17 @@ class RenameDetector(object):
                 score = _similarity_score(old_obj, new_obj,
                                           block_cache={old_sha: old_blocks})
                 if score > self._rename_threshold:
-                    if check_paths and delete.old.path == add.new.path:
-                        # If the paths match, this must be a split modify, so
-                        # make sure it comes out as a modify.
-                        new_type = CHANGE_MODIFY
-                    elif delete.type != CHANGE_DELETE:
-                        # If it's in deletes but not marked as a delete, it must
-                        # have been added due to find_copies_harder, and needs
-                        # to be marked as a copy.
-                        new_type = CHANGE_COPY
-                    else:
-                        new_type = CHANGE_RENAME
+                    new_type = self._rename_type(check_paths, delete, add)
                     rename = TreeChange(new_type, delete.old, add.new)
                     candidates.append((-score, rename))
 
+    def _choose_content_renames(self):
         # Sort scores from highest to lowest, but keep names in ascending order.
-        candidates.sort()
+        self._candidates.sort()
 
         delete_paths = set()
         add_paths = set()
-        for _, change in candidates:
+        for _, change in self._candidates:
             new_path = change.new.path
             if new_path in add_paths:
                 continue
@@ -472,13 +556,18 @@ class RenameDetector(object):
         return result
 
     def _prune_unchanged(self):
+        if self._want_unchanged:
+            return
         self._deletes = [d for d in self._deletes if d.type != CHANGE_UNCHANGED]
 
-    def changes_with_renames(self):
-        """Iterate TreeChanges between the two trees, with rename detection."""
-        self._collect_changes()
+    def changes_with_renames(self, tree1_id, tree2_id, want_unchanged=False):
+        """Iterate TreeChanges between two tree SHAs, with rename detection."""
+        self._reset()
+        self._want_unchanged = want_unchanged
+        self._collect_changes(tree1_id, tree2_id)
         self._find_exact_renames()
-        self._find_content_renames()
+        self._find_content_rename_candidates()
+        self._choose_content_renames()
         self._join_modifies()
         self._prune_unchanged()
         return self._sorted_changes()

+ 1 - 0
dulwich/errors.py

@@ -83,6 +83,7 @@ class MissingCommitError(Exception):
     """Indicates that a commit was not found in the repository"""
 
     def __init__(self, sha, *args, **kwargs):
+        self.sha = sha
         Exception.__init__(self, "%s is not in the revision store" % sha)
 
 

+ 17 - 28
dulwich/object_store.py

@@ -20,19 +20,12 @@
 """Git object store interfaces and implementation."""
 
 
-from cStringIO import StringIO
 import errno
 import itertools
 import os
 import stat
 import tempfile
-import urllib2
 
-from dulwich._compat import (
-    make_sha,
-    SEEK_END,
-    SEEK_CUR,
-    )
 from dulwich.diff_tree import (
     tree_changes,
     walk_trees,
@@ -56,10 +49,7 @@ from dulwich.objects import (
 from dulwich.pack import (
     Pack,
     PackData,
-    obj_sha,
     iter_sha1,
-    load_pack_index,
-    write_pack,
     write_pack_header,
     write_pack_index_v2,
     write_pack_object,
@@ -685,23 +675,17 @@ class ObjectStoreIterator(ObjectIterator):
 
 
 def tree_lookup_path(lookup_obj, root_sha, path):
-    """Lookup an object in a Git tree.
+    """Look up an object in a Git tree.
 
     :param lookup_obj: Callback for retrieving object by SHA1
     :param root_sha: SHA1 of the root tree
     :param path: Path to lookup
+    :return: A tuple of (mode, SHA) of the resulting path.
     """
-    parts = path.split("/")
-    sha = root_sha
-    mode = None
-    for p in parts:
-        obj = lookup_obj(sha)
-        if not isinstance(obj, Tree):
-            raise NotTreeError(sha)
-        if p == '':
-            continue
-        mode, sha = obj[p]
-    return mode, sha
+    tree = lookup_obj(root_sha)
+    if not isinstance(tree, Tree):
+        raise NotTreeError(root_sha)
+    return tree.lookup_path(lookup_obj, path)
 
 
 class MissingObjectFinder(object):
@@ -747,9 +731,12 @@ class MissingObjectFinder(object):
         self.add_todo([(tag.object[1], None, False)])
 
     def next(self):
-        if not self.objects_to_send:
-            return None
-        (sha, name, leaf) = self.objects_to_send.pop()
+        while True:
+            if not self.objects_to_send:
+                return None
+            (sha, name, leaf) = self.objects_to_send.pop()
+            if sha not in self.sha_done:
+                break
         if not leaf:
             o = self.object_store[sha]
             if isinstance(o, Commit):
@@ -795,8 +782,10 @@ class ObjectStoreGraphWalker(object):
             # collect all ancestors
             new_ancestors = set()
             for a in ancestors:
-                if a in self.parents:
-                    new_ancestors.update(self.parents[a])
+                ps = self.parents.get(a)
+                if ps is not None:
+                    new_ancestors.update(ps)
+                self.parents[a] = None
 
             # no more ancestors; stop
             if not new_ancestors:
@@ -810,6 +799,6 @@ class ObjectStoreGraphWalker(object):
             ret = self.heads.pop()
             ps = self.get_parents(ret)
             self.parents[ret] = ps
-            self.heads.update(ps)
+            self.heads.update([p for p in ps if not p in self.parents])
             return ret
         return None

+ 24 - 5
dulwich/objects.py

@@ -469,15 +469,15 @@ class ShaFile(object):
         return "<%s %s>" % (self.__class__.__name__, self.id)
 
     def __ne__(self, other):
-        return self.id != other.id
+        return not isinstance(other, ShaFile) or self.id != other.id
 
     def __eq__(self, other):
-        """Return true if the sha of the two objects match.
+        """Return True if the SHAs of the two objects match.
 
-        The __le__ etc methods aren't overriden as they make no sense,
-        certainly at this level.
+        It doesn't make sense to talk about an order on ShaFiles, so we don't
+        override the rich comparison methods (__le__, etc.).
         """
-        return self.id == other.id
+        return isinstance(other, ShaFile) and self.id == other.id
 
 
 class Blob(ShaFile):
@@ -919,6 +919,25 @@ class Tree(ShaFile):
             text.append("%04o %s %s\t%s\n" % (mode, kind, hexsha, name))
         return "".join(text)
 
+    def lookup_path(self, lookup_obj, path):
+        """Look up an object in a Git tree.
+
+        :param lookup_obj: Callback for retrieving object by SHA1
+        :param path: Path to lookup
+        :return: A tuple of (mode, SHA) of the resulting path.
+        """
+        parts = path.split('/')
+        sha = self.id
+        mode = None
+        for p in parts:
+            if not p:
+                continue
+            obj = lookup_obj(sha)
+            if not isinstance(obj, Tree):
+                raise NotTreeError(sha)
+            mode, sha = obj[p]
+        return mode, sha
+
 
 def parse_timezone(text):
     """Parse a timezone text fragment (e.g. '+0100').

+ 12 - 2
dulwich/pack.py

@@ -121,6 +121,7 @@ class UnpackedObject(object):
 
     __slots__ = [
       'offset',         # Offset in its pack.
+      '_sha',           # Cached binary SHA.
       'obj_type_num',   # Type of this object.
       'obj_chunks',     # Decompressed and delta-resolved chunks.
       'pack_type_num',  # Type of this object in the pack (may be a delta).
@@ -135,6 +136,7 @@ class UnpackedObject(object):
     # methods of this object.
     def __init__(self, pack_type_num, delta_base, decomp_len, crc32):
         self.offset = None
+        self._sha = None
         self.pack_type_num = pack_type_num
         self.delta_base = delta_base
         self.comp_chunks = None
@@ -152,7 +154,9 @@ class UnpackedObject(object):
 
     def sha(self):
         """Return the binary SHA of this object."""
-        return obj_sha(self.obj_type_num, self.obj_chunks)
+        if self._sha is None:
+            self._sha = obj_sha(self.obj_type_num, self.obj_chunks)
+        return self._sha
 
     def sha_file(self):
         """Return a ShaFile from this object."""
@@ -631,9 +635,12 @@ def read_pack_header(read):
     """Read the header of a pack file.
 
     :param read: Read function
-    :return: Tuple with pack version and number of objects
+    :return: Tuple of (pack version, number of objects). If no data is available
+        to read, returns (None, None).
     """
     header = read(12)
+    if not header:
+        return None, None
     assert header[:4] == 'PACK'
     (version,) = unpack_from('>L', header, 4)
     assert version in (2, 3), 'Version was %d' % version
@@ -817,6 +824,9 @@ class PackStreamReader(object):
         :raise IOError: if an error occurred writing to the output file.
         """
         pack_version, self._num_objects = read_pack_header(self.read)
+        if pack_version is None:
+            return
+
         for i in xrange(self._num_objects):
             offset = self.offset
             unpacked, unused = unpack_object(

+ 11 - 41
dulwich/repo.py

@@ -53,6 +53,9 @@ from dulwich.objects import (
     Tree,
     hex_to_sha,
     )
+from dulwich.walk import (
+    Walker,
+    )
 import warnings
 
 
@@ -954,48 +957,15 @@ class BaseRepo(object):
     def revision_history(self, head):
         """Returns a list of the commits reachable from head.
 
-        Returns a list of commit objects. the first of which will be the commit
-        of head, then following that will be the parents.
-
-        Raises NotCommitError if any no commits are referenced, including if the
-        head parameter isn't the sha of a commit.
-
-        XXX: work out how to handle merges.
+        :param head: The SHA of the head to list revision history for.
+        :return: A list of commit objects reachable from head, starting with
+            head itself, in descending commit time order.
+        :raise MissingCommitError: if any missing commits are referenced,
+            including if the head parameter isn't the SHA of a commit.
         """
-
-        try:
-            commit = self[head]
-        except KeyError:
-            raise MissingCommitError(head)
-
-        if type(commit) != Commit:
-            raise NotCommitError(commit)
-        pending_commits = [commit]
-
-        history = set()
-
-        while pending_commits != []:
-            commit = pending_commits.pop(0)
-
-            if commit in history:
-                continue
-
-            history.add(commit)
-
-            for parent in commit.parents:
-                try:
-                    commit = self[parent]
-                except KeyError:
-                    raise MissingCommitError(head)
-
-                if type(commit) != Commit:
-                    raise NotCommitError(commit)
-
-                pending_commits.append(commit)
-
-        history = list(history)
-        history.sort(key=lambda c:c.commit_time, reverse=True)
-        return history
+        # TODO(dborowitz): Expose more of the Walker functionality here or in a
+        # separate Repo/BaseObjectStore method.
+        return [e.commit for e in Walker(self.object_store, [head])]
 
     def __getitem__(self, name):
         if len(name) in (20, 40):

+ 20 - 19
dulwich/server.py

@@ -140,9 +140,10 @@ class FileSystemBackend(Backend):
 class Handler(object):
     """Smart protocol command handler base class."""
 
-    def __init__(self, backend, proto):
+    def __init__(self, backend, proto, http_req=None):
         self.backend = backend
         self.proto = proto
+        self.http_req = http_req
         self._client_capabilities = None
 
     @classmethod
@@ -186,12 +187,11 @@ class Handler(object):
 class UploadPackHandler(Handler):
     """Protocol handler for uploading a pack to the server."""
 
-    def __init__(self, backend, args, proto,
-                 stateless_rpc=False, advertise_refs=False):
-        Handler.__init__(self, backend, proto)
+    def __init__(self, backend, args, proto, http_req=None,
+                 advertise_refs=False):
+        Handler.__init__(self, backend, proto, http_req=http_req)
         self.repo = backend.open_repository(args[0])
         self._graph_walker = None
-        self.stateless_rpc = stateless_rpc
         self.advertise_refs = advertise_refs
 
     @classmethod
@@ -312,7 +312,7 @@ class ProtocolGraphWalker(object):
         self.store = object_store
         self.get_peeled = get_peeled
         self.proto = handler.proto
-        self.stateless_rpc = handler.stateless_rpc
+        self.http_req = handler.http_req
         self.advertise_refs = handler.advertise_refs
         self._wants = []
         self._cached = False
@@ -332,10 +332,12 @@ class ProtocolGraphWalker(object):
         :return: a list of SHA1s requested by the client
         """
         if not heads:
-            raise GitProtocolError('No heads found')
+            # The repo is empty, so short-circuit the whole process.
+            self.proto.write_pkt_line(None)
+            return None
         values = set(heads.itervalues())
-        if self.advertise_refs or not self.stateless_rpc:
-            for i, (ref, sha) in enumerate(heads.iteritems()):
+        if self.advertise_refs or not self.http_req:
+            for i, (ref, sha) in enumerate(sorted(heads.iteritems())):
                 line = "%s %s" % (sha, ref)
                 if not i:
                     line = "%s\x00%s" % (line, self.handler.capability_line())
@@ -371,7 +373,7 @@ class ProtocolGraphWalker(object):
 
         self.set_wants(want_revs)
 
-        if self.stateless_rpc and self.proto.eof():
+        if self.http_req and self.proto.eof():
             # The client may close the socket at this point, expecting a
             # flush-pkt from the server. We might be ready to send a packfile at
             # this point, so we need to explicitly short-circuit in this case.
@@ -388,7 +390,7 @@ class ProtocolGraphWalker(object):
 
     def next(self):
         if not self._cached:
-            if not self._impl and self.stateless_rpc:
+            if not self._impl and self.http_req:
                 return None
             return self._impl.next()
         self._cache_index += 1
@@ -553,7 +555,7 @@ class MultiAckDetailedGraphWalkerImpl(object):
             command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS)
             if command is None:
                 self.walker.send_nak()
-                if self.walker.stateless_rpc:
+                if self.walker.http_req:
                     return None
                 continue
             elif command == 'done':
@@ -575,11 +577,10 @@ class MultiAckDetailedGraphWalkerImpl(object):
 class ReceivePackHandler(Handler):
     """Protocol handler for downloading a pack from the client."""
 
-    def __init__(self, backend, args, proto,
-                 stateless_rpc=False, advertise_refs=False):
-        Handler.__init__(self, backend, proto)
+    def __init__(self, backend, args, proto, http_req=None,
+                 advertise_refs=False):
+        Handler.__init__(self, backend, proto, http_req=http_req)
         self.repo = backend.open_repository(args[0])
-        self.stateless_rpc = stateless_rpc
         self.advertise_refs = advertise_refs
 
     @classmethod
@@ -648,9 +649,9 @@ class ReceivePackHandler(Handler):
         flush()
 
     def handle(self):
-        refs = self.repo.get_refs().items()
+        refs = sorted(self.repo.get_refs().iteritems())
 
-        if self.advertise_refs or not self.stateless_rpc:
+        if self.advertise_refs or not self.http_req:
             if refs:
                 self.proto.write_pkt_line(
                   "%s %s\x00%s\n" % (refs[0][1], refs[0][0],
@@ -659,7 +660,7 @@ class ReceivePackHandler(Handler):
                     ref = refs[i]
                     self.proto.write_pkt_line("%s %s\n" % (ref[1], ref[0]))
             else:
-                self.proto.write_pkt_line("%s capabilities^{} %s" % (
+                self.proto.write_pkt_line("%s capabilities^{}\0%s" % (
                   ZERO_SHA, self.capability_line()))
 
             self.proto.write("0000")

+ 1 - 0
dulwich/tests/__init__.py

@@ -95,6 +95,7 @@ def self_test_suite():
         'protocol',
         'repository',
         'server',
+        'walk',
         'web',
         ]
     module_names = ['dulwich.tests.test_' + name for name in names]

+ 22 - 0
dulwich/tests/compat/server_utils.py

@@ -20,10 +20,14 @@
 """Utilities for testing git server compatibility."""
 
 
+import os
 import select
+import shutil
 import socket
+import tempfile
 import threading
 
+from dulwich.repo import Repo
 from dulwich.server import (
     ReceivePackHandler,
     )
@@ -100,6 +104,24 @@ class ServerTests(object):
         self._old_repo.object_store._pack_cache = None
         self.assertReposEqual(self._old_repo, self._new_repo)
 
+    def test_clone_from_dulwich_empty(self):
+        old_repo_dir = os.path.join(tempfile.mkdtemp(), 'empty_old')
+        run_git_or_fail(['init', '--quiet', '--bare', old_repo_dir])
+        self._old_repo = Repo(old_repo_dir)
+        port = self._start_server(self._old_repo)
+
+        new_repo_base_dir = tempfile.mkdtemp()
+        try:
+            new_repo_dir = os.path.join(new_repo_base_dir, 'empty_new')
+            run_git_or_fail(['clone', self.url(port), new_repo_dir],
+                            cwd=new_repo_base_dir)
+            new_repo = Repo(new_repo_dir)
+            self.assertReposEqual(self._old_repo, new_repo)
+        finally:
+            # We don't create a Repo from new_repo_dir until after some errors
+            # may have occurred, so don't depend on tearDown to clean it up.
+            shutil.rmtree(new_repo_base_dir)
+
 
 class ShutdownServerMixIn:
     """Mixin that allows serve_forever to be shut down.

+ 14 - 6
dulwich/tests/test_client.py

@@ -35,6 +35,7 @@ from dulwich.protocol import (
 
 
 class DummyClient(GitClient):
+
     def __init__(self, can_read, read, write):
         self.can_read = can_read
         self.read = read
@@ -64,7 +65,9 @@ class GitClientTests(TestCase):
 
     def test_fetch_pack_none(self):
         self.rin.write(
-            '008855dcc6bf963f922e1ed5c4bbaaefcfacef57b1d7 HEAD.multi_ack thin-pack side-band side-band-64k ofs-delta shallow no-progress include-tag\n'
+            '008855dcc6bf963f922e1ed5c4bbaaefcfacef57b1d7 HEAD.multi_ack '
+            'thin-pack side-band side-band-64k ofs-delta shallow no-progress '
+            'include-tag\n'
             '0000')
         self.rin.seek(0)
         self.client.fetch_pack('bla', lambda heads: [], None, None, None)
@@ -91,7 +94,8 @@ class GitClientTests(TestCase):
         self.assertEquals(None, client.username)
         self.assertEqual('/bar/baz', path)
 
-        client, path = get_transport_and_path('git+ssh://foo.com:1234/bar/baz')
+        client, path = get_transport_and_path(
+            'git+ssh://foo.com:1234/bar/baz')
         self.assertTrue(isinstance(client, SSHGitClient))
         self.assertEquals('foo.com', client.host)
         self.assertEquals(1234, client.port)
@@ -127,7 +131,8 @@ class GitClientTests(TestCase):
     def test_get_transport_and_path_error(self):
         # Need to use a known urlparse.uses_netloc URL scheme to get the
         # expected parsing of the URL on Python versions less than 2.6.5
-        self.assertRaises(ValueError, get_transport_and_path, 'prospero://bar/baz')
+        self.assertRaises(ValueError, get_transport_and_path,
+        'prospero://bar/baz')
 
 
 class SSHGitClientTests(TestCase):
@@ -137,9 +142,12 @@ class SSHGitClientTests(TestCase):
         self.client = SSHGitClient('git.samba.org')
 
     def test_default_command(self):
-        self.assertEquals('git-upload-pack', self.client._get_cmd_path('upload-pack'))
+        self.assertEquals('git-upload-pack',
+                self.client._get_cmd_path('upload-pack'))
 
     def test_alternative_command_path(self):
-        self.client.alternative_paths['upload-pack'] = '/usr/lib/git/git-upload-pack'
-        self.assertEquals('/usr/lib/git/git-upload-pack', self.client._get_cmd_path('upload-pack'))
+        self.client.alternative_paths['upload-pack'] = (
+            '/usr/lib/git/git-upload-pack')
+        self.assertEquals('/usr/lib/git/git-upload-pack',
+            self.client._get_cmd_path('upload-pack'))
 

+ 234 - 21
dulwich/tests/test_diff_tree.py

@@ -27,6 +27,7 @@ from dulwich.diff_tree import (
     _merge_entries,
     _merge_entries_py,
     tree_changes,
+    tree_changes_for_merge,
     _count_blocks,
     _count_blocks_py,
     _similarity_score,
@@ -54,14 +55,12 @@ from dulwich.tests import (
     TestCase,
     )
 from dulwich.tests.utils import (
+    F,
     make_object,
     functest_builder,
     ext_functest_builder,
     )
 
-# Shorthand mode for Files.
-F = 0100644
-
 
 class DiffTestCase(TestCase):
 
@@ -89,6 +88,10 @@ class DiffTestCase(TestCase):
 
 class TreeChangesTest(DiffTestCase):
 
+    def setUp(self):
+        super(TreeChangesTest, self).setUp()
+        self.detector = RenameDetector(self.store)
+
     def assertMergeFails(self, merge_entries, name, mode, sha):
         t = Tree()
         t[name] = (mode, sha)
@@ -290,6 +293,179 @@ class TreeChangesTest(DiffTestCase):
                       ('a', F, blob_a2.id))],
           tree1, tree2)
 
+    def test_tree_changes_rename_detector(self):
+        blob_a1 = make_object(Blob, data='a\nb\nc\nd\n')
+        blob_a2 = make_object(Blob, data='a\nb\nc\ne\n')
+        blob_b = make_object(Blob, data='b')
+        tree1 = self.commit_tree([('a', blob_a1), ('b', blob_b)])
+        tree2 = self.commit_tree([('c', blob_a2), ('b', blob_b)])
+        detector = RenameDetector(self.store)
+
+        self.assertChangesEqual(
+          [TreeChange.delete(('a', F, blob_a1.id)),
+           TreeChange.add(('c', F, blob_a2.id))],
+          tree1, tree2)
+        self.assertChangesEqual(
+          [TreeChange.delete(('a', F, blob_a1.id)),
+           TreeChange(CHANGE_UNCHANGED, ('b', F, blob_b.id),
+                      ('b', F, blob_b.id)),
+           TreeChange.add(('c', F, blob_a2.id))],
+          tree1, tree2, want_unchanged=True)
+        self.assertChangesEqual(
+          [TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id),
+                      ('c', F, blob_a2.id))],
+          tree1, tree2, rename_detector=detector)
+        self.assertChangesEqual(
+          [TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id),
+                      ('c', F, blob_a2.id)),
+           TreeChange(CHANGE_UNCHANGED, ('b', F, blob_b.id),
+                      ('b', F, blob_b.id))],
+          tree1, tree2, rename_detector=detector, want_unchanged=True)
+
+    def assertChangesForMergeEqual(self, expected, parent_trees, merge_tree,
+                                   **kwargs):
+        parent_tree_ids = [t.id for t in parent_trees]
+        actual = list(tree_changes_for_merge(
+          self.store, parent_tree_ids, merge_tree.id, **kwargs))
+        self.assertEqual(expected, actual)
+
+        parent_tree_ids.reverse()
+        expected = [list(reversed(cs)) for cs in expected]
+        actual = list(tree_changes_for_merge(
+          self.store, parent_tree_ids, merge_tree.id, **kwargs))
+        self.assertEqual(expected, actual)
+
+    def test_tree_changes_for_merge_add_no_conflict(self):
+        blob = make_object(Blob, data='blob')
+        parent1 = self.commit_tree([])
+        parent2 = merge = self.commit_tree([('a', blob)])
+        self.assertChangesForMergeEqual([], [parent1, parent2], merge)
+        self.assertChangesForMergeEqual([], [parent2, parent2], merge)
+
+    def test_tree_changes_for_merge_add_modify_conflict(self):
+        blob1 = make_object(Blob, data='1')
+        blob2 = make_object(Blob, data='2')
+        parent1 = self.commit_tree([])
+        parent2 = self.commit_tree([('a', blob1)])
+        merge = self.commit_tree([('a', blob2)])
+        self.assertChangesForMergeEqual(
+          [[TreeChange.add(('a', F, blob2.id)),
+            TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id))]],
+          [parent1, parent2], merge)
+
+    def test_tree_changes_for_merge_modify_modify_conflict(self):
+        blob1 = make_object(Blob, data='1')
+        blob2 = make_object(Blob, data='2')
+        blob3 = make_object(Blob, data='3')
+        parent1 = self.commit_tree([('a', blob1)])
+        parent2 = self.commit_tree([('a', blob2)])
+        merge = self.commit_tree([('a', blob3)])
+        self.assertChangesForMergeEqual(
+          [[TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob3.id)),
+            TreeChange(CHANGE_MODIFY, ('a', F, blob2.id), ('a', F, blob3.id))]],
+          [parent1, parent2], merge)
+
+    def test_tree_changes_for_merge_modify_no_conflict(self):
+        blob1 = make_object(Blob, data='1')
+        blob2 = make_object(Blob, data='2')
+        parent1 = self.commit_tree([('a', blob1)])
+        parent2 = merge = self.commit_tree([('a', blob2)])
+        self.assertChangesForMergeEqual([], [parent1, parent2], merge)
+
+    def test_tree_changes_for_merge_delete_delete_conflict(self):
+        blob1 = make_object(Blob, data='1')
+        blob2 = make_object(Blob, data='2')
+        parent1 = self.commit_tree([('a', blob1)])
+        parent2 = self.commit_tree([('a', blob2)])
+        merge = self.commit_tree([])
+        self.assertChangesForMergeEqual(
+          [[TreeChange.delete(('a', F, blob1.id)),
+            TreeChange.delete(('a', F, blob2.id))]],
+          [parent1, parent2], merge)
+
+    def test_tree_changes_for_merge_delete_no_conflict(self):
+        blob = make_object(Blob, data='blob')
+        has = self.commit_tree([('a', blob)])
+        doesnt_have = self.commit_tree([])
+        self.assertChangesForMergeEqual([], [has, has], doesnt_have)
+        self.assertChangesForMergeEqual([], [has, doesnt_have], doesnt_have)
+
+    def test_tree_changes_for_merge_octopus_no_conflict(self):
+        r = range(5)
+        blobs = [make_object(Blob, data=str(i)) for i in r]
+        parents = [self.commit_tree([('a', blobs[i])]) for i in r]
+        for i in r:
+            # Take the SHA from each of the parents.
+            self.assertChangesForMergeEqual([], parents, parents[i])
+
+    def test_tree_changes_for_merge_octopus_modify_conflict(self):
+        # Because the octopus merge strategy is limited, I doubt it's possible
+        # to create this with the git command line. But the output is well-
+        # defined, so test it anyway.
+        r = range(5)
+        parent_blobs = [make_object(Blob, data=str(i)) for i in r]
+        merge_blob = make_object(Blob, data='merge')
+        parents = [self.commit_tree([('a', parent_blobs[i])]) for i in r]
+        merge = self.commit_tree([('a', merge_blob)])
+        expected = [[TreeChange(CHANGE_MODIFY, ('a', F, parent_blobs[i].id),
+                                ('a', F, merge_blob.id)) for i in r]]
+        self.assertChangesForMergeEqual(expected, parents, merge)
+
+    def test_tree_changes_for_merge_octopus_delete(self):
+        blob1 = make_object(Blob, data='1')
+        blob2 = make_object(Blob, data='3')
+        parent1 = self.commit_tree([('a', blob1)])
+        parent2 = self.commit_tree([('a', blob2)])
+        parent3 = merge = self.commit_tree([])
+        self.assertChangesForMergeEqual([], [parent1, parent1, parent1], merge)
+        self.assertChangesForMergeEqual([], [parent1, parent1, parent3], merge)
+        self.assertChangesForMergeEqual([], [parent1, parent3, parent3], merge)
+        self.assertChangesForMergeEqual(
+          [[TreeChange.delete(('a', F, blob1.id)),
+            TreeChange.delete(('a', F, blob2.id)),
+            None]],
+          [parent1, parent2, parent3], merge)
+
+    def test_tree_changes_for_merge_add_add_same_conflict(self):
+        blob = make_object(Blob, data='a\nb\nc\nd\n')
+        parent1 = self.commit_tree([('a', blob)])
+        parent2 = self.commit_tree([])
+        merge = self.commit_tree([('b', blob)])
+        add = TreeChange.add(('b', F, blob.id))
+        self.assertChangesForMergeEqual([[add, add]], [parent1, parent2], merge)
+
+    def test_tree_changes_for_merge_add_exact_rename_conflict(self):
+        blob = make_object(Blob, data='a\nb\nc\nd\n')
+        parent1 = self.commit_tree([('a', blob)])
+        parent2 = self.commit_tree([])
+        merge = self.commit_tree([('b', blob)])
+        self.assertChangesForMergeEqual(
+          [[TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('b', F, blob.id)),
+            TreeChange.add(('b', F, blob.id))]],
+          [parent1, parent2], merge, rename_detector=self.detector)
+
+    def test_tree_changes_for_merge_add_content_rename_conflict(self):
+        blob1 = make_object(Blob, data='a\nb\nc\nd\n')
+        blob2 = make_object(Blob, data='a\nb\nc\ne\n')
+        parent1 = self.commit_tree([('a', blob1)])
+        parent2 = self.commit_tree([])
+        merge = self.commit_tree([('b', blob2)])
+        self.assertChangesForMergeEqual(
+          [[TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob2.id)),
+            TreeChange.add(('b', F, blob2.id))]],
+          [parent1, parent2], merge, rename_detector=self.detector)
+
+    def test_tree_changes_for_merge_modify_rename_conflict(self):
+        blob1 = make_object(Blob, data='a\nb\nc\nd\n')
+        blob2 = make_object(Blob, data='a\nb\nc\ne\n')
+        parent1 = self.commit_tree([('a', blob1)])
+        parent2 = self.commit_tree([('b', blob1)])
+        merge = self.commit_tree([('b', blob2)])
+        self.assertChangesForMergeEqual(
+          [[TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob2.id)),
+            TreeChange(CHANGE_MODIFY, ('b', F, blob1.id), ('b', F, blob2.id))]],
+          [parent1, parent2], merge, rename_detector=self.detector)
+
 
 class RenameDetectionTest(DiffTestCase):
 
@@ -387,9 +563,10 @@ class RenameDetectionTest(DiffTestCase):
             self.assertEqual(expected_entries,
                              sorted(perm, key=_tree_change_key))
 
-    def detect_renames(self, tree1, tree2, **kwargs):
-        detector = RenameDetector(self.store, tree1.id, tree2.id, **kwargs)
-        return detector.changes_with_renames()
+    def detect_renames(self, tree1, tree2, want_unchanged=False, **kwargs):
+        detector = RenameDetector(self.store, **kwargs)
+        return detector.changes_with_renames(tree1.id, tree2.id,
+                                             want_unchanged=want_unchanged)
 
     def test_no_renames(self):
         blob1 = make_object(Blob, data='a\nb\nc\nd\n')
@@ -458,6 +635,26 @@ class RenameDetectionTest(DiffTestCase):
            TreeChange(CHANGE_RENAME, ('b', F, blob.id), ('d', F, blob.id))],
           self.detect_renames(tree1, tree2))
 
+    def test_exact_copy_modify(self):
+        blob1 = make_object(Blob, data='a\nb\nc\nd\n')
+        blob2 = make_object(Blob, data='a\nb\nc\ne\n')
+        tree1 = self.commit_tree([('a', blob1)])
+        tree2 = self.commit_tree([('a', blob2), ('b', blob1)])
+        self.assertEqual(
+          [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)),
+           TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob1.id))],
+          self.detect_renames(tree1, tree2))
+
+    def test_exact_copy_change_mode(self):
+        blob = make_object(Blob, data='a\nb\nc\nd\n')
+        tree1 = self.commit_tree([('a', blob)])
+        tree2 = self.commit_tree([('a', blob, 0100755), ('b', blob)])
+        self.assertEqual(
+          [TreeChange(CHANGE_MODIFY, ('a', F, blob.id),
+                      ('a', 0100755, blob.id)),
+           TreeChange(CHANGE_COPY, ('a', F, blob.id), ('b', F, blob.id))],
+          self.detect_renames(tree1, tree2))
+
     def test_rename_threshold(self):
         blob1 = make_object(Blob, data='a\nb\nc\n')
         blob2 = make_object(Blob, data='a\nb\nd\n')
@@ -607,7 +804,7 @@ class RenameDetectionTest(DiffTestCase):
 
         no_renames = [
           TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob3.id)),
-          TreeChange.add(('b', F, blob2.id))]
+          TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id))]
         self.assertEqual(
           no_renames, self.detect_renames(tree1, tree2))
         self.assertEqual(
@@ -638,20 +835,6 @@ class RenameDetectionTest(DiffTestCase):
           [TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id))],
           self.detect_renames(tree1, tree2, find_copies_harder=True))
 
-    def test_find_copies_harder_modify(self):
-        blob1 = make_object(Blob, data='a\nb\nc\nd\n')
-        blob2 = make_object(Blob, data='a\nb\nc\ne\n')
-        tree1 = self.commit_tree([('a', blob1)])
-        tree2 = self.commit_tree([('a', blob2), ('b', blob2)])
-        self.assertEqual(
-          [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)),
-           TreeChange.add(('b', F, blob2.id))],
-          self.detect_renames(tree1, tree2))
-        self.assertEqual(
-          [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)),
-           TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id))],
-          self.detect_renames(tree1, tree2, find_copies_harder=True))
-
     def test_find_copies_harder_with_rewrites(self):
         blob_a1 = make_object(Blob, data='a\nb\nc\nd\n')
         blob_a2 = make_object(Blob, data='f\ng\nh\ni\n')
@@ -669,3 +852,33 @@ class RenameDetectionTest(DiffTestCase):
                       ('b', F, blob_b2.id))],
           self.detect_renames(tree1, tree2, rewrite_threshold=50,
                               find_copies_harder=True))
+
+    def test_reuse_detector(self):
+        blob = make_object(Blob, data='blob')
+        tree1 = self.commit_tree([('a', blob)])
+        tree2 = self.commit_tree([('b', blob)])
+        detector = RenameDetector(self.store)
+        changes = [TreeChange(CHANGE_RENAME, ('a', F, blob.id),
+                              ('b', F, blob.id))]
+        self.assertEqual(changes,
+                         detector.changes_with_renames(tree1.id, tree2.id))
+        self.assertEqual(changes,
+                         detector.changes_with_renames(tree1.id, tree2.id))
+
+    def test_want_unchanged(self):
+        blob_a1 = make_object(Blob, data='a\nb\nc\nd\n')
+        blob_b = make_object(Blob, data='b')
+        blob_c2 = make_object(Blob, data='a\nb\nc\ne\n')
+        tree1 = self.commit_tree([('a', blob_a1), ('b', blob_b)])
+        tree2 = self.commit_tree([('c', blob_c2), ('b', blob_b)])
+        detector = RenameDetector(self.store)
+        self.assertEqual(
+          [TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id),
+                      ('c', F, blob_c2.id))],
+          self.detect_renames(tree1, tree2))
+        self.assertEqual(
+          [TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id),
+                      ('c', F, blob_c2.id)),
+           TreeChange(CHANGE_UNCHANGED, ('b', F, blob_b.id),
+                      ('b', F, blob_b.id))],
+          self.detect_renames(tree1, tree2, want_unchanged=True))

+ 56 - 0
dulwich/tests/test_object_store.py

@@ -41,6 +41,7 @@ from dulwich.objects import (
 from dulwich.object_store import (
     DiskObjectStore,
     MemoryObjectStore,
+    ObjectStoreGraphWalker,
     tree_lookup_path,
     )
 from dulwich.pack import (
@@ -300,3 +301,58 @@ class TreeLookupPathTests(TestCase):
         self.assertRaises(NotTreeError, tree_lookup_path, self.get_object, self.tree_id, 'ad/b/j')
 
 # TODO: MissingObjectFinderTests
+
+class ObjectStoreGraphWalkerTests(TestCase):
+
+    def get_walker(self, heads, parent_map):
+        return ObjectStoreGraphWalker(heads,
+            parent_map.__getitem__)
+
+    def test_empty(self):
+        gw = self.get_walker([], {})
+        self.assertIs(None, gw.next())
+        gw.ack("aa" * 20)
+        self.assertIs(None, gw.next())
+
+    def test_descends(self):
+        gw = self.get_walker(["a"], {"a": ["b"], "b": []})
+        self.assertEquals("a", gw.next())
+        self.assertEquals("b", gw.next())
+
+    def test_present(self):
+        gw = self.get_walker(["a"], {"a": ["b"], "b": []})
+        gw.ack("a")
+        self.assertIs(None, gw.next())
+
+    def test_parent_present(self):
+        gw = self.get_walker(["a"], {"a": ["b"], "b": []})
+        self.assertEquals("a", gw.next())
+        gw.ack("a")
+        self.assertIs(None, gw.next())
+
+    def test_child_ack_later(self):
+        gw = self.get_walker(["a"], {"a": ["b"], "b": ["c"], "c": []})
+        self.assertEquals("a", gw.next())
+        self.assertEquals("b", gw.next())
+        gw.ack("a")
+        self.assertIs(None, gw.next())
+
+    def test_only_once(self):
+        # a  b
+        # |  |
+        # c  d
+        # \ /
+        #  e
+        gw = self.get_walker(["a", "b"], {
+                "a": ["c"],
+                "b": ["d"],
+                "c": ["e"],
+                "d": ["e"],
+                "e": [],
+                })
+        self.assertEquals("a", gw.next())
+        self.assertEquals("c", gw.next())
+        gw.ack("a")
+        self.assertEquals("b", gw.next())
+        self.assertEquals("d", gw.next())
+        self.assertIs(None, gw.next())

+ 4 - 1
dulwich/tests/test_pack.py

@@ -330,7 +330,6 @@ class TestPack(PackTests):
         finally:
             origpack.close()
 
-
     def test_commit_obj(self):
         p = self.get_pack(pack1_sha)
         commit = p[commit_sha]
@@ -708,6 +707,10 @@ class TestPackStreamReader(TestCase):
         reader = PackStreamReader(f.read, zlib_bufsize=4)
         self.assertEqual(2, len(list(reader.read_objects())))
 
+    def test_read_objects_empty(self):
+        reader = PackStreamReader(StringIO().read)
+        self.assertEqual([], list(reader.read_objects()))
+
 
 class TestPackIterator(DeltaChainIterator):
 

+ 25 - 45
dulwich/tests/test_repository.py

@@ -76,21 +76,17 @@ class CreateRepositoryTests(TestCase):
 
     def test_create_disk_bare(self):
         tmp_dir = tempfile.mkdtemp()
-        try:
-            repo = Repo.init_bare(tmp_dir)
-            self.assertEquals(tmp_dir, repo._controldir)
-            self._check_repo_contents(repo, True)
-        finally:
-            shutil.rmtree(tmp_dir)
+        self.addCleanup(shutil.rmtree, tmp_dir)
+        repo = Repo.init_bare(tmp_dir)
+        self.assertEquals(tmp_dir, repo._controldir)
+        self._check_repo_contents(repo, True)
 
     def test_create_disk_non_bare(self):
         tmp_dir = tempfile.mkdtemp()
-        try:
-            repo = Repo.init(tmp_dir)
-            self.assertEquals(os.path.join(tmp_dir, '.git'), repo._controldir)
-            self._check_repo_contents(repo, False)
-        finally:
-            shutil.rmtree(tmp_dir)
+        self.addCleanup(shutil.rmtree, tmp_dir)
+        repo = Repo.init(tmp_dir)
+        self.assertEquals(os.path.join(tmp_dir, '.git'), repo._controldir)
+        self._check_repo_contents(repo, False)
 
     def test_create_memory(self):
         repo = MemoryRepo.init_bare([], {})
@@ -171,48 +167,38 @@ class RepositoryTests(TestCase):
     def test_commit(self):
         r = self._repo = open_repo('a.git')
         warnings.simplefilter("ignore", DeprecationWarning)
-        try:
-            obj = r.commit(r.head())
-        finally:
-            warnings.resetwarnings()
+        self.addCleanup(warnings.resetwarnings)
+        obj = r.commit(r.head())
         self.assertEqual(obj.type_name, 'commit')
 
     def test_commit_not_commit(self):
         r = self._repo = open_repo('a.git')
         warnings.simplefilter("ignore", DeprecationWarning)
-        try:
-            self.assertRaises(errors.NotCommitError,
-                r.commit, '4f2e6529203aa6d44b5af6e3292c837ceda003f9')
-        finally:
-            warnings.resetwarnings()
+        self.addCleanup(warnings.resetwarnings)
+        self.assertRaises(errors.NotCommitError,
+            r.commit, '4f2e6529203aa6d44b5af6e3292c837ceda003f9')
 
     def test_tree(self):
         r = self._repo = open_repo('a.git')
         commit = r[r.head()]
         warnings.simplefilter("ignore", DeprecationWarning)
-        try:
-            tree = r.tree(commit.tree)
-        finally:
-            warnings.resetwarnings()
+        self.addCleanup(warnings.resetwarnings)
+        tree = r.tree(commit.tree)
         self.assertEqual(tree.type_name, 'tree')
         self.assertEqual(tree.sha().hexdigest(), commit.tree)
 
     def test_tree_not_tree(self):
         r = self._repo = open_repo('a.git')
         warnings.simplefilter("ignore", DeprecationWarning)
-        try:
-            self.assertRaises(errors.NotTreeError, r.tree, r.head())
-        finally:
-            warnings.resetwarnings()
+        self.addCleanup(warnings.resetwarnings)
+        self.assertRaises(errors.NotTreeError, r.tree, r.head())
 
     def test_tag(self):
         r = self._repo = open_repo('a.git')
         tag_sha = '28237f4dc30d0d462658d6b937b08a0f0b6ef55a'
         warnings.simplefilter("ignore", DeprecationWarning)
-        try:
-            tag = r.tag(tag_sha)
-        finally:
-            warnings.resetwarnings()
+        self.addCleanup(warnings.resetwarnings)
+        tag = r.tag(tag_sha)
         self.assertEqual(tag.type_name, 'tag')
         self.assertEqual(tag.sha().hexdigest(), tag_sha)
         obj_class, obj_sha = tag.object
@@ -222,10 +208,8 @@ class RepositoryTests(TestCase):
     def test_tag_not_tag(self):
         r = self._repo = open_repo('a.git')
         warnings.simplefilter("ignore", DeprecationWarning)
-        try:
-            self.assertRaises(errors.NotTagError, r.tag, r.head())
-        finally:
-            warnings.resetwarnings()
+        self.addCleanup(warnings.resetwarnings)
+        self.assertRaises(errors.NotTagError, r.tag, r.head())
 
     def test_get_peeled(self):
         # unpacked ref
@@ -252,20 +236,16 @@ class RepositoryTests(TestCase):
         tree = r[commit.tree]
         blob_sha = tree.items()[0][2]
         warnings.simplefilter("ignore", DeprecationWarning)
-        try:
-            blob = r.get_blob(blob_sha)
-        finally:
-            warnings.resetwarnings()
+        self.addCleanup(warnings.resetwarnings)
+        blob = r.get_blob(blob_sha)
         self.assertEqual(blob.type_name, 'blob')
         self.assertEqual(blob.sha().hexdigest(), blob_sha)
 
     def test_get_blob_notblob(self):
         r = self._repo = open_repo('a.git')
         warnings.simplefilter("ignore", DeprecationWarning)
-        try:
-            self.assertRaises(errors.NotBlobError, r.get_blob, r.head())
-        finally:
-            warnings.resetwarnings()
+        self.addCleanup(warnings.resetwarnings)
+        self.assertRaises(errors.NotBlobError, r.get_blob, r.head())
 
     def test_linear_history(self):
         r = self._repo = open_repo('a.git')

+ 8 - 12
dulwich/tests/test_server.py

@@ -78,16 +78,11 @@ class TestProto(object):
         self._received[band].append(data)
 
     def write_pkt_line(self, data):
-        if data is None:
-            data = 'None'
         self._received[0].append(data)
 
     def get_received_line(self, band=0):
         lines = self._received[band]
-        if lines:
-            return lines.pop(0)
-        else:
-            return None
+        return lines.pop(0)
 
 
 class TestGenericHandler(Handler):
@@ -164,14 +159,14 @@ class UploadPackHandlerTestCase(TestCase):
                          self._handler.proto.get_received_line(2))
         self.assertEqual('second message',
                          self._handler.proto.get_received_line(2))
-        self.assertEqual(None, self._handler.proto.get_received_line(2))
+        self.assertRaises(IndexError, self._handler.proto.get_received_line, 2)
 
     def test_no_progress(self):
         caps = list(self._handler.required_capabilities()) + ['no-progress']
         self._handler.set_client_capabilities(caps)
         self._handler.progress('first message')
         self._handler.progress('second message')
-        self.assertEqual(None, self._handler.proto.get_received_line(2))
+        self.assertRaises(IndexError, self._handler.proto.get_received_line, 2)
 
     def test_get_tagged(self):
         refs = {
@@ -268,7 +263,8 @@ class ProtocolGraphWalkerTestCase(TestCase):
         self.assertEquals((None, None), _split_proto_line('', allowed))
 
     def test_determine_wants(self):
-        self.assertRaises(GitProtocolError, self._walker.determine_wants, {})
+        self.assertEqual(None, self._walker.determine_wants({}))
+        self.assertEqual(None, self._walker.proto.get_received_line())
 
         self._walker.proto.set_output([
           'want %s multi_ack' % ONE,
@@ -309,7 +305,7 @@ class ProtocolGraphWalkerTestCase(TestCase):
         lines = []
         while True:
             line = self._walker.proto.get_received_line()
-            if line == 'None':
+            if line is None:
                 break
             # strip capabilities list if present
             if '\x00' in line:
@@ -337,7 +333,7 @@ class TestProtocolGraphWalker(object):
         self.acks = []
         self.lines = []
         self.done = False
-        self.stateless_rpc = False
+        self.http_req = None
         self.advertise_refs = False
 
     def read_proto_line(self, allowed):
@@ -633,7 +629,7 @@ class MultiAckDetailedGraphWalkerImplTestCase(AckGraphWalkerImplTestCase):
     def test_multi_ack_stateless(self):
         # transmission ends with a flush-pkt
         self._walker.lines[-1] = (None, None)
-        self._walker.stateless_rpc = True
+        self._walker.http_req = True
 
         self.assertNextEquals(TWO)
         self.assertNoAck()

+ 84 - 0
dulwich/tests/test_utils.py

@@ -0,0 +1,84 @@
+# test_utils.py -- Tests for git test utilities.
+# Copyright (C) 2010 Google, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+
+"""Tests for git test utilities."""
+
+from dulwich.object_store import (
+    MemoryObjectStore,
+    )
+from dulwich.objects import (
+    Blob,
+    )
+from dulwich.tests import (
+    TestCase,
+    )
+from utils import (
+    make_object,
+    build_commit_graph,
+    )
+
+
+class BuildCommitGraphTest(TestCase):
+
+    def setUp(self):
+        self.store = MemoryObjectStore()
+
+    def test_linear(self):
+        c1, c2 = build_commit_graph(self.store, [[1], [2, 1]])
+        for obj_id in [c1.id, c2.id, c1.tree, c2.tree]:
+            self.assertTrue(obj_id in self.store)
+        self.assertEqual([], c1.parents)
+        self.assertEqual([c1.id], c2.parents)
+        self.assertEqual(c1.tree, c2.tree)
+        self.assertEqual([], list(self.store[c1.tree].iteritems()))
+        self.assertTrue(c2.commit_time > c1.commit_time)
+
+    def test_merge(self):
+        c1, c2, c3, c4 = build_commit_graph(self.store,
+                                            [[1], [2, 1], [3, 1], [4, 2, 3]])
+        self.assertEqual([c2.id, c3.id], c4.parents)
+        self.assertTrue(c4.commit_time > c2.commit_time)
+        self.assertTrue(c4.commit_time > c3.commit_time)
+
+    def test_missing_parent(self):
+        self.assertRaises(ValueError, build_commit_graph, self.store,
+                          [[1], [3, 2], [2, 1]])
+
+    def test_trees(self):
+        a1 = make_object(Blob, data='aaa1')
+        a2 = make_object(Blob, data='aaa2')
+        c1, c2 = build_commit_graph(self.store, [[1], [2, 1]],
+                                    trees={1: [('a', a1)],
+                                           2: [('a', a2, 0100644)]})
+        self.assertEqual((0100644, a1.id), self.store[c1.tree]['a'])
+        self.assertEqual((0100644, a2.id), self.store[c2.tree]['a'])
+
+    def test_attrs(self):
+        c1, c2 = build_commit_graph(self.store, [[1], [2, 1]],
+                                    attrs={1: {'message': 'Hooray!'}})
+        self.assertEqual('Hooray!', c1.message)
+        self.assertEqual('Commit 2', c2.message)
+
+    def test_commit_time(self):
+        c1, c2, c3 = build_commit_graph(self.store, [[1], [2, 1], [3, 2]],
+                                        attrs={1: {'commit_time': 124},
+                                               2: {'commit_time': 123}})
+        self.assertEqual(124, c1.commit_time)
+        self.assertEqual(123, c2.commit_time)
+        self.assertTrue(c2.commit_time < c1.commit_time < c3.commit_time)

+ 411 - 0
dulwich/tests/test_walk.py

@@ -0,0 +1,411 @@
+# test_walk.py -- Tests for commit walking functionality.
+# Copyright (C) 2010 Google, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# or (at your option) any later version of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+"""Tests for commit walking functionality."""
+
+from dulwich._compat import (
+    permutations,
+    )
+from dulwich.diff_tree import (
+    CHANGE_ADD,
+    CHANGE_MODIFY,
+    CHANGE_RENAME,
+    CHANGE_COPY,
+    TreeChange,
+    RenameDetector,
+    )
+from dulwich.errors import (
+    MissingCommitError,
+    )
+from dulwich.object_store import (
+    MemoryObjectStore,
+    )
+from dulwich.objects import (
+    Commit,
+    Blob,
+    )
+from dulwich.walk import (
+    ORDER_TOPO,
+    WalkEntry,
+    Walker,
+    _topo_reorder
+    )
+from dulwich.tests import TestCase
+from utils import (
+    F,
+    make_object,
+    build_commit_graph,
+    )
+
+
+class TestWalkEntry(object):
+
+    def __init__(self, commit, changes):
+        self.commit = commit
+        self.changes = changes
+
+    def __repr__(self):
+        return '<TestWalkEntry commit=%s, changes=%r>' % (
+          self.commit.id, self.changes)
+
+    def __eq__(self, other):
+        if not isinstance(other, WalkEntry) or self.commit != other.commit:
+            return False
+        if self.changes is None:
+            return True
+        return self.changes == other.changes()
+
+
+class WalkerTest(TestCase):
+
+    def setUp(self):
+        self.store = MemoryObjectStore()
+
+    def make_commits(self, commit_spec, **kwargs):
+        times = kwargs.pop('times', [])
+        attrs = kwargs.pop('attrs', {})
+        for i, t in enumerate(times):
+            attrs.setdefault(i + 1, {})['commit_time'] = t
+        return build_commit_graph(self.store, commit_spec, attrs=attrs,
+                                  **kwargs)
+
+    def make_linear_commits(self, num_commits, **kwargs):
+        commit_spec = []
+        for i in xrange(1, num_commits + 1):
+            c = [i]
+            if i > 1:
+                c.append(i - 1)
+            commit_spec.append(c)
+        return self.make_commits(commit_spec, **kwargs)
+
+    def assertWalkYields(self, expected, *args, **kwargs):
+        walker = Walker(self.store, *args, **kwargs)
+        expected = list(expected)
+        for i, entry in enumerate(expected):
+            if isinstance(entry, Commit):
+                expected[i] = TestWalkEntry(entry, None)
+        actual = list(walker)
+        self.assertEqual(expected, actual)
+
+    def test_linear(self):
+        c1, c2, c3 = self.make_linear_commits(3)
+        self.assertWalkYields([c1], [c1.id])
+        self.assertWalkYields([c2, c1], [c2.id])
+        self.assertWalkYields([c3, c2, c1], [c3.id])
+        self.assertWalkYields([c3, c2, c1], [c3.id, c1.id])
+        self.assertWalkYields([c3, c2], [c3.id], exclude=[c1.id])
+        self.assertWalkYields([c3, c2], [c3.id, c1.id], exclude=[c1.id])
+        self.assertWalkYields([c3], [c3.id, c1.id], exclude=[c2.id])
+
+    def test_missing(self):
+        cs = list(reversed(self.make_linear_commits(20)))
+        self.assertWalkYields(cs, [cs[0].id])
+
+        # Exactly how close we can get to a missing commit depends on our
+        # implementation (in particular the choice of _MAX_EXTRA_COMMITS), but
+        # we should at least be able to walk some history in a broken repo.
+        del self.store[cs[-1].id]
+        for i in xrange(1, 11):
+            self.assertWalkYields(cs[:i], [cs[0].id], max_entries=i)
+        self.assertRaises(MissingCommitError, Walker, self.store, cs[0].id)
+
+    def test_branch(self):
+        c1, x2, x3, y4 = self.make_commits([[1], [2, 1], [3, 2], [4, 1]])
+        self.assertWalkYields([x3, x2, c1], [x3.id])
+        self.assertWalkYields([y4, c1], [y4.id])
+        self.assertWalkYields([y4, x2, c1], [y4.id, x2.id])
+        self.assertWalkYields([y4, x2], [y4.id, x2.id], exclude=[c1.id])
+        self.assertWalkYields([y4, x3], [y4.id, x3.id], exclude=[x2.id])
+        self.assertWalkYields([y4], [y4.id], exclude=[x3.id])
+        self.assertWalkYields([x3, x2], [x3.id], exclude=[y4.id])
+
+    def test_merge(self):
+        c1, c2, c3, c4 = self.make_commits([[1], [2, 1], [3, 1], [4, 2, 3]])
+        self.assertWalkYields([c4, c3, c2, c1], [c4.id])
+        self.assertWalkYields([c3, c1], [c3.id])
+        self.assertWalkYields([c2, c1], [c2.id])
+        self.assertWalkYields([c4, c3], [c4.id], exclude=[c2.id])
+        self.assertWalkYields([c4, c2], [c4.id], exclude=[c3.id])
+
+    def test_reverse(self):
+        c1, c2, c3 = self.make_linear_commits(3)
+        self.assertWalkYields([c1, c2, c3], [c3.id], reverse=True)
+
+    def test_max_entries(self):
+        c1, c2, c3 = self.make_linear_commits(3)
+        self.assertWalkYields([c3, c2, c1], [c3.id], max_entries=3)
+        self.assertWalkYields([c3, c2], [c3.id], max_entries=2)
+        self.assertWalkYields([c3], [c3.id], max_entries=1)
+
+    def test_reverse_after_max_entries(self):
+        c1, c2, c3 = self.make_linear_commits(3)
+        self.assertWalkYields([c1, c2, c3], [c3.id], max_entries=3,
+                              reverse=True)
+        self.assertWalkYields([c2, c3], [c3.id], max_entries=2, reverse=True)
+        self.assertWalkYields([c3], [c3.id], max_entries=1, reverse=True)
+
+    def test_changes_one_parent(self):
+        blob_a1 = make_object(Blob, data='a1')
+        blob_a2 = make_object(Blob, data='a2')
+        blob_b2 = make_object(Blob, data='b2')
+        c1, c2 = self.make_linear_commits(
+          2, trees={1: [('a', blob_a1)],
+                    2: [('a', blob_a2), ('b', blob_b2)]})
+        e1 = TestWalkEntry(c1, [TreeChange.add(('a', F, blob_a1.id))])
+        e2 = TestWalkEntry(c2, [TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
+                                           ('a', F, blob_a2.id)),
+                                TreeChange.add(('b', F, blob_b2.id))])
+        self.assertWalkYields([e2, e1], [c2.id])
+
+    def test_changes_multiple_parents(self):
+        blob_a1 = make_object(Blob, data='a1')
+        blob_b2 = make_object(Blob, data='b2')
+        blob_a3 = make_object(Blob, data='a3')
+        c1, c2, c3 = self.make_commits(
+          [[1], [2], [3, 1, 2]],
+          trees={1: [('a', blob_a1)], 2: [('b', blob_b2)],
+                 3: [('a', blob_a3), ('b', blob_b2)]})
+        # a is a modify/add conflict and b is not conflicted.
+        changes = [[
+          TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id), ('a', F, blob_a3.id)),
+          TreeChange.add(('a', F, blob_a3.id)),
+          ]]
+        self.assertWalkYields([TestWalkEntry(c3, changes)], [c3.id],
+                              exclude=[c1.id, c2.id])
+
+    def test_path_matches(self):
+        walker = Walker(None, [], paths=['foo', 'bar', 'baz/quux'])
+        self.assertTrue(walker._path_matches('foo'))
+        self.assertTrue(walker._path_matches('foo/a'))
+        self.assertTrue(walker._path_matches('foo/a/b'))
+        self.assertTrue(walker._path_matches('bar'))
+        self.assertTrue(walker._path_matches('baz/quux'))
+        self.assertTrue(walker._path_matches('baz/quux/a'))
+
+        self.assertFalse(walker._path_matches(None))
+        self.assertFalse(walker._path_matches('oops'))
+        self.assertFalse(walker._path_matches('fool'))
+        self.assertFalse(walker._path_matches('baz'))
+        self.assertFalse(walker._path_matches('baz/quu'))
+
+    def test_paths(self):
+        blob_a1 = make_object(Blob, data='a1')
+        blob_b2 = make_object(Blob, data='b2')
+        blob_a3 = make_object(Blob, data='a3')
+        blob_b3 = make_object(Blob, data='b3')
+        c1, c2, c3 = self.make_linear_commits(
+          3, trees={1: [('a', blob_a1)],
+                    2: [('a', blob_a1), ('x/b', blob_b2)],
+                    3: [('a', blob_a3), ('x/b', blob_b3)]})
+
+        self.assertWalkYields([c3, c2, c1], [c3.id])
+        self.assertWalkYields([c3, c1], [c3.id], paths=['a'])
+        self.assertWalkYields([c3, c2], [c3.id], paths=['x/b'])
+
+        # All changes are included, not just for requested paths.
+        changes = [
+          TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
+                     ('a', F, blob_a3.id)),
+          TreeChange(CHANGE_MODIFY, ('x/b', F, blob_b2.id),
+                     ('x/b', F, blob_b3.id)),
+          ]
+        self.assertWalkYields([TestWalkEntry(c3, changes)], [c3.id],
+                              max_entries=1, paths=['a'])
+
+    def test_paths_subtree(self):
+        blob_a = make_object(Blob, data='a')
+        blob_b = make_object(Blob, data='b')
+        c1, c2, c3 = self.make_linear_commits(
+          3, trees={1: [('x/a', blob_a)],
+                    2: [('b', blob_b), ('x/a', blob_a)],
+                    3: [('b', blob_b), ('x/a', blob_a), ('x/b', blob_b)]})
+        self.assertWalkYields([c2], [c3.id], paths=['b'])
+        self.assertWalkYields([c3, c1], [c3.id], paths=['x'])
+
+    def test_paths_max_entries(self):
+        blob_a = make_object(Blob, data='a')
+        blob_b = make_object(Blob, data='b')
+        c1, c2 = self.make_linear_commits(
+          2, trees={1: [('a', blob_a)],
+                    2: [('a', blob_a), ('b', blob_b)]})
+        self.assertWalkYields([c2], [c2.id], paths=['b'], max_entries=1)
+        self.assertWalkYields([c1], [c1.id], paths=['a'], max_entries=1)
+
+    def test_paths_merge(self):
+        blob_a1 = make_object(Blob, data='a1')
+        blob_a2 = make_object(Blob, data='a2')
+        blob_a3 = make_object(Blob, data='a3')
+        x1, y2, m3, m4 = self.make_commits(
+          [[1], [2], [3, 1, 2], [4, 1, 2]],
+          trees={1: [('a', blob_a1)],
+                 2: [('a', blob_a2)],
+                 3: [('a', blob_a3)],
+                 4: [('a', blob_a1)]})  # Non-conflicting
+        self.assertWalkYields([m3, y2, x1], [m3.id], paths=['a'])
+        self.assertWalkYields([y2, x1], [m4.id], paths=['a'])
+
+    def test_changes_with_renames(self):
+        blob = make_object(Blob, data='blob')
+        c1, c2 = self.make_linear_commits(
+          2, trees={1: [('a', blob)], 2: [('b', blob)]})
+        entry_a = ('a', F, blob.id)
+        entry_b = ('b', F, blob.id)
+        changes_without_renames = [TreeChange.delete(entry_a),
+                                   TreeChange.add(entry_b)]
+        changes_with_renames = [TreeChange(CHANGE_RENAME, entry_a, entry_b)]
+        self.assertWalkYields(
+          [TestWalkEntry(c2, changes_without_renames)], [c2.id], max_entries=1)
+        detector = RenameDetector(self.store)
+        self.assertWalkYields(
+          [TestWalkEntry(c2, changes_with_renames)], [c2.id], max_entries=1,
+          rename_detector=detector)
+
+    def test_follow_rename(self):
+        blob = make_object(Blob, data='blob')
+        names = ['a', 'a', 'b', 'b', 'c', 'c']
+
+        trees = dict((i + 1, [(n, blob, F)]) for i, n in enumerate(names))
+        c1, c2, c3, c4, c5, c6 = self.make_linear_commits(6, trees=trees)
+        self.assertWalkYields([c5], [c6.id], paths=['c'])
+
+        e = lambda n: (n, F, blob.id)
+        self.assertWalkYields(
+          [TestWalkEntry(c5, [TreeChange(CHANGE_RENAME, e('b'), e('c'))]),
+           TestWalkEntry(c3, [TreeChange(CHANGE_RENAME, e('a'), e('b'))]),
+           TestWalkEntry(c1, [TreeChange.add(e('a'))])],
+          [c6.id], paths=['c'], follow=True)
+
+    def test_follow_rename_remove_path(self):
+        blob = make_object(Blob, data='blob')
+        _, _, _, c4, c5, c6 = self.make_linear_commits(
+          6, trees={1: [('a', blob), ('c', blob)],
+                    2: [],
+                    3: [],
+                    4: [('b', blob)],
+                    5: [('a', blob)],
+                    6: [('c', blob)]})
+
+        e = lambda n: (n, F, blob.id)
+        # Once the path changes to b, we aren't interested in a or c anymore.
+        self.assertWalkYields(
+          [TestWalkEntry(c6, [TreeChange(CHANGE_RENAME, e('a'), e('c'))]),
+           TestWalkEntry(c5, [TreeChange(CHANGE_RENAME, e('b'), e('a'))]),
+           TestWalkEntry(c4, [TreeChange.add(e('b'))])],
+          [c6.id], paths=['c'], follow=True)
+
+    def test_since(self):
+        c1, c2, c3 = self.make_linear_commits(3)
+        self.assertWalkYields([c3, c2, c1], [c3.id], since=-1)
+        self.assertWalkYields([c3, c2, c1], [c3.id], since=0)
+        self.assertWalkYields([c3, c2], [c3.id], since=1)
+        self.assertWalkYields([c3, c2], [c3.id], since=99)
+        self.assertWalkYields([c3, c2], [c3.id], since=100)
+        self.assertWalkYields([c3], [c3.id], since=101)
+        self.assertWalkYields([c3], [c3.id], since=199)
+        self.assertWalkYields([c3], [c3.id], since=200)
+        self.assertWalkYields([], [c3.id], since=201)
+        self.assertWalkYields([], [c3.id], since=300)
+
+    def test_until(self):
+        c1, c2, c3 = self.make_linear_commits(3)
+        self.assertWalkYields([], [c3.id], until=-1)
+        self.assertWalkYields([c1], [c3.id], until=0)
+        self.assertWalkYields([c1], [c3.id], until=1)
+        self.assertWalkYields([c1], [c3.id], until=99)
+        self.assertWalkYields([c2, c1], [c3.id], until=100)
+        self.assertWalkYields([c2, c1], [c3.id], until=101)
+        self.assertWalkYields([c2, c1], [c3.id], until=199)
+        self.assertWalkYields([c3, c2, c1], [c3.id], until=200)
+        self.assertWalkYields([c3, c2, c1], [c3.id], until=201)
+        self.assertWalkYields([c3, c2, c1], [c3.id], until=300)
+
+    def test_since_until(self):
+        c1, c2, c3 = self.make_linear_commits(3)
+        self.assertWalkYields([], [c3.id], since=100, until=99)
+        self.assertWalkYields([c3, c2, c1], [c3.id], since=-1, until=201)
+        self.assertWalkYields([c2], [c3.id], since=100, until=100)
+        self.assertWalkYields([c2], [c3.id], since=50, until=150)
+
+    def test_since_over_scan(self):
+        commits = self.make_linear_commits(
+          11, times=[9, 0, 1, 2, 3, 4, 5, 8, 6, 7, 9])
+        c8, _, c10, c11 = commits[-4:]
+        del self.store[commits[0].id]
+        # c9 is older than we want to walk, but is out of order with its parent,
+        # so we need to walk past it to get to c8.
+        # c1 would also match, but we've deleted it, and it should get pruned
+        # even with over-scanning.
+        self.assertWalkYields([c11, c10, c8], [c11.id], since=7)
+
+    def assertTopoOrderEqual(self, expected_commits, commits):
+        entries = [TestWalkEntry(c, None) for c in commits]
+        actual_ids = [e.commit.id for e in list(_topo_reorder(entries))]
+        self.assertEqual([c.id for c in expected_commits], actual_ids)
+
+    def test_topo_reorder_linear(self):
+        commits = self.make_linear_commits(5)
+        commits.reverse()
+        for perm in permutations(commits):
+            self.assertTopoOrderEqual(commits, perm)
+
+    def test_topo_reorder_multiple_parents(self):
+        c1, c2, c3 = self.make_commits([[1], [2], [3, 1, 2]])
+        # Already sorted, so totally FIFO.
+        self.assertTopoOrderEqual([c3, c2, c1], [c3, c2, c1])
+        self.assertTopoOrderEqual([c3, c1, c2], [c3, c1, c2])
+
+        # c3 causes one parent to be yielded.
+        self.assertTopoOrderEqual([c3, c2, c1], [c2, c3, c1])
+        self.assertTopoOrderEqual([c3, c1, c2], [c1, c3, c2])
+
+        # c3 causes both parents to be yielded.
+        self.assertTopoOrderEqual([c3, c2, c1], [c1, c2, c3])
+        self.assertTopoOrderEqual([c3, c2, c1], [c2, c1, c3])
+
+    def test_topo_reorder_multiple_children(self):
+        c1, c2, c3 = self.make_commits([[1], [2, 1], [3, 1]])
+
+        # c2 and c3 are FIFO but c1 moves to the end.
+        self.assertTopoOrderEqual([c3, c2, c1], [c3, c2, c1])
+        self.assertTopoOrderEqual([c3, c2, c1], [c3, c1, c2])
+        self.assertTopoOrderEqual([c3, c2, c1], [c1, c3, c2])
+
+        self.assertTopoOrderEqual([c2, c3, c1], [c2, c3, c1])
+        self.assertTopoOrderEqual([c2, c3, c1], [c2, c1, c3])
+        self.assertTopoOrderEqual([c2, c3, c1], [c1, c2, c3])
+
+    def test_out_of_order_children(self):
+        c1, c2, c3, c4, c5 = self.make_commits(
+          [[1], [2, 1], [3, 2], [4, 1], [5, 3, 4]],
+          times=[2, 1, 3, 4, 5])
+        self.assertWalkYields([c5, c4, c3, c1, c2], [c5.id])
+        self.assertWalkYields([c5, c4, c3, c2, c1], [c5.id], order=ORDER_TOPO)
+
+    def test_out_of_order_with_exclude(self):
+        # Create the following graph:
+        # c1-------x2---m6
+        #   \          /
+        #    \-y3--y4-/--y5
+        # Due to skew, y5 is the oldest commit.
+        c1, x2, y3, y4, y5, m6 = cs = self.make_commits(
+          [[1], [2, 1], [3, 1], [4, 3], [5, 4], [6, 2, 4]],
+          times=[2, 3, 4, 5, 1, 6])
+        self.assertWalkYields([m6, y4, y3, x2, c1], [m6.id])
+        # Ensure that c1..y4 get excluded even though they're popped from the
+        # priority queue long before y5.
+        self.assertWalkYields([m6, x2], [m6.id], exclude=[y5.id])

+ 5 - 5
dulwich/tests/test_web.py

@@ -282,11 +282,11 @@ class DumbHandlersTestCase(WebTestCase):
 class SmartHandlersTestCase(WebTestCase):
 
     class _TestUploadPackHandler(object):
-        def __init__(self, backend, args, proto, stateless_rpc=False,
+        def __init__(self, backend, args, proto, http_req=None,
                      advertise_refs=False):
             self.args = args
             self.proto = proto
-            self.stateless_rpc = stateless_rpc
+            self.http_req = http_req
             self.advertise_refs = advertise_refs
 
         def handle(self):
@@ -316,9 +316,9 @@ class SmartHandlersTestCase(WebTestCase):
         # Ensure all output was written via the write callback.
         self.assertEqual('', handler_output)
         self.assertEqual('handled input: foo', write_output)
-        self.assertContentTypeEquals('application/x-git-upload-pack-response')
+        self.assertContentTypeEquals('application/x-git-upload-pack-result')
         self.assertFalse(self._handler.advertise_refs)
-        self.assertTrue(self._handler.stateless_rpc)
+        self.assertTrue(self._handler.http_req)
         self.assertFalse(self._req.cached)
 
     def test_handle_service_request(self):
@@ -350,7 +350,7 @@ class SmartHandlersTestCase(WebTestCase):
         # Ensure all output was written via the write callback.
         self.assertEquals('', handler_output)
         self.assertTrue(self._handler.advertise_refs)
-        self.assertTrue(self._handler.stateless_rpc)
+        self.assertTrue(self._handler.http_req)
         self.assertFalse(self._req.cached)
 
 

+ 80 - 0
dulwich/tests/utils.py

@@ -28,6 +28,9 @@ import tempfile
 import time
 import types
 
+from dulwich.index import (
+    commit_tree,
+    )
 from dulwich.objects import (
     FixedSha,
     Commit,
@@ -47,6 +50,9 @@ from dulwich.tests import (
     TestSkipped,
     )
 
+# Plain files are very frequently used in tests, so let the mode be very short.
+F = 0100644  # Shorthand mode for Files.
+
 
 def open_repo(name):
     """Open a copy of a repo in a temporary directory.
@@ -230,3 +236,77 @@ def build_pack(f, objects_spec, store=None):
     sf.write_sha()
     f.seek(0)
     return expected
+
+
+def build_commit_graph(object_store, commit_spec, trees=None, attrs=None):
+    """Build a commit graph from a concise specification.
+
+    Sample usage:
+    >>> c1, c2, c3 = build_commit_graph(store, [[1], [2, 1], [3, 1, 2]])
+    >>> store[store[c3].parents[0]] == c1
+    True
+    >>> store[store[c3].parents[1]] == c2
+    True
+
+    If not otherwise specified, commits will refer to the empty tree and have
+    commit times increasing in the same order as the commit spec.
+
+    :param object_store: An ObjectStore to commit objects to.
+    :param commit_spec: An iterable of iterables of ints defining the commit
+        graph. Each entry defines one commit, and entries must be in topological
+        order. The first element of each entry is a commit number, and the
+        remaining elements are its parents. The commit numbers are only
+        meaningful for the call to make_commits; since real commit objects are
+        created, they will get created with real, opaque SHAs.
+    :param trees: An optional dict of commit number -> tree spec for building
+        trees for commits. The tree spec is an iterable of (path, blob, mode) or
+        (path, blob) entries; if mode is omitted, it defaults to the normal file
+        mode (0100644).
+    :param attrs: A dict of commit number -> (dict of attribute -> value) for
+        assigning additional values to the commits.
+    :return: The list of commit objects created.
+    :raise ValueError: If an undefined commit identifier is listed as a parent.
+    """
+    if trees is None:
+        trees = {}
+    if attrs is None:
+        attrs = {}
+    commit_time = 0
+    nums = {}
+    commits = []
+
+    for commit in commit_spec:
+        commit_num = commit[0]
+        try:
+            parent_ids = [nums[pn] for pn in commit[1:]]
+        except KeyError, e:
+            missing_parent, = e.args
+            raise ValueError('Unknown parent %i' % missing_parent)
+
+        blobs = []
+        for entry in trees.get(commit_num, []):
+            if len(entry) == 2:
+                path, blob = entry
+                entry = (path, blob, F)
+            path, blob, mode = entry
+            blobs.append((path, blob.id, mode))
+            object_store.add_object(blob)
+        tree_id = commit_tree(object_store, blobs)
+
+        commit_attrs = {
+            'message': 'Commit %i' % commit_num,
+            'parents': parent_ids,
+            'tree': tree_id,
+            'commit_time': commit_time,
+            }
+        commit_attrs.update(attrs.get(commit_num, {}))
+        commit_obj = make_commit(**commit_attrs)
+
+        # By default, increment the time by a lot. Out-of-order commits should
+        # be closer together than this because their main cause is clock skew.
+        commit_time = commit_attrs['commit_time'] + 100
+        nums[commit_num] = commit_obj.id
+        object_store.add_object(commit_obj)
+        commits.append(commit_obj)
+
+    return commits

+ 363 - 0
dulwich/walk.py

@@ -0,0 +1,363 @@
+# walk.py -- General implementation of walking commits and their contents.
+# Copyright (C) 2010 Google, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# or (at your option) any later version of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+"""General implementation of walking commits and their contents."""
+
+
+try:
+    from collections import defaultdict
+except ImportError:
+    from _compat import defaultdict
+
+import collections
+import heapq
+import itertools
+
+from dulwich._compat import (
+    all,
+    )
+from dulwich.diff_tree import (
+    RENAME_CHANGE_TYPES,
+    tree_changes,
+    tree_changes_for_merge,
+    RenameDetector,
+    )
+from dulwich.errors import (
+    MissingCommitError,
+    )
+
+ORDER_DATE = 'date'
+ORDER_TOPO = 'topo'
+
+ALL_ORDERS = (ORDER_DATE, ORDER_TOPO)
+
+# Maximum number of commits to walk past a commit time boundary.
+_MAX_EXTRA_COMMITS = 5
+
+
+class WalkEntry(object):
+    """Object encapsulating a single result from a walk."""
+
+    def __init__(self, walker, commit):
+        self.commit = commit
+        self._store = walker.store
+        self._changes = None
+        self._rename_detector = walker.rename_detector
+
+    def changes(self):
+        """Get the tree changes for this entry.
+
+        :return: For commits with up to one parent, a list of TreeChange
+            objects; if the commit has no parents, these will be relative to the
+            empty tree. For merge commits, a list of lists of TreeChange
+            objects; see dulwich.diff.tree_changes_for_merge.
+        """
+        if self._changes is None:
+            commit = self.commit
+            if not commit.parents:
+                changes_func = tree_changes
+                parent = None
+            elif len(commit.parents) == 1:
+                changes_func = tree_changes
+                parent = self._store[commit.parents[0]].tree
+            else:
+                changes_func = tree_changes_for_merge
+                parent = [self._store[p].tree for p in commit.parents]
+            self._changes = list(changes_func(
+              self._store, parent, commit.tree,
+              rename_detector=self._rename_detector))
+        return self._changes
+
+    def __repr__(self):
+        return '<WalkEntry commit=%s, changes=%r>' % (
+          self.commit.id, self.changes())
+
+
+class _CommitTimeQueue(object):
+    """Priority queue of WalkEntry objects by commit time."""
+
+    def __init__(self, walker):
+        self._walker = walker
+        self._store = walker.store
+        self._excluded = walker.excluded
+        self._pq = []
+        self._pq_set = set()
+        self._seen = set()
+        self._done = set()
+        self._min_time = walker.since
+        self._last = None
+        self._extra_commits_left = _MAX_EXTRA_COMMITS
+        self._is_finished = False
+
+        for commit_id in itertools.chain(walker.include, walker.excluded):
+            self._push(commit_id)
+
+    def _push(self, commit_id):
+        try:
+            commit = self._store[commit_id]
+        except KeyError:
+            raise MissingCommitError(commit_id)
+        if commit_id not in self._pq_set and commit_id not in self._done:
+            heapq.heappush(self._pq, (-commit.commit_time, commit))
+            self._pq_set.add(commit_id)
+            self._seen.add(commit_id)
+
+    def _exclude_parents(self, commit):
+        excluded = self._excluded
+        seen = self._seen
+        todo = [commit]
+        while todo:
+            commit = todo.pop()
+            for parent in commit.parents:
+                if parent not in excluded and parent in seen:
+                    # TODO: This is inefficient unless the object store does
+                    # some caching (which DiskObjectStore currently does not).
+                    # We could either add caching in this class or pass around
+                    # parsed queue entry objects instead of commits.
+                    todo.append(self._store[parent])
+                excluded.add(parent)
+
+    def next(self):
+        if self._is_finished:
+            return None
+        while self._pq:
+            _, commit = heapq.heappop(self._pq)
+            sha = commit.id
+            self._pq_set.remove(sha)
+            if sha in self._done:
+                continue
+            self._done.add(commit.id)
+
+            for parent_id in commit.parents:
+                self._push(parent_id)
+
+            reset_extra_commits = True
+            is_excluded = sha in self._excluded
+            if is_excluded:
+                self._exclude_parents(commit)
+                if self._pq and all(c.id in self._excluded
+                                    for _, c in self._pq):
+                    _, n = self._pq[0]
+                    if n.commit_time >= self._last.commit_time:
+                        # If the next commit is newer than the last one, we need
+                        # to keep walking in case its parents (which we may not
+                        # have seen yet) are excluded. This gives the excluded
+                        # set a chance to "catch up" while the commit is still
+                        # in the Walker's output queue.
+                        reset_extra_commits = True
+                    else:
+                        reset_extra_commits = False
+
+            if (self._min_time is not None and
+                commit.commit_time < self._min_time):
+                # We want to stop walking at min_time, but commits at the
+                # boundary may be out of order with respect to their parents. So
+                # we walk _MAX_EXTRA_COMMITS more commits once we hit this
+                # boundary.
+                reset_extra_commits = False
+
+            if reset_extra_commits:
+                # We're not at a boundary, so reset the counter.
+                self._extra_commits_left = _MAX_EXTRA_COMMITS
+            else:
+                self._extra_commits_left -= 1
+                if not self._extra_commits_left:
+                    break
+
+            if not is_excluded:
+                self._last = commit
+                return WalkEntry(self._walker, commit)
+        self._is_finished = True
+        return None
+
+
+class Walker(object):
+    """Object for performing a walk of commits in a store.
+
+    Walker objects are initialized with a store and other options and can then
+    be treated as iterators of Commit objects.
+    """
+
+    def __init__(self, store, include, exclude=None, order=ORDER_DATE,
+                 reverse=False, max_entries=None, paths=None,
+                 rename_detector=None, follow=False, since=None, until=None,
+                 queue_cls=_CommitTimeQueue):
+        """Constructor.
+
+        :param store: ObjectStore instance for looking up objects.
+        :param include: Iterable of SHAs of commits to include along with their
+            ancestors.
+        :param exclude: Iterable of SHAs of commits to exclude along with their
+            ancestors, overriding includes.
+        :param order: ORDER_* constant specifying the order of results. Anything
+            other than ORDER_DATE may result in O(n) memory usage.
+        :param reverse: If True, reverse the order of output, requiring O(n)
+            memory.
+        :param max_entries: The maximum number of entries to yield, or None for
+            no limit.
+        :param paths: Iterable of file or subtree paths to show entries for.
+        :param rename_detector: diff.RenameDetector object for detecting
+            renames.
+        :param follow: If True, follow path across renames/copies. Forces a
+            default rename_detector.
+        :param since: Timestamp to list commits after.
+        :param until: Timestamp to list commits before.
+        :param queue_cls: A class to use for a queue of commits, supporting the
+            iterator protocol. The constructor takes a single argument, the
+            Walker.
+        """
+        if order not in ALL_ORDERS:
+            raise ValueError('Unknown walk order %s' % order)
+        self.store = store
+        self.include = include
+        self.excluded = set(exclude or [])
+        self.order = order
+        self.reverse = reverse
+        self.max_entries = max_entries
+        self.paths = paths and set(paths) or None
+        if follow and not rename_detector:
+            rename_detector = RenameDetector(store)
+        self.rename_detector = rename_detector
+        self.follow = follow
+        self.since = since
+        self.until = until
+
+        self._num_entries = 0
+        self._queue = queue_cls(self)
+        self._out_queue = collections.deque()
+
+    def _path_matches(self, changed_path):
+        if changed_path is None:
+            return False
+        for followed_path in self.paths:
+            if changed_path == followed_path:
+                return True
+            if (changed_path.startswith(followed_path) and
+                changed_path[len(followed_path)] == '/'):
+                return True
+        return False
+
+    def _change_matches(self, change):
+        old_path = change.old.path
+        new_path = change.new.path
+        if self._path_matches(new_path):
+            if self.follow and change.type in RENAME_CHANGE_TYPES:
+                self.paths.add(old_path)
+                self.paths.remove(new_path)
+            return True
+        elif self._path_matches(old_path):
+            return True
+        return False
+
+    def _should_return(self, entry):
+        """Determine if a walk entry should be returned..
+
+        :param entry: The WalkEntry to consider.
+        :return: True if the WalkEntry should be returned by this walk, or False
+            otherwise (e.g. if it doesn't match any requested paths).
+        """
+        commit = entry.commit
+        if self.since is not None and commit.commit_time < self.since:
+            return False
+        if self.until is not None and commit.commit_time > self.until:
+            return False
+        if commit.id in self.excluded:
+            return False
+
+        if self.paths is None:
+            return True
+
+        if len(commit.parents) > 1:
+            for path_changes in entry.changes():
+                # For merge commits, only include changes with conflicts for
+                # this path. Since a rename conflict may include different
+                # old.paths, we have to check all of them.
+                for change in path_changes:
+                    if self._change_matches(change):
+                        return True
+        else:
+            for change in entry.changes():
+                if self._change_matches(change):
+                    return True
+        return None
+
+    def _next(self):
+        max_entries = self.max_entries
+        while max_entries is None or self._num_entries < max_entries:
+            entry = self._queue.next()
+            if entry is not None:
+                self._out_queue.append(entry)
+            if entry is None or len(self._out_queue) > _MAX_EXTRA_COMMITS:
+                if not self._out_queue:
+                    return None
+                entry = self._out_queue.popleft()
+                if self._should_return(entry):
+                    self._num_entries += 1
+                    return entry
+        return None
+
+    def _reorder(self, results):
+        """Possibly reorder a results iterator.
+
+        :param results: An iterator of WalkEntry objects, in the order returned
+            from the queue_cls.
+        :return: An iterator or list of WalkEntry objects, in the order required
+            by the Walker.
+        """
+        if self.order == ORDER_TOPO:
+            results = _topo_reorder(results)
+        if self.reverse:
+            results = reversed(list(results))
+        return results
+
+    def __iter__(self):
+        return iter(self._reorder(iter(self._next, None)))
+
+
+def _topo_reorder(entries):
+    """Reorder an iterable of entries topologically.
+
+    This works best assuming the entries are already in almost-topological
+    order, e.g. in commit time order.
+
+    :param entries: An iterable of WalkEntry objects.
+    :yield: WalkEntry objects from entries in FIFO order, except where a parent
+        would be yielded before any of its children.
+    """
+    todo = collections.deque()
+    pending = {}
+    num_children = defaultdict(int)
+    for entry in entries:
+        todo.append(entry)
+        for p in entry.commit.parents:
+            num_children[p] += 1
+
+    while todo:
+        entry = todo.popleft()
+        commit = entry.commit
+        commit_id = commit.id
+        if num_children[commit_id]:
+            pending[commit_id] = entry
+            continue
+        for parent_id in commit.parents:
+            num_children[parent_id] -= 1
+            if not num_children[parent_id]:
+                parent_entry = pending.pop(parent_id, None)
+                if parent_entry:
+                    todo.appendleft(parent_entry)
+        yield entry

+ 3 - 3
dulwich/web.py

@@ -169,7 +169,7 @@ def get_info_refs(req, backend, mat):
         write = req.respond(HTTP_OK, 'application/x-%s-advertisement' % service)
         proto = ReceivableProtocol(StringIO().read, write)
         handler = handler_cls(backend, [url_prefix(mat)], proto,
-                              stateless_rpc=True, advertise_refs=True)
+                              http_req=req, advertise_refs=True)
         handler.proto.write_pkt_line('# service=%s\n' % service)
         handler.proto.write_pkt_line(None)
         handler.handle()
@@ -235,7 +235,7 @@ def handle_service_request(req, backend, mat):
         yield req.forbidden('Unsupported service %s' % service)
         return
     req.nocache()
-    write = req.respond(HTTP_OK, 'application/x-%s-response' % service)
+    write = req.respond(HTTP_OK, 'application/x-%s-result' % service)
 
     input = req.environ['wsgi.input']
     # This is not necessary if this app is run from a conforming WSGI server.
@@ -246,7 +246,7 @@ def handle_service_request(req, backend, mat):
     if content_length:
         input = _LengthLimitedFile(input, int(content_length))
     proto = ReceivableProtocol(input.read, write)
-    handler = handler_cls(backend, [url_prefix(mat)], proto, stateless_rpc=True)
+    handler = handler_cls(backend, [url_prefix(mat)], proto, http_req=req)
     handler.handle()