浏览代码

add path filtering to WalkEntry.changes()

Add a path_prefix argument to WalkEntry.changes() to let it compute only
the differences under a portion of the tree represented by the change.
This is slightly different than the paths argument to Repo.get_walker()
because it allows the Walker to produce WalkEntry objects for which
there are no changes under the path, and then realize that as the
calling code iterates over the results. This is useful in cases where
the caller needs to have access to all commits (for example, to see
where tags fall), but may only care about some actual file changes under
a portion of the repository.

Signed-off-by: Doug Hellmann <doug@doughellmann.com>
Doug Hellmann 8 年之前
父节点
当前提交
dd038d7637
共有 2 个文件被更改,包括 163 次插入6 次删除
  1. 126 0
      dulwich/tests/test_walk.py
  2. 37 6
      dulwich/walk.py

+ 126 - 0
dulwich/tests/test_walk.py

@@ -25,6 +25,7 @@ from itertools import (
     )
 
 from dulwich.diff_tree import (
+    CHANGE_ADD,
     CHANGE_MODIFY,
     CHANGE_RENAME,
     TreeChange,
@@ -415,3 +416,128 @@ class WalkerTest(TestCase):
     def test_empty_walk(self):
         c1, c2, c3 = self.make_linear_commits(3)
         self.assertWalkYields([], [c3.id], exclude=[c3.id])
+
+
+class WalkEntryTest(TestCase):
+
+    def setUp(self):
+        super(WalkEntryTest, self).setUp()
+        self.store = MemoryObjectStore()
+
+    def make_commits(self, commit_spec, **kwargs):
+        times = kwargs.pop('times', [])
+        attrs = kwargs.pop('attrs', {})
+        for i, t in enumerate(times):
+            attrs.setdefault(i + 1, {})['commit_time'] = t
+        return build_commit_graph(self.store, commit_spec, attrs=attrs,
+                                  **kwargs)
+
+    def make_linear_commits(self, num_commits, **kwargs):
+        commit_spec = []
+        for i in range(1, num_commits + 1):
+            c = [i]
+            if i > 1:
+                c.append(i - 1)
+            commit_spec.append(c)
+        return self.make_commits(commit_spec, **kwargs)
+
+    def test_all_changes(self):
+        # Construct a commit with 2 files in different subdirectories.
+        blob_a = make_object(Blob, data=b'a')
+        blob_b = make_object(Blob, data=b'b')
+        c1 = self.make_linear_commits(
+            1,
+            trees={1: [(b'x/a', blob_a), (b'y/b', blob_b)]},
+        )[0]
+
+        # Get the WalkEntry for the commit.
+        walker = Walker(self.store, c1.id)
+        walker_entry = list(walker)[0]
+        changes = walker_entry.changes()
+
+        # Compare the changes with the expected values.
+        entry_a = (b'x/a', F, blob_a.id)
+        entry_b = (b'y/b', F, blob_b.id)
+        self.assertEqual(
+            [TreeChange.add(entry_a),
+             TreeChange.add(entry_b)],
+            changes,
+        )
+
+    def test_all_with_merge(self):
+        blob_a = make_object(Blob, data=b'a')
+        blob_a2 = make_object(Blob, data=b'a2')
+        blob_b = make_object(Blob, data=b'b')
+        blob_b2 = make_object(Blob, data=b'b2')
+        x1, y2, m3 = self.make_commits(
+            [[1], [2], [3, 1, 2]],
+            trees={1: [(b'x/a', blob_a)],
+                   2: [(b'y/b', blob_b)],
+                   3: [(b'x/a', blob_a2), (b'y/b', blob_b2)]})
+
+        # Get the WalkEntry for the merge commit.
+        walker = Walker(self.store, m3.id)
+        entries = list(walker)
+        walker_entry = entries[0]
+        self.assertEqual(walker_entry.commit.id, m3.id)
+        changes = walker_entry.changes()
+        self.assertEqual(2, len(changes))
+
+        entry_a = (b'x/a', F, blob_a.id)
+        entry_a2 = (b'x/a', F, blob_a2.id)
+        entry_b = (b'y/b', F, blob_b.id)
+        entry_b2 = (b'y/b', F, blob_b2.id)
+        self.assertEqual(
+            [[TreeChange(CHANGE_MODIFY, entry_a, entry_a2),
+             TreeChange.add(entry_a2)],
+            [TreeChange.add(entry_b2),
+             TreeChange(CHANGE_MODIFY, entry_b, entry_b2)]],
+            changes,
+        )
+
+    def test_filter_changes(self):
+        # Construct a commit with 2 files in different subdirectories.
+        blob_a = make_object(Blob, data=b'a')
+        blob_b = make_object(Blob, data=b'b')
+        c1 = self.make_linear_commits(
+            1,
+            trees={1: [(b'x/a', blob_a), (b'y/b', blob_b)]},
+        )[0]
+
+        # Get the WalkEntry for the commit.
+        walker = Walker(self.store, c1.id)
+        walker_entry = list(walker)[0]
+        changes = walker_entry.changes(path_prefix=b'x')
+
+        # Compare the changes with the expected values.
+        entry_a = (b'a', F, blob_a.id)
+        self.assertEqual(
+            [TreeChange.add(entry_a)],
+            changes,
+        )
+
+    def test_filter_with_merge(self):
+        blob_a = make_object(Blob, data=b'a')
+        blob_a2 = make_object(Blob, data=b'a2')
+        blob_b = make_object(Blob, data=b'b')
+        blob_b2 = make_object(Blob, data=b'b2')
+        x1, y2, m3 = self.make_commits(
+            [[1], [2], [3, 1, 2]],
+            trees={1: [(b'x/a', blob_a)],
+                   2: [(b'y/b', blob_b)],
+                   3: [(b'x/a', blob_a2), (b'y/b', blob_b2)]})
+
+        # Get the WalkEntry for the merge commit.
+        walker = Walker(self.store, m3.id)
+        entries = list(walker)
+        walker_entry = entries[0]
+        self.assertEqual(walker_entry.commit.id, m3.id)
+        changes = walker_entry.changes(b'x')
+        self.assertEqual(1, len(changes))
+
+        entry_a = (b'a', F, blob_a.id)
+        entry_a2 = (b'a', F, blob_a2.id)
+        self.assertEqual(
+            [[TreeChange(CHANGE_MODIFY, entry_a, entry_a2)]],
+            changes,
+        )

+ 37 - 6
dulwich/walk.py

@@ -53,18 +53,22 @@ class WalkEntry(object):
         self.commit = commit
         self._store = walker.store
         self._get_parents = walker.get_parents
-        self._changes = None
+        self._changes = {}
         self._rename_detector = walker.rename_detector
 
-    def changes(self):
+    def changes(self, path_prefix=None):
         """Get the tree changes for this entry.
 
+        :param path_prefix: Portion of the path in the repository to
+            use to filter changes. Must be a directory name. Must be
+            a full, valid, path reference (no partial names or wildcards).
         :return: For commits with up to one parent, a list of TreeChange
             objects; if the commit has no parents, these will be relative to the
             empty tree. For merge commits, a list of lists of TreeChange
             objects; see dulwich.diff.tree_changes_for_merge.
         """
-        if self._changes is None:
+        cached = self._changes.get(path_prefix)
+        if cached is None:
             commit = self.commit
             if not self._get_parents(commit):
                 changes_func = tree_changes
@@ -72,13 +76,40 @@ class WalkEntry(object):
             elif len(self._get_parents(commit)) == 1:
                 changes_func = tree_changes
                 parent = self._store[self._get_parents(commit)[0]].tree
+                if path_prefix:
+                    mode, subtree_sha = parent.lookup_path(
+                        self._store.__getitem__,
+                        path_prefix,
+                    )
+                    parent = self._store[subtree_sha]
             else:
                 changes_func = tree_changes_for_merge
                 parent = [self._store[p].tree for p in self._get_parents(commit)]
-            self._changes = list(changes_func(
-              self._store, parent, commit.tree,
+                if path_prefix:
+                    parent_trees = [self._store[p] for p in parent]
+                    parent = []
+                    for p in parent_trees:
+                        try:
+                            mode, st = p.lookup_path(
+                                self._store.__getitem__,
+                                path_prefix,
+                            )
+                        except KeyError:
+                            pass
+                        else:
+                            parent.append(st)
+            commit_tree_sha = commit.tree
+            if path_prefix:
+                commit_tree = self._store[commit_tree_sha]
+                mode, commit_tree_sha = commit_tree.lookup_path(
+                    self._store.__getitem__,
+                    path_prefix,
+                )
+            cached = list(changes_func(
+              self._store, parent, commit_tree_sha,
               rename_detector=self._rename_detector))
-        return self._changes
+            self._changes[path_prefix] = cached
+        return self._changes[path_prefix]
 
     def __repr__(self):
         return '<WalkEntry commit=%s, changes=%r>' % (