Răsfoiți Sursa

add path filtering to WalkEntry.changes()

Add a path_prefix argument to WalkEntry.changes() to let it compute only
the differences under a portion of the tree represented by the change.
This is slightly different than the paths argument to Repo.get_walker()
because it allows the Walker to produce WalkEntry objects for which
there are no changes under the path, and then realize that as the
calling code iterates over the results. This is useful in cases where
the caller needs to have access to all commits (for example, to see
where tags fall), but may only care about some actual file changes under
a portion of the repository.

Signed-off-by: Doug Hellmann <doug@doughellmann.com>
Doug Hellmann 8 ani în urmă
părinte
comite
dd038d7637
2 a modificat fișierele cu 163 adăugiri și 6 ștergeri
  1. 126 0
      dulwich/tests/test_walk.py
  2. 37 6
      dulwich/walk.py

+ 126 - 0
dulwich/tests/test_walk.py

@@ -25,6 +25,7 @@ from itertools import (
     )
 
 from dulwich.diff_tree import (
+    CHANGE_ADD,
     CHANGE_MODIFY,
     CHANGE_RENAME,
     TreeChange,
@@ -415,3 +416,128 @@ class WalkerTest(TestCase):
     def test_empty_walk(self):
         c1, c2, c3 = self.make_linear_commits(3)
         self.assertWalkYields([], [c3.id], exclude=[c3.id])
+
+
+class WalkEntryTest(TestCase):
+
+    def setUp(self):
+        super(WalkEntryTest, self).setUp()
+        self.store = MemoryObjectStore()
+
+    def make_commits(self, commit_spec, **kwargs):
+        times = kwargs.pop('times', [])
+        attrs = kwargs.pop('attrs', {})
+        for i, t in enumerate(times):
+            attrs.setdefault(i + 1, {})['commit_time'] = t
+        return build_commit_graph(self.store, commit_spec, attrs=attrs,
+                                  **kwargs)
+
+    def make_linear_commits(self, num_commits, **kwargs):
+        commit_spec = []
+        for i in range(1, num_commits + 1):
+            c = [i]
+            if i > 1:
+                c.append(i - 1)
+            commit_spec.append(c)
+        return self.make_commits(commit_spec, **kwargs)
+
+    def test_all_changes(self):
+        # Construct a commit with 2 files in different subdirectories.
+        blob_a = make_object(Blob, data=b'a')
+        blob_b = make_object(Blob, data=b'b')
+        c1 = self.make_linear_commits(
+            1,
+            trees={1: [(b'x/a', blob_a), (b'y/b', blob_b)]},
+        )[0]
+
+        # Get the WalkEntry for the commit.
+        walker = Walker(self.store, c1.id)
+        walker_entry = list(walker)[0]
+        changes = walker_entry.changes()
+
+        # Compare the changes with the expected values.
+        entry_a = (b'x/a', F, blob_a.id)
+        entry_b = (b'y/b', F, blob_b.id)
+        self.assertEqual(
+            [TreeChange.add(entry_a),
+             TreeChange.add(entry_b)],
+            changes,
+        )
+
+    def test_all_with_merge(self):
+        blob_a = make_object(Blob, data=b'a')
+        blob_a2 = make_object(Blob, data=b'a2')
+        blob_b = make_object(Blob, data=b'b')
+        blob_b2 = make_object(Blob, data=b'b2')
+        x1, y2, m3 = self.make_commits(
+            [[1], [2], [3, 1, 2]],
+            trees={1: [(b'x/a', blob_a)],
+                   2: [(b'y/b', blob_b)],
+                   3: [(b'x/a', blob_a2), (b'y/b', blob_b2)]})
+
+        # Get the WalkEntry for the merge commit.
+        walker = Walker(self.store, m3.id)
+        entries = list(walker)
+        walker_entry = entries[0]
+        self.assertEqual(walker_entry.commit.id, m3.id)
+        changes = walker_entry.changes()
+        self.assertEqual(2, len(changes))
+
+        entry_a = (b'x/a', F, blob_a.id)
+        entry_a2 = (b'x/a', F, blob_a2.id)
+        entry_b = (b'y/b', F, blob_b.id)
+        entry_b2 = (b'y/b', F, blob_b2.id)
+        self.assertEqual(
+            [[TreeChange(CHANGE_MODIFY, entry_a, entry_a2),
+             TreeChange.add(entry_a2)],
+            [TreeChange.add(entry_b2),
+             TreeChange(CHANGE_MODIFY, entry_b, entry_b2)]],
+            changes,
+        )
+
+    def test_filter_changes(self):
+        # Construct a commit with 2 files in different subdirectories.
+        blob_a = make_object(Blob, data=b'a')
+        blob_b = make_object(Blob, data=b'b')
+        c1 = self.make_linear_commits(
+            1,
+            trees={1: [(b'x/a', blob_a), (b'y/b', blob_b)]},
+        )[0]
+
+        # Get the WalkEntry for the commit.
+        walker = Walker(self.store, c1.id)
+        walker_entry = list(walker)[0]
+        changes = walker_entry.changes(path_prefix=b'x')
+
+        # Compare the changes with the expected values.
+        entry_a = (b'a', F, blob_a.id)
+        self.assertEqual(
+            [TreeChange.add(entry_a)],
+            changes,
+        )
+
+    def test_filter_with_merge(self):
+        blob_a = make_object(Blob, data=b'a')
+        blob_a2 = make_object(Blob, data=b'a2')
+        blob_b = make_object(Blob, data=b'b')
+        blob_b2 = make_object(Blob, data=b'b2')
+        x1, y2, m3 = self.make_commits(
+            [[1], [2], [3, 1, 2]],
+            trees={1: [(b'x/a', blob_a)],
+                   2: [(b'y/b', blob_b)],
+                   3: [(b'x/a', blob_a2), (b'y/b', blob_b2)]})
+
+        # Get the WalkEntry for the merge commit.
+        walker = Walker(self.store, m3.id)
+        entries = list(walker)
+        walker_entry = entries[0]
+        self.assertEqual(walker_entry.commit.id, m3.id)
+        changes = walker_entry.changes(b'x')
+        self.assertEqual(1, len(changes))
+
+        entry_a = (b'a', F, blob_a.id)
+        entry_a2 = (b'a', F, blob_a2.id)
+        self.assertEqual(
+            [[TreeChange(CHANGE_MODIFY, entry_a, entry_a2)]],
+            changes,
+        )

+ 37 - 6
dulwich/walk.py

@@ -53,18 +53,22 @@ class WalkEntry(object):
         self.commit = commit
         self._store = walker.store
         self._get_parents = walker.get_parents
-        self._changes = None
+        self._changes = {}
         self._rename_detector = walker.rename_detector
 
-    def changes(self):
+    def changes(self, path_prefix=None):
         """Get the tree changes for this entry.
 
+        :param path_prefix: Portion of the path in the repository to
+            use to filter changes. Must be a directory name. Must be
+            a full, valid, path reference (no partial names or wildcards).
         :return: For commits with up to one parent, a list of TreeChange
             objects; if the commit has no parents, these will be relative to the
             empty tree. For merge commits, a list of lists of TreeChange
             objects; see dulwich.diff.tree_changes_for_merge.
         """
-        if self._changes is None:
+        cached = self._changes.get(path_prefix)
+        if cached is None:
             commit = self.commit
             if not self._get_parents(commit):
                 changes_func = tree_changes
@@ -72,13 +76,40 @@ class WalkEntry(object):
             elif len(self._get_parents(commit)) == 1:
                 changes_func = tree_changes
                 parent = self._store[self._get_parents(commit)[0]].tree
+                if path_prefix:
+                    mode, subtree_sha = parent.lookup_path(
+                        self._store.__getitem__,
+                        path_prefix,
+                    )
+                    parent = self._store[subtree_sha]
             else:
                 changes_func = tree_changes_for_merge
                 parent = [self._store[p].tree for p in self._get_parents(commit)]
-            self._changes = list(changes_func(
-              self._store, parent, commit.tree,
+                if path_prefix:
+                    parent_trees = [self._store[p] for p in parent]
+                    parent = []
+                    for p in parent_trees:
+                        try:
+                            mode, st = p.lookup_path(
+                                self._store.__getitem__,
+                                path_prefix,
+                            )
+                        except KeyError:
+                            pass
+                        else:
+                            parent.append(st)
+            commit_tree_sha = commit.tree
+            if path_prefix:
+                commit_tree = self._store[commit_tree_sha]
+                mode, commit_tree_sha = commit_tree.lookup_path(
+                    self._store.__getitem__,
+                    path_prefix,
+                )
+            cached = list(changes_func(
+              self._store, parent, commit_tree_sha,
               rename_detector=self._rename_detector))
-        return self._changes
+            self._changes[path_prefix] = cached
+        return self._changes[path_prefix]
 
     def __repr__(self):
         return '<WalkEntry commit=%s, changes=%r>' % (