ソースを参照

Add `object_store.iter_commit_contents()` and docs (#1761)

Closes #1740
Jelmer Vernooij 5 ヶ月 前
コミット
51d5db20e8
2 ファイル変更200 行追加4 行削除
  1. 54 2
      dulwich/object_store.py
  2. 146 2
      dulwich/tests/test_object_store.py

+ 54 - 2
dulwich/object_store.py

@@ -32,6 +32,7 @@ import warnings
 from collections.abc import Iterable, Iterator, Sequence
 from contextlib import suppress
 from io import BytesIO
+from pathlib import Path
 from typing import (
     TYPE_CHECKING,
     Callable,
@@ -2653,8 +2654,8 @@ def iter_tree_contents(
       store: Object store to get trees from
       tree_id: SHA1 of the tree.
       include_trees: If True, include tree objects in the iteration.
-    Returns: Iterator over TreeEntry namedtuples for all the objects in a
-        tree.
+
+    Yields: TreeEntry namedtuples for all the objects in a tree.
     """
     if tree_id is None:
         return
@@ -2674,6 +2675,57 @@ def iter_tree_contents(
             yield entry
 
 
+def iter_commit_contents(
+    store: ObjectContainer,
+    commit: Union[Commit, bytes],
+    *,
+    include: Optional[Sequence[Union[str, bytes, Path]]] = None,
+):
+    """Iterate the contents of the repository at the specified commit.
+
+    This is a wrapper around iter_tree_contents() and
+    tree_lookup_path() to simplify the common task of getting the
+    contest of a repo at a particular commit. See also
+    dulwich.index.build_file_from_blob() for writing individual files
+    to disk.
+
+    Args:
+      store: Object store to get trees from
+      commit: Commit object, or SHA1 of a commit
+      include: if provided, only the entries whose paths are in the
+        list, or whose parent tree is in the list, will be
+        included. Note that duplicate or overlapping paths
+        (e.g. ["foo", "foo/bar"]) may result in duplicate entries
+
+    Yields: TreeEntry namedtuples for all matching files in a commit.
+    """
+    sha = commit.id if isinstance(commit, Commit) else commit
+    if not isinstance(obj := store[sha], Commit):
+        raise TypeError(
+            f"{sha.decode('ascii')} should be ID of a Commit, but is {type(obj)}"
+        )
+    commit = obj
+    encoding = commit.encoding or "utf-8"
+    include = (
+        [
+            path if isinstance(path, bytes) else str(path).encode(encoding)
+            for path in include
+        ]
+        if include is not None
+        else [b""]
+    )
+
+    for path in include:
+        mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path)
+        # Iterate all contained files if path points to a dir, otherwise just get that
+        # single file
+        if isinstance(store[obj_id], Tree):
+            for entry in iter_tree_contents(store, obj_id):
+                yield entry.in_path(path)
+        else:
+            yield TreeEntry(path, mode, obj_id)
+
+
 def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]:
     """Peel all tags from a SHA.
 

+ 146 - 2
dulwich/tests/test_object_store.py

@@ -30,6 +30,7 @@ from dulwich.object_store import (
     MemoryObjectStore,
     PackBasedObjectStore,
     find_shallow,
+    iter_commit_contents,
     iter_tree_contents,
     peel_sha,
 )
@@ -427,8 +428,8 @@ class PackBasedObjectStoreTests(ObjectStoreTests):
         self.assertNotIn(b1.id, self.store)
 
 
-class FindShallowTests(TestCase):
-    """Tests for finding shallow commits."""
+class CommitTestHelper:
+    """Helper for tests which iterate over commits."""
 
     def setUp(self):
         """Set up test fixture."""
@@ -441,6 +442,149 @@ class FindShallowTests(TestCase):
         self._store.add_object(commit)
         return commit
 
+
+class IterCommitContentsTests(CommitTestHelper, TestCase):
+    """Tests for iter_commit_contents."""
+
+    def make_commits_with_contents(self):
+        """Helper to prepare test commits."""
+        files = [
+            # (path, contents)
+            (b"foo", b"foo"),
+            (b"bar", b"bar"),
+            (b"dir/baz", b"baz"),
+            (b"dir/subdir/foo", b"subfoo"),
+            (b"dir/subdir/bar", b"subbar"),
+            (b"dir/subdir/baz", b"subbaz"),
+        ]
+        blobs = {contents: make_object(Blob, data=contents) for path, contents in files}
+        for blob in blobs.values():
+            self._store.add_object(blob)
+        commit = self.make_commit(
+            tree=commit_tree(
+                self._store,
+                [(path, blobs[contents].id, 0o100644) for path, contents in files],
+            )
+        )
+
+        return commit
+
+    def assertCommitEntries(self, results, expected):
+        """Assert that iter_commit_contents results are equal to expected."""
+        actual = [(entry.path, self._store[entry.sha].data) for entry in results]
+        self.assertEqual(actual, expected)
+
+    def test_iter_commit_contents_no_includes(self) -> None:
+        """Test iterating commit contents without includes."""
+        commit = self.make_commits_with_contents()
+
+        # this is the same list as used by make_commits_with_contents,
+        # but ordered to match the actual iter_tree_contents iteration
+        # order
+        all_files = [
+            (b"bar", b"bar"),
+            (b"dir/baz", b"baz"),
+            (b"dir/subdir/bar", b"subbar"),
+            (b"dir/subdir/baz", b"subbaz"),
+            (b"dir/subdir/foo", b"subfoo"),
+            (b"foo", b"foo"),
+        ]
+
+        # No includes
+        self.assertCommitEntries(iter_commit_contents(self._store, commit), all_files)
+
+        # Explicit include=None
+        self.assertCommitEntries(
+            iter_commit_contents(self._store, commit, include=None), all_files
+        )
+
+        # include=[] is not the same as None
+        self.assertCommitEntries(
+            iter_commit_contents(self._store, commit, include=[]), []
+        )
+
+    def test_iter_commit_contents_with_includes(self) -> None:
+        """Test iterating commit contents with includes."""
+        commit = self.make_commits_with_contents()
+
+        include1 = ["foo", "bar"]
+        expected1 = [
+            # Note: iter_tree_contents iterates in name order, but we
+            # listed two separate paths, so they'll keep their order
+            # as specified
+            (b"foo", b"foo"),
+            (b"bar", b"bar"),
+        ]
+
+        include2 = ["foo", "dir/subdir"]
+        expected2 = [
+            # foo
+            (b"foo", b"foo"),
+            # dir/subdir
+            (b"dir/subdir/bar", b"subbar"),
+            (b"dir/subdir/baz", b"subbaz"),
+            (b"dir/subdir/foo", b"subfoo"),
+        ]
+
+        include3 = ["dir"]
+        expected3 = [
+            (b"dir/baz", b"baz"),
+            (b"dir/subdir/bar", b"subbar"),
+            (b"dir/subdir/baz", b"subbaz"),
+            (b"dir/subdir/foo", b"subfoo"),
+        ]
+
+        for include, expected in [
+            (include1, expected1),
+            (include2, expected2),
+            (include3, expected3),
+        ]:
+            self.assertCommitEntries(
+                iter_commit_contents(self._store, commit, include=include), expected
+            )
+
+    def test_iter_commit_contents_overlapping_includes(self) -> None:
+        """Test iterating commit contents with overlaps in includes."""
+        commit = self.make_commits_with_contents()
+
+        include1 = ["dir", "dir/baz"]
+        expected1 = [
+            # dir
+            (b"dir/baz", b"baz"),
+            (b"dir/subdir/bar", b"subbar"),
+            (b"dir/subdir/baz", b"subbaz"),
+            (b"dir/subdir/foo", b"subfoo"),
+            # dir/baz
+            (b"dir/baz", b"baz"),
+        ]
+
+        include2 = ["dir", "dir/subdir", "dir/subdir/baz"]
+        expected2 = [
+            # dir
+            (b"dir/baz", b"baz"),
+            (b"dir/subdir/bar", b"subbar"),
+            (b"dir/subdir/baz", b"subbaz"),
+            (b"dir/subdir/foo", b"subfoo"),
+            # dir/subdir
+            (b"dir/subdir/bar", b"subbar"),
+            (b"dir/subdir/baz", b"subbaz"),
+            (b"dir/subdir/foo", b"subfoo"),
+            # dir/subdir/baz
+            (b"dir/subdir/baz", b"subbaz"),
+        ]
+
+        for include, expected in [
+            (include1, expected1),
+            (include2, expected2),
+        ]:
+            self.assertCommitEntries(
+                iter_commit_contents(self._store, commit, include=include), expected
+            )
+
+
+class FindShallowTests(CommitTestHelper, TestCase):
+    """Tests for finding shallow commits."""
+
     def make_linear_commits(self, n, message=b""):
         """Create a linear chain of commits."""
         commits = []