Переглянути джерело

Implement diffing two committed trees, and diffing between cached and working tree

Jelmer Vernooij 1 місяць тому
батько
коміт
4f1a1d52ae
9 змінених файлів з 1466 додано та 66 видалено
  1. 3 0
      NEWS
  2. 51 11
      dulwich/cli.py
  3. 506 0
      dulwich/diff.py
  4. 49 2
      dulwich/diff_tree.py
  5. 4 0
      dulwich/object_store.py
  6. 102 0
      dulwich/porcelain.py
  7. 185 0
      tests/test_cli.py
  8. 299 53
      tests/test_diff_tree.py
  9. 267 0
      tests/test_porcelain.py

+ 3 - 0
NEWS

@@ -1,5 +1,8 @@
 0.23.3	UNRELEASED
 
+ * ``dulwich.porcelain.diff``: Support diffing two commits
+   and diffing cached and working tree. (Jelmer Vernooij)
+
  * Add ``format-patch`` command in porcelain. (Jelmer Vernooij)
 
  * Add support for ``core.commitGraph`` configuration setting to control

+ 51 - 11
dulwich/cli.py

@@ -41,7 +41,7 @@ from .client import GitProtocolError, get_transport_and_path
 from .errors import ApplyDeltaError
 from .index import Index
 from .objects import valid_hexsha
-from .objectspec import parse_commit, parse_committish_range
+from .objectspec import parse_committish_range
 from .pack import Pack, sha_to_hex
 from .repo import Repo
 
@@ -299,19 +299,59 @@ class cmd_diff(Command):
     def run(self, args) -> None:
         parser = argparse.ArgumentParser()
         parser.add_argument(
-            "commit", nargs="?", default="HEAD", help="Commit to show diff for"
+            "committish", nargs="*", default=[], help="Commits or refs to compare"
         )
-        args = parser.parse_args(args)
-
-        r = Repo(".")
-        commit_id = (
-            args.commit.encode() if isinstance(args.commit, str) else args.commit
+        parser.add_argument("--staged", action="store_true", help="Show staged changes")
+        parser.add_argument(
+            "--cached",
+            action="store_true",
+            help="Show staged changes (same as --staged)",
         )
-        commit = parse_commit(r, commit_id)
-        parent_commit = r[commit.parents[0]]
-        porcelain.diff_tree(
-            r, parent_commit.tree, commit.tree, outstream=sys.stdout.buffer
+        parser.add_argument(
+            "--", dest="separator", action="store_true", help=argparse.SUPPRESS
         )
+        parser.add_argument("paths", nargs="*", default=[], help="Paths to limit diff")
+
+        # Handle the -- separator for paths
+        if "--" in args:
+            sep_index = args.index("--")
+            parsed_args = parser.parse_args(args[:sep_index])
+            parsed_args.paths = args[sep_index + 1 :]
+        else:
+            parsed_args = parser.parse_args(args)
+
+        args = parsed_args
+
+        if len(args.committish) == 0:
+            # Show diff for working tree or staged changes
+            porcelain.diff(
+                ".",
+                staged=(args.staged or args.cached),
+                paths=args.paths or None,
+                outstream=sys.stdout.buffer,
+            )
+        elif len(args.committish) == 1:
+            # Show diff between working tree and specified commit
+            if args.staged or args.cached:
+                parser.error("--staged/--cached cannot be used with commits")
+            porcelain.diff(
+                ".",
+                commit=args.committish[0],
+                staged=False,
+                paths=args.paths or None,
+                outstream=sys.stdout.buffer,
+            )
+        elif len(args.committish) == 2:
+            # Show diff between two commits
+            porcelain.diff(
+                ".",
+                commit=args.committish[0],
+                commit2=args.committish[1],
+                paths=args.paths or None,
+                outstream=sys.stdout.buffer,
+            )
+        else:
+            parser.error("Too many arguments - specify at most two commits")
 
 
 class cmd_dump_pack(Command):

+ 506 - 0
dulwich/diff.py

@@ -0,0 +1,506 @@
+# diff.py -- Diff functionality for Dulwich
+# Copyright (C) 2025 Dulwich contributors
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Diff functionality with separate codepaths.
+
+This module provides three main functions for different diff scenarios:
+
+1. diff_index_to_tree: Shows staged changes (index vs commit)
+   Used by: git diff --staged, git diff --cached
+
+2. diff_working_tree_to_tree: Shows all changes from a commit to working tree
+   Used by: git diff <commit>
+
+3. diff_working_tree_to_index: Shows unstaged changes (working tree vs index)
+   Used by: git diff (with no arguments)
+
+Example usage:
+    from dulwich.repo import Repo
+    from dulwich.diff import diff_index_to_tree
+    import sys
+
+    repo = Repo('.')
+    # Show staged changes
+    diff_index_to_tree(repo, sys.stdout.buffer)
+
+    # Show changes in specific paths only
+    diff_index_to_tree(repo, sys.stdout.buffer, paths=[b'src/', b'README.md'])
+"""
+
+import logging
+import os
+import stat
+from typing import BinaryIO, Optional, cast
+
+from .index import ConflictedIndexEntry, commit_index
+from .object_store import iter_tree_contents
+from .objects import S_ISGITLINK, Blob
+from .patch import write_blob_diff, write_object_diff
+from .repo import Repo
+
+logger = logging.getLogger(__name__)
+
+
+def should_include_path(path: bytes, paths: Optional[list[bytes]]) -> bool:
+    """Check if a path should be included based on path filters.
+
+    Args:
+        path: The path to check
+        paths: List of path filters, or None for no filtering
+
+    Returns:
+        True if the path should be included
+    """
+    if not paths:
+        return True
+    return any(path == p or path.startswith(p + b"/") for p in paths)
+
+
+def diff_index_to_tree(
+    repo: Repo,
+    outstream: BinaryIO,
+    commit_sha: Optional[bytes] = None,
+    paths: Optional[list[bytes]] = None,
+) -> None:
+    """Show staged changes (index vs commit).
+
+    Args:
+        repo: Repository object
+        outstream: Stream to write diff to
+        commit_sha: SHA of commit to compare against, or None for HEAD
+        paths: Optional list of paths to filter (as bytes)
+    """
+    if commit_sha is None:
+        try:
+            commit_sha = repo.refs[b"HEAD"]
+            old_tree = repo[commit_sha].tree
+        except KeyError:
+            # No HEAD means no commits yet
+            old_tree = None
+    else:
+        old_tree = repo[commit_sha].tree
+
+    # Get tree from index
+    index = repo.open_index()
+    new_tree = commit_index(repo.object_store, index)
+    changes = repo.object_store.tree_changes(old_tree, new_tree, paths=paths)
+
+    for (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) in changes:
+        write_object_diff(
+            outstream,
+            repo.object_store,
+            (oldpath, oldmode, oldsha),
+            (newpath, newmode, newsha),
+        )
+
+
+def diff_working_tree_to_tree(
+    repo: Repo,
+    outstream: BinaryIO,
+    commit_sha: bytes,
+    paths: Optional[list[bytes]] = None,
+) -> None:
+    """Compare working tree to a specific commit.
+
+    Args:
+        repo: Repository object
+        outstream: Stream to write diff to
+        commit_sha: SHA of commit to compare against
+        paths: Optional list of paths to filter (as bytes)
+    """
+    tree = repo[commit_sha].tree
+    normalizer = repo.get_blob_normalizer()
+    filter_callback = normalizer.checkin_normalize
+
+    # Get index for tracking new files
+    index = repo.open_index()
+    index_paths = set(index.paths())
+    processed_paths = set()
+
+    # Process files from the committed tree lazily
+    for entry in iter_tree_contents(repo.object_store, tree):
+        path = entry.path
+        if not should_include_path(path, paths):
+            continue
+
+        processed_paths.add(path)
+        full_path = os.path.join(repo.path, path.decode("utf-8"))
+
+        # Get the old file from tree
+        old_mode = entry.mode
+        old_sha = entry.sha
+        old_blob = repo.object_store[old_sha]
+        assert isinstance(old_blob, Blob)
+
+        try:
+            # Use lstat to handle symlinks properly
+            st = os.lstat(full_path)
+        except FileNotFoundError:
+            # File was deleted
+            if old_blob is not None:
+                write_blob_diff(
+                    outstream, (path, old_mode, old_blob), (None, None, None)
+                )
+        except PermissionError:
+            logger.warning("%s: Permission denied", path.decode())
+            # Show as deletion if it was in tree
+            if old_blob is not None:
+                write_blob_diff(
+                    outstream, (path, old_mode, old_blob), (None, None, None)
+                )
+        except OSError as e:
+            logger.warning("%s: %s", path.decode(), e)
+            # Show as deletion if it was in tree
+            if old_blob is not None:
+                write_blob_diff(
+                    outstream, (path, old_mode, old_blob), (None, None, None)
+                )
+        else:
+            # Handle different file types
+            if stat.S_ISDIR(st.st_mode):
+                if old_blob is not None:
+                    # Directory in working tree where file was expected
+                    if stat.S_ISLNK(old_mode):
+                        logger.warning("%s: symlink became a directory", path.decode())
+                    else:
+                        logger.warning("%s: file became a directory", path.decode())
+                    # Show as deletion
+                    write_blob_diff(
+                        outstream, (path, old_mode, old_blob), (None, None, None)
+                    )
+                # If old_blob is None, it's a new directory - skip it
+                continue
+
+            elif stat.S_ISLNK(st.st_mode):
+                # Symlink in working tree
+                target = os.readlink(full_path).encode("utf-8")
+                new_blob = Blob()
+                new_blob.data = target
+
+                if old_blob is None:
+                    # New symlink
+                    write_blob_diff(
+                        outstream,
+                        (None, None, None),
+                        (path, stat.S_IFLNK | 0o777, new_blob),
+                    )
+                elif not stat.S_ISLNK(old_mode):
+                    # Type change: file/submodule -> symlink
+                    write_blob_diff(
+                        outstream,
+                        (path, old_mode, old_blob),
+                        (path, stat.S_IFLNK | 0o777, new_blob),
+                    )
+                elif old_blob is not None and old_blob.data != target:
+                    # Symlink target changed
+                    write_blob_diff(
+                        outstream,
+                        (path, old_mode, old_blob),
+                        (path, old_mode, new_blob),
+                    )
+
+            elif stat.S_ISREG(st.st_mode):
+                # Regular file
+                with open(full_path, "rb") as f:
+                    new_content = f.read()
+
+                # Create a temporary blob for filtering and comparison
+                new_blob = Blob()
+                new_blob.data = new_content
+
+                # Apply filters if needed (only for regular files, not gitlinks)
+                if filter_callback is not None and (
+                    old_blob is None or not S_ISGITLINK(old_mode)
+                ):
+                    new_blob = filter_callback(new_blob, path)
+
+                # Determine the git mode for the new file
+                if st.st_mode & stat.S_IXUSR:
+                    new_git_mode = stat.S_IFREG | 0o755
+                else:
+                    new_git_mode = stat.S_IFREG | 0o644
+
+                if old_blob is None:
+                    # New file
+                    write_blob_diff(
+                        outstream, (None, None, None), (path, new_git_mode, new_blob)
+                    )
+                elif stat.S_ISLNK(old_mode):
+                    # Symlink -> file
+                    write_blob_diff(
+                        outstream,
+                        (path, old_mode, old_blob),
+                        (path, new_git_mode, new_blob),
+                    )
+                elif S_ISGITLINK(old_mode):
+                    # Submodule -> file
+                    write_blob_diff(
+                        outstream,
+                        (path, old_mode, old_blob),
+                        (path, new_git_mode, new_blob),
+                    )
+                else:
+                    # Regular file, check for content or mode changes
+                    old_git_mode = old_mode & (stat.S_IFREG | 0o777)
+                    if (
+                        old_blob is not None and old_blob.data != new_blob.data
+                    ) or old_git_mode != new_git_mode:
+                        write_blob_diff(
+                            outstream,
+                            (path, old_mode, old_blob),
+                            (path, new_git_mode, new_blob),
+                        )
+
+            elif stat.S_ISFIFO(st.st_mode):
+                logger.warning("%s: unsupported file type (fifo)", path.decode())
+                if old_blob is not None:
+                    write_blob_diff(
+                        outstream, (path, old_mode, old_blob), (None, None, None)
+                    )
+            elif stat.S_ISSOCK(st.st_mode):
+                logger.warning("%s: unsupported file type (socket)", path.decode())
+                if old_blob is not None:
+                    write_blob_diff(
+                        outstream, (path, old_mode, old_blob), (None, None, None)
+                    )
+            else:
+                logger.warning("%s: unsupported file type", path.decode())
+                if old_blob is not None:
+                    write_blob_diff(
+                        outstream, (path, old_mode, old_blob), (None, None, None)
+                    )
+
+    # Now process any new files from index that weren't in the tree
+    for path in sorted(index_paths - processed_paths):
+        if not should_include_path(path, paths):
+            continue
+
+        full_path = os.path.join(repo.path, path.decode("utf-8"))
+
+        try:
+            # Use lstat to handle symlinks properly
+            st = os.lstat(full_path)
+        except FileNotFoundError:
+            # New file already deleted, skip
+            continue
+        except PermissionError:
+            logger.warning("%s: Permission denied", path.decode())
+            continue
+        except OSError as e:
+            logger.warning("%s: %s", path.decode(), e)
+            continue
+
+        # Handle different file types for new files
+        if stat.S_ISDIR(st.st_mode):
+            # New directory - skip it
+            continue
+        elif stat.S_ISLNK(st.st_mode):
+            # New symlink
+            target = os.readlink(full_path).encode("utf-8")
+            new_blob = Blob()
+            new_blob.data = target
+            write_blob_diff(
+                outstream,
+                (None, None, None),
+                (path, stat.S_IFLNK | 0o777, new_blob),
+            )
+        elif stat.S_ISREG(st.st_mode):
+            # New regular file
+            with open(full_path, "rb") as f:
+                new_content = f.read()
+
+            new_blob = Blob()
+            new_blob.data = new_content
+
+            # Apply filters if needed
+            if filter_callback is not None:
+                new_blob = filter_callback(new_blob, path)
+
+            # Determine the git mode for the new file
+            if st.st_mode & stat.S_IXUSR:
+                new_git_mode = 0o100755
+            else:
+                new_git_mode = 0o100644
+
+            write_blob_diff(
+                outstream, (None, None, None), (path, new_git_mode, new_blob)
+            )
+        elif stat.S_ISFIFO(st.st_mode):
+            logger.warning("%s: unsupported file type (fifo)", path.decode())
+        elif stat.S_ISSOCK(st.st_mode):
+            logger.warning("%s: unsupported file type (socket)", path.decode())
+        else:
+            logger.warning("%s: unsupported file type", path.decode())
+
+
+def diff_working_tree_to_index(
+    repo: Repo, outstream: BinaryIO, paths: Optional[list[bytes]] = None
+) -> None:
+    """Compare working tree to index.
+
+    Args:
+        repo: Repository object
+        outstream: Stream to write diff to
+        paths: Optional list of paths to filter (as bytes)
+    """
+    index = repo.open_index()
+    normalizer = repo.get_blob_normalizer()
+    filter_callback = normalizer.checkin_normalize
+
+    # Process each file in the index
+    for tree_path, entry in index.iteritems():
+        if not should_include_path(tree_path, paths):
+            continue
+
+        # Handle conflicted entries by using stage 2 ("ours")
+        if isinstance(entry, ConflictedIndexEntry):
+            if entry.this is None:
+                continue  # No stage 2 entry, skip
+            old_mode = entry.this.mode
+            old_sha = entry.this.sha
+        else:
+            # Get file from regular index entry
+            old_mode = entry.mode
+            old_sha = entry.sha
+        old_obj = repo.object_store[old_sha]
+        # Type check and cast to Blob
+        if isinstance(old_obj, Blob):
+            old_blob = cast(Blob, old_obj)
+        else:
+            old_blob = None
+
+        full_path = os.path.join(repo.path, tree_path.decode("utf-8"))
+        try:
+            # Use lstat to handle symlinks properly
+            st = os.lstat(full_path)
+
+            # Handle different file types
+            if stat.S_ISDIR(st.st_mode):
+                # Directory in working tree where file was expected
+                if stat.S_ISLNK(old_mode):
+                    logger.warning("%s: symlink became a directory", tree_path.decode())
+                else:
+                    logger.warning("%s: file became a directory", tree_path.decode())
+                # Show as deletion
+                write_blob_diff(
+                    outstream, (tree_path, old_mode, old_blob), (None, None, None)
+                )
+
+            elif stat.S_ISLNK(st.st_mode):
+                # Symlink in working tree
+                target = os.readlink(full_path).encode("utf-8")
+                new_blob = Blob()
+                new_blob.data = target
+
+                # Check if type changed or content changed
+                if not stat.S_ISLNK(old_mode):
+                    # Type change: file/submodule -> symlink
+                    write_blob_diff(
+                        outstream,
+                        (tree_path, old_mode, old_blob),
+                        (tree_path, stat.S_IFLNK | 0o777, new_blob),
+                    )
+                elif old_blob is not None and old_blob.data != target:
+                    # Symlink target changed
+                    write_blob_diff(
+                        outstream,
+                        (tree_path, old_mode, old_blob),
+                        (tree_path, old_mode, new_blob),
+                    )
+
+            elif stat.S_ISREG(st.st_mode):
+                # Regular file
+                with open(full_path, "rb") as f:
+                    new_content = f.read()
+
+                # Create a temporary blob for filtering and comparison
+                new_blob = Blob()
+                new_blob.data = new_content
+
+                # Apply filters if needed (only for regular files)
+                if filter_callback is not None and not S_ISGITLINK(old_mode):
+                    new_blob = filter_callback(new_blob, tree_path)
+
+                # Determine the git mode for the new file
+                if st.st_mode & stat.S_IXUSR:
+                    new_git_mode = stat.S_IFREG | 0o755
+                else:
+                    new_git_mode = stat.S_IFREG | 0o644
+
+                # Check if this was a type change
+                if stat.S_ISLNK(old_mode):
+                    # Symlink -> file
+                    write_blob_diff(
+                        outstream,
+                        (tree_path, old_mode, old_blob),
+                        (tree_path, new_git_mode, new_blob),
+                    )
+                elif S_ISGITLINK(old_mode):
+                    # Submodule -> file
+                    write_blob_diff(
+                        outstream,
+                        (tree_path, old_mode, old_blob),
+                        (tree_path, new_git_mode, new_blob),
+                    )
+                else:
+                    # Regular file, check for content or mode changes
+                    old_git_mode = old_mode & (stat.S_IFREG | 0o777)
+                    if (
+                        old_blob is not None and old_blob.data != new_blob.data
+                    ) or old_git_mode != new_git_mode:
+                        write_blob_diff(
+                            outstream,
+                            (tree_path, old_mode, old_blob),
+                            (tree_path, new_git_mode, new_blob),
+                        )
+
+            elif stat.S_ISFIFO(st.st_mode):
+                logger.warning("%s: unsupported file type (fifo)", tree_path.decode())
+                write_blob_diff(
+                    outstream, (tree_path, old_mode, old_blob), (None, None, None)
+                )
+            elif stat.S_ISSOCK(st.st_mode):
+                logger.warning("%s: unsupported file type (socket)", tree_path.decode())
+                write_blob_diff(
+                    outstream, (tree_path, old_mode, old_blob), (None, None, None)
+                )
+            else:
+                logger.warning("%s: unsupported file type", tree_path.decode())
+                write_blob_diff(
+                    outstream, (tree_path, old_mode, old_blob), (None, None, None)
+                )
+
+        except FileNotFoundError:
+            # File was deleted - this is normal, not a warning
+            write_blob_diff(
+                outstream, (tree_path, old_mode, old_blob), (None, None, None)
+            )
+        except PermissionError:
+            logger.warning("%s: Permission denied", tree_path.decode())
+            # Show as deletion since we can't read it
+            write_blob_diff(
+                outstream, (tree_path, old_mode, old_blob), (None, None, None)
+            )
+        except OSError as e:
+            logger.warning("%s: %s", tree_path.decode(), e)
+            # Show as deletion since we can't read it
+            write_blob_diff(
+                outstream, (tree_path, old_mode, old_blob), (None, None, None)
+            )

+ 49 - 2
dulwich/diff_tree.py

@@ -125,6 +125,7 @@ def walk_trees(
     tree1_id: Optional[ObjectID],
     tree2_id: Optional[ObjectID],
     prune_identical: bool = False,
+    paths: Optional[list[bytes]] = None,
 ) -> Iterator[tuple[TreeEntry, TreeEntry]]:
     """Recursively walk all the entries of two trees.
 
@@ -135,6 +136,7 @@ def walk_trees(
       tree1_id: The SHA of the first Tree object to iterate, or None.
       tree2_id: The SHA of the second Tree object to iterate, or None.
       prune_identical: If True, identical subtrees will not be walked.
+      paths: Optional list of paths to filter to (as bytes).
 
     Returns:
       Iterator over Pairs of TreeEntry objects for each pair of entries
@@ -159,6 +161,31 @@ def walk_trees(
         tree2 = (is_tree2 and store[entry2.sha]) or None
         path = entry1.path or entry2.path
 
+        # If we have path filters, check if we should process this tree
+        if paths is not None and (is_tree1 or is_tree2):
+            # Special case for root tree
+            if path == b"":
+                should_recurse = True
+            else:
+                # Check if any of our filter paths could be under this tree
+                should_recurse = False
+                for filter_path in paths:
+                    if filter_path == path:
+                        # Exact match - we want this directory itself
+                        should_recurse = True
+                        break
+                    elif filter_path.startswith(path + b"/"):
+                        # Filter path is under this directory
+                        should_recurse = True
+                        break
+                    elif path.startswith(filter_path + b"/"):
+                        # This directory is under a filter path
+                        should_recurse = True
+                        break
+            if not should_recurse:
+                # Skip this tree entirely
+                continue
+
         # Ensure trees are Tree objects before merging
         if tree1 is not None and not isinstance(tree1, Tree):
             tree1 = None
@@ -172,7 +199,25 @@ def walk_trees(
             if tree2 is None:
                 tree2 = Tree()
             todo.extend(reversed(_merge_entries(path, tree1, tree2)))
-        yield entry1, entry2
+
+        # Only yield entries that match our path filters
+        if paths is None:
+            yield entry1, entry2
+        else:
+            # Check if this entry matches any of our filters
+            for filter_path in paths:
+                if path == filter_path:
+                    # Exact match
+                    yield entry1, entry2
+                    break
+                elif path.startswith(filter_path + b"/"):
+                    # This entry is under a filter directory
+                    yield entry1, entry2
+                    break
+                elif filter_path.startswith(path + b"/") and (is_tree1 or is_tree2):
+                    # This is a parent directory of a filter path
+                    yield entry1, entry2
+                    break
 
 
 def _skip_tree(entry: TreeEntry, include_trees: bool) -> TreeEntry:
@@ -189,6 +234,7 @@ def tree_changes(
     rename_detector: Optional["RenameDetector"] = None,
     include_trees: bool = False,
     change_type_same: bool = False,
+    paths: Optional[list[bytes]] = None,
 ) -> Iterator[TreeChange]:
     """Find the differences between the contents of two trees.
 
@@ -202,6 +248,7 @@ def tree_changes(
       rename_detector: RenameDetector object for detecting renames.
       change_type_same: Whether to report change types in the same
         entry or as delete+add.
+      paths: Optional list of paths to filter to (as bytes).
 
     Returns:
       Iterator over TreeChange instances for each change between the
@@ -217,7 +264,7 @@ def tree_changes(
         return
 
     entries = walk_trees(
-        store, tree1_id, tree2_id, prune_identical=(not want_unchanged)
+        store, tree1_id, tree2_id, prune_identical=(not want_unchanged), paths=paths
     )
     for entry1, entry2 in entries:
         if entry1 == entry2 and not want_unchanged:

+ 4 - 0
dulwich/object_store.py

@@ -271,6 +271,7 @@ class BaseObjectStore:
         include_trees=False,
         change_type_same=False,
         rename_detector=None,
+        paths=None,
     ):
         """Find the differences between the contents of two trees.
 
@@ -281,6 +282,8 @@ class BaseObjectStore:
           include_trees: Whether to include trees
           change_type_same: Whether to report files changing
             type in the same entry.
+          rename_detector: RenameDetector object for detecting renames.
+          paths: Optional list of paths to filter to (as bytes).
         Returns: Iterator over tuples with
             (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
         """
@@ -294,6 +297,7 @@ class BaseObjectStore:
             include_trees=include_trees,
             change_type_same=change_type_same,
             rename_detector=rename_detector,
+            paths=paths,
         ):
             yield (
                 (change.old.path, change.new.path),

+ 102 - 0
dulwich/porcelain.py

@@ -1230,6 +1230,108 @@ def diff_tree(repo, old_tree, new_tree, outstream=default_bytes_out_stream) -> N
         write_tree_diff(outstream, r.object_store, old_tree, new_tree)
 
 
+def diff(
+    repo=".",
+    commit=None,
+    commit2=None,
+    staged=False,
+    paths=None,
+    outstream=default_bytes_out_stream,
+) -> None:
+    """Show diff.
+
+    Args:
+      repo: Path to repository
+      commit: First commit to compare. If staged is True, compare
+              index to this commit. If staged is False, compare working tree
+              to this commit. If None, defaults to HEAD for staged and index
+              for unstaged.
+      commit2: Second commit to compare against first commit. If provided,
+               show diff between commit and commit2 (ignoring staged flag).
+      staged: If True, show staged changes (index vs commit).
+              If False, show unstaged changes (working tree vs commit/index).
+              Ignored if commit2 is provided.
+      paths: Optional list of paths to limit diff
+      outstream: Stream to write to
+    """
+    from . import diff as diff_module
+    from .objectspec import parse_commit
+
+    with open_repo_closing(repo) as r:
+        # Normalize paths to bytes
+        if paths is not None and paths:  # Check if paths is not empty
+            byte_paths = []
+            for p in paths:
+                if isinstance(p, str):
+                    byte_paths.append(p.encode("utf-8"))
+                else:
+                    byte_paths.append(p)
+            paths = byte_paths
+        elif paths == []:  # Convert empty list to None
+            paths = None
+
+        # Resolve commit refs to SHAs if provided
+        if commit is not None:
+            from .objects import Commit
+
+            if isinstance(commit, Commit):
+                # Already a Commit object
+                commit_sha = commit.id
+                commit_obj = commit
+            else:
+                # parse_commit handles both refs and SHAs, and always returns a Commit object
+                commit_obj = parse_commit(r, commit)
+                commit_sha = commit_obj.id
+        else:
+            commit_sha = None
+            commit_obj = None
+
+        if commit2 is not None:
+            # Compare two commits
+            from .objects import Commit
+            from .patch import write_object_diff
+
+            if isinstance(commit2, Commit):
+                commit2_obj = commit2
+            else:
+                commit2_obj = parse_commit(r, commit2)
+
+            # Get trees from commits
+            old_tree = commit_obj.tree if commit_obj else None
+            new_tree = commit2_obj.tree
+
+            # Use tree_changes to get the changes and apply path filtering
+            changes = r.object_store.tree_changes(old_tree, new_tree)
+            for (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) in changes:
+                # Skip if paths are specified and this change doesn't match
+                if paths:
+                    path_to_check = newpath or oldpath
+                    if not any(
+                        path_to_check == p or path_to_check.startswith(p + b"/")
+                        for p in paths
+                    ):
+                        continue
+
+                write_object_diff(
+                    outstream,
+                    r.object_store,
+                    (oldpath, oldmode, oldsha),
+                    (newpath, newmode, newsha),
+                )
+        elif staged:
+            # Show staged changes (index vs commit)
+            diff_module.diff_index_to_tree(r, outstream, commit_sha, paths)
+        elif commit is not None:
+            # Compare working tree to a specific commit
+            assert (
+                commit_sha is not None
+            )  # mypy: commit_sha is set when commit is not None
+            diff_module.diff_working_tree_to_tree(r, outstream, commit_sha, paths)
+        else:
+            # Compare working tree to index
+            diff_module.diff_working_tree_to_index(r, outstream, paths)
+
+
 def rev_list(repo, commits, outstream=sys.stdout) -> None:
     """Lists commit objects in reverse chronological order.
 

+ 185 - 0
tests/test_cli.py

@@ -295,6 +295,191 @@ class TagCommandTest(DulwichCliTestCase):
         self.assertIn(b"refs/tags/v1.0", self.repo.refs.keys())
 
 
+class DiffCommandTest(DulwichCliTestCase):
+    """Tests for diff command."""
+
+    def test_diff_working_tree(self):
+        # Create and commit a file
+        test_file = os.path.join(self.repo_path, "test.txt")
+        with open(test_file, "w") as f:
+            f.write("initial content\n")
+        self._run_cli("add", "test.txt")
+        self._run_cli("commit", "--message=Initial")
+
+        # Modify the file
+        with open(test_file, "w") as f:
+            f.write("initial content\nmodified\n")
+
+        # Test unstaged diff
+        result, stdout, stderr = self._run_cli("diff")
+        self.assertIn("+modified", stdout)
+
+    def test_diff_staged(self):
+        # Create initial commit
+        test_file = os.path.join(self.repo_path, "test.txt")
+        with open(test_file, "w") as f:
+            f.write("initial content\n")
+        self._run_cli("add", "test.txt")
+        self._run_cli("commit", "--message=Initial")
+
+        # Modify and stage the file
+        with open(test_file, "w") as f:
+            f.write("initial content\nnew file\n")
+        self._run_cli("add", "test.txt")
+
+        # Test staged diff
+        result, stdout, stderr = self._run_cli("diff", "--staged")
+        self.assertIn("+new file", stdout)
+
+    def test_diff_cached(self):
+        # Create initial commit
+        test_file = os.path.join(self.repo_path, "test.txt")
+        with open(test_file, "w") as f:
+            f.write("initial content\n")
+        self._run_cli("add", "test.txt")
+        self._run_cli("commit", "--message=Initial")
+
+        # Modify and stage the file
+        with open(test_file, "w") as f:
+            f.write("initial content\nnew file\n")
+        self._run_cli("add", "test.txt")
+
+        # Test cached diff (alias for staged)
+        result, stdout, stderr = self._run_cli("diff", "--cached")
+        self.assertIn("+new file", stdout)
+
+    def test_diff_commit(self):
+        # Create two commits
+        test_file = os.path.join(self.repo_path, "test.txt")
+        with open(test_file, "w") as f:
+            f.write("first version\n")
+        self._run_cli("add", "test.txt")
+        self._run_cli("commit", "--message=First")
+
+        with open(test_file, "w") as f:
+            f.write("first version\nsecond line\n")
+        self._run_cli("add", "test.txt")
+        self._run_cli("commit", "--message=Second")
+
+        # Add working tree changes
+        with open(test_file, "a") as f:
+            f.write("working tree change\n")
+
+        # Test single commit diff (should show working tree vs HEAD)
+        result, stdout, stderr = self._run_cli("diff", "HEAD")
+        self.assertIn("+working tree change", stdout)
+
+    def test_diff_two_commits(self):
+        # Create two commits
+        test_file = os.path.join(self.repo_path, "test.txt")
+        with open(test_file, "w") as f:
+            f.write("first version\n")
+        self._run_cli("add", "test.txt")
+        self._run_cli("commit", "--message=First")
+
+        # Get first commit SHA
+        first_commit = self.repo.refs[b"HEAD"].decode()
+
+        with open(test_file, "w") as f:
+            f.write("first version\nsecond line\n")
+        self._run_cli("add", "test.txt")
+        self._run_cli("commit", "--message=Second")
+
+        # Get second commit SHA
+        second_commit = self.repo.refs[b"HEAD"].decode()
+
+        # Test diff between two commits
+        result, stdout, stderr = self._run_cli("diff", first_commit, second_commit)
+        self.assertIn("+second line", stdout)
+
+    def test_diff_commit_vs_working_tree(self):
+        # Test that diff <commit> shows working tree vs commit (not commit vs parent)
+        test_file = os.path.join(self.repo_path, "test.txt")
+        with open(test_file, "w") as f:
+            f.write("first version\n")
+        self._run_cli("add", "test.txt")
+        self._run_cli("commit", "--message=First")
+
+        first_commit = self.repo.refs[b"HEAD"].decode()
+
+        with open(test_file, "w") as f:
+            f.write("first version\nsecond line\n")
+        self._run_cli("add", "test.txt")
+        self._run_cli("commit", "--message=Second")
+
+        # Add changes to working tree
+        with open(test_file, "w") as f:
+            f.write("completely different\n")
+
+        # diff <first_commit> should show working tree vs first commit
+        result, stdout, stderr = self._run_cli("diff", first_commit)
+        self.assertIn("-first version", stdout)
+        self.assertIn("+completely different", stdout)
+
+    def test_diff_with_paths(self):
+        # Test path filtering
+        # Create multiple files
+        file1 = os.path.join(self.repo_path, "file1.txt")
+        file2 = os.path.join(self.repo_path, "file2.txt")
+        subdir = os.path.join(self.repo_path, "subdir")
+        os.makedirs(subdir)
+        file3 = os.path.join(subdir, "file3.txt")
+
+        with open(file1, "w") as f:
+            f.write("content1\n")
+        with open(file2, "w") as f:
+            f.write("content2\n")
+        with open(file3, "w") as f:
+            f.write("content3\n")
+
+        self._run_cli("add", ".")
+        self._run_cli("commit", "--message=Initial")
+
+        # Modify all files
+        with open(file1, "w") as f:
+            f.write("modified1\n")
+        with open(file2, "w") as f:
+            f.write("modified2\n")
+        with open(file3, "w") as f:
+            f.write("modified3\n")
+
+        # Test diff with specific file
+        result, stdout, stderr = self._run_cli("diff", "--", "file1.txt")
+        self.assertIn("file1.txt", stdout)
+        self.assertNotIn("file2.txt", stdout)
+        self.assertNotIn("file3.txt", stdout)
+
+        # Test diff with directory
+        result, stdout, stderr = self._run_cli("diff", "--", "subdir")
+        self.assertNotIn("file1.txt", stdout)
+        self.assertNotIn("file2.txt", stdout)
+        self.assertIn("file3.txt", stdout)
+
+        # Test staged diff with paths
+        self._run_cli("add", "file1.txt")
+        result, stdout, stderr = self._run_cli("diff", "--staged", "--", "file1.txt")
+        self.assertIn("file1.txt", stdout)
+        self.assertIn("+modified1", stdout)
+
+        # Test diff with multiple paths (file2 and file3 are still unstaged)
+        result, stdout, stderr = self._run_cli(
+            "diff", "--", "file2.txt", "subdir/file3.txt"
+        )
+        self.assertIn("file2.txt", stdout)
+        self.assertIn("file3.txt", stdout)
+        self.assertNotIn("file1.txt", stdout)
+
+        # Test diff with commit and paths
+        first_commit = self.repo.refs[b"HEAD"].decode()
+        with open(file1, "w") as f:
+            f.write("newer1\n")
+        result, stdout, stderr = self._run_cli("diff", first_commit, "--", "file1.txt")
+        self.assertIn("file1.txt", stdout)
+        self.assertIn("-content1", stdout)
+        self.assertIn("+newer1", stdout)
+        self.assertNotIn("file2.txt", stdout)
+
+
 class FilterBranchCommandTest(DulwichCliTestCase):
     """Tests for filter-branch command."""
 

+ 299 - 53
tests/test_diff_tree.py

@@ -187,16 +187,16 @@ class TreeChangesTest(DiffTestCase):
         tree = self.commit_tree([(b"a", blob_a, 0o100644), (b"x/b", blob_b, 0o100755)])
         self.assertChangesEqual(
             [
-                TreeChange.add((b"a", 0o100644, blob_a.id)),
-                TreeChange.add((b"x/b", 0o100755, blob_b.id)),
+                TreeChange.add(TreeEntry(b"a", 0o100644, blob_a.id)),
+                TreeChange.add(TreeEntry(b"x/b", 0o100755, blob_b.id)),
             ],
             self.empty_tree,
             tree,
         )
         self.assertChangesEqual(
             [
-                TreeChange.delete((b"a", 0o100644, blob_a.id)),
-                TreeChange.delete((b"x/b", 0o100755, blob_b.id)),
+                TreeChange.delete(TreeEntry(b"a", 0o100644, blob_a.id)),
+                TreeChange.delete(TreeEntry(b"x/b", 0o100755, blob_b.id)),
             ],
             tree,
             self.empty_tree,
@@ -236,8 +236,8 @@ class TreeChangesTest(DiffTestCase):
         tree2 = self.commit_tree([(b"a", blob_a2, 0o120000)])
         self.assertChangesEqual(
             [
-                TreeChange.delete((b"a", 0o100644, blob_a1.id)),
-                TreeChange.add((b"a", 0o120000, blob_a2.id)),
+                TreeChange.delete(TreeEntry(b"a", 0o100644, blob_a1.id)),
+                TreeChange.add(TreeEntry(b"a", 0o120000, blob_a2.id)),
             ],
             tree1,
             tree2,
@@ -268,8 +268,8 @@ class TreeChangesTest(DiffTestCase):
         tree2 = self.commit_tree([(b"a/x", blob_x)])
         self.assertChangesEqual(
             [
-                TreeChange.delete((b"a", F, blob_a.id)),
-                TreeChange.add((b"a/x", F, blob_x.id)),
+                TreeChange.delete(TreeEntry(b"a", F, blob_a.id)),
+                TreeChange.add(TreeEntry(b"a/x", F, blob_x.id)),
             ],
             tree1,
             tree2,
@@ -311,11 +311,11 @@ class TreeChangesTest(DiffTestCase):
                     (b"a", F, blob_a_1.id),
                     (b"a", F, blob_a_2.id),
                 ),
-                TreeChange.delete((b"b/x/2", F, blob_bx2_1.id)),
-                TreeChange.add((b"b/y", F, blob_by_2.id)),
-                TreeChange.delete((b"b/y/1", F, blob_by1_1.id)),
-                TreeChange.delete((b"b/y/2", F, blob_by2_1.id)),
-                TreeChange.add((b"c", F, blob_c_2.id)),
+                TreeChange.delete(TreeEntry(b"b/x/2", F, blob_bx2_1.id)),
+                TreeChange.add(TreeEntry(b"b/y", F, blob_by_2.id)),
+                TreeChange.delete(TreeEntry(b"b/y/1", F, blob_by1_1.id)),
+                TreeChange.delete(TreeEntry(b"b/y/2", F, blob_by2_1.id)),
+                TreeChange.add(TreeEntry(b"c", F, blob_c_2.id)),
             ],
             tree1,
             tree2,
@@ -330,10 +330,10 @@ class TreeChangesTest(DiffTestCase):
 
         self.assertChangesEqual(
             [
-                TreeChange.delete((b"a", F, blob.id)),
-                TreeChange.add((b"a/x", F, blob.id)),
-                TreeChange.delete((b"a.", F, blob.id)),
-                TreeChange.add((b"a./x", F, blob.id)),
+                TreeChange.delete(TreeEntry(b"a", F, blob.id)),
+                TreeChange.add(TreeEntry(b"a/x", F, blob.id)),
+                TreeChange.delete(TreeEntry(b"a.", F, blob.id)),
+                TreeChange.add(TreeEntry(b"a./x", F, blob.id)),
             ],
             tree1,
             tree2,
@@ -367,21 +367,21 @@ class TreeChangesTest(DiffTestCase):
 
         self.assertChangesEqual(
             [
-                TreeChange.delete((b"a", F, blob_a1.id)),
-                TreeChange.add((b"c", F, blob_a2.id)),
+                TreeChange.delete(TreeEntry(b"a", F, blob_a1.id)),
+                TreeChange.add(TreeEntry(b"c", F, blob_a2.id)),
             ],
             tree1,
             tree2,
         )
         self.assertChangesEqual(
             [
-                TreeChange.delete((b"a", F, blob_a1.id)),
+                TreeChange.delete(TreeEntry(b"a", F, blob_a1.id)),
                 TreeChange(
                     CHANGE_UNCHANGED,
                     (b"b", F, blob_b.id),
                     (b"b", F, blob_b.id),
                 ),
-                TreeChange.add((b"c", F, blob_a2.id)),
+                TreeChange.add(TreeEntry(b"c", F, blob_a2.id)),
             ],
             tree1,
             tree2,
@@ -440,7 +440,7 @@ class TreeChangesTest(DiffTestCase):
         self.assertChangesForMergeEqual(
             [
                 [
-                    TreeChange.add((b"a", F, blob2.id)),
+                    TreeChange.add(TreeEntry(b"a", F, blob2.id)),
                     TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob2.id)),
                 ]
             ],
@@ -482,8 +482,8 @@ class TreeChangesTest(DiffTestCase):
         self.assertChangesForMergeEqual(
             [
                 [
-                    TreeChange.delete((b"a", F, blob1.id)),
-                    TreeChange.delete((b"a", F, blob2.id)),
+                    TreeChange.delete(TreeEntry(b"a", F, blob1.id)),
+                    TreeChange.delete(TreeEntry(b"a", F, blob2.id)),
                 ]
             ],
             [parent1, parent2],
@@ -538,8 +538,8 @@ class TreeChangesTest(DiffTestCase):
         self.assertChangesForMergeEqual(
             [
                 [
-                    TreeChange.delete((b"a", F, blob1.id)),
-                    TreeChange.delete((b"a", F, blob2.id)),
+                    TreeChange.delete(TreeEntry(b"a", F, blob1.id)),
+                    TreeChange.delete(TreeEntry(b"a", F, blob2.id)),
                     None,
                 ]
             ],
@@ -552,7 +552,7 @@ class TreeChangesTest(DiffTestCase):
         parent1 = self.commit_tree([(b"a", blob)])
         parent2 = self.commit_tree([])
         merge = self.commit_tree([(b"b", blob)])
-        add = TreeChange.add((b"b", F, blob.id))
+        add = TreeChange.add(TreeEntry(b"b", F, blob.id))
         self.assertChangesForMergeEqual([[add, add]], [parent1, parent2], merge)
 
     def test_tree_changes_for_merge_add_exact_rename_conflict(self) -> None:
@@ -564,7 +564,7 @@ class TreeChangesTest(DiffTestCase):
             [
                 [
                     TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"b", F, blob.id)),
-                    TreeChange.add((b"b", F, blob.id)),
+                    TreeChange.add(TreeEntry(b"b", F, blob.id)),
                 ]
             ],
             [parent1, parent2],
@@ -582,7 +582,7 @@ class TreeChangesTest(DiffTestCase):
             [
                 [
                     TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id)),
-                    TreeChange.add((b"b", F, blob2.id)),
+                    TreeChange.add(TreeEntry(b"b", F, blob2.id)),
                 ]
             ],
             [parent1, parent2],
@@ -608,6 +608,252 @@ class TreeChangesTest(DiffTestCase):
             rename_detector=self.detector,
         )
 
+    def test_tree_changes_with_paths(self) -> None:
+        # Test filtering to specific paths
+        blob_a = make_object(Blob, data=b"a")
+        blob_b = make_object(Blob, data=b"b")
+        blob_c = make_object(Blob, data=b"c")
+        tree1 = self.commit_tree(
+            [
+                (b"file_a", blob_a),
+                (b"file_b", blob_b),
+            ]
+        )
+        tree2 = self.commit_tree(
+            [
+                (b"file_a", blob_b),  # modified
+                (b"file_b", blob_b),  # unchanged
+                (b"file_c", blob_c),  # added
+            ]
+        )
+
+        # No filtering - should see all changes
+        self.assertChangesEqual(
+            [
+                TreeChange(
+                    CHANGE_MODIFY, (b"file_a", F, blob_a.id), (b"file_a", F, blob_b.id)
+                ),
+                TreeChange.add(TreeEntry(b"file_c", F, blob_c.id)),
+            ],
+            tree1,
+            tree2,
+        )
+
+        # Filter to file_a only
+        changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"file_a"]))
+        self.assertEqual(
+            [
+                TreeChange(
+                    CHANGE_MODIFY, (b"file_a", F, blob_a.id), (b"file_a", F, blob_b.id)
+                ),
+            ],
+            changes,
+        )
+
+        # Filter to file_c only
+        changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"file_c"]))
+        self.assertEqual(
+            [
+                TreeChange.add(TreeEntry(b"file_c", F, blob_c.id)),
+            ],
+            changes,
+        )
+
+        # Filter to multiple files
+        changes = list(
+            tree_changes(self.store, tree1.id, tree2.id, paths=[b"file_a", b"file_c"])
+        )
+        self.assertEqual(
+            [
+                TreeChange(
+                    CHANGE_MODIFY, (b"file_a", F, blob_a.id), (b"file_a", F, blob_b.id)
+                ),
+                TreeChange.add(TreeEntry(b"file_c", F, blob_c.id)),
+            ],
+            changes,
+        )
+
+        # Filter to non-existent file
+        changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"file_d"]))
+        self.assertEqual([], changes)
+
+    def test_tree_changes_with_paths_dirs(self) -> None:
+        # Test filtering with directories
+        blob_a = make_object(Blob, data=b"a")
+        blob_b = make_object(Blob, data=b"b")
+        blob_c = make_object(Blob, data=b"c")
+        tree1 = self.commit_tree(
+            [
+                (b"dir1/file_a", blob_a),
+                (b"dir2/file_b", blob_b),
+            ]
+        )
+        tree2 = self.commit_tree(
+            [
+                (b"dir1/file_a", blob_b),  # modified
+                (b"dir1/file_c", blob_c),  # added
+                (b"dir2/file_b", blob_b),  # unchanged
+            ]
+        )
+
+        # Filter to dir1 only
+        changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"dir1"]))
+        self.assertEqual(
+            [
+                TreeChange(
+                    CHANGE_MODIFY,
+                    (b"dir1/file_a", F, blob_a.id),
+                    (b"dir1/file_a", F, blob_b.id),
+                ),
+                TreeChange.add(TreeEntry(b"dir1/file_c", F, blob_c.id)),
+            ],
+            changes,
+        )
+
+        # Filter to specific file in directory
+        changes = list(
+            tree_changes(self.store, tree1.id, tree2.id, paths=[b"dir1/file_a"])
+        )
+        self.assertEqual(
+            [
+                TreeChange(
+                    CHANGE_MODIFY,
+                    (b"dir1/file_a", F, blob_a.id),
+                    (b"dir1/file_a", F, blob_b.id),
+                ),
+            ],
+            changes,
+        )
+
+        # Filter to dir2 (no changes)
+        changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"dir2"]))
+        self.assertEqual([], changes)
+
+    def test_tree_changes_with_paths_deep(self) -> None:
+        # Test filtering with deep directory structure
+        blob_a = make_object(Blob, data=b"a")
+        blob_b = make_object(Blob, data=b"b")
+        tree1 = self.commit_tree(
+            [
+                (b"a/b/c/file1", blob_a),
+                (b"a/b/d/file2", blob_a),
+                (b"x/y/z/file3", blob_a),
+            ]
+        )
+        tree2 = self.commit_tree(
+            [
+                (b"a/b/c/file1", blob_b),  # modified
+                (b"a/b/d/file2", blob_a),  # unchanged
+                (b"x/y/z/file3", blob_b),  # modified
+            ]
+        )
+
+        # Filter to a/b/c
+        changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"a/b/c"]))
+        self.assertEqual(
+            [
+                TreeChange(
+                    CHANGE_MODIFY,
+                    (b"a/b/c/file1", F, blob_a.id),
+                    (b"a/b/c/file1", F, blob_b.id),
+                ),
+            ],
+            changes,
+        )
+
+        # Filter to a/b (should include both subdirs)
+        changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"a/b"]))
+        self.assertEqual(
+            [
+                TreeChange(
+                    CHANGE_MODIFY,
+                    (b"a/b/c/file1", F, blob_a.id),
+                    (b"a/b/c/file1", F, blob_b.id),
+                ),
+            ],
+            changes,
+        )
+
+        # Filter to multiple deep paths
+        changes = list(
+            tree_changes(
+                self.store, tree1.id, tree2.id, paths=[b"a/b/c/file1", b"x/y/z/file3"]
+            )
+        )
+        self.assertEqual(
+            [
+                TreeChange(
+                    CHANGE_MODIFY,
+                    (b"a/b/c/file1", F, blob_a.id),
+                    (b"a/b/c/file1", F, blob_b.id),
+                ),
+                TreeChange(
+                    CHANGE_MODIFY,
+                    (b"x/y/z/file3", F, blob_a.id),
+                    (b"x/y/z/file3", F, blob_b.id),
+                ),
+            ],
+            changes,
+        )
+
+    def test_tree_changes_with_paths_include_unchanged(self) -> None:
+        # Test path filtering with want_unchanged=True
+        blob_a = make_object(Blob, data=b"a")
+        blob_b = make_object(Blob, data=b"b")
+        tree1 = self.commit_tree(
+            [
+                (b"file_a", blob_a),
+                (b"file_b", blob_b),
+            ]
+        )
+        tree2 = self.commit_tree(
+            [
+                (b"file_a", blob_b),  # modified
+                (b"file_b", blob_b),  # unchanged
+            ]
+        )
+
+        # Filter to file_b with want_unchanged=True
+        changes = list(
+            tree_changes(
+                self.store, tree1.id, tree2.id, paths=[b"file_b"], want_unchanged=True
+            )
+        )
+        self.assertEqual(
+            [
+                TreeChange(
+                    CHANGE_UNCHANGED,
+                    (b"file_b", F, blob_b.id),
+                    (b"file_b", F, blob_b.id),
+                ),
+            ],
+            changes,
+        )
+
+        # Filter to both files with want_unchanged=True
+        changes = list(
+            tree_changes(
+                self.store,
+                tree1.id,
+                tree2.id,
+                paths=[b"file_a", b"file_b"],
+                want_unchanged=True,
+            )
+        )
+        self.assertEqual(
+            [
+                TreeChange(
+                    CHANGE_MODIFY, (b"file_a", F, blob_a.id), (b"file_a", F, blob_b.id)
+                ),
+                TreeChange(
+                    CHANGE_UNCHANGED,
+                    (b"file_b", F, blob_b.id),
+                    (b"file_b", F, blob_b.id),
+                ),
+            ],
+            changes,
+        )
+
 
 class RenameDetectionTest(DiffTestCase):
     def _do_test_count_blocks(self, count_blocks) -> None:
@@ -686,7 +932,7 @@ class RenameDetectionTest(DiffTestCase):
         blob1 = make_object(Blob, data=b"ab\ncd\n")
         blob2 = make_object(Blob, data=b"ab\n")
 
-        block_cache = {}
+        block_cache: dict[bytes, dict[int, int]] = {}
         self.assertEqual(50, _similarity_score(blob1, blob2, block_cache=block_cache))
         self.assertEqual({blob1.id, blob2.id}, set(block_cache))
 
@@ -759,8 +1005,8 @@ class RenameDetectionTest(DiffTestCase):
         tree2 = self.commit_tree([(b"a", blob, 0o120000)])
         self.assertEqual(
             [
-                TreeChange.add((b"a", 0o120000, blob.id)),
-                TreeChange.delete((b"a", 0o100644, blob.id)),
+                TreeChange.add(TreeEntry(b"a", 0o120000, blob.id)),
+                TreeChange.delete(TreeEntry(b"a", 0o100644, blob.id)),
             ],
             self.detect_renames(tree1, tree2),
         )
@@ -772,7 +1018,7 @@ class RenameDetectionTest(DiffTestCase):
         tree2 = self.commit_tree([(b"a", blob2, 0o120000), (b"b", blob1)])
         self.assertEqual(
             [
-                TreeChange.add((b"a", 0o120000, blob2.id)),
+                TreeChange.add(TreeEntry(b"a", 0o120000, blob2.id)),
                 TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob1.id)),
             ],
             self.detect_renames(tree1, tree2),
@@ -797,7 +1043,7 @@ class RenameDetectionTest(DiffTestCase):
         self.assertEqual(
             [
                 TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"c", F, blob.id)),
-                TreeChange.delete((b"b", F, blob.id)),
+                TreeChange.delete(TreeEntry(b"b", F, blob.id)),
             ],
             self.detect_renames(tree1, tree2),
         )
@@ -855,8 +1101,8 @@ class RenameDetectionTest(DiffTestCase):
         )
         self.assertEqual(
             [
-                TreeChange.delete((b"a", F, blob1.id)),
-                TreeChange.add((b"b", F, blob2.id)),
+                TreeChange.delete(TreeEntry(b"a", F, blob1.id)),
+                TreeChange.add(TreeEntry(b"b", F, blob2.id)),
             ],
             self.detect_renames(tree1, tree2, rename_threshold=75),
         )
@@ -877,10 +1123,10 @@ class RenameDetectionTest(DiffTestCase):
         )
         self.assertEqual(
             [
-                TreeChange.delete((b"a", F, blob1.id)),
-                TreeChange.delete((b"b", F, blob2.id)),
-                TreeChange.add((b"c", F, blob3.id)),
-                TreeChange.add((b"d", F, blob4.id)),
+                TreeChange.delete(TreeEntry(b"a", F, blob1.id)),
+                TreeChange.delete(TreeEntry(b"b", F, blob2.id)),
+                TreeChange.add(TreeEntry(b"c", F, blob3.id)),
+                TreeChange.add(TreeEntry(b"d", F, blob4.id)),
             ],
             self.detect_renames(tree1, tree2, max_files=1),
         )
@@ -909,7 +1155,7 @@ class RenameDetectionTest(DiffTestCase):
         tree2 = self.commit_tree([(b"c", blob3)])
         self.assertEqual(
             [
-                TreeChange.delete((b"a", F, blob1.id)),
+                TreeChange.delete(TreeEntry(b"a", F, blob1.id)),
                 TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"c", F, blob3.id)),
             ],
             self.detect_renames(tree1, tree2),
@@ -920,7 +1166,7 @@ class RenameDetectionTest(DiffTestCase):
         self.assertEqual(
             [
                 TreeChange(CHANGE_RENAME, (b"a", F, blob2.id), (b"c", F, blob3.id)),
-                TreeChange.delete((b"b", F, blob1.id)),
+                TreeChange.delete(TreeEntry(b"b", F, blob1.id)),
             ],
             self.detect_renames(tree3, tree4),
         )
@@ -948,7 +1194,7 @@ class RenameDetectionTest(DiffTestCase):
         self.assertEqual(
             [
                 TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob3.id)),
-                TreeChange.delete((b"b", F, blob2.id)),
+                TreeChange.delete(TreeEntry(b"b", F, blob2.id)),
             ],
             self.detect_renames(tree1, tree2),
         )
@@ -966,7 +1212,7 @@ class RenameDetectionTest(DiffTestCase):
             [
                 TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob3.id)),
                 TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"d", F, blob4.id)),
-                TreeChange.delete((b"b", F, blob2.id)),
+                TreeChange.delete(TreeEntry(b"b", F, blob2.id)),
             ],
             self.detect_renames(tree1, tree2),
         )
@@ -983,7 +1229,7 @@ class RenameDetectionTest(DiffTestCase):
                 TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"e", F, blob1.id)),
                 TreeChange(CHANGE_RENAME, (b"b", F, blob1.id), (b"f", F, blob1.id)),
                 TreeChange(CHANGE_RENAME, (b"c", F, blob1.id), (b"g", F, blob1.id)),
-                TreeChange.delete((b"d", F, blob1.id)),
+                TreeChange.delete(TreeEntry(b"d", F, blob1.id)),
             ],
             self.detect_renames(tree1, tree2),
         )
@@ -997,10 +1243,10 @@ class RenameDetectionTest(DiffTestCase):
         tree2 = self.commit_tree([(b"c", blob2), (b"d", link2, 0o160000)])
         self.assertEqual(
             [
-                TreeChange.delete((b"a", 0o100644, blob1.id)),
-                TreeChange.delete((b"b", 0o160000, link1)),
-                TreeChange.add((b"c", 0o100644, blob2.id)),
-                TreeChange.add((b"d", 0o160000, link2)),
+                TreeChange.delete(TreeEntry(b"a", 0o100644, blob1.id)),
+                TreeChange.delete(TreeEntry(b"b", 0o160000, link1)),
+                TreeChange.add(TreeEntry(b"c", 0o100644, blob2.id)),
+                TreeChange.add(TreeEntry(b"d", 0o160000, link2)),
             ],
             self.detect_renames(tree1, tree2),
         )
@@ -1058,7 +1304,7 @@ class RenameDetectionTest(DiffTestCase):
         )
         self.assertEqual(
             [
-                TreeChange.add((b"a", F, blob3.id)),
+                TreeChange.add(TreeEntry(b"a", F, blob3.id)),
                 TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id)),
             ],
             self.detect_renames(tree1, tree2, rewrite_threshold=80),
@@ -1069,7 +1315,7 @@ class RenameDetectionTest(DiffTestCase):
         tree1 = self.commit_tree([(b"a", blob)])
         tree2 = self.commit_tree([(b"a", blob), (b"b", blob)])
         self.assertEqual(
-            [TreeChange.add((b"b", F, blob.id))],
+            [TreeChange.add(TreeEntry(b"b", F, blob.id))],
             self.detect_renames(tree1, tree2),
         )
         self.assertEqual(
@@ -1083,7 +1329,7 @@ class RenameDetectionTest(DiffTestCase):
         tree1 = self.commit_tree([(b"a", blob1)])
         tree2 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
         self.assertEqual(
-            [TreeChange.add((b"b", F, blob2.id))],
+            [TreeChange.add(TreeEntry(b"b", F, blob2.id))],
             self.detect_renames(tree1, tree2),
         )
         self.assertEqual(
@@ -1106,7 +1352,7 @@ class RenameDetectionTest(DiffTestCase):
         )
         self.assertEqual(
             [
-                TreeChange.add((b"a", F, blob_a2.id)),
+                TreeChange.add(TreeEntry(b"a", F, blob_a2.id)),
                 TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"b", F, blob_b2.id)),
             ],
             self.detect_renames(

+ 267 - 0
tests/test_porcelain.py

@@ -2568,6 +2568,273 @@ class DiffTreeTests(PorcelainTestCase):
         self.assertEqual(outstream.getvalue(), b"")
 
 
+class DiffTests(PorcelainTestCase):
+    def test_diff_uncommitted_stage(self) -> None:
+        # Test diff in repository with no commits yet
+        fullpath = os.path.join(self.repo.path, "test.txt")
+        with open(fullpath, "w") as f:
+            f.write("Hello, world!\n")
+        porcelain.add(self.repo.path, paths=["test.txt"])
+
+        outstream = BytesIO()
+        porcelain.diff(self.repo.path, staged=True, outstream=outstream)
+
+        diff_output = outstream.getvalue()
+        self.assertIn(b"diff --git a/test.txt b/test.txt", diff_output)
+        self.assertIn(b"new file mode", diff_output)
+        self.assertIn(b"+Hello, world!", diff_output)
+
+    def test_diff_uncommitted_working_tree(self) -> None:
+        # Test unstaged changes in repository with no commits
+        fullpath = os.path.join(self.repo.path, "test.txt")
+        with open(fullpath, "w") as f:
+            f.write("Hello, world!\n")
+        porcelain.add(self.repo.path, paths=["test.txt"])
+
+        # Modify file in working tree
+        with open(fullpath, "w") as f:
+            f.write("Hello, world!\nNew line\n")
+
+        outstream = BytesIO()
+        porcelain.diff(self.repo.path, staged=False, outstream=outstream)
+
+        diff_output = outstream.getvalue()
+        self.assertIn(b"diff --git a/test.txt b/test.txt", diff_output)
+        self.assertIn(b"+New line", diff_output)
+
+    def test_diff_with_commits_staged(self) -> None:
+        # Test staged changes with existing commits
+        fullpath = os.path.join(self.repo.path, "test.txt")
+        with open(fullpath, "w") as f:
+            f.write("Initial content\n")
+        porcelain.add(self.repo.path, paths=["test.txt"])
+        porcelain.commit(self.repo.path, message=b"Initial commit")
+
+        # Modify and stage
+        with open(fullpath, "w") as f:
+            f.write("Initial content\nModified\n")
+        porcelain.add(self.repo.path, paths=["test.txt"])
+
+        outstream = BytesIO()
+        porcelain.diff(self.repo.path, staged=True, outstream=outstream)
+
+        diff_output = outstream.getvalue()
+        self.assertIn(b"diff --git a/test.txt b/test.txt", diff_output)
+        self.assertIn(b"+Modified", diff_output)
+
+    def test_diff_with_commits_unstaged(self) -> None:
+        # Test unstaged changes with existing commits
+        fullpath = os.path.join(self.repo.path, "test.txt")
+        with open(fullpath, "w") as f:
+            f.write("Initial content\n")
+        porcelain.add(self.repo.path, paths=["test.txt"])
+        porcelain.commit(self.repo.path, message=b"Initial commit")
+
+        # Modify without staging
+        with open(fullpath, "w") as f:
+            f.write("Initial content\nModified\n")
+
+        outstream = BytesIO()
+        porcelain.diff(self.repo.path, staged=False, outstream=outstream)
+
+        diff_output = outstream.getvalue()
+        self.assertIn(b"diff --git a/test.txt b/test.txt", diff_output)
+        self.assertIn(b"+Modified", diff_output)
+
+    def test_diff_file_deletion(self) -> None:
+        # Test showing file deletion
+        fullpath = os.path.join(self.repo.path, "test.txt")
+        with open(fullpath, "w") as f:
+            f.write("Content to delete\n")
+        porcelain.add(self.repo.path, paths=["test.txt"])
+        porcelain.commit(self.repo.path, message=b"Add file")
+
+        # Delete file
+        os.unlink(fullpath)
+
+        outstream = BytesIO()
+        porcelain.diff(self.repo.path, staged=False, outstream=outstream)
+
+        diff_output = outstream.getvalue()
+        self.assertIn(b"diff --git a/test.txt b/test.txt", diff_output)
+        self.assertIn(b"deleted file mode", diff_output)
+        self.assertIn(b"-Content to delete", diff_output)
+
+    def test_diff_with_paths(self) -> None:
+        # Test diff with specific paths
+        # Create two files
+        fullpath1 = os.path.join(self.repo.path, "file1.txt")
+        fullpath2 = os.path.join(self.repo.path, "file2.txt")
+        with open(fullpath1, "w") as f:
+            f.write("File 1 content\n")
+        with open(fullpath2, "w") as f:
+            f.write("File 2 content\n")
+        porcelain.add(self.repo.path, paths=["file1.txt", "file2.txt"])
+        porcelain.commit(self.repo.path, message=b"Add two files")
+
+        # Modify both files
+        with open(fullpath1, "w") as f:
+            f.write("File 1 modified\n")
+        with open(fullpath2, "w") as f:
+            f.write("File 2 modified\n")
+
+        # Test diff with specific path
+        outstream = BytesIO()
+        porcelain.diff(self.repo.path, paths=["file1.txt"], outstream=outstream)
+
+        diff_output = outstream.getvalue()
+        self.assertIn(b"diff --git a/file1.txt b/file1.txt", diff_output)
+        self.assertIn(b"-File 1 content", diff_output)
+        self.assertIn(b"+File 1 modified", diff_output)
+        # file2.txt should not appear in diff
+        self.assertNotIn(b"file2.txt", diff_output)
+
+    def test_diff_with_paths_multiple(self) -> None:
+        # Test diff with multiple paths
+        # Create three files
+        fullpath1 = os.path.join(self.repo.path, "file1.txt")
+        fullpath2 = os.path.join(self.repo.path, "file2.txt")
+        fullpath3 = os.path.join(self.repo.path, "file3.txt")
+        with open(fullpath1, "w") as f:
+            f.write("File 1 content\n")
+        with open(fullpath2, "w") as f:
+            f.write("File 2 content\n")
+        with open(fullpath3, "w") as f:
+            f.write("File 3 content\n")
+        porcelain.add(self.repo.path, paths=["file1.txt", "file2.txt", "file3.txt"])
+        porcelain.commit(self.repo.path, message=b"Add three files")
+
+        # Modify all files
+        with open(fullpath1, "w") as f:
+            f.write("File 1 modified\n")
+        with open(fullpath2, "w") as f:
+            f.write("File 2 modified\n")
+        with open(fullpath3, "w") as f:
+            f.write("File 3 modified\n")
+
+        # Test diff with two specific paths
+        outstream = BytesIO()
+        porcelain.diff(
+            self.repo.path, paths=["file1.txt", "file3.txt"], outstream=outstream
+        )
+
+        diff_output = outstream.getvalue()
+        self.assertIn(b"diff --git a/file1.txt b/file1.txt", diff_output)
+        self.assertIn(b"diff --git a/file3.txt b/file3.txt", diff_output)
+        # file2.txt should not appear in diff
+        self.assertNotIn(b"file2.txt", diff_output)
+
+    def test_diff_with_paths_directory(self) -> None:
+        # Test diff with directory paths
+        # Create files in subdirectory
+        os.mkdir(os.path.join(self.repo.path, "subdir"))
+        fullpath1 = os.path.join(self.repo.path, "subdir", "file1.txt")
+        fullpath2 = os.path.join(self.repo.path, "subdir", "file2.txt")
+        fullpath3 = os.path.join(self.repo.path, "root.txt")
+        with open(fullpath1, "w") as f:
+            f.write("Subdir file 1\n")
+        with open(fullpath2, "w") as f:
+            f.write("Subdir file 2\n")
+        with open(fullpath3, "w") as f:
+            f.write("Root file\n")
+        porcelain.add(
+            self.repo.path, paths=["subdir/file1.txt", "subdir/file2.txt", "root.txt"]
+        )
+        porcelain.commit(self.repo.path, message=b"Add files in subdir")
+
+        # Modify all files
+        with open(fullpath1, "w") as f:
+            f.write("Subdir file 1 modified\n")
+        with open(fullpath2, "w") as f:
+            f.write("Subdir file 2 modified\n")
+        with open(fullpath3, "w") as f:
+            f.write("Root file modified\n")
+
+        # Test diff with directory path
+        outstream = BytesIO()
+        porcelain.diff(self.repo.path, paths=["subdir"], outstream=outstream)
+
+        diff_output = outstream.getvalue()
+        self.assertIn(b"subdir/file1.txt", diff_output)
+        self.assertIn(b"subdir/file2.txt", diff_output)
+        # root.txt should not appear in diff
+        self.assertNotIn(b"root.txt", diff_output)
+
+    def test_diff_staged_with_paths(self) -> None:
+        # Test staged diff with specific paths
+        # Create two files
+        fullpath1 = os.path.join(self.repo.path, "file1.txt")
+        fullpath2 = os.path.join(self.repo.path, "file2.txt")
+        with open(fullpath1, "w") as f:
+            f.write("File 1 content\n")
+        with open(fullpath2, "w") as f:
+            f.write("File 2 content\n")
+        porcelain.add(self.repo.path, paths=["file1.txt", "file2.txt"])
+        porcelain.commit(self.repo.path, message=b"Add two files")
+
+        # Modify and stage both files
+        with open(fullpath1, "w") as f:
+            f.write("File 1 staged\n")
+        with open(fullpath2, "w") as f:
+            f.write("File 2 staged\n")
+        porcelain.add(self.repo.path, paths=["file1.txt", "file2.txt"])
+
+        # Test staged diff with specific path
+        outstream = BytesIO()
+        porcelain.diff(
+            self.repo.path, staged=True, paths=["file1.txt"], outstream=outstream
+        )
+
+        diff_output = outstream.getvalue()
+        self.assertIn(b"diff --git a/file1.txt b/file1.txt", diff_output)
+        self.assertIn(b"-File 1 content", diff_output)
+        self.assertIn(b"+File 1 staged", diff_output)
+        # file2.txt should not appear in diff
+        self.assertNotIn(b"file2.txt", diff_output)
+
+    def test_diff_with_commit_and_paths(self) -> None:
+        # Test diff against specific commit with paths
+        # Create initial file
+        fullpath1 = os.path.join(self.repo.path, "file1.txt")
+        fullpath2 = os.path.join(self.repo.path, "file2.txt")
+        with open(fullpath1, "w") as f:
+            f.write("Initial content 1\n")
+        with open(fullpath2, "w") as f:
+            f.write("Initial content 2\n")
+        porcelain.add(self.repo.path, paths=["file1.txt", "file2.txt"])
+        first_commit = porcelain.commit(self.repo.path, message=b"Initial commit")
+
+        # Make second commit
+        with open(fullpath1, "w") as f:
+            f.write("Second content 1\n")
+        with open(fullpath2, "w") as f:
+            f.write("Second content 2\n")
+        porcelain.add(self.repo.path, paths=["file1.txt", "file2.txt"])
+        porcelain.commit(self.repo.path, message=b"Second commit")
+
+        # Modify working tree
+        with open(fullpath1, "w") as f:
+            f.write("Working content 1\n")
+        with open(fullpath2, "w") as f:
+            f.write("Working content 2\n")
+
+        # Test diff against first commit with specific path
+        outstream = BytesIO()
+        porcelain.diff(
+            self.repo.path,
+            commit=first_commit,
+            paths=["file1.txt"],
+            outstream=outstream,
+        )
+
+        diff_output = outstream.getvalue()
+        self.assertIn(b"diff --git a/file1.txt b/file1.txt", diff_output)
+        self.assertIn(b"-Initial content 1", diff_output)
+        self.assertIn(b"+Working content 1", diff_output)
+        # file2.txt should not appear in diff
+        self.assertNotIn(b"file2.txt", diff_output)
+
+
 class CommitTreeTests(PorcelainTestCase):
     def test_simple(self) -> None:
         c1, c2, c3 = build_commit_graph(