Просмотр исходного кода

Fix TreeEntry typing to not allow None values

Fix test failures after TreeEntry typing changes

* Update test_is_tree to test _is_tree(None) instead of TreeEntry(None, None, None)
* Update test_merge_entries to expect TreeEntry objects and None instead of (None, None, None) tuples
* All diff_tree tests now pass (70/70)
Jelmer Vernooij 4 месяцев назад
Родитель
Сommit
281b1de41b

+ 8 - 9
crates/diff-tree/src/lib.rs

@@ -84,6 +84,10 @@ fn _count_blocks(py: Python, obj: &Bound<PyAny>) -> PyResult<PyObject> {
 
 
 #[pyfunction]
 #[pyfunction]
 fn _is_tree(_py: Python, entry: &Bound<PyAny>) -> PyResult<bool> {
 fn _is_tree(_py: Python, entry: &Bound<PyAny>) -> PyResult<bool> {
+    if entry.is_none() {
+        return Ok(false);
+    }
+
     let mode = entry.getattr("mode")?;
     let mode = entry.getattr("mode")?;
 
 
     if mode.is_none() {
     if mode.is_none() {
@@ -142,9 +146,6 @@ fn _merge_entries(
     let entries1 = tree_entries(path, tree1, py)?;
     let entries1 = tree_entries(path, tree1, py)?;
     let entries2 = tree_entries(path, tree2, py)?;
     let entries2 = tree_entries(path, tree2, py)?;
 
 
-    let pym = py.import("dulwich.diff_tree")?;
-    let null_entry = pym.getattr("_NULL_ENTRY")?.unbind();
-
     let mut result = Vec::new();
     let mut result = Vec::new();
 
 
     let mut i1 = 0;
     let mut i1 = 0;
@@ -153,8 +154,8 @@ fn _merge_entries(
         let cmp = entry_path_cmp(entries1[i1].bind(py), entries2[i2].bind(py))?;
         let cmp = entry_path_cmp(entries1[i1].bind(py), entries2[i2].bind(py))?;
         let (e1, e2) = match cmp {
         let (e1, e2) = match cmp {
             Ordering::Equal => (entries1[i1].clone_ref(py), entries2[i2].clone_ref(py)),
             Ordering::Equal => (entries1[i1].clone_ref(py), entries2[i2].clone_ref(py)),
-            Ordering::Less => (entries1[i1].clone_ref(py), null_entry.clone_ref(py)),
-            Ordering::Greater => (null_entry.clone_ref(py), entries2[i2].clone_ref(py)),
+            Ordering::Less => (entries1[i1].clone_ref(py), py.None()),
+            Ordering::Greater => (py.None(), entries2[i2].clone_ref(py)),
         };
         };
         let pair = PyTuple::new(py, &[e1, e2]).unwrap();
         let pair = PyTuple::new(py, &[e1, e2]).unwrap();
         result.push(pair);
         result.push(pair);
@@ -173,15 +174,13 @@ fn _merge_entries(
     }
     }
 
 
     while i1 < entries1.len() {
     while i1 < entries1.len() {
-        let pair =
-            PyTuple::new(py, &[entries1[i1].clone_ref(py), null_entry.clone_ref(py)]).unwrap();
+        let pair = PyTuple::new(py, &[entries1[i1].clone_ref(py), py.None()]).unwrap();
         result.push(pair);
         result.push(pair);
         i1 += 1;
         i1 += 1;
     }
     }
 
 
     while i2 < entries2.len() {
     while i2 < entries2.len() {
-        let pair =
-            PyTuple::new(py, &[null_entry.clone_ref(py), entries2[i2].clone_ref(py)]).unwrap();
+        let pair = PyTuple::new(py, &[py.None(), entries2[i2].clone_ref(py)]).unwrap();
         result.push(pair);
         result.push(pair);
         i2 += 1;
         i2 += 1;
     }
     }

+ 1 - 1
crates/objects/src/lib.rs

@@ -28,7 +28,7 @@ use pyo3::types::{PyBytes, PyDict};
 import_exception!(dulwich.errors, ObjectFormatException);
 import_exception!(dulwich.errors, ObjectFormatException);
 
 
 const S_IFDIR: u32 = 0o40000;
 const S_IFDIR: u32 = 0o40000;
-const S_IFMT: u32 = 0o170000;  // File type mask
+const S_IFMT: u32 = 0o170000; // File type mask
 
 
 #[inline]
 #[inline]
 fn bytehex(byte: u8) -> u8 {
 fn bytehex(byte: u8) -> u8 {

+ 3 - 2
dulwich/annotate.py

@@ -102,8 +102,9 @@ def annotate_lines(
             else:
             else:
                 changes = [tree_change]
                 changes = [tree_change]
             for change in changes:
             for change in changes:
-                if change.new.path == path:
-                    path = change.old.path
+                if change.new is not None and change.new.path == path:
+                    if change.old is not None:
+                        path = change.old.path
                     revs.append((log_entry.commit, change.new))
                     revs.append((log_entry.commit, change.new))
                     break
                     break
 
 

+ 87 - 53
dulwich/diff_tree.py

@@ -22,11 +22,11 @@
 """Utilities for diffing files and trees."""
 """Utilities for diffing files and trees."""
 
 
 import stat
 import stat
-from collections import defaultdict, namedtuple
+from collections import defaultdict
 from collections.abc import Iterator
 from collections.abc import Iterator
 from io import BytesIO
 from io import BytesIO
 from itertools import chain
 from itertools import chain
-from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar
+from typing import TYPE_CHECKING, Any, Callable, NamedTuple, Optional, TypeVar
 
 
 from .object_store import BaseObjectStore
 from .object_store import BaseObjectStore
 from .objects import S_ISGITLINK, ObjectID, ShaFile, Tree, TreeEntry
 from .objects import S_ISGITLINK, ObjectID, ShaFile, Tree, TreeEntry
@@ -41,7 +41,7 @@ CHANGE_UNCHANGED = "unchanged"
 
 
 RENAME_CHANGE_TYPES = (CHANGE_RENAME, CHANGE_COPY)
 RENAME_CHANGE_TYPES = (CHANGE_RENAME, CHANGE_COPY)
 
 
-_NULL_ENTRY = TreeEntry(None, None, None)
+# _NULL_ENTRY removed - using None instead
 
 
 _MAX_SCORE = 100
 _MAX_SCORE = 100
 RENAME_THRESHOLD = 60
 RENAME_THRESHOLD = 60
@@ -49,9 +49,13 @@ MAX_FILES = 200
 REWRITE_THRESHOLD: Optional[int] = None
 REWRITE_THRESHOLD: Optional[int] = None
 
 
 
 
-class TreeChange(namedtuple("TreeChange", ["type", "old", "new"])):
+class TreeChange(NamedTuple):
     """Named tuple a single change between two trees."""
     """Named tuple a single change between two trees."""
 
 
+    type: str
+    old: Optional[TreeEntry]
+    new: Optional[TreeEntry]
+
     @classmethod
     @classmethod
     def add(cls, new: TreeEntry) -> "TreeChange":
     def add(cls, new: TreeEntry) -> "TreeChange":
         """Create a TreeChange for an added entry.
         """Create a TreeChange for an added entry.
@@ -62,7 +66,7 @@ class TreeChange(namedtuple("TreeChange", ["type", "old", "new"])):
         Returns:
         Returns:
           TreeChange instance
           TreeChange instance
         """
         """
-        return cls(CHANGE_ADD, _NULL_ENTRY, new)
+        return cls(CHANGE_ADD, None, new)
 
 
     @classmethod
     @classmethod
     def delete(cls, old: TreeEntry) -> "TreeChange":
     def delete(cls, old: TreeEntry) -> "TreeChange":
@@ -74,7 +78,7 @@ class TreeChange(namedtuple("TreeChange", ["type", "old", "new"])):
         Returns:
         Returns:
           TreeChange instance
           TreeChange instance
         """
         """
-        return cls(CHANGE_DELETE, old, _NULL_ENTRY)
+        return cls(CHANGE_DELETE, old, None)
 
 
 
 
 def _tree_entries(path: bytes, tree: Tree) -> list[TreeEntry]:
 def _tree_entries(path: bytes, tree: Tree) -> list[TreeEntry]:
@@ -88,7 +92,7 @@ def _tree_entries(path: bytes, tree: Tree) -> list[TreeEntry]:
 
 
 def _merge_entries(
 def _merge_entries(
     path: bytes, tree1: Tree, tree2: Tree
     path: bytes, tree1: Tree, tree2: Tree
-) -> list[tuple[TreeEntry, TreeEntry]]:
+) -> list[tuple[Optional[TreeEntry], Optional[TreeEntry]]]:
     """Merge the entries of two trees.
     """Merge the entries of two trees.
 
 
     Args:
     Args:
@@ -99,8 +103,7 @@ def _merge_entries(
     Returns:
     Returns:
       A list of pairs of TreeEntry objects for each pair of entries in
       A list of pairs of TreeEntry objects for each pair of entries in
         the trees. If an entry exists in one tree but not the other, the other
         the trees. If an entry exists in one tree but not the other, the other
-        entry will have all attributes set to None. If neither entry's path is
-        None, they are guaranteed to match.
+        entry will be None. If both entries exist, they are guaranteed to match.
     """
     """
     entries1 = _tree_entries(path, tree1)
     entries1 = _tree_entries(path, tree1)
     entries2 = _tree_entries(path, tree2)
     entries2 = _tree_entries(path, tree2)
@@ -108,32 +111,31 @@ def _merge_entries(
     len1 = len(entries1)
     len1 = len(entries1)
     len2 = len(entries2)
     len2 = len(entries2)
 
 
-    result = []
+    result: list[tuple[Optional[TreeEntry], Optional[TreeEntry]]] = []
     while i1 < len1 and i2 < len2:
     while i1 < len1 and i2 < len2:
         entry1 = entries1[i1]
         entry1 = entries1[i1]
         entry2 = entries2[i2]
         entry2 = entries2[i2]
         if entry1.path < entry2.path:
         if entry1.path < entry2.path:
-            result.append((entry1, _NULL_ENTRY))
+            result.append((entry1, None))
             i1 += 1
             i1 += 1
         elif entry1.path > entry2.path:
         elif entry1.path > entry2.path:
-            result.append((_NULL_ENTRY, entry2))
+            result.append((None, entry2))
             i2 += 1
             i2 += 1
         else:
         else:
             result.append((entry1, entry2))
             result.append((entry1, entry2))
             i1 += 1
             i1 += 1
             i2 += 1
             i2 += 1
     for i in range(i1, len1):
     for i in range(i1, len1):
-        result.append((entries1[i], _NULL_ENTRY))
+        result.append((entries1[i], None))
     for i in range(i2, len2):
     for i in range(i2, len2):
-        result.append((_NULL_ENTRY, entries2[i]))
+        result.append((None, entries2[i]))
     return result
     return result
 
 
 
 
-def _is_tree(entry: TreeEntry) -> bool:
-    mode = entry.mode
-    if mode is None:
+def _is_tree(entry: Optional[TreeEntry]) -> bool:
+    if entry is None:
         return False
         return False
-    return stat.S_ISDIR(mode)
+    return stat.S_ISDIR(entry.mode)
 
 
 
 
 def walk_trees(
 def walk_trees(
@@ -142,7 +144,7 @@ def walk_trees(
     tree2_id: Optional[ObjectID],
     tree2_id: Optional[ObjectID],
     prune_identical: bool = False,
     prune_identical: bool = False,
     paths: Optional[list[bytes]] = None,
     paths: Optional[list[bytes]] = None,
-) -> Iterator[tuple[TreeEntry, TreeEntry]]:
+) -> Iterator[tuple[Optional[TreeEntry], Optional[TreeEntry]]]:
     """Recursively walk all the entries of two trees.
     """Recursively walk all the entries of two trees.
 
 
     Iteration is depth-first pre-order, as in e.g. os.walk.
     Iteration is depth-first pre-order, as in e.g. os.walk.
@@ -157,15 +159,14 @@ def walk_trees(
     Returns:
     Returns:
       Iterator over Pairs of TreeEntry objects for each pair of entries
       Iterator over Pairs of TreeEntry objects for each pair of entries
         in the trees and their subtrees recursively. If an entry exists in one
         in the trees and their subtrees recursively. If an entry exists in one
-        tree but not the other, the other entry will have all attributes set
-        to None. If neither entry's path is None, they are guaranteed to
-        match.
+        tree but not the other, the other entry will be None. If both entries
+        exist, they are guaranteed to match.
     """
     """
     # This could be fairly easily generalized to >2 trees if we find a use
     # This could be fairly easily generalized to >2 trees if we find a use
     # case.
     # case.
-    mode1 = (tree1_id and stat.S_IFDIR) or None
-    mode2 = (tree2_id and stat.S_IFDIR) or None
-    todo = [(TreeEntry(b"", mode1, tree1_id), TreeEntry(b"", mode2, tree2_id))]
+    entry1 = TreeEntry(b"", stat.S_IFDIR, tree1_id) if tree1_id else None
+    entry2 = TreeEntry(b"", stat.S_IFDIR, tree2_id) if tree2_id else None
+    todo: list[tuple[Optional[TreeEntry], Optional[TreeEntry]]] = [(entry1, entry2)]
     while todo:
     while todo:
         entry1, entry2 = todo.pop()
         entry1, entry2 = todo.pop()
         is_tree1 = _is_tree(entry1)
         is_tree1 = _is_tree(entry1)
@@ -173,9 +174,13 @@ def walk_trees(
         if prune_identical and is_tree1 and is_tree2 and entry1 == entry2:
         if prune_identical and is_tree1 and is_tree2 and entry1 == entry2:
             continue
             continue
 
 
-        tree1 = (is_tree1 and store[entry1.sha]) or None
-        tree2 = (is_tree2 and store[entry2.sha]) or None
-        path = entry1.path or entry2.path
+        tree1 = (is_tree1 and entry1 and store[entry1.sha]) or None
+        tree2 = (is_tree2 and entry2 and store[entry2.sha]) or None
+        path = (
+            (entry1.path if entry1 else None)
+            or (entry2.path if entry2 else None)
+            or b""
+        )
 
 
         # If we have path filters, check if we should process this tree
         # If we have path filters, check if we should process this tree
         if paths is not None and (is_tree1 or is_tree2):
         if paths is not None and (is_tree1 or is_tree2):
@@ -236,9 +241,9 @@ def walk_trees(
                     break
                     break
 
 
 
 
-def _skip_tree(entry: TreeEntry, include_trees: bool) -> TreeEntry:
-    if entry.mode is None or (not include_trees and stat.S_ISDIR(entry.mode)):
-        return _NULL_ENTRY
+def _skip_tree(entry: Optional[TreeEntry], include_trees: bool) -> Optional[TreeEntry]:
+    if entry is None or (not include_trees and stat.S_ISDIR(entry.mode)):
+        return None
     return entry
     return entry
 
 
 
 
@@ -290,22 +295,22 @@ def tree_changes(
         entry1 = _skip_tree(entry1, include_trees)
         entry1 = _skip_tree(entry1, include_trees)
         entry2 = _skip_tree(entry2, include_trees)
         entry2 = _skip_tree(entry2, include_trees)
 
 
-        if entry1 != _NULL_ENTRY and entry2 != _NULL_ENTRY:
+        if entry1 is not None and entry2 is not None:
             if (
             if (
                 stat.S_IFMT(entry1.mode) != stat.S_IFMT(entry2.mode)
                 stat.S_IFMT(entry1.mode) != stat.S_IFMT(entry2.mode)
                 and not change_type_same
                 and not change_type_same
             ):
             ):
                 # File type changed: report as delete/add.
                 # File type changed: report as delete/add.
                 yield TreeChange.delete(entry1)
                 yield TreeChange.delete(entry1)
-                entry1 = _NULL_ENTRY
+                entry1 = None
                 change_type = CHANGE_ADD
                 change_type = CHANGE_ADD
             elif entry1 == entry2:
             elif entry1 == entry2:
                 change_type = CHANGE_UNCHANGED
                 change_type = CHANGE_UNCHANGED
             else:
             else:
                 change_type = CHANGE_MODIFY
                 change_type = CHANGE_MODIFY
-        elif entry1 != _NULL_ENTRY:
+        elif entry1 is not None:
             change_type = CHANGE_DELETE
             change_type = CHANGE_DELETE
-        elif entry2 != _NULL_ENTRY:
+        elif entry2 is not None:
             change_type = CHANGE_ADD
             change_type = CHANGE_ADD
         else:
         else:
             # Both were None because at least one was a tree.
             # Both were None because at least one was a tree.
@@ -359,7 +364,7 @@ def tree_changes_for_merge(
         for t in parent_tree_ids
         for t in parent_tree_ids
     ]
     ]
     num_parents = len(parent_tree_ids)
     num_parents = len(parent_tree_ids)
-    changes_by_path: dict[str, list[Optional[TreeChange]]] = defaultdict(
+    changes_by_path: dict[bytes, list[Optional[TreeChange]]] = defaultdict(
         lambda: [None] * num_parents
         lambda: [None] * num_parents
     )
     )
 
 
@@ -367,13 +372,15 @@ def tree_changes_for_merge(
     for i, parent_changes in enumerate(all_parent_changes):
     for i, parent_changes in enumerate(all_parent_changes):
         for change in parent_changes:
         for change in parent_changes:
             if change.type == CHANGE_DELETE:
             if change.type == CHANGE_DELETE:
+                assert change.old is not None
                 path = change.old.path
                 path = change.old.path
             else:
             else:
+                assert change.new is not None
                 path = change.new.path
                 path = change.new.path
             changes_by_path[path][i] = change
             changes_by_path[path][i] = change
 
 
     def old_sha(c: TreeChange) -> Optional[ObjectID]:
     def old_sha(c: TreeChange) -> Optional[ObjectID]:
-        return c.old.sha
+        return c.old.sha if c.old is not None else None
 
 
     def change_type(c: TreeChange) -> str:
     def change_type(c: TreeChange) -> str:
         return c.type
         return c.type
@@ -490,12 +497,13 @@ def _similarity_score(
 
 
 def _tree_change_key(entry: TreeChange) -> tuple[bytes, bytes]:
 def _tree_change_key(entry: TreeChange) -> tuple[bytes, bytes]:
     # Sort by old path then new path. If only one exists, use it for both keys.
     # Sort by old path then new path. If only one exists, use it for both keys.
-    path1 = entry.old.path
-    path2 = entry.new.path
+    path1 = entry.old.path if entry.old is not None else None
+    path2 = entry.new.path if entry.new is not None else None
     if path1 is None:
     if path1 is None:
         path1 = path2
         path1 = path2
     if path2 is None:
     if path2 is None:
         path2 = path1
         path2 = path1
+    assert path1 is not None and path2 is not None
     return (path1, path2)
     return (path1, path2)
 
 
 
 
@@ -545,11 +553,10 @@ class RenameDetector:
         self._changes = []
         self._changes = []
 
 
     def _should_split(self, change: TreeChange) -> bool:
     def _should_split(self, change: TreeChange) -> bool:
-        if (
-            self._rewrite_threshold is None
-            or change.type != CHANGE_MODIFY
-            or change.old.sha == change.new.sha
-        ):
+        if self._rewrite_threshold is None or change.type != CHANGE_MODIFY:
+            return False
+        assert change.old is not None and change.new is not None
+        if change.old.sha == change.new.sha:
             return False
             return False
         old_obj = self._store[change.old.sha]
         old_obj = self._store[change.old.sha]
         new_obj = self._store[change.new.sha]
         new_obj = self._store[change.new.sha]
@@ -561,6 +568,7 @@ class RenameDetector:
         elif change.type == CHANGE_DELETE:
         elif change.type == CHANGE_DELETE:
             self._deletes.append(change)
             self._deletes.append(change)
         elif self._should_split(change):
         elif self._should_split(change):
+            assert change.old is not None and change.new is not None
             self._deletes.append(TreeChange.delete(change.old))
             self._deletes.append(TreeChange.delete(change.old))
             self._adds.append(TreeChange.add(change.new))
             self._adds.append(TreeChange.add(change.new))
         elif (
         elif (
@@ -588,18 +596,28 @@ class RenameDetector:
             self._add_change(change)
             self._add_change(change)
 
 
     def _prune(self, add_paths: set[bytes], delete_paths: set[bytes]) -> None:
     def _prune(self, add_paths: set[bytes], delete_paths: set[bytes]) -> None:
-        self._adds = [a for a in self._adds if a.new.path not in add_paths]
-        self._deletes = [d for d in self._deletes if d.old.path not in delete_paths]
+        def check_add(a: TreeChange) -> bool:
+            assert a.new is not None
+            return a.new.path not in add_paths
+
+        def check_delete(d: TreeChange) -> bool:
+            assert d.old is not None
+            return d.old.path not in delete_paths
+
+        self._adds = [a for a in self._adds if check_add(a)]
+        self._deletes = [d for d in self._deletes if check_delete(d)]
 
 
     def _find_exact_renames(self) -> None:
     def _find_exact_renames(self) -> None:
         add_map = defaultdict(list)
         add_map = defaultdict(list)
         for add in self._adds:
         for add in self._adds:
+            assert add.new is not None
             add_map[add.new.sha].append(add.new)
             add_map[add.new.sha].append(add.new)
         delete_map = defaultdict(list)
         delete_map = defaultdict(list)
         for delete in self._deletes:
         for delete in self._deletes:
             # Keep track of whether the delete was actually marked as a delete.
             # Keep track of whether the delete was actually marked as a delete.
             # If not, it needs to be marked as a copy.
             # If not, it needs to be marked as a copy.
             is_delete = delete.type == CHANGE_DELETE
             is_delete = delete.type == CHANGE_DELETE
+            assert delete.old is not None
             delete_map[delete.old.sha].append((delete.old, is_delete))
             delete_map[delete.old.sha].append((delete.old, is_delete))
 
 
         add_paths = set()
         add_paths = set()
@@ -632,6 +650,7 @@ class RenameDetector:
     def _rename_type(
     def _rename_type(
         self, check_paths: bool, delete: TreeChange, add: TreeChange
         self, check_paths: bool, delete: TreeChange, add: TreeChange
     ) -> str:
     ) -> str:
+        assert delete.old is not None and add.new is not None
         if check_paths and delete.old.path == add.new.path:
         if check_paths and delete.old.path == add.new.path:
             # If the paths match, this must be a split modify, so make sure it
             # If the paths match, this must be a split modify, so make sure it
             # comes out as a modify.
             # comes out as a modify.
@@ -657,12 +676,14 @@ class RenameDetector:
         block_cache = {}
         block_cache = {}
         check_paths = self._rename_threshold is not None
         check_paths = self._rename_threshold is not None
         for delete in self._deletes:
         for delete in self._deletes:
+            assert delete.old is not None
             if S_ISGITLINK(delete.old.mode):
             if S_ISGITLINK(delete.old.mode):
                 continue  # Git links don't exist in this repo.
                 continue  # Git links don't exist in this repo.
             old_sha = delete.old.sha
             old_sha = delete.old.sha
             old_obj = self._store[old_sha]
             old_obj = self._store[old_sha]
             block_cache[old_sha] = _count_blocks(old_obj)
             block_cache[old_sha] = _count_blocks(old_obj)
             for add in self._adds:
             for add in self._adds:
+                assert add.new is not None
                 if stat.S_IFMT(delete.old.mode) != stat.S_IFMT(add.new.mode):
                 if stat.S_IFMT(delete.old.mode) != stat.S_IFMT(add.new.mode):
                     continue
                     continue
                 new_obj = self._store[add.new.sha]
                 new_obj = self._store[add.new.sha]
@@ -680,6 +701,7 @@ class RenameDetector:
         delete_paths = set()
         delete_paths = set()
         add_paths = set()
         add_paths = set()
         for _, change in self._candidates:
         for _, change in self._candidates:
+            assert change.old is not None and change.new is not None
             new_path = change.new.path
             new_path = change.new.path
             if new_path in add_paths:
             if new_path in add_paths:
                 continue
                 continue
@@ -701,17 +723,29 @@ class RenameDetector:
             return
             return
 
 
         modifies = {}
         modifies = {}
-        delete_map = {d.old.path: d for d in self._deletes}
+        delete_map = {}
+        for d in self._deletes:
+            assert d.old is not None
+            delete_map[d.old.path] = d
         for add in self._adds:
         for add in self._adds:
+            assert add.new is not None
             path = add.new.path
             path = add.new.path
             delete = delete_map.get(path)
             delete = delete_map.get(path)
-            if delete is not None and stat.S_IFMT(delete.old.mode) == stat.S_IFMT(
-                add.new.mode
-            ):
-                modifies[path] = TreeChange(CHANGE_MODIFY, delete.old, add.new)
+            if delete is not None:
+                assert delete.old is not None
+                if stat.S_IFMT(delete.old.mode) == stat.S_IFMT(add.new.mode):
+                    modifies[path] = TreeChange(CHANGE_MODIFY, delete.old, add.new)
+
+        def check_add_mod(a: TreeChange) -> bool:
+            assert a.new is not None
+            return a.new.path not in modifies
+
+        def check_delete_mod(d: TreeChange) -> bool:
+            assert d.old is not None
+            return d.old.path not in modifies
 
 
-        self._adds = [a for a in self._adds if a.new.path not in modifies]
-        self._deletes = [a for a in self._deletes if a.new.path not in modifies]
+        self._adds = [a for a in self._adds if check_add_mod(a)]
+        self._deletes = [d for d in self._deletes if check_delete_mod(d)]
         self._changes += modifies.values()
         self._changes += modifies.values()
 
 
     def _sorted_changes(self) -> list[TreeChange]:
     def _sorted_changes(self) -> list[TreeChange]:

+ 7 - 2
dulwich/index.py

@@ -57,6 +57,7 @@ from .objects import (
     Blob,
     Blob,
     ObjectID,
     ObjectID,
     Tree,
     Tree,
+    TreeEntry,
     hex_to_sha,
     hex_to_sha,
     sha_to_hex,
     sha_to_hex,
 )
 )
@@ -1929,7 +1930,7 @@ def _transition_to_submodule(
     path: bytes,
     path: bytes,
     full_path: bytes,
     full_path: bytes,
     current_stat: Optional[os.stat_result],
     current_stat: Optional[os.stat_result],
-    entry: IndexEntry,
+    entry: Union[IndexEntry, TreeEntry],
     index: Index,
     index: Index,
 ) -> None:
 ) -> None:
     """Transition any type to submodule."""
     """Transition any type to submodule."""
@@ -1953,7 +1954,7 @@ def _transition_to_file(
     path: bytes,
     path: bytes,
     full_path: bytes,
     full_path: bytes,
     current_stat: Optional[os.stat_result],
     current_stat: Optional[os.stat_result],
-    entry: IndexEntry,
+    entry: Union[IndexEntry, TreeEntry],
     index: Index,
     index: Index,
     honor_filemode: bool,
     honor_filemode: bool,
     symlink_fn: Optional[
     symlink_fn: Optional[
@@ -2275,6 +2276,7 @@ def update_working_tree(
     paths_becoming_dirs = set()
     paths_becoming_dirs = set()
     for change in changes:
     for change in changes:
         if change.type in (CHANGE_ADD, CHANGE_MODIFY, CHANGE_RENAME, CHANGE_COPY):
         if change.type in (CHANGE_ADD, CHANGE_MODIFY, CHANGE_RENAME, CHANGE_COPY):
+            assert change.new is not None
             path = change.new.path
             path = change.new.path
             if b"/" in path:  # This is a file inside a directory
             if b"/" in path:  # This is a file inside a directory
                 # Check if any parent path exists as a file in the old tree or changes
                 # Check if any parent path exists as a file in the old tree or changes
@@ -2316,6 +2318,7 @@ def update_working_tree(
 
 
             if old_change:
             if old_change:
                 # Check if file has been modified
                 # Check if file has been modified
+                assert old_change.old is not None
                 file_matches = _check_file_matches(
                 file_matches = _check_file_matches(
                     repo.object_store,
                     repo.object_store,
                     full_path,
                     full_path,
@@ -2377,6 +2380,7 @@ def update_working_tree(
     for change in changes:
     for change in changes:
         if change.type in (CHANGE_DELETE, CHANGE_RENAME):
         if change.type in (CHANGE_DELETE, CHANGE_RENAME):
             # Remove file/directory
             # Remove file/directory
+            assert change.old is not None
             path = change.old.path
             path = change.old.path
             if path.startswith(b".git") or not validate_path(
             if path.startswith(b".git") or not validate_path(
                 path, validate_path_element
                 path, validate_path_element
@@ -2403,6 +2407,7 @@ def update_working_tree(
             CHANGE_RENAME,
             CHANGE_RENAME,
         ):
         ):
             # Add or modify file
             # Add or modify file
+            assert change.new is not None
             path = change.new.path
             path = change.new.path
             if path.startswith(b".git") or not validate_path(
             if path.startswith(b".git") or not validate_path(
                 path, validate_path_element
                 path, validate_path_element

+ 11 - 5
dulwich/object_store.py

@@ -338,10 +338,16 @@ class BaseObjectStore:
             rename_detector=rename_detector,
             rename_detector=rename_detector,
             paths=paths,
             paths=paths,
         ):
         ):
+            old_path = change.old.path if change.old is not None else None
+            new_path = change.new.path if change.new is not None else None
+            old_mode = change.old.mode if change.old is not None else None
+            new_mode = change.new.mode if change.new is not None else None
+            old_sha = change.old.sha if change.old is not None else None
+            new_sha = change.new.sha if change.new is not None else None
             yield (
             yield (
-                (change.old.path, change.new.path),
-                (change.old.mode, change.new.mode),
-                (change.old.sha, change.new.sha),
+                (old_path, new_path),
+                (old_mode, new_mode),
+                (old_sha, new_sha),
             )
             )
 
 
     def iter_tree_contents(
     def iter_tree_contents(
@@ -2715,7 +2721,7 @@ def iter_commit_contents(
         )
         )
     commit = obj
     commit = obj
     encoding = commit.encoding or "utf-8"
     encoding = commit.encoding or "utf-8"
-    include = (
+    include_bytes: list[bytes] = (
         [
         [
             path if isinstance(path, bytes) else str(path).encode(encoding)
             path if isinstance(path, bytes) else str(path).encode(encoding)
             for path in include
             for path in include
@@ -2724,7 +2730,7 @@ def iter_commit_contents(
         else [b""]
         else [b""]
     )
     )
 
 
-    for path in include:
+    for path in include_bytes:
         mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path)
         mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path)
         # Iterate all contained files if path points to a dir, otherwise just get that
         # Iterate all contained files if path points to a dir, otherwise just get that
         # single file
         # single file

+ 6 - 2
dulwich/objects.py

@@ -28,13 +28,13 @@ import posixpath
 import stat
 import stat
 import sys
 import sys
 import zlib
 import zlib
-from collections import namedtuple
 from collections.abc import Callable, Iterable, Iterator
 from collections.abc import Callable, Iterable, Iterator
 from hashlib import sha1
 from hashlib import sha1
 from io import BufferedIOBase, BytesIO
 from io import BufferedIOBase, BytesIO
 from typing import (
 from typing import (
     IO,
     IO,
     TYPE_CHECKING,
     TYPE_CHECKING,
+    NamedTuple,
     Optional,
     Optional,
     Union,
     Union,
 )
 )
@@ -1109,9 +1109,13 @@ class Tag(ShaFile):
                 raise gpg.errors.MissingSignatures(result, keys, results=(data, result))
                 raise gpg.errors.MissingSignatures(result, keys, results=(data, result))
 
 
 
 
-class TreeEntry(namedtuple("TreeEntry", ["path", "mode", "sha"])):
+class TreeEntry(NamedTuple):
     """Named tuple encapsulating a single tree entry."""
     """Named tuple encapsulating a single tree entry."""
 
 
+    path: bytes
+    mode: int
+    sha: bytes
+
     def in_path(self, path: bytes) -> "TreeEntry":
     def in_path(self, path: bytes) -> "TreeEntry":
         """Return a copy of this entry with the given path prepended."""
         """Return a copy of this entry with the given path prepended."""
         if not isinstance(self.path, bytes):
         if not isinstance(self.path, bytes):

+ 21 - 6
dulwich/porcelain.py

@@ -1421,25 +1421,39 @@ def print_name_status(changes: Iterator[TreeChange]) -> Iterator[str]:
         if isinstance(change, list):
         if isinstance(change, list):
             change = change[0]
             change = change[0]
         if change.type == CHANGE_ADD:
         if change.type == CHANGE_ADD:
+            assert change.new is not None
             path1 = change.new.path
             path1 = change.new.path
-            path2 = ""
+            path2 = b""
             kind = "A"
             kind = "A"
         elif change.type == CHANGE_DELETE:
         elif change.type == CHANGE_DELETE:
+            assert change.old is not None
             path1 = change.old.path
             path1 = change.old.path
-            path2 = ""
+            path2 = b""
             kind = "D"
             kind = "D"
         elif change.type == CHANGE_MODIFY:
         elif change.type == CHANGE_MODIFY:
+            assert change.new is not None
             path1 = change.new.path
             path1 = change.new.path
-            path2 = ""
+            path2 = b""
             kind = "M"
             kind = "M"
         elif change.type in RENAME_CHANGE_TYPES:
         elif change.type in RENAME_CHANGE_TYPES:
+            assert change.old is not None and change.new is not None
             path1 = change.old.path
             path1 = change.old.path
             path2 = change.new.path
             path2 = change.new.path
             if change.type == CHANGE_RENAME:
             if change.type == CHANGE_RENAME:
                 kind = "R"
                 kind = "R"
             elif change.type == CHANGE_COPY:
             elif change.type == CHANGE_COPY:
                 kind = "C"
                 kind = "C"
-        yield "%-8s%-20s%-20s" % (kind, path1, path2)  # noqa: UP031
+        path1_str = (
+            path1.decode("utf-8", errors="replace")
+            if isinstance(path1, bytes)
+            else path1
+        )
+        path2_str = (
+            path2.decode("utf-8", errors="replace")
+            if isinstance(path2, bytes)
+            else path2
+        )
+        yield f"{kind:<8}{path1_str:<20}{path2_str:<20}"
 
 
 
 
 def log(
 def log(
@@ -3519,9 +3533,10 @@ def ls_tree(
             if base:
             if base:
                 name = posixpath.join(base, name)
                 name = posixpath.join(base, name)
             if name_only:
             if name_only:
-                outstream.write(name + b"\n")
+                outstream.write(name.decode("utf-8", errors="replace") + "\n")
             else:
             else:
-                outstream.write(pretty_format_tree_entry(name, mode, sha))
+                formatted = pretty_format_tree_entry(name, mode, sha)
+                outstream.write(formatted)
             if stat.S_ISDIR(mode) and recursive:
             if stat.S_ISDIR(mode) and recursive:
                 list_tree(store, sha, name)
                 list_tree(store, sha, name)
 
 

+ 23 - 11
dulwich/walk.py

@@ -360,10 +360,11 @@ class Walker:
         if not change:
         if not change:
             return False
             return False
 
 
-        old_path = change.old.path
-        new_path = change.new.path
+        old_path = change.old.path if change.old is not None else None
+        new_path = change.new.path if change.new is not None else None
         if self._path_matches(new_path):
         if self._path_matches(new_path):
             if self.follow and change.type in RENAME_CHANGE_TYPES:
             if self.follow and change.type in RENAME_CHANGE_TYPES:
+                assert old_path is not None and new_path is not None
                 self.paths.add(old_path)
                 self.paths.add(old_path)
                 self.paths.remove(new_path)
                 self.paths.remove(new_path)
             return True
             return True
@@ -391,12 +392,19 @@ class Walker:
             return True
             return True
 
 
         if len(self.get_parents(commit)) > 1:
         if len(self.get_parents(commit)) > 1:
-            for path_changes in entry.changes():
+            changes_result = entry.changes()
+            # For merge commits, changes() returns list[list[TreeChange]]
+            assert isinstance(changes_result, list)
+            for path_changes in changes_result:
                 # For merge commits, only include changes with conflicts for
                 # For merge commits, only include changes with conflicts for
                 # this path. Since a rename conflict may include different
                 # this path. Since a rename conflict may include different
                 # old.paths, we have to check all of them.
                 # old.paths, we have to check all of them.
-                for change in path_changes:
-                    if self._change_matches(change):
+                if isinstance(path_changes, list):
+                    for change in path_changes:
+                        if change is not None and self._change_matches(change):
+                            return True
+                elif path_changes is not None:
+                    if self._change_matches(path_changes):
                         return True
                         return True
         else:
         else:
             changes = entry.changes()
             changes = entry.changes()
@@ -404,16 +412,20 @@ class Walker:
             if changes and isinstance(changes[0], list):
             if changes and isinstance(changes[0], list):
                 # It's list[list[TreeChange]], flatten it
                 # It's list[list[TreeChange]], flatten it
                 for change_list in changes:
                 for change_list in changes:
-                    for change in change_list:
-                        if self._change_matches(change):
-                            return True
+                    if isinstance(change_list, list):
+                        for item in change_list:
+                            if item is not None and self._change_matches(item):
+                                return True
             else:
             else:
                 # It's list[TreeChange]
                 # It's list[TreeChange]
                 from .diff_tree import TreeChange
                 from .diff_tree import TreeChange
 
 
-                for change in changes:
-                    if isinstance(change, TreeChange) and self._change_matches(change):
-                        return True
+                if isinstance(changes, list):
+                    for entry_item in changes:
+                        if isinstance(entry_item, TreeChange) and self._change_matches(
+                            entry_item
+                        ):
+                            return True
         return None
         return None
 
 
     def _next(self) -> Optional[WalkEntry]:
     def _next(self) -> Optional[WalkEntry]:

+ 19 - 13
tests/test_diff_tree.py

@@ -93,41 +93,47 @@ class TreeChangesTest(DiffTestCase):
         self.assertEqual([], merge_entries(b"", self.empty_tree, self.empty_tree))
         self.assertEqual([], merge_entries(b"", self.empty_tree, self.empty_tree))
         self.assertEqual(
         self.assertEqual(
             [
             [
-                ((None, None, None), (b"a", 0o100644, blob_a1.id)),
-                ((None, None, None), (b"b", 0o100755, blob_b1.id)),
+                (None, TreeEntry(b"a", 0o100644, blob_a1.id)),
+                (None, TreeEntry(b"b", 0o100755, blob_b1.id)),
             ],
             ],
             merge_entries(b"", self.empty_tree, tree1),
             merge_entries(b"", self.empty_tree, tree1),
         )
         )
         self.assertEqual(
         self.assertEqual(
             [
             [
-                ((None, None, None), (b"x/a", 0o100644, blob_a1.id)),
-                ((None, None, None), (b"x/b", 0o100755, blob_b1.id)),
+                (None, TreeEntry(b"x/a", 0o100644, blob_a1.id)),
+                (None, TreeEntry(b"x/b", 0o100755, blob_b1.id)),
             ],
             ],
             merge_entries(b"x", self.empty_tree, tree1),
             merge_entries(b"x", self.empty_tree, tree1),
         )
         )
 
 
         self.assertEqual(
         self.assertEqual(
             [
             [
-                ((b"a", 0o100644, blob_a2.id), (None, None, None)),
-                ((b"c", 0o100755, blob_c2.id), (None, None, None)),
+                (TreeEntry(b"a", 0o100644, blob_a2.id), None),
+                (TreeEntry(b"c", 0o100755, blob_c2.id), None),
             ],
             ],
             merge_entries(b"", tree2, self.empty_tree),
             merge_entries(b"", tree2, self.empty_tree),
         )
         )
 
 
         self.assertEqual(
         self.assertEqual(
             [
             [
-                ((b"a", 0o100644, blob_a1.id), (b"a", 0o100644, blob_a2.id)),
-                ((b"b", 0o100755, blob_b1.id), (None, None, None)),
-                ((None, None, None), (b"c", 0o100755, blob_c2.id)),
+                (
+                    TreeEntry(b"a", 0o100644, blob_a1.id),
+                    TreeEntry(b"a", 0o100644, blob_a2.id),
+                ),
+                (TreeEntry(b"b", 0o100755, blob_b1.id), None),
+                (None, TreeEntry(b"c", 0o100755, blob_c2.id)),
             ],
             ],
             merge_entries(b"", tree1, tree2),
             merge_entries(b"", tree1, tree2),
         )
         )
 
 
         self.assertEqual(
         self.assertEqual(
             [
             [
-                ((b"a", 0o100644, blob_a2.id), (b"a", 0o100644, blob_a1.id)),
-                ((None, None, None), (b"b", 0o100755, blob_b1.id)),
-                ((b"c", 0o100755, blob_c2.id), (None, None, None)),
+                (
+                    TreeEntry(b"a", 0o100644, blob_a2.id),
+                    TreeEntry(b"a", 0o100644, blob_a1.id),
+                ),
+                (None, TreeEntry(b"b", 0o100755, blob_b1.id)),
+                (TreeEntry(b"c", 0o100755, blob_c2.id), None),
             ],
             ],
             merge_entries(b"", tree2, tree1),
             merge_entries(b"", tree2, tree1),
         )
         )
@@ -142,7 +148,7 @@ class TreeChangesTest(DiffTestCase):
     )
     )
 
 
     def _do_test_is_tree(self, is_tree) -> None:
     def _do_test_is_tree(self, is_tree) -> None:
-        self.assertFalse(is_tree(TreeEntry(None, None, None)))
+        self.assertFalse(is_tree(None))
         self.assertFalse(is_tree(TreeEntry(b"a", 0o100644, b"a" * 40)))
         self.assertFalse(is_tree(TreeEntry(b"a", 0o100644, b"a" * 40)))
         self.assertFalse(is_tree(TreeEntry(b"a", 0o100755, b"a" * 40)))
         self.assertFalse(is_tree(TreeEntry(b"a", 0o100755, b"a" * 40)))
         self.assertFalse(is_tree(TreeEntry(b"a", 0o120000, b"a" * 40)))
         self.assertFalse(is_tree(TreeEntry(b"a", 0o120000, b"a" * 40)))