|
@@ -47,6 +47,30 @@ from .objects import S_ISGITLINK, Blob, Commit
|
|
|
|
|
|
|
|
FIRST_FEW_BYTES = 8000
|
|
FIRST_FEW_BYTES = 8000
|
|
|
|
|
|
|
|
|
|
+DEFAULT_DIFF_ALGORITHM = "myers"
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class DiffAlgorithmNotAvailable(Exception):
|
|
|
|
|
+ """Raised when a requested diff algorithm is not available."""
|
|
|
|
|
+
|
|
|
|
|
+ def __init__(self, algorithm: str, install_hint: str = "") -> None:
|
|
|
|
|
+ """Initialize exception.
|
|
|
|
|
+
|
|
|
|
|
+ Args:
|
|
|
|
|
+ algorithm: Name of the unavailable algorithm
|
|
|
|
|
+ install_hint: Optional installation hint
|
|
|
|
|
+ """
|
|
|
|
|
+ self.algorithm = algorithm
|
|
|
|
|
+ self.install_hint = install_hint
|
|
|
|
|
+ if install_hint:
|
|
|
|
|
+ super().__init__(
|
|
|
|
|
+ f"Diff algorithm '{algorithm}' requested but not available. {install_hint}"
|
|
|
|
|
+ )
|
|
|
|
|
+ else:
|
|
|
|
|
+ super().__init__(
|
|
|
|
|
+ f"Diff algorithm '{algorithm}' requested but not available."
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
|
|
|
def write_commit_patch(
|
|
def write_commit_patch(
|
|
|
f: IO[bytes],
|
|
f: IO[bytes],
|
|
@@ -191,6 +215,107 @@ def unified_diff(
|
|
|
yield b"+" + line
|
|
yield b"+" + line
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+def _get_sequence_matcher(algorithm: str, a: list[bytes], b: list[bytes]):
|
|
|
|
|
+ """Get appropriate sequence matcher for the given algorithm.
|
|
|
|
|
+
|
|
|
|
|
+ Args:
|
|
|
|
|
+ algorithm: Diff algorithm ("myers" or "patience")
|
|
|
|
|
+ a: First sequence
|
|
|
|
|
+ b: Second sequence
|
|
|
|
|
+
|
|
|
|
|
+ Returns:
|
|
|
|
|
+ Configured sequence matcher instance
|
|
|
|
|
+
|
|
|
|
|
+ Raises:
|
|
|
|
|
+ DiffAlgorithmNotAvailable: If patience requested but not available
|
|
|
|
|
+ """
|
|
|
|
|
+ if algorithm == "patience":
|
|
|
|
|
+ try:
|
|
|
|
|
+ from patiencediff import PatienceSequenceMatcher
|
|
|
|
|
+
|
|
|
|
|
+ return PatienceSequenceMatcher(None, a, b)
|
|
|
|
|
+ except ImportError:
|
|
|
|
|
+ raise DiffAlgorithmNotAvailable(
|
|
|
|
|
+ "patience", "Install with: pip install 'dulwich[patiencediff]'"
|
|
|
|
|
+ )
|
|
|
|
|
+ else:
|
|
|
|
|
+ return SequenceMatcher(a=a, b=b)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def unified_diff_with_algorithm(
|
|
|
|
|
+ a: list[bytes],
|
|
|
|
|
+ b: list[bytes],
|
|
|
|
|
+ fromfile: bytes = b"",
|
|
|
|
|
+ tofile: bytes = b"",
|
|
|
|
|
+ fromfiledate: str = "",
|
|
|
|
|
+ tofiledate: str = "",
|
|
|
|
|
+ n: int = 3,
|
|
|
|
|
+ lineterm: str = "\n",
|
|
|
|
|
+ tree_encoding: str = "utf-8",
|
|
|
|
|
+ output_encoding: str = "utf-8",
|
|
|
|
|
+ algorithm: Optional[str] = None,
|
|
|
|
|
+) -> Generator[bytes, None, None]:
|
|
|
|
|
+ """Generate unified diff with specified algorithm.
|
|
|
|
|
+
|
|
|
|
|
+ Args:
|
|
|
|
|
+ a: First sequence of lines
|
|
|
|
|
+ b: Second sequence of lines
|
|
|
|
|
+ fromfile: Name of first file
|
|
|
|
|
+ tofile: Name of second file
|
|
|
|
|
+ fromfiledate: Date of first file
|
|
|
|
|
+ tofiledate: Date of second file
|
|
|
|
|
+ n: Number of context lines
|
|
|
|
|
+ lineterm: Line terminator
|
|
|
|
|
+ tree_encoding: Encoding for tree paths
|
|
|
|
|
+ output_encoding: Encoding for output
|
|
|
|
|
+ algorithm: Diff algorithm to use ("myers" or "patience")
|
|
|
|
|
+
|
|
|
|
|
+ Returns:
|
|
|
|
|
+ Generator yielding diff lines
|
|
|
|
|
+
|
|
|
|
|
+ Raises:
|
|
|
|
|
+ DiffAlgorithmNotAvailable: If patience algorithm requested but patiencediff not available
|
|
|
|
|
+ """
|
|
|
|
|
+ if algorithm is None:
|
|
|
|
|
+ algorithm = DEFAULT_DIFF_ALGORITHM
|
|
|
|
|
+
|
|
|
|
|
+ matcher = _get_sequence_matcher(algorithm, a, b)
|
|
|
|
|
+
|
|
|
|
|
+ started = False
|
|
|
|
|
+ for group in matcher.get_grouped_opcodes(n):
|
|
|
|
|
+ if not started:
|
|
|
|
|
+ started = True
|
|
|
|
|
+ fromdate = f"\t{fromfiledate}" if fromfiledate else ""
|
|
|
|
|
+ todate = f"\t{tofiledate}" if tofiledate else ""
|
|
|
|
|
+ yield f"--- {fromfile.decode(tree_encoding)}{fromdate}{lineterm}".encode(
|
|
|
|
|
+ output_encoding
|
|
|
|
|
+ )
|
|
|
|
|
+ yield f"+++ {tofile.decode(tree_encoding)}{todate}{lineterm}".encode(
|
|
|
|
|
+ output_encoding
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ first, last = group[0], group[-1]
|
|
|
|
|
+ file1_range = _format_range_unified(first[1], last[2])
|
|
|
|
|
+ file2_range = _format_range_unified(first[3], last[4])
|
|
|
|
|
+ yield f"@@ -{file1_range} +{file2_range} @@{lineterm}".encode(output_encoding)
|
|
|
|
|
+
|
|
|
|
|
+ for tag, i1, i2, j1, j2 in group:
|
|
|
|
|
+ if tag == "equal":
|
|
|
|
|
+ for line in a[i1:i2]:
|
|
|
|
|
+ yield b" " + line
|
|
|
|
|
+ continue
|
|
|
|
|
+ if tag in ("replace", "delete"):
|
|
|
|
|
+ for line in a[i1:i2]:
|
|
|
|
|
+ if not line[-1:] == b"\n":
|
|
|
|
|
+ line += b"\n\\ No newline at end of file\n"
|
|
|
|
|
+ yield b"-" + line
|
|
|
|
|
+ if tag in ("replace", "insert"):
|
|
|
|
|
+ for line in b[j1:j2]:
|
|
|
|
|
+ if not line[-1:] == b"\n":
|
|
|
|
|
+ line += b"\n\\ No newline at end of file\n"
|
|
|
|
|
+ yield b"+" + line
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
def is_binary(content: bytes) -> bool:
|
|
def is_binary(content: bytes) -> bool:
|
|
|
"""See if the first few bytes contain any null characters.
|
|
"""See if the first few bytes contain any null characters.
|
|
|
|
|
|
|
@@ -237,6 +362,7 @@ def write_object_diff(
|
|
|
old_file: tuple[Optional[bytes], Optional[int], Optional[bytes]],
|
|
old_file: tuple[Optional[bytes], Optional[int], Optional[bytes]],
|
|
|
new_file: tuple[Optional[bytes], Optional[int], Optional[bytes]],
|
|
new_file: tuple[Optional[bytes], Optional[int], Optional[bytes]],
|
|
|
diff_binary: bool = False,
|
|
diff_binary: bool = False,
|
|
|
|
|
+ diff_algorithm: Optional[str] = None,
|
|
|
) -> None:
|
|
) -> None:
|
|
|
"""Write the diff for an object.
|
|
"""Write the diff for an object.
|
|
|
|
|
|
|
@@ -247,6 +373,7 @@ def write_object_diff(
|
|
|
new_file: (path, mode, hexsha) tuple
|
|
new_file: (path, mode, hexsha) tuple
|
|
|
diff_binary: Whether to diff files even if they
|
|
diff_binary: Whether to diff files even if they
|
|
|
are considered binary files by is_binary().
|
|
are considered binary files by is_binary().
|
|
|
|
|
+ diff_algorithm: Algorithm to use for diffing ("myers" or "patience")
|
|
|
|
|
|
|
|
Note: the tuple elements should be None for nonexistent files
|
|
Note: the tuple elements should be None for nonexistent files
|
|
|
"""
|
|
"""
|
|
@@ -307,11 +434,12 @@ def write_object_diff(
|
|
|
f.write(binary_diff)
|
|
f.write(binary_diff)
|
|
|
else:
|
|
else:
|
|
|
f.writelines(
|
|
f.writelines(
|
|
|
- unified_diff(
|
|
|
|
|
|
|
+ unified_diff_with_algorithm(
|
|
|
lines(old_content),
|
|
lines(old_content),
|
|
|
lines(new_content),
|
|
lines(new_content),
|
|
|
patched_old_path,
|
|
patched_old_path,
|
|
|
patched_new_path,
|
|
patched_new_path,
|
|
|
|
|
+ algorithm=diff_algorithm,
|
|
|
)
|
|
)
|
|
|
)
|
|
)
|
|
|
|
|
|
|
@@ -358,6 +486,7 @@ def write_blob_diff(
|
|
|
f: IO[bytes],
|
|
f: IO[bytes],
|
|
|
old_file: tuple[Optional[bytes], Optional[int], Optional["Blob"]],
|
|
old_file: tuple[Optional[bytes], Optional[int], Optional["Blob"]],
|
|
|
new_file: tuple[Optional[bytes], Optional[int], Optional["Blob"]],
|
|
new_file: tuple[Optional[bytes], Optional[int], Optional["Blob"]],
|
|
|
|
|
+ diff_algorithm: Optional[str] = None,
|
|
|
) -> None:
|
|
) -> None:
|
|
|
"""Write blob diff.
|
|
"""Write blob diff.
|
|
|
|
|
|
|
@@ -365,6 +494,7 @@ def write_blob_diff(
|
|
|
f: File-like object to write to
|
|
f: File-like object to write to
|
|
|
old_file: (path, mode, hexsha) tuple (None if nonexisting)
|
|
old_file: (path, mode, hexsha) tuple (None if nonexisting)
|
|
|
new_file: (path, mode, hexsha) tuple (None if nonexisting)
|
|
new_file: (path, mode, hexsha) tuple (None if nonexisting)
|
|
|
|
|
+ diff_algorithm: Algorithm to use for diffing ("myers" or "patience")
|
|
|
|
|
|
|
|
Note: The use of write_object_diff is recommended over this function.
|
|
Note: The use of write_object_diff is recommended over this function.
|
|
|
"""
|
|
"""
|
|
@@ -397,7 +527,13 @@ def write_blob_diff(
|
|
|
old_contents = lines(old_blob)
|
|
old_contents = lines(old_blob)
|
|
|
new_contents = lines(new_blob)
|
|
new_contents = lines(new_blob)
|
|
|
f.writelines(
|
|
f.writelines(
|
|
|
- unified_diff(old_contents, new_contents, patched_old_path, patched_new_path)
|
|
|
|
|
|
|
+ unified_diff_with_algorithm(
|
|
|
|
|
+ old_contents,
|
|
|
|
|
+ new_contents,
|
|
|
|
|
+ patched_old_path,
|
|
|
|
|
+ patched_new_path,
|
|
|
|
|
+ algorithm=diff_algorithm,
|
|
|
|
|
+ )
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@@ -407,6 +543,7 @@ def write_tree_diff(
|
|
|
old_tree: Optional[bytes],
|
|
old_tree: Optional[bytes],
|
|
|
new_tree: Optional[bytes],
|
|
new_tree: Optional[bytes],
|
|
|
diff_binary: bool = False,
|
|
diff_binary: bool = False,
|
|
|
|
|
+ diff_algorithm: Optional[str] = None,
|
|
|
) -> None:
|
|
) -> None:
|
|
|
"""Write tree diff.
|
|
"""Write tree diff.
|
|
|
|
|
|
|
@@ -417,6 +554,7 @@ def write_tree_diff(
|
|
|
new_tree: New tree id
|
|
new_tree: New tree id
|
|
|
diff_binary: Whether to diff files even if they
|
|
diff_binary: Whether to diff files even if they
|
|
|
are considered binary files by is_binary().
|
|
are considered binary files by is_binary().
|
|
|
|
|
+ diff_algorithm: Algorithm to use for diffing ("myers" or "patience")
|
|
|
"""
|
|
"""
|
|
|
changes = store.tree_changes(old_tree, new_tree)
|
|
changes = store.tree_changes(old_tree, new_tree)
|
|
|
for (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) in changes:
|
|
for (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) in changes:
|
|
@@ -426,6 +564,7 @@ def write_tree_diff(
|
|
|
(oldpath, oldmode, oldsha),
|
|
(oldpath, oldmode, oldsha),
|
|
|
(newpath, newmode, newsha),
|
|
(newpath, newmode, newsha),
|
|
|
diff_binary=diff_binary,
|
|
diff_binary=diff_binary,
|
|
|
|
|
+ diff_algorithm=diff_algorithm,
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
|