123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514 |
- # test_diffstat.py -- Tests for diffstat
- # SPDX-License-Identifier: MIT
- # Copyright (c) 2025 Test Contributor
- # All rights reserved.
- """Tests for dulwich.contrib.diffstat."""
- import os
- import tempfile
- import unittest
- from dulwich.contrib.diffstat import (
- _parse_patch,
- diffstat,
- main,
- )
- class ParsePatchTests(unittest.TestCase):
- """Tests for _parse_patch function."""
- def test_empty_input(self):
- """Test parsing an empty list of lines."""
- names, nametypes, counts = _parse_patch([])
- self.assertEqual(names, [])
- self.assertEqual(nametypes, [])
- self.assertEqual(counts, [])
- def test_basic_git_diff(self):
- """Test parsing a basic git diff with additions and deletions."""
- diff = [
- b"diff --git a/file.txt b/file.txt",
- b"index 1234567..abcdefg 100644",
- b"--- a/file.txt",
- b"+++ b/file.txt",
- b"@@ -1,5 +1,7 @@",
- b" unchanged line",
- b"-deleted line",
- b"-another deleted line",
- b"+added line",
- b"+another added line",
- b"+third added line",
- b" unchanged line",
- ]
- names, nametypes, counts = _parse_patch(diff)
- self.assertEqual(names, [b"file.txt"])
- self.assertEqual(nametypes, [False]) # Not a binary file
- self.assertEqual(counts, [(3, 2)]) # 3 additions, 2 deletions
- def test_chunk_ending_with_nonstandard_line(self):
- """Test parsing a git diff where a chunk ends with a non-standard line.
- This tests the code path in line 103 of diffstat.py where the in_patch_chunk
- flag is set to False when encountering a line that doesn't start with
- the unchanged, added, or deleted indicators.
- """
- diff = [
- b"diff --git a/file.txt b/file.txt",
- b"index 1234567..abcdefg 100644",
- b"--- a/file.txt",
- b"+++ b/file.txt",
- b"@@ -1,5 +1,7 @@",
- b" unchanged line",
- b"-deleted line",
- b"+added line",
- b"No leading space or indicator", # Non-standard line
- b"diff --git a/file2.txt b/file2.txt", # Next file's diff
- b"index 2345678..bcdefgh 100644",
- b"--- a/file2.txt",
- b"+++ b/file2.txt",
- b"@@ -1,3 +1,4 @@",
- b" unchanged in file2",
- b"+added in file2",
- b" another unchanged in file2",
- ]
- names, nametypes, counts = _parse_patch(diff)
- self.assertEqual(names, [b"file.txt", b"file2.txt"])
- self.assertEqual(nametypes, [False, False])
- self.assertEqual(
- counts, [(1, 1), (1, 0)]
- ) # file1: 1 add, 1 delete; file2: 1 add, 0 delete
- def test_binary_files(self):
- """Test parsing a git diff with binary files."""
- diff = [
- b"diff --git a/image.png b/image.png",
- b"index 1234567..abcdefg 100644",
- b"Binary files a/image.png and b/image.png differ",
- ]
- names, nametypes, counts = _parse_patch(diff)
- self.assertEqual(names, [b"image.png"])
- self.assertEqual(nametypes, [True]) # Is a binary file
- self.assertEqual(counts, [(0, 0)]) # No additions/deletions counted
- def test_renamed_file(self):
- """Test parsing a git diff with a renamed file."""
- diff = [
- b"diff --git a/oldname.txt b/newname.txt",
- b"similarity index 80%",
- b"rename from oldname.txt",
- b"rename to newname.txt",
- b"index 1234567..abcdefg 100644",
- b"--- a/oldname.txt",
- b"+++ b/newname.txt",
- b"@@ -1,3 +1,4 @@",
- b" unchanged line",
- b" another unchanged line",
- b"+added line",
- b" third unchanged line",
- ]
- names, nametypes, counts = _parse_patch(diff)
- # The name should include both old and new names
- self.assertEqual(names, [b"oldname.txt => newname.txt"])
- self.assertEqual(nametypes, [False]) # Not a binary file
- self.assertEqual(counts, [(1, 0)]) # 1 addition, 0 deletions
- def test_multiple_files(self):
- """Test parsing a git diff with multiple files."""
- diff = [
- # First file
- b"diff --git a/file1.txt b/file1.txt",
- b"index 1234567..abcdefg 100644",
- b"--- a/file1.txt",
- b"+++ b/file1.txt",
- b"@@ -1,3 +1,4 @@",
- b" unchanged",
- b"+added",
- b" unchanged",
- b" unchanged",
- # Second file
- b"diff --git a/file2.txt b/file2.txt",
- b"index 1234567..abcdefg 100644",
- b"--- a/file2.txt",
- b"+++ b/file2.txt",
- b"@@ -1,3 +1,2 @@",
- b" unchanged",
- b"-deleted",
- b" unchanged",
- ]
- names, nametypes, counts = _parse_patch(diff)
- self.assertEqual(names, [b"file1.txt", b"file2.txt"])
- self.assertEqual(nametypes, [False, False])
- self.assertEqual(
- counts, [(1, 0), (0, 1)]
- ) # 1 addition, 0 deletions for file1; 0 additions, 1 deletion for file2
- class DiffstatTests(unittest.TestCase):
- """Tests for diffstat function."""
- def test_empty_diff(self):
- """Test generating diffstat for an empty diff."""
- result = diffstat([])
- self.assertEqual(result, b" 0 files changed, 0 insertions(+), 0 deletions(-)")
- def test_basic_diffstat(self):
- """Test generating a basic diffstat."""
- diff = [
- b"diff --git a/file.txt b/file.txt",
- b"index 1234567..abcdefg 100644",
- b"--- a/file.txt",
- b"+++ b/file.txt",
- b"@@ -1,2 +1,3 @@",
- b" unchanged line",
- b"+added line",
- b" unchanged line",
- ]
- result = diffstat(diff)
- # Check that the output contains key elements
- self.assertIn(b"file.txt", result)
- self.assertIn(b"1 files changed", result)
- self.assertIn(b"1 insertions(+)", result)
- self.assertIn(b"0 deletions(-)", result)
- def test_binary_file_diffstat(self):
- """Test generating diffstat with binary files."""
- diff = [
- b"diff --git a/image.png b/image.png",
- b"index 1234567..abcdefg 100644",
- b"Binary files a/image.png and b/image.png differ",
- ]
- result = diffstat(diff)
- self.assertIn(b"image.png", result)
- self.assertIn(b"Bin", result) # Binary file indicator
- self.assertIn(b"1 files changed", result)
- self.assertIn(b"0 insertions(+)", result)
- self.assertIn(b"0 deletions(-)", result)
- def test_multiple_files_diffstat(self):
- """Test generating diffstat with multiple files."""
- diff = [
- # First file
- b"diff --git a/file1.txt b/file1.txt",
- b"index 1234567..abcdefg 100644",
- b"--- a/file1.txt",
- b"+++ b/file1.txt",
- b"@@ -1,3 +1,5 @@",
- b" unchanged",
- b"+added1",
- b"+added2",
- b" unchanged",
- b" unchanged",
- # Second file
- b"diff --git a/file2.txt b/file2.txt",
- b"index 1234567..abcdefg 100644",
- b"--- a/file2.txt",
- b"+++ b/file2.txt",
- b"@@ -1,3 +1,2 @@",
- b" unchanged",
- b"-deleted",
- b" unchanged",
- ]
- result = diffstat(diff)
- self.assertIn(b"file1.txt", result)
- self.assertIn(b"file2.txt", result)
- self.assertIn(b"2 files changed", result)
- self.assertIn(b"2 insertions(+)", result)
- self.assertIn(b"1 deletions(-)", result)
- def test_custom_width(self):
- """Test diffstat with custom width parameter."""
- diff = [
- b"diff --git a/file.txt b/file.txt",
- b"index 1234567..abcdefg 100644",
- b"--- a/file.txt",
- b"+++ b/file.txt",
- b"@@ -1,2 +1,5 @@",
- b" unchanged line",
- b"+added line 1",
- b"+added line 2",
- b"+added line 3",
- b" unchanged line",
- ]
- # Test with a very narrow width
- narrow_result = diffstat(diff, max_width=30)
- # Test with a wide width
- wide_result = diffstat(diff, max_width=120)
- # Both should contain the same file info but potentially different histogram widths
- self.assertIn(b"file.txt", narrow_result)
- self.assertIn(b"file.txt", wide_result)
- self.assertIn(b"1 files changed", narrow_result)
- self.assertIn(b"1 files changed", wide_result)
- self.assertIn(b"3 insertions(+)", narrow_result)
- self.assertIn(b"3 insertions(+)", wide_result)
- def test_histwidth_scaling(self):
- """Test histogram width scaling for various change sizes."""
- # Create a diff with a large number of changes to trigger the histogram scaling
- diff_lines = [
- b"diff --git a/file.txt b/file.txt",
- b"index 1234567..abcdefg 100644",
- b"--- a/file.txt",
- b"+++ b/file.txt",
- b"@@ -1,50 +1,50 @@",
- ]
- # Add a lot of added and deleted lines
- for i in range(30):
- diff_lines.append(b"+added line %d" % i)
- for i in range(20):
- diff_lines.append(b"-deleted line %d" % i)
- # Try with a narrow width to force scaling
- result = diffstat(diff_lines, max_width=40)
- self.assertIn(b"file.txt", result)
- self.assertIn(b"50", result) # Should show 50 changes (30+20)
- # Make sure it has some + and - characters for the histogram
- plus_count = result.count(b"+")
- minus_count = result.count(b"-")
- self.assertGreater(plus_count, 0)
- self.assertGreater(minus_count, 0)
- def test_small_nonzero_changes(self):
- """Test with very small positive changes that would round to zero."""
- # Create a diff with a tiny number of changes and a large max_diff to trigger
- # the small ratio calculation
- normal_diff = [
- b"diff --git a/bigfile.txt b/bigfile.txt",
- b"index 1234567..abcdefg 100644",
- b"--- a/bigfile.txt",
- b"+++ b/bigfile.txt",
- b"@@ -1,1000 +1,1001 @@",
- b"+new line", # Just one addition
- ]
- lot_of_changes_diff = [
- b"diff --git a/hugefile.txt b/hugefile.txt",
- b"index 1234567..abcdefg 100644",
- b"--- a/hugefile.txt",
- b"+++ b/hugefile.txt",
- b"@@ -1,1000 +1,2000 @@",
- ]
- # Add 1000 added lines to this one
- for i in range(1000):
- lot_of_changes_diff.append(b"+added line %d" % i)
- # Combine these diffs
- combined_diff = normal_diff + lot_of_changes_diff
- # Use a very large width to make the contrast obvious
- result = diffstat(combined_diff, max_width=200)
- # The small change should still have at least one '+' in the histogram
- self.assertIn(b"bigfile.txt", result)
- self.assertIn(b"hugefile.txt", result)
- self.assertIn(b"2 files changed", result)
- self.assertIn(b"1001 insertions(+)", result)
- # Get the line for bigfile.txt (should be the first file line)
- lines = result.split(b"\n")
- bigfile_line = next(line for line in lines if b"bigfile.txt" in line)
- # Make sure it has at least one + even though the ratio would be tiny
- self.assertIn(b"+", bigfile_line)
- def test_big_diff_histogram(self):
- """Test histogram creation with very large diffs."""
- # Create a large diff with many additions and deletions to test histogram width scaling
- diff_lines = [
- b"diff --git a/bigfile.txt b/bigfile.txt",
- b"index 1234567..abcdefg 100644",
- b"--- a/bigfile.txt",
- b"+++ b/bigfile.txt",
- b"@@ -1,1000 +1,2000 @@",
- ]
- # Add 1000 additions and 500 deletions
- for i in range(1000):
- diff_lines.append(b"+added line %d" % i)
- for i in range(500):
- diff_lines.append(b"-deleted line %d" % i)
- # Test with different widths
- narrow_result = diffstat(diff_lines, max_width=40)
- wide_result = diffstat(diff_lines, max_width=120)
- # Both should show the right number of changes
- for result in [narrow_result, wide_result]:
- self.assertIn(b"1 files changed", result)
- self.assertIn(b"1000 insertions(+)", result)
- self.assertIn(b"500 deletions(-)", result)
- def test_small_deletions_only(self):
- """Test histogram creation with only a few deletions."""
- # Create a diff with a huge maxdiff to force scaling, but only a few deletions
- diff1 = [
- b"diff --git a/file1.txt b/file1.txt",
- b"@@ -1,1000 +1,900 @@",
- ]
- for i in range(100):
- diff1.append(b"-deleted line %d" % i)
- # Create a second diff with many more changes to increase maxdiff
- diff2 = [
- b"diff --git a/file2.txt b/file2.txt",
- b"@@ -1,1000 +1,5000 @@",
- ]
- for i in range(4000):
- diff2.append(b"+added line %d" % i)
- # Combine the diffs
- diff = diff1 + diff2
- # Generate diffstat with a very wide display
- result = diffstat(diff, max_width=200)
- # Make sure both files are reported
- self.assertIn(b"file1.txt", result)
- self.assertIn(b"file2.txt", result)
- # Get the line for file1.txt
- lines = result.split(b"\n")
- file1_line = next(line for line in lines if b"file1.txt" in line)
- # Should show some - characters for the deletions
- self.assertIn(b"-", file1_line)
- def test_very_small_deletions_ratio(self):
- """Test histogram with tiny deletion ratio that would round to zero.
- This tests line 174 in diffstat.py where a small ratio between 0 and 1
- is forced to be at least 1 character wide in the histogram.
- """
- # Create a diff with a single deletion and a massive number of additions
- # to make the deletion ratio tiny
- diff = [
- b"diff --git a/file1.txt b/file1.txt",
- b"@@ -1,2 +1,1 @@",
- b"-single deleted line", # Just one deletion
- b" unchanged line",
- b"diff --git a/file2.txt b/file2.txt",
- b"@@ -1,1 +1,10001 @@",
- b" unchanged line",
- ]
- # Add 10000 additions to file2 to create a huge maxdiff
- for i in range(10000):
- diff.append(b"+added line %d" % i)
- # Generate diffstat with a moderate display width
- result = diffstat(diff, max_width=80)
- # Make sure both files are reported
- self.assertIn(b"file1.txt", result)
- self.assertIn(b"file2.txt", result)
- # Get the line for file1.txt
- lines = result.split(b"\n")
- file1_line = next(line for line in lines if b"file1.txt" in line)
- # Should show at least one - character for the deletion
- # even though the ratio would be tiny (1/10001 ≈ 0.0001)
- self.assertIn(b"-", file1_line)
- # Confirm the summary stats are correct
- self.assertIn(b"2 files changed", result)
- self.assertIn(b"10000 insertions(+)", result)
- self.assertIn(b"1 deletions(-)", result)
- class MainFunctionTests(unittest.TestCase):
- """Tests for the main() function."""
- def test_main_with_diff_file(self):
- """Test the main function with a diff file argument."""
- # Create a temporary diff file
- with tempfile.NamedTemporaryFile(delete=False) as tmp:
- diff_content = b"""diff --git a/file.txt b/file.txt
- index 1234567..abcdefg 100644
- --- a/file.txt
- +++ b/file.txt
- @@ -1,3 +1,4 @@
- unchanged line
- +added line
- another unchanged line
- third unchanged line
- """
- tmp.write(diff_content)
- tmp_path = tmp.name
- try:
- # Save the original sys.argv
- import sys
- orig_argv = sys.argv
- # Test with a file path argument
- sys.argv = ["diffstat.py", tmp_path]
- return_code = main()
- self.assertEqual(return_code, 0)
- # Test with no args to trigger the self-test
- sys.argv = ["diffstat.py"]
- return_code = main()
- self.assertEqual(return_code, 0)
- finally:
- # Restore original sys.argv
- sys.argv = orig_argv
- # Clean up the temporary file
- if os.path.exists(tmp_path):
- os.unlink(tmp_path)
- def test_main_self_test_failure(self):
- """Test the main function when the self-test fails."""
- import io
- import sys
- from dulwich.contrib.diffstat import diffstat as real_diffstat
- # Save original sys.argv, diffstat function, and stdout
- orig_argv = sys.argv
- orig_diffstat = real_diffstat
- orig_stdout = sys.stdout
- try:
- # Set up for testing self-test failure
- sys.argv = ["diffstat.py"]
- # Replace stdout with a StringIO object to capture output
- captured_output = io.StringIO()
- sys.stdout = captured_output
- # Mock the diffstat function to return a wrong result
- # This will trigger the self-test failure path
- from dulwich.contrib import diffstat as diffstat_module
- diffstat_module.diffstat = lambda lines, max_width=80: b"WRONG OUTPUT"
- # The main function should return -1 for self-test failure
- return_code = main()
- self.assertEqual(return_code, -1)
- # Check if the expected output is captured
- captured = captured_output.getvalue()
- self.assertIn("self test failed", captured)
- self.assertIn("Received:", captured)
- self.assertIn("WRONG OUTPUT", captured)
- self.assertIn("Expected:", captured)
- finally:
- # Restore original sys.argv, diffstat function, and stdout
- sys.argv = orig_argv
- diffstat_module.diffstat = orig_diffstat
- sys.stdout = orig_stdout
- if __name__ == "__main__":
- unittest.main()
|