test_diffstat.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. # test_diffstat.py -- Tests for diffstat
  2. # SPDX-License-Identifier: MIT
  3. # Copyright (c) 2025 Test Contributor
  4. # All rights reserved.
  5. """Tests for dulwich.contrib.diffstat."""
  6. import os
  7. import tempfile
  8. import unittest
  9. from dulwich.contrib.diffstat import (
  10. _parse_patch,
  11. diffstat,
  12. main,
  13. )
  14. class ParsePatchTests(unittest.TestCase):
  15. """Tests for _parse_patch function."""
  16. def test_empty_input(self):
  17. """Test parsing an empty list of lines."""
  18. names, nametypes, counts = _parse_patch([])
  19. self.assertEqual(names, [])
  20. self.assertEqual(nametypes, [])
  21. self.assertEqual(counts, [])
  22. def test_basic_git_diff(self):
  23. """Test parsing a basic git diff with additions and deletions."""
  24. diff = [
  25. b"diff --git a/file.txt b/file.txt",
  26. b"index 1234567..abcdefg 100644",
  27. b"--- a/file.txt",
  28. b"+++ b/file.txt",
  29. b"@@ -1,5 +1,7 @@",
  30. b" unchanged line",
  31. b"-deleted line",
  32. b"-another deleted line",
  33. b"+added line",
  34. b"+another added line",
  35. b"+third added line",
  36. b" unchanged line",
  37. ]
  38. names, nametypes, counts = _parse_patch(diff)
  39. self.assertEqual(names, [b"file.txt"])
  40. self.assertEqual(nametypes, [False]) # Not a binary file
  41. self.assertEqual(counts, [(3, 2)]) # 3 additions, 2 deletions
  42. def test_chunk_ending_with_nonstandard_line(self):
  43. """Test parsing a git diff where a chunk ends with a non-standard line.
  44. This tests the code path in line 103 of diffstat.py where the in_patch_chunk
  45. flag is set to False when encountering a line that doesn't start with
  46. the unchanged, added, or deleted indicators.
  47. """
  48. diff = [
  49. b"diff --git a/file.txt b/file.txt",
  50. b"index 1234567..abcdefg 100644",
  51. b"--- a/file.txt",
  52. b"+++ b/file.txt",
  53. b"@@ -1,5 +1,7 @@",
  54. b" unchanged line",
  55. b"-deleted line",
  56. b"+added line",
  57. b"No leading space or indicator", # Non-standard line
  58. b"diff --git a/file2.txt b/file2.txt", # Next file's diff
  59. b"index 2345678..bcdefgh 100644",
  60. b"--- a/file2.txt",
  61. b"+++ b/file2.txt",
  62. b"@@ -1,3 +1,4 @@",
  63. b" unchanged in file2",
  64. b"+added in file2",
  65. b" another unchanged in file2",
  66. ]
  67. names, nametypes, counts = _parse_patch(diff)
  68. self.assertEqual(names, [b"file.txt", b"file2.txt"])
  69. self.assertEqual(nametypes, [False, False])
  70. self.assertEqual(
  71. counts, [(1, 1), (1, 0)]
  72. ) # file1: 1 add, 1 delete; file2: 1 add, 0 delete
  73. def test_binary_files(self):
  74. """Test parsing a git diff with binary files."""
  75. diff = [
  76. b"diff --git a/image.png b/image.png",
  77. b"index 1234567..abcdefg 100644",
  78. b"Binary files a/image.png and b/image.png differ",
  79. ]
  80. names, nametypes, counts = _parse_patch(diff)
  81. self.assertEqual(names, [b"image.png"])
  82. self.assertEqual(nametypes, [True]) # Is a binary file
  83. self.assertEqual(counts, [(0, 0)]) # No additions/deletions counted
  84. def test_renamed_file(self):
  85. """Test parsing a git diff with a renamed file."""
  86. diff = [
  87. b"diff --git a/oldname.txt b/newname.txt",
  88. b"similarity index 80%",
  89. b"rename from oldname.txt",
  90. b"rename to newname.txt",
  91. b"index 1234567..abcdefg 100644",
  92. b"--- a/oldname.txt",
  93. b"+++ b/newname.txt",
  94. b"@@ -1,3 +1,4 @@",
  95. b" unchanged line",
  96. b" another unchanged line",
  97. b"+added line",
  98. b" third unchanged line",
  99. ]
  100. names, nametypes, counts = _parse_patch(diff)
  101. # The name should include both old and new names
  102. self.assertEqual(names, [b"oldname.txt => newname.txt"])
  103. self.assertEqual(nametypes, [False]) # Not a binary file
  104. self.assertEqual(counts, [(1, 0)]) # 1 addition, 0 deletions
  105. def test_multiple_files(self):
  106. """Test parsing a git diff with multiple files."""
  107. diff = [
  108. # First file
  109. b"diff --git a/file1.txt b/file1.txt",
  110. b"index 1234567..abcdefg 100644",
  111. b"--- a/file1.txt",
  112. b"+++ b/file1.txt",
  113. b"@@ -1,3 +1,4 @@",
  114. b" unchanged",
  115. b"+added",
  116. b" unchanged",
  117. b" unchanged",
  118. # Second file
  119. b"diff --git a/file2.txt b/file2.txt",
  120. b"index 1234567..abcdefg 100644",
  121. b"--- a/file2.txt",
  122. b"+++ b/file2.txt",
  123. b"@@ -1,3 +1,2 @@",
  124. b" unchanged",
  125. b"-deleted",
  126. b" unchanged",
  127. ]
  128. names, nametypes, counts = _parse_patch(diff)
  129. self.assertEqual(names, [b"file1.txt", b"file2.txt"])
  130. self.assertEqual(nametypes, [False, False])
  131. self.assertEqual(
  132. counts, [(1, 0), (0, 1)]
  133. ) # 1 addition, 0 deletions for file1; 0 additions, 1 deletion for file2
  134. class DiffstatTests(unittest.TestCase):
  135. """Tests for diffstat function."""
  136. def test_empty_diff(self):
  137. """Test generating diffstat for an empty diff."""
  138. result = diffstat([])
  139. self.assertEqual(result, b" 0 files changed, 0 insertions(+), 0 deletions(-)")
  140. def test_basic_diffstat(self):
  141. """Test generating a basic diffstat."""
  142. diff = [
  143. b"diff --git a/file.txt b/file.txt",
  144. b"index 1234567..abcdefg 100644",
  145. b"--- a/file.txt",
  146. b"+++ b/file.txt",
  147. b"@@ -1,2 +1,3 @@",
  148. b" unchanged line",
  149. b"+added line",
  150. b" unchanged line",
  151. ]
  152. result = diffstat(diff)
  153. # Check that the output contains key elements
  154. self.assertIn(b"file.txt", result)
  155. self.assertIn(b"1 files changed", result)
  156. self.assertIn(b"1 insertions(+)", result)
  157. self.assertIn(b"0 deletions(-)", result)
  158. def test_binary_file_diffstat(self):
  159. """Test generating diffstat with binary files."""
  160. diff = [
  161. b"diff --git a/image.png b/image.png",
  162. b"index 1234567..abcdefg 100644",
  163. b"Binary files a/image.png and b/image.png differ",
  164. ]
  165. result = diffstat(diff)
  166. self.assertIn(b"image.png", result)
  167. self.assertIn(b"Bin", result) # Binary file indicator
  168. self.assertIn(b"1 files changed", result)
  169. self.assertIn(b"0 insertions(+)", result)
  170. self.assertIn(b"0 deletions(-)", result)
  171. def test_multiple_files_diffstat(self):
  172. """Test generating diffstat with multiple files."""
  173. diff = [
  174. # First file
  175. b"diff --git a/file1.txt b/file1.txt",
  176. b"index 1234567..abcdefg 100644",
  177. b"--- a/file1.txt",
  178. b"+++ b/file1.txt",
  179. b"@@ -1,3 +1,5 @@",
  180. b" unchanged",
  181. b"+added1",
  182. b"+added2",
  183. b" unchanged",
  184. b" unchanged",
  185. # Second file
  186. b"diff --git a/file2.txt b/file2.txt",
  187. b"index 1234567..abcdefg 100644",
  188. b"--- a/file2.txt",
  189. b"+++ b/file2.txt",
  190. b"@@ -1,3 +1,2 @@",
  191. b" unchanged",
  192. b"-deleted",
  193. b" unchanged",
  194. ]
  195. result = diffstat(diff)
  196. self.assertIn(b"file1.txt", result)
  197. self.assertIn(b"file2.txt", result)
  198. self.assertIn(b"2 files changed", result)
  199. self.assertIn(b"2 insertions(+)", result)
  200. self.assertIn(b"1 deletions(-)", result)
  201. def test_custom_width(self):
  202. """Test diffstat with custom width parameter."""
  203. diff = [
  204. b"diff --git a/file.txt b/file.txt",
  205. b"index 1234567..abcdefg 100644",
  206. b"--- a/file.txt",
  207. b"+++ b/file.txt",
  208. b"@@ -1,2 +1,5 @@",
  209. b" unchanged line",
  210. b"+added line 1",
  211. b"+added line 2",
  212. b"+added line 3",
  213. b" unchanged line",
  214. ]
  215. # Test with a very narrow width
  216. narrow_result = diffstat(diff, max_width=30)
  217. # Test with a wide width
  218. wide_result = diffstat(diff, max_width=120)
  219. # Both should contain the same file info but potentially different histogram widths
  220. self.assertIn(b"file.txt", narrow_result)
  221. self.assertIn(b"file.txt", wide_result)
  222. self.assertIn(b"1 files changed", narrow_result)
  223. self.assertIn(b"1 files changed", wide_result)
  224. self.assertIn(b"3 insertions(+)", narrow_result)
  225. self.assertIn(b"3 insertions(+)", wide_result)
  226. def test_histwidth_scaling(self):
  227. """Test histogram width scaling for various change sizes."""
  228. # Create a diff with a large number of changes to trigger the histogram scaling
  229. diff_lines = [
  230. b"diff --git a/file.txt b/file.txt",
  231. b"index 1234567..abcdefg 100644",
  232. b"--- a/file.txt",
  233. b"+++ b/file.txt",
  234. b"@@ -1,50 +1,50 @@",
  235. ]
  236. # Add a lot of added and deleted lines
  237. for i in range(30):
  238. diff_lines.append(b"+added line %d" % i)
  239. for i in range(20):
  240. diff_lines.append(b"-deleted line %d" % i)
  241. # Try with a narrow width to force scaling
  242. result = diffstat(diff_lines, max_width=40)
  243. self.assertIn(b"file.txt", result)
  244. self.assertIn(b"50", result) # Should show 50 changes (30+20)
  245. # Make sure it has some + and - characters for the histogram
  246. plus_count = result.count(b"+")
  247. minus_count = result.count(b"-")
  248. self.assertGreater(plus_count, 0)
  249. self.assertGreater(minus_count, 0)
  250. def test_small_nonzero_changes(self):
  251. """Test with very small positive changes that would round to zero."""
  252. # Create a diff with a tiny number of changes and a large max_diff to trigger
  253. # the small ratio calculation
  254. normal_diff = [
  255. b"diff --git a/bigfile.txt b/bigfile.txt",
  256. b"index 1234567..abcdefg 100644",
  257. b"--- a/bigfile.txt",
  258. b"+++ b/bigfile.txt",
  259. b"@@ -1,1000 +1,1001 @@",
  260. b"+new line", # Just one addition
  261. ]
  262. lot_of_changes_diff = [
  263. b"diff --git a/hugefile.txt b/hugefile.txt",
  264. b"index 1234567..abcdefg 100644",
  265. b"--- a/hugefile.txt",
  266. b"+++ b/hugefile.txt",
  267. b"@@ -1,1000 +1,2000 @@",
  268. ]
  269. # Add 1000 added lines to this one
  270. for i in range(1000):
  271. lot_of_changes_diff.append(b"+added line %d" % i)
  272. # Combine these diffs
  273. combined_diff = normal_diff + lot_of_changes_diff
  274. # Use a very large width to make the contrast obvious
  275. result = diffstat(combined_diff, max_width=200)
  276. # The small change should still have at least one '+' in the histogram
  277. self.assertIn(b"bigfile.txt", result)
  278. self.assertIn(b"hugefile.txt", result)
  279. self.assertIn(b"2 files changed", result)
  280. self.assertIn(b"1001 insertions(+)", result)
  281. # Get the line for bigfile.txt (should be the first file line)
  282. lines = result.split(b"\n")
  283. bigfile_line = next(line for line in lines if b"bigfile.txt" in line)
  284. # Make sure it has at least one + even though the ratio would be tiny
  285. self.assertIn(b"+", bigfile_line)
  286. def test_big_diff_histogram(self):
  287. """Test histogram creation with very large diffs."""
  288. # Create a large diff with many additions and deletions to test histogram width scaling
  289. diff_lines = [
  290. b"diff --git a/bigfile.txt b/bigfile.txt",
  291. b"index 1234567..abcdefg 100644",
  292. b"--- a/bigfile.txt",
  293. b"+++ b/bigfile.txt",
  294. b"@@ -1,1000 +1,2000 @@",
  295. ]
  296. # Add 1000 additions and 500 deletions
  297. for i in range(1000):
  298. diff_lines.append(b"+added line %d" % i)
  299. for i in range(500):
  300. diff_lines.append(b"-deleted line %d" % i)
  301. # Test with different widths
  302. narrow_result = diffstat(diff_lines, max_width=40)
  303. wide_result = diffstat(diff_lines, max_width=120)
  304. # Both should show the right number of changes
  305. for result in [narrow_result, wide_result]:
  306. self.assertIn(b"1 files changed", result)
  307. self.assertIn(b"1000 insertions(+)", result)
  308. self.assertIn(b"500 deletions(-)", result)
  309. def test_small_deletions_only(self):
  310. """Test histogram creation with only a few deletions."""
  311. # Create a diff with a huge maxdiff to force scaling, but only a few deletions
  312. diff1 = [
  313. b"diff --git a/file1.txt b/file1.txt",
  314. b"@@ -1,1000 +1,900 @@",
  315. ]
  316. for i in range(100):
  317. diff1.append(b"-deleted line %d" % i)
  318. # Create a second diff with many more changes to increase maxdiff
  319. diff2 = [
  320. b"diff --git a/file2.txt b/file2.txt",
  321. b"@@ -1,1000 +1,5000 @@",
  322. ]
  323. for i in range(4000):
  324. diff2.append(b"+added line %d" % i)
  325. # Combine the diffs
  326. diff = diff1 + diff2
  327. # Generate diffstat with a very wide display
  328. result = diffstat(diff, max_width=200)
  329. # Make sure both files are reported
  330. self.assertIn(b"file1.txt", result)
  331. self.assertIn(b"file2.txt", result)
  332. # Get the line for file1.txt
  333. lines = result.split(b"\n")
  334. file1_line = next(line for line in lines if b"file1.txt" in line)
  335. # Should show some - characters for the deletions
  336. self.assertIn(b"-", file1_line)
  337. def test_very_small_deletions_ratio(self):
  338. """Test histogram with tiny deletion ratio that would round to zero.
  339. This tests line 174 in diffstat.py where a small ratio between 0 and 1
  340. is forced to be at least 1 character wide in the histogram.
  341. """
  342. # Create a diff with a single deletion and a massive number of additions
  343. # to make the deletion ratio tiny
  344. diff = [
  345. b"diff --git a/file1.txt b/file1.txt",
  346. b"@@ -1,2 +1,1 @@",
  347. b"-single deleted line", # Just one deletion
  348. b" unchanged line",
  349. b"diff --git a/file2.txt b/file2.txt",
  350. b"@@ -1,1 +1,10001 @@",
  351. b" unchanged line",
  352. ]
  353. # Add 10000 additions to file2 to create a huge maxdiff
  354. for i in range(10000):
  355. diff.append(b"+added line %d" % i)
  356. # Generate diffstat with a moderate display width
  357. result = diffstat(diff, max_width=80)
  358. # Make sure both files are reported
  359. self.assertIn(b"file1.txt", result)
  360. self.assertIn(b"file2.txt", result)
  361. # Get the line for file1.txt
  362. lines = result.split(b"\n")
  363. file1_line = next(line for line in lines if b"file1.txt" in line)
  364. # Should show at least one - character for the deletion
  365. # even though the ratio would be tiny (1/10001 ≈ 0.0001)
  366. self.assertIn(b"-", file1_line)
  367. # Confirm the summary stats are correct
  368. self.assertIn(b"2 files changed", result)
  369. self.assertIn(b"10000 insertions(+)", result)
  370. self.assertIn(b"1 deletions(-)", result)
  371. class MainFunctionTests(unittest.TestCase):
  372. """Tests for the main() function."""
  373. def test_main_with_diff_file(self):
  374. """Test the main function with a diff file argument."""
  375. # Create a temporary diff file
  376. with tempfile.NamedTemporaryFile(delete=False) as tmp:
  377. diff_content = b"""diff --git a/file.txt b/file.txt
  378. index 1234567..abcdefg 100644
  379. --- a/file.txt
  380. +++ b/file.txt
  381. @@ -1,3 +1,4 @@
  382. unchanged line
  383. +added line
  384. another unchanged line
  385. third unchanged line
  386. """
  387. tmp.write(diff_content)
  388. tmp_path = tmp.name
  389. try:
  390. # Save the original sys.argv
  391. import sys
  392. orig_argv = sys.argv
  393. # Test with a file path argument
  394. sys.argv = ["diffstat.py", tmp_path]
  395. return_code = main()
  396. self.assertEqual(return_code, 0)
  397. # Test with no args to trigger the self-test
  398. sys.argv = ["diffstat.py"]
  399. return_code = main()
  400. self.assertEqual(return_code, 0)
  401. finally:
  402. # Restore original sys.argv
  403. sys.argv = orig_argv
  404. # Clean up the temporary file
  405. if os.path.exists(tmp_path):
  406. os.unlink(tmp_path)
  407. def test_main_self_test_failure(self):
  408. """Test the main function when the self-test fails."""
  409. import io
  410. import sys
  411. from dulwich.contrib.diffstat import diffstat as real_diffstat
  412. # Save original sys.argv, diffstat function, and stdout
  413. orig_argv = sys.argv
  414. orig_diffstat = real_diffstat
  415. orig_stdout = sys.stdout
  416. try:
  417. # Set up for testing self-test failure
  418. sys.argv = ["diffstat.py"]
  419. # Replace stdout with a StringIO object to capture output
  420. captured_output = io.StringIO()
  421. sys.stdout = captured_output
  422. # Mock the diffstat function to return a wrong result
  423. # This will trigger the self-test failure path
  424. from dulwich.contrib import diffstat as diffstat_module
  425. diffstat_module.diffstat = lambda lines, max_width=80: b"WRONG OUTPUT"
  426. # The main function should return -1 for self-test failure
  427. return_code = main()
  428. self.assertEqual(return_code, -1)
  429. # Check if the expected output is captured
  430. captured = captured_output.getvalue()
  431. self.assertIn("self test failed", captured)
  432. self.assertIn("Received:", captured)
  433. self.assertIn("WRONG OUTPUT", captured)
  434. self.assertIn("Expected:", captured)
  435. finally:
  436. # Restore original sys.argv, diffstat function, and stdout
  437. sys.argv = orig_argv
  438. diffstat_module.diffstat = orig_diffstat
  439. sys.stdout = orig_stdout
  440. if __name__ == "__main__":
  441. unittest.main()