test_mbox.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. # test_mbox.py -- tests for mbox.py
  2. # Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for mbox.py."""
  22. import mailbox
  23. import os
  24. import tempfile
  25. from io import BytesIO
  26. from dulwich.mbox import split_maildir, split_mbox
  27. from . import TestCase
  28. class SplitMboxTests(TestCase):
  29. """Tests for split_mbox function."""
  30. def test_split_simple_mbox(self) -> None:
  31. """Test splitting a simple mbox with two messages."""
  32. mbox_content = b"""\
  33. From alice@example.com Mon Jan 01 00:00:00 2025
  34. From: Alice <alice@example.com>
  35. To: Bob <bob@example.com>
  36. Subject: First message
  37. This is the first message.
  38. From bob@example.com Mon Jan 01 00:01:00 2025
  39. From: Bob <bob@example.com>
  40. To: Alice <alice@example.com>
  41. Subject: Second message
  42. This is the second message.
  43. """
  44. with tempfile.TemporaryDirectory() as tmpdir:
  45. # Create temporary mbox file
  46. mbox_path = os.path.join(tmpdir, "test.mbox")
  47. with open(mbox_path, "wb") as f:
  48. f.write(mbox_content)
  49. output_dir = os.path.join(tmpdir, "output")
  50. os.makedirs(output_dir)
  51. # Split the mbox
  52. output_files = split_mbox(mbox_path, output_dir)
  53. # Verify output
  54. self.assertEqual(len(output_files), 2)
  55. self.assertEqual(output_files[0], os.path.join(output_dir, "0001"))
  56. self.assertEqual(output_files[1], os.path.join(output_dir, "0002"))
  57. # Check first message
  58. with open(output_files[0], "rb") as f:
  59. content = f.read()
  60. expected = b"""\
  61. From: Alice <alice@example.com>
  62. To: Bob <bob@example.com>
  63. Subject: First message
  64. This is the first message.
  65. """
  66. self.assertEqual(content, expected)
  67. # Check second message
  68. with open(output_files[1], "rb") as f:
  69. content = f.read()
  70. expected = b"""\
  71. From: Bob <bob@example.com>
  72. To: Alice <alice@example.com>
  73. Subject: Second message
  74. This is the second message.
  75. """
  76. self.assertEqual(content, expected)
  77. def test_split_mbox_with_precision(self) -> None:
  78. """Test splitting mbox with custom precision."""
  79. mbox_content = b"""\
  80. From test@example.com Mon Jan 01 00:00:00 2025
  81. From: Test <test@example.com>
  82. Subject: Test
  83. Test message.
  84. """
  85. with tempfile.TemporaryDirectory() as tmpdir:
  86. mbox_path = os.path.join(tmpdir, "test.mbox")
  87. with open(mbox_path, "wb") as f:
  88. f.write(mbox_content)
  89. output_dir = os.path.join(tmpdir, "output")
  90. os.makedirs(output_dir)
  91. # Split with precision=2
  92. output_files = split_mbox(mbox_path, output_dir, precision=2)
  93. self.assertEqual(len(output_files), 1)
  94. self.assertEqual(output_files[0], os.path.join(output_dir, "01"))
  95. def test_split_mbox_with_start_number(self) -> None:
  96. """Test splitting mbox with custom start number."""
  97. mbox_content = b"""\
  98. From test@example.com Mon Jan 01 00:00:00 2025
  99. From: Test <test@example.com>
  100. Subject: Test
  101. Test message.
  102. """
  103. with tempfile.TemporaryDirectory() as tmpdir:
  104. mbox_path = os.path.join(tmpdir, "test.mbox")
  105. with open(mbox_path, "wb") as f:
  106. f.write(mbox_content)
  107. output_dir = os.path.join(tmpdir, "output")
  108. os.makedirs(output_dir)
  109. # Split starting at message 10
  110. output_files = split_mbox(mbox_path, output_dir, start_number=10)
  111. self.assertEqual(len(output_files), 1)
  112. self.assertEqual(output_files[0], os.path.join(output_dir, "0010"))
  113. def test_split_mbox_keep_cr(self) -> None:
  114. """Test splitting mbox with keep_cr option."""
  115. # Note: Python's mailbox module normalizes line endings, so this test
  116. # verifies that keep_cr=False removes CR while keep_cr=True preserves
  117. # whatever the mailbox module outputs
  118. mbox_content = b"""\
  119. From test@example.com Mon Jan 01 00:00:00 2025
  120. From: Test <test@example.com>
  121. Subject: Test
  122. Test message.
  123. """
  124. with tempfile.TemporaryDirectory() as tmpdir:
  125. mbox_path = os.path.join(tmpdir, "test.mbox")
  126. with open(mbox_path, "wb") as f:
  127. f.write(mbox_content)
  128. output_dir = os.path.join(tmpdir, "output")
  129. os.makedirs(output_dir)
  130. # Split without keep_cr (default removes \r\n)
  131. output_files_no_cr = split_mbox(mbox_path, output_dir, keep_cr=False)
  132. with open(output_files_no_cr[0], "rb") as f:
  133. content_no_cr = f.read()
  134. # Verify the output
  135. self.assertEqual(len(output_files_no_cr), 1)
  136. expected = b"""\
  137. From: Test <test@example.com>
  138. Subject: Test
  139. Test message.
  140. """
  141. self.assertEqual(content_no_cr, expected)
  142. def test_split_mbox_from_file_object(self) -> None:
  143. """Test splitting mbox from a file-like object."""
  144. mbox_content = b"""\
  145. From test@example.com Mon Jan 01 00:00:00 2025
  146. From: Test <test@example.com>
  147. Subject: Test
  148. Test message.
  149. """
  150. with tempfile.TemporaryDirectory() as tmpdir:
  151. output_dir = os.path.join(tmpdir, "output")
  152. os.makedirs(output_dir)
  153. # Split from BytesIO
  154. output_files = split_mbox(BytesIO(mbox_content), output_dir)
  155. self.assertEqual(len(output_files), 1)
  156. self.assertTrue(os.path.exists(output_files[0]))
  157. def test_split_mbox_output_dir_not_exists(self) -> None:
  158. """Test that split_mbox raises ValueError if output_dir doesn't exist."""
  159. mbox_content = b"From test@example.com Mon Jan 01 00:00:00 2025\n"
  160. with tempfile.TemporaryDirectory() as tmpdir:
  161. mbox_path = os.path.join(tmpdir, "test.mbox")
  162. with open(mbox_path, "wb") as f:
  163. f.write(mbox_content)
  164. nonexistent_dir = os.path.join(tmpdir, "nonexistent")
  165. with self.assertRaises(ValueError) as cm:
  166. split_mbox(mbox_path, nonexistent_dir)
  167. self.assertIn("does not exist", str(cm.exception))
  168. def test_split_mboxrd(self) -> None:
  169. """Test splitting mboxrd format with >From escaping.
  170. In mboxrd format, lines starting with ">From " have one leading ">" removed.
  171. So ">From " becomes "From " and ">>From " becomes ">From ".
  172. """
  173. mbox_content = b"""\
  174. From test@example.com Mon Jan 01 00:00:00 2025
  175. From: Test <test@example.com>
  176. Subject: Test
  177. >From the beginning...
  178. >>From the middle...
  179. """
  180. with tempfile.TemporaryDirectory() as tmpdir:
  181. mbox_path = os.path.join(tmpdir, "test.mbox")
  182. with open(mbox_path, "wb") as f:
  183. f.write(mbox_content)
  184. output_dir = os.path.join(tmpdir, "output")
  185. os.makedirs(output_dir)
  186. # Split with mboxrd=True
  187. output_files = split_mbox(mbox_path, output_dir, mboxrd=True)
  188. self.assertEqual(len(output_files), 1)
  189. # Check that >From escaping was reversed (one ">" removed per line)
  190. with open(output_files[0], "rb") as f:
  191. content = f.read()
  192. expected = b"""\
  193. From: Test <test@example.com>
  194. Subject: Test
  195. From the beginning...
  196. >From the middle...
  197. """
  198. self.assertEqual(content, expected)
  199. class SplitMaildirTests(TestCase):
  200. """Tests for split_maildir function."""
  201. def test_split_maildir(self) -> None:
  202. """Test splitting a Maildir."""
  203. with tempfile.TemporaryDirectory() as tmpdir:
  204. # Create a Maildir
  205. maildir_path = os.path.join(tmpdir, "maildir")
  206. md = mailbox.Maildir(maildir_path)
  207. # Add two messages
  208. msg1 = mailbox.MaildirMessage()
  209. msg1.set_payload(b"First message")
  210. msg1["From"] = "alice@example.com"
  211. msg1["Subject"] = "First"
  212. md.add(msg1)
  213. msg2 = mailbox.MaildirMessage()
  214. msg2.set_payload(b"Second message")
  215. msg2["From"] = "bob@example.com"
  216. msg2["Subject"] = "Second"
  217. md.add(msg2)
  218. output_dir = os.path.join(tmpdir, "output")
  219. os.makedirs(output_dir)
  220. # Split the Maildir
  221. output_files = split_maildir(maildir_path, output_dir)
  222. # Verify output
  223. self.assertEqual(len(output_files), 2)
  224. self.assertTrue(all(os.path.exists(f) for f in output_files))
  225. # Check that files are numbered correctly
  226. self.assertEqual(output_files[0], os.path.join(output_dir, "0001"))
  227. self.assertEqual(output_files[1], os.path.join(output_dir, "0002"))
  228. def test_split_maildir_not_exists(self) -> None:
  229. """Test that split_maildir raises ValueError if Maildir doesn't exist."""
  230. with tempfile.TemporaryDirectory() as tmpdir:
  231. nonexistent_dir = os.path.join(tmpdir, "nonexistent")
  232. output_dir = os.path.join(tmpdir, "output")
  233. os.makedirs(output_dir)
  234. with self.assertRaises(ValueError) as cm:
  235. split_maildir(nonexistent_dir, output_dir)
  236. self.assertIn("does not exist", str(cm.exception))
  237. def test_split_maildir_with_precision(self) -> None:
  238. """Test splitting Maildir with custom precision."""
  239. with tempfile.TemporaryDirectory() as tmpdir:
  240. maildir_path = os.path.join(tmpdir, "maildir")
  241. md = mailbox.Maildir(maildir_path)
  242. msg = mailbox.MaildirMessage()
  243. msg.set_payload(b"Test message")
  244. msg["From"] = "test@example.com"
  245. md.add(msg)
  246. output_dir = os.path.join(tmpdir, "output")
  247. os.makedirs(output_dir)
  248. # Split with precision=2
  249. output_files = split_maildir(maildir_path, output_dir, precision=2)
  250. self.assertEqual(len(output_files), 1)
  251. self.assertEqual(output_files[0], os.path.join(output_dir, "01"))
  252. class MailinfoTests(TestCase):
  253. """Tests for mbox.mailinfo function."""
  254. def test_mailinfo_from_file_path(self) -> None:
  255. """Test mailinfo with file path."""
  256. from dulwich.mbox import mailinfo
  257. email_content = b"""From: Test User <test@example.com>
  258. Subject: [PATCH] Test patch
  259. Date: Mon, 1 Jan 2024 12:00:00 +0000
  260. This is the commit message.
  261. ---
  262. diff --git a/test.txt b/test.txt
  263. """
  264. with tempfile.TemporaryDirectory() as tmpdir:
  265. email_path = os.path.join(tmpdir, "email.txt")
  266. with open(email_path, "wb") as f:
  267. f.write(email_content)
  268. # Test with file path
  269. result = mailinfo(email_path)
  270. self.assertEqual("Test User", result.author_name)
  271. self.assertEqual("test@example.com", result.author_email)
  272. self.assertEqual("Test patch", result.subject)
  273. self.assertIn("This is the commit message.", result.message)
  274. self.assertIn("diff --git", result.patch)
  275. def test_mailinfo_from_file_object(self) -> None:
  276. """Test mailinfo with file-like object."""
  277. from dulwich.mbox import mailinfo
  278. email_content = b"""From: Test User <test@example.com>
  279. Subject: Test subject
  280. Body text
  281. """
  282. result = mailinfo(BytesIO(email_content))
  283. self.assertEqual("Test User", result.author_name)
  284. self.assertEqual("test@example.com", result.author_email)
  285. self.assertEqual("Test subject", result.subject)
  286. def test_mailinfo_with_options(self) -> None:
  287. """Test mailinfo with various options."""
  288. from dulwich.mbox import mailinfo
  289. email_content = b"""From: Test <test@example.com>
  290. Subject: [PATCH] Feature
  291. Message-ID: <test123@example.com>
  292. Ignore this
  293. -- >8 --
  294. Keep this
  295. """
  296. # Test with scissors and message_id
  297. result = mailinfo(
  298. BytesIO(email_content), scissors=True, message_id=True, keep_subject=False
  299. )
  300. self.assertEqual("Feature", result.subject)
  301. self.assertIn("Keep this", result.message)
  302. self.assertNotIn("Ignore this", result.message)
  303. self.assertIn("Message-ID:", result.message)