浏览代码

Add mailsplit command

Fixes #1840
Jelmer Vernooij 3 月之前
父节点
当前提交
8321f46335
共有 6 个文件被更改,包括 853 次插入0 次删除
  1. 4 0
      NEWS
  2. 87 0
      dulwich/cli.py
  3. 235 0
      dulwich/mbox.py
  4. 78 0
      dulwich/porcelain.py
  5. 1 0
      tests/__init__.py
  6. 448 0
      tests/test_mbox.py

+ 4 - 0
NEWS

@@ -33,6 +33,10 @@
 
  * Add ``dulwich merge-base`` command. (Jelmer Vernooij, #1831)
 
+ * Add support for ``git mailsplit`` command to split mbox files and Maildir
+   into individual message files. Supports mboxrd format, custom precision,
+   and all standard git mailsplit options. (Jelmer Vernooij, #1840)
+
  * Add support for ``git var`` command to display Git's logical variables
    (GIT_AUTHOR_IDENT, GIT_COMMITTER_IDENT, GIT_EDITOR, GIT_SEQUENCE_EDITOR,
    GIT_PAGER, GIT_DEFAULT_BRANCH). Available as ``porcelain.var()`` and

+ 87 - 0
dulwich/cli.py

@@ -4319,6 +4319,92 @@ class cmd_format_patch(Command):
                 logger.info(filename)
 
 
+class cmd_mailsplit(Command):
+    """Split mbox or Maildir into individual message files."""
+
+    def run(self, args: Sequence[str]) -> None:
+        """Execute the mailsplit command.
+
+        Args:
+            args: Command line arguments
+        """
+        parser = argparse.ArgumentParser()
+        parser.add_argument(
+            "mbox",
+            nargs="?",
+            help="Path to mbox file or Maildir. If not specified, reads from stdin.",
+        )
+        parser.add_argument(
+            "-o",
+            "--output-directory",
+            dest="output_dir",
+            required=True,
+            help="Directory in which to place the individual messages",
+        )
+        parser.add_argument(
+            "-b",
+            action="store_true",
+            dest="single_mail",
+            help="If any file doesn't begin with a From line, assume it is a single mail message",
+        )
+        parser.add_argument(
+            "-d",
+            dest="precision",
+            type=int,
+            default=4,
+            help="Number of digits for generated filenames (default: 4)",
+        )
+        parser.add_argument(
+            "-f",
+            dest="start_number",
+            type=int,
+            default=1,
+            help="Skip the first <nn> numbers (default: 1)",
+        )
+        parser.add_argument(
+            "--keep-cr",
+            action="store_true",
+            help="Do not remove \\r from lines ending with \\r\\n",
+        )
+        parser.add_argument(
+            "--mboxrd",
+            action="store_true",
+            help='Input is of the "mboxrd" format and "^>+From " line escaping is reversed',
+        )
+        parsed_args = parser.parse_args(args)
+
+        # Determine if input is a Maildir
+        is_maildir = False
+        if parsed_args.mbox:
+            input_path = Path(parsed_args.mbox)
+            if input_path.is_dir():
+                # Check if it's a Maildir (has cur, tmp, new subdirectories)
+                if (
+                    (input_path / "cur").exists()
+                    and (input_path / "tmp").exists()
+                    and (input_path / "new").exists()
+                ):
+                    is_maildir = True
+        else:
+            input_path = None
+
+        # Call porcelain function
+        output_files = porcelain.mailsplit(
+            input_path=input_path,
+            output_dir=parsed_args.output_dir,
+            start_number=parsed_args.start_number,
+            precision=parsed_args.precision,
+            keep_cr=parsed_args.keep_cr,
+            mboxrd=parsed_args.mboxrd,
+            is_maildir=is_maildir,
+        )
+
+        # Print information about the split
+        logger.info(
+            "Split %d messages into %s", len(output_files), parsed_args.output_dir
+        )
+
+
 class cmd_bundle(Command):
     """Create, unpack, and manipulate bundle files."""
 
@@ -4911,6 +4997,7 @@ commands = {
     "ls-files": cmd_ls_files,
     "ls-remote": cmd_ls_remote,
     "ls-tree": cmd_ls_tree,
+    "mailsplit": cmd_mailsplit,
     "merge": cmd_merge,
     "merge-base": cmd_merge_base,
     "merge-tree": cmd_merge_tree,

+ 235 - 0
dulwich/mbox.py

@@ -0,0 +1,235 @@
+# mbox.py -- For dealing with mbox files
+# Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as published by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Classes for dealing with mbox files and Maildir.
+
+This module provides functionality to split mbox files and Maildir
+into individual message files, similar to git mailsplit.
+"""
+
+import mailbox
+import os
+from collections.abc import Iterable, Iterator
+from pathlib import Path
+from typing import BinaryIO, Union
+
+
+def split_mbox(
+    input_file: Union[str, bytes, BinaryIO],
+    output_dir: Union[str, bytes, Path],
+    start_number: int = 1,
+    precision: int = 4,
+    keep_cr: bool = False,
+    mboxrd: bool = False,
+) -> list[str]:
+    r"""Split an mbox file into individual message files.
+
+    Args:
+        input_file: Path to mbox file or file-like object. If None, reads from stdin.
+        output_dir: Directory where individual messages will be written
+        start_number: Starting number for output files (default: 1)
+        precision: Number of digits for output filenames (default: 4)
+        keep_cr: If True, preserve \r in lines ending with \r\n (default: False)
+        mboxrd: If True, treat input as mboxrd format and reverse escaping (default: False)
+
+    Returns:
+        List of output file paths that were created
+
+    Raises:
+        ValueError: If output_dir doesn't exist or isn't a directory
+        OSError: If there are issues reading/writing files
+    """
+    # Convert output_dir to Path for easier manipulation
+    if isinstance(output_dir, bytes):
+        output_dir = output_dir.decode("utf-8")
+    output_path = Path(output_dir)
+
+    if not output_path.exists():
+        raise ValueError(f"Output directory does not exist: {output_dir}")
+    if not output_path.is_dir():
+        raise ValueError(f"Output path is not a directory: {output_dir}")
+
+    # Open the mbox file
+    mbox_iter: Iterable[mailbox.mboxMessage]
+    if isinstance(input_file, (str, bytes)):
+        if isinstance(input_file, bytes):
+            input_file = input_file.decode("utf-8")
+        mbox_iter = mailbox.mbox(input_file)
+    else:
+        # For file-like objects, we need to read and parse manually
+        mbox_iter = _parse_mbox_from_file(input_file)
+
+    output_files = []
+    msg_number = start_number
+
+    for message in mbox_iter:
+        # Format the output filename with the specified precision
+        output_filename = f"{msg_number:0{precision}d}"
+        output_file_path = output_path / output_filename
+
+        # Write the message to the output file
+        with open(output_file_path, "wb") as f:
+            message_bytes = bytes(message)
+
+            # Handle mboxrd format - reverse the escaping
+            if mboxrd:
+                message_bytes = _reverse_mboxrd_escaping(message_bytes)
+
+            # Handle CR/LF if needed
+            if not keep_cr:
+                message_bytes = message_bytes.replace(b"\r\n", b"\n")
+
+            # Strip trailing newlines (mailbox module adds separator newlines)
+            message_bytes = message_bytes.rstrip(b"\n")
+            if message_bytes:
+                message_bytes += b"\n"
+
+            f.write(message_bytes)
+
+        output_files.append(str(output_file_path))
+        msg_number += 1
+
+    return output_files
+
+
+def split_maildir(
+    maildir_path: Union[str, bytes, Path],
+    output_dir: Union[str, bytes, Path],
+    start_number: int = 1,
+    precision: int = 4,
+    keep_cr: bool = False,
+) -> list[str]:
+    r"""Split a Maildir into individual message files.
+
+    Maildir splitting relies upon filenames being sorted to output
+    patches in the correct order.
+
+    Args:
+        maildir_path: Path to the Maildir directory (should contain cur, tmp, new subdirectories)
+        output_dir: Directory where individual messages will be written
+        start_number: Starting number for output files (default: 1)
+        precision: Number of digits for output filenames (default: 4)
+        keep_cr: If True, preserve \r in lines ending with \r\n (default: False)
+
+    Returns:
+        List of output file paths that were created
+
+    Raises:
+        ValueError: If maildir_path or output_dir don't exist or aren't valid
+        OSError: If there are issues reading/writing files
+    """
+    # Convert paths to Path objects
+    if isinstance(maildir_path, bytes):
+        maildir_path = maildir_path.decode("utf-8")
+    if isinstance(output_dir, bytes):
+        output_dir = output_dir.decode("utf-8")
+
+    maildir = Path(maildir_path)
+    output_path = Path(output_dir)
+
+    if not maildir.exists():
+        raise ValueError(f"Maildir does not exist: {maildir_path}")
+    if not maildir.is_dir():
+        raise ValueError(f"Maildir path is not a directory: {maildir_path}")
+    if not output_path.exists():
+        raise ValueError(f"Output directory does not exist: {output_dir}")
+    if not output_path.is_dir():
+        raise ValueError(f"Output path is not a directory: {output_dir}")
+
+    # Open the Maildir
+    md = mailbox.Maildir(str(maildir), factory=None)
+
+    # Get all messages and sort by their keys to ensure consistent ordering
+    sorted_keys = sorted(md.keys())
+
+    output_files = []
+    msg_number = start_number
+
+    for key in sorted_keys:
+        message = md[key]
+
+        # Format the output filename with the specified precision
+        output_filename = f"{msg_number:0{precision}d}"
+        output_file_path = output_path / output_filename
+
+        # Write the message to the output file
+        with open(output_file_path, "wb") as f:
+            message_bytes = bytes(message)
+
+            # Handle CR/LF if needed
+            if not keep_cr:
+                message_bytes = message_bytes.replace(b"\r\n", b"\n")
+
+            f.write(message_bytes)
+
+        output_files.append(str(output_file_path))
+        msg_number += 1
+
+    return output_files
+
+
+def _parse_mbox_from_file(file_obj: BinaryIO) -> Iterator[mailbox.mboxMessage]:
+    """Parse mbox format from a file-like object.
+
+    Args:
+        file_obj: Binary file-like object containing mbox data
+
+    Yields:
+        Individual mboxMessage objects
+    """
+    import tempfile
+
+    # Create a temporary file to hold the mbox data
+    with tempfile.NamedTemporaryFile(mode="wb", delete=False) as tmp:
+        tmp.write(file_obj.read())
+        tmp_path = tmp.name
+
+    mbox = mailbox.mbox(tmp_path)
+    try:
+        yield from mbox
+    finally:
+        mbox.close()
+        os.unlink(tmp_path)
+
+
+def _reverse_mboxrd_escaping(message_bytes: bytes) -> bytes:
+    """Reverse mboxrd escaping (^>+From lines).
+
+    In mboxrd format, lines matching ^>+From have one leading ">" removed.
+
+    Args:
+        message_bytes: Message content with mboxrd escaping
+
+    Returns:
+        Message content with escaping reversed
+    """
+    lines = message_bytes.split(b"\n")
+    result_lines = []
+
+    for line in lines:
+        # Check if line matches the pattern ^>+From (one or more > followed by From)
+        if line.startswith(b">") and line.lstrip(b">").startswith(b"From "):
+            # Remove one leading ">"
+            result_lines.append(line[1:])
+        else:
+            result_lines.append(line)
+
+    return b"\n".join(result_lines)

+ 78 - 0
dulwich/porcelain.py

@@ -43,6 +43,7 @@ Currently implemented:
  * ls_files
  * ls_remote
  * ls_tree
+ * mailsplit
  * merge
  * merge_tree
  * mv/move
@@ -7667,3 +7668,80 @@ def independent_commits(
 
         # Filter to independent commits
         return independent(r, commit_ids)
+
+
+def mailsplit(
+    input_path: Optional[Union[str, os.PathLike[str], IO[bytes]]] = None,
+    output_dir: Union[str, os.PathLike[str]] = ".",
+    start_number: int = 1,
+    precision: int = 4,
+    keep_cr: bool = False,
+    mboxrd: bool = False,
+    is_maildir: bool = False,
+) -> list[str]:
+    r"""Split an mbox file or Maildir into individual message files.
+
+    This is similar to git mailsplit.
+
+    Args:
+        input_path: Path to mbox file, Maildir, or file-like object. If None, reads from stdin.
+        output_dir: Directory where individual messages will be written
+        start_number: Starting number for output files (default: 1)
+        precision: Number of digits for output filenames (default: 4)
+        keep_cr: If True, preserve \r in lines ending with \r\n (default: False)
+        mboxrd: If True, treat input as mboxrd format and reverse escaping (default: False)
+        is_maildir: If True, treat input_path as a Maildir (default: False)
+
+    Returns:
+        List of output file paths that were created
+
+    Raises:
+        ValueError: If output_dir doesn't exist or input is invalid
+        OSError: If there are issues reading/writing files
+    """
+    from .mbox import split_maildir, split_mbox
+
+    if is_maildir:
+        if input_path is None:
+            raise ValueError("input_path is required for Maildir splitting")
+        if not isinstance(input_path, (str, bytes, os.PathLike)):
+            raise ValueError("Maildir splitting requires a path, not a file object")
+        # Convert PathLike to str for split_maildir
+        maildir_path: Union[str, bytes] = (
+            os.fspath(input_path) if isinstance(input_path, os.PathLike) else input_path
+        )
+        out_dir: Union[str, bytes] = (
+            os.fspath(output_dir) if isinstance(output_dir, os.PathLike) else output_dir
+        )
+        return split_maildir(
+            maildir_path,
+            out_dir,
+            start_number=start_number,
+            precision=precision,
+            keep_cr=keep_cr,
+        )
+    else:
+        from typing import BinaryIO, cast
+
+        if input_path is None:
+            # Read from stdin
+            input_file: Union[str, bytes, BinaryIO] = sys.stdin.buffer
+        else:
+            # Convert PathLike to str if needed
+            if isinstance(input_path, os.PathLike):
+                input_file = os.fspath(input_path)
+            else:
+                # input_path is either str or IO[bytes] here
+                input_file = cast(Union[str, BinaryIO], input_path)
+
+        out_dir = (
+            os.fspath(output_dir) if isinstance(output_dir, os.PathLike) else output_dir
+        )
+        return split_mbox(
+            input_file,
+            out_dir,
+            start_number=start_number,
+            precision=precision,
+            keep_cr=keep_cr,
+            mboxrd=mboxrd,
+        )

+ 1 - 0
tests/__init__.py

@@ -157,6 +157,7 @@ def self_test_suite() -> unittest.TestSuite:
         "log_utils",
         "lru_cache",
         "mailmap",
+        "mbox",
         "merge",
         "merge_drivers",
         "missing_obj_finder",

+ 448 - 0
tests/test_mbox.py

@@ -0,0 +1,448 @@
+# test_mbox.py -- tests for mbox.py
+# Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as published by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Tests for mbox.py."""
+
+import mailbox
+import os
+import tempfile
+from io import BytesIO
+
+from dulwich import porcelain
+from dulwich.mbox import split_maildir, split_mbox
+
+from . import TestCase
+
+
+class SplitMboxTests(TestCase):
+    """Tests for split_mbox function."""
+
+    def test_split_simple_mbox(self) -> None:
+        """Test splitting a simple mbox with two messages."""
+        mbox_content = b"""\
+From alice@example.com Mon Jan 01 00:00:00 2025
+From: Alice <alice@example.com>
+To: Bob <bob@example.com>
+Subject: First message
+
+This is the first message.
+
+From bob@example.com Mon Jan 01 00:01:00 2025
+From: Bob <bob@example.com>
+To: Alice <alice@example.com>
+Subject: Second message
+
+This is the second message.
+"""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create temporary mbox file
+            mbox_path = os.path.join(tmpdir, "test.mbox")
+            with open(mbox_path, "wb") as f:
+                f.write(mbox_content)
+
+            output_dir = os.path.join(tmpdir, "output")
+            os.makedirs(output_dir)
+
+            # Split the mbox
+            output_files = split_mbox(mbox_path, output_dir)
+
+            # Verify output
+            self.assertEqual(len(output_files), 2)
+            self.assertEqual(output_files[0], os.path.join(output_dir, "0001"))
+            self.assertEqual(output_files[1], os.path.join(output_dir, "0002"))
+
+            # Check first message
+            with open(output_files[0], "rb") as f:
+                content = f.read()
+                expected = b"""\
+From: Alice <alice@example.com>
+To: Bob <bob@example.com>
+Subject: First message
+
+This is the first message.
+"""
+                self.assertEqual(content, expected)
+
+            # Check second message
+            with open(output_files[1], "rb") as f:
+                content = f.read()
+                expected = b"""\
+From: Bob <bob@example.com>
+To: Alice <alice@example.com>
+Subject: Second message
+
+This is the second message.
+"""
+                self.assertEqual(content, expected)
+
+    def test_split_mbox_with_precision(self) -> None:
+        """Test splitting mbox with custom precision."""
+        mbox_content = b"""\
+From test@example.com Mon Jan 01 00:00:00 2025
+From: Test <test@example.com>
+Subject: Test
+
+Test message.
+"""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            mbox_path = os.path.join(tmpdir, "test.mbox")
+            with open(mbox_path, "wb") as f:
+                f.write(mbox_content)
+
+            output_dir = os.path.join(tmpdir, "output")
+            os.makedirs(output_dir)
+
+            # Split with precision=2
+            output_files = split_mbox(mbox_path, output_dir, precision=2)
+
+            self.assertEqual(len(output_files), 1)
+            self.assertEqual(output_files[0], os.path.join(output_dir, "01"))
+
+    def test_split_mbox_with_start_number(self) -> None:
+        """Test splitting mbox with custom start number."""
+        mbox_content = b"""\
+From test@example.com Mon Jan 01 00:00:00 2025
+From: Test <test@example.com>
+Subject: Test
+
+Test message.
+"""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            mbox_path = os.path.join(tmpdir, "test.mbox")
+            with open(mbox_path, "wb") as f:
+                f.write(mbox_content)
+
+            output_dir = os.path.join(tmpdir, "output")
+            os.makedirs(output_dir)
+
+            # Split starting at message 10
+            output_files = split_mbox(mbox_path, output_dir, start_number=10)
+
+            self.assertEqual(len(output_files), 1)
+            self.assertEqual(output_files[0], os.path.join(output_dir, "0010"))
+
+    def test_split_mbox_keep_cr(self) -> None:
+        """Test splitting mbox with keep_cr option."""
+        # Note: Python's mailbox module normalizes line endings, so this test
+        # verifies that keep_cr=False removes CR while keep_cr=True preserves
+        # whatever the mailbox module outputs
+        mbox_content = b"""\
+From test@example.com Mon Jan 01 00:00:00 2025
+From: Test <test@example.com>
+Subject: Test
+
+Test message.
+"""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            mbox_path = os.path.join(tmpdir, "test.mbox")
+            with open(mbox_path, "wb") as f:
+                f.write(mbox_content)
+
+            output_dir = os.path.join(tmpdir, "output")
+            os.makedirs(output_dir)
+
+            # Split without keep_cr (default removes \r\n)
+            output_files_no_cr = split_mbox(mbox_path, output_dir, keep_cr=False)
+            with open(output_files_no_cr[0], "rb") as f:
+                content_no_cr = f.read()
+
+            # Verify the output
+            self.assertEqual(len(output_files_no_cr), 1)
+            expected = b"""\
+From: Test <test@example.com>
+Subject: Test
+
+Test message.
+"""
+            self.assertEqual(content_no_cr, expected)
+
+    def test_split_mbox_from_file_object(self) -> None:
+        """Test splitting mbox from a file-like object."""
+        mbox_content = b"""\
+From test@example.com Mon Jan 01 00:00:00 2025
+From: Test <test@example.com>
+Subject: Test
+
+Test message.
+"""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            output_dir = os.path.join(tmpdir, "output")
+            os.makedirs(output_dir)
+
+            # Split from BytesIO
+            output_files = split_mbox(BytesIO(mbox_content), output_dir)
+
+            self.assertEqual(len(output_files), 1)
+            self.assertTrue(os.path.exists(output_files[0]))
+
+    def test_split_mbox_output_dir_not_exists(self) -> None:
+        """Test that split_mbox raises ValueError if output_dir doesn't exist."""
+        mbox_content = b"From test@example.com Mon Jan 01 00:00:00 2025\n"
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            mbox_path = os.path.join(tmpdir, "test.mbox")
+            with open(mbox_path, "wb") as f:
+                f.write(mbox_content)
+
+            nonexistent_dir = os.path.join(tmpdir, "nonexistent")
+
+            with self.assertRaises(ValueError) as cm:
+                split_mbox(mbox_path, nonexistent_dir)
+
+            self.assertIn("does not exist", str(cm.exception))
+
+    def test_split_mboxrd(self) -> None:
+        """Test splitting mboxrd format with >From escaping.
+
+        In mboxrd format, lines starting with ">From " have one leading ">" removed.
+        So ">From " becomes "From " and ">>From " becomes ">From ".
+        """
+        mbox_content = b"""\
+From test@example.com Mon Jan 01 00:00:00 2025
+From: Test <test@example.com>
+Subject: Test
+
+>From the beginning...
+>>From the middle...
+"""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            mbox_path = os.path.join(tmpdir, "test.mbox")
+            with open(mbox_path, "wb") as f:
+                f.write(mbox_content)
+
+            output_dir = os.path.join(tmpdir, "output")
+            os.makedirs(output_dir)
+
+            # Split with mboxrd=True
+            output_files = split_mbox(mbox_path, output_dir, mboxrd=True)
+
+            self.assertEqual(len(output_files), 1)
+
+            # Check that >From escaping was reversed (one ">" removed per line)
+            with open(output_files[0], "rb") as f:
+                content = f.read()
+                expected = b"""\
+From: Test <test@example.com>
+Subject: Test
+
+From the beginning...
+>From the middle...
+"""
+                self.assertEqual(content, expected)
+
+
+class SplitMaildirTests(TestCase):
+    """Tests for split_maildir function."""
+
+    def test_split_maildir(self) -> None:
+        """Test splitting a Maildir."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create a Maildir
+            maildir_path = os.path.join(tmpdir, "maildir")
+            md = mailbox.Maildir(maildir_path)
+
+            # Add two messages
+            msg1 = mailbox.MaildirMessage()
+            msg1.set_payload(b"First message")
+            msg1["From"] = "alice@example.com"
+            msg1["Subject"] = "First"
+            md.add(msg1)
+
+            msg2 = mailbox.MaildirMessage()
+            msg2.set_payload(b"Second message")
+            msg2["From"] = "bob@example.com"
+            msg2["Subject"] = "Second"
+            md.add(msg2)
+
+            output_dir = os.path.join(tmpdir, "output")
+            os.makedirs(output_dir)
+
+            # Split the Maildir
+            output_files = split_maildir(maildir_path, output_dir)
+
+            # Verify output
+            self.assertEqual(len(output_files), 2)
+            self.assertTrue(all(os.path.exists(f) for f in output_files))
+
+            # Check that files are numbered correctly
+            self.assertEqual(output_files[0], os.path.join(output_dir, "0001"))
+            self.assertEqual(output_files[1], os.path.join(output_dir, "0002"))
+
+    def test_split_maildir_not_exists(self) -> None:
+        """Test that split_maildir raises ValueError if Maildir doesn't exist."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            nonexistent_dir = os.path.join(tmpdir, "nonexistent")
+            output_dir = os.path.join(tmpdir, "output")
+            os.makedirs(output_dir)
+
+            with self.assertRaises(ValueError) as cm:
+                split_maildir(nonexistent_dir, output_dir)
+
+            self.assertIn("does not exist", str(cm.exception))
+
+    def test_split_maildir_with_precision(self) -> None:
+        """Test splitting Maildir with custom precision."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            maildir_path = os.path.join(tmpdir, "maildir")
+            md = mailbox.Maildir(maildir_path)
+
+            msg = mailbox.MaildirMessage()
+            msg.set_payload(b"Test message")
+            msg["From"] = "test@example.com"
+            md.add(msg)
+
+            output_dir = os.path.join(tmpdir, "output")
+            os.makedirs(output_dir)
+
+            # Split with precision=2
+            output_files = split_maildir(maildir_path, output_dir, precision=2)
+
+            self.assertEqual(len(output_files), 1)
+            self.assertEqual(output_files[0], os.path.join(output_dir, "01"))
+
+
+class PorcelainMailsplitTests(TestCase):
+    """Tests for porcelain.mailsplit function."""
+
+    def test_mailsplit_mbox(self) -> None:
+        """Test porcelain mailsplit with mbox file."""
+        mbox_content = b"""\
+From alice@example.com Mon Jan 01 00:00:00 2025
+From: Alice <alice@example.com>
+Subject: Test
+
+Test message.
+"""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            mbox_path = os.path.join(tmpdir, "test.mbox")
+            with open(mbox_path, "wb") as f:
+                f.write(mbox_content)
+
+            output_dir = os.path.join(tmpdir, "output")
+            os.makedirs(output_dir)
+
+            # Split using porcelain function
+            output_files = porcelain.mailsplit(
+                input_path=mbox_path, output_dir=output_dir
+            )
+
+            self.assertEqual(len(output_files), 1)
+            self.assertEqual(output_files[0], os.path.join(output_dir, "0001"))
+
+    def test_mailsplit_maildir(self) -> None:
+        """Test porcelain mailsplit with Maildir."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create a Maildir
+            maildir_path = os.path.join(tmpdir, "maildir")
+            md = mailbox.Maildir(maildir_path)
+
+            msg = mailbox.MaildirMessage()
+            msg.set_payload(b"Test message")
+            msg["From"] = "test@example.com"
+            md.add(msg)
+
+            output_dir = os.path.join(tmpdir, "output")
+            os.makedirs(output_dir)
+
+            # Split using porcelain function with is_maildir=True
+            output_files = porcelain.mailsplit(
+                input_path=maildir_path, output_dir=output_dir, is_maildir=True
+            )
+
+            self.assertEqual(len(output_files), 1)
+            self.assertTrue(os.path.exists(output_files[0]))
+
+    def test_mailsplit_with_options(self) -> None:
+        """Test porcelain mailsplit with various options."""
+        mbox_content = b"""\
+From test@example.com Mon Jan 01 00:00:00 2025
+From: Test <test@example.com>
+Subject: Test
+
+Test message.
+"""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            mbox_path = os.path.join(tmpdir, "test.mbox")
+            with open(mbox_path, "wb") as f:
+                f.write(mbox_content)
+
+            output_dir = os.path.join(tmpdir, "output")
+            os.makedirs(output_dir)
+
+            # Split with custom options
+            output_files = porcelain.mailsplit(
+                input_path=mbox_path,
+                output_dir=output_dir,
+                start_number=5,
+                precision=3,
+                keep_cr=True,
+            )
+
+            self.assertEqual(len(output_files), 1)
+            self.assertEqual(output_files[0], os.path.join(output_dir, "005"))
+
+    def test_mailsplit_mboxrd(self) -> None:
+        """Test porcelain mailsplit with mboxrd format."""
+        mbox_content = b"""\
+From test@example.com Mon Jan 01 00:00:00 2025
+From: Test <test@example.com>
+Subject: Test
+
+>From quoted text
+"""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            mbox_path = os.path.join(tmpdir, "test.mbox")
+            with open(mbox_path, "wb") as f:
+                f.write(mbox_content)
+
+            output_dir = os.path.join(tmpdir, "output")
+            os.makedirs(output_dir)
+
+            # Split with mboxrd=True
+            output_files = porcelain.mailsplit(
+                input_path=mbox_path, output_dir=output_dir, mboxrd=True
+            )
+
+            self.assertEqual(len(output_files), 1)
+
+            # Verify >From escaping was reversed
+            with open(output_files[0], "rb") as f:
+                content = f.read()
+                expected = b"""\
+From: Test <test@example.com>
+Subject: Test
+
+From quoted text
+"""
+                self.assertEqual(content, expected)
+
+    def test_mailsplit_maildir_requires_path(self) -> None:
+        """Test that mailsplit raises ValueError when is_maildir=True but no input_path."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            output_dir = os.path.join(tmpdir, "output")
+            os.makedirs(output_dir)
+
+            with self.assertRaises(ValueError) as cm:
+                porcelain.mailsplit(
+                    input_path=None, output_dir=output_dir, is_maildir=True
+                )
+
+            self.assertIn("required", str(cm.exception).lower())