Selaa lähdekoodia

Add mailinfo command

Implement mailinfo command to extract patch information from
email messages.

Fixes #1839
Jelmer Vernooij 2 kuukautta sitten
vanhempi
commit
19ea2e9e95
10 muutettua tiedostoa jossa 1019 lisäystä ja 6 poistoa
  1. 1 1
      .codespellrc
  2. 6 0
      NEWS
  3. 75 0
      dulwich/cli.py
  4. 60 2
      dulwich/mbox.py
  5. 247 3
      dulwich/patch.py
  6. 82 0
      dulwich/porcelain.py
  7. 165 0
      tests/test_cli.py
  8. 67 0
      tests/test_mbox.py
  9. 213 0
      tests/test_patch.py
  10. 103 0
      tests/test_porcelain.py

+ 1 - 1
.codespellrc

@@ -1,3 +1,3 @@
 [codespell]
 skip = .git,.mypy_cache,build,testdata
-ignore-words-list = fpr,claus,feld,nd,bu,ue,te,fo,afile,manuel,checkin,assertIn,rin
+ignore-words-list = fpr,claus,feld,nd,bu,ue,te,fo,afile,manuel,checkin,assertIn,rin,caf

+ 6 - 0
NEWS

@@ -2,6 +2,12 @@
 
  * Drop support for Python 3.9. (Jelmer Vernooij)
 
+ * Add support for ``git mailinfo`` command to extract patch information from
+   email messages. Implements ``dulwich mailinfo`` CLI command,
+   ``porcelain.mailinfo()``, and ``patch.mailinfo()`` with support for subject
+   munging, -k/-b flags, --scissors, --encoding, and --message-id options.
+   (Jelmer Vernooij, #1839)
+
 0.24.8	2025-10-29
 
  * Add Rust implementation of pack delta creation (create_delta). The

+ 75 - 0
dulwich/cli.py

@@ -5493,6 +5493,80 @@ class cmd_mailsplit(Command):
         )
 
 
+class cmd_mailinfo(Command):
+    """Extract patch information from an email message."""
+
+    def run(self, args: Sequence[str]) -> None:
+        """Execute the mailinfo command.
+
+        Args:
+            args: Command line arguments
+        """
+        parser = argparse.ArgumentParser()
+        parser.add_argument(
+            "msg",
+            help="File to write commit message",
+        )
+        parser.add_argument(
+            "patch",
+            help="File to write patch content",
+        )
+        parser.add_argument(
+            "mail",
+            nargs="?",
+            help="Path to email file. If not specified, reads from stdin.",
+        )
+        parser.add_argument(
+            "-k",
+            action="store_true",
+            dest="keep_subject",
+            help="Pass -k flag to git mailinfo (keeps [PATCH] and other subject tags)",
+        )
+        parser.add_argument(
+            "-b",
+            action="store_true",
+            dest="keep_non_patch",
+            help="Pass -b flag to git mailinfo (only strip [PATCH] tags)",
+        )
+        parser.add_argument(
+            "--encoding",
+            dest="encoding",
+            help="Character encoding to use (default: detect from message)",
+        )
+        parser.add_argument(
+            "--scissors",
+            action="store_true",
+            help="Remove everything before scissors line",
+        )
+        parser.add_argument(
+            "-m",
+            "--message-id",
+            action="store_true",
+            dest="message_id",
+            help="Copy Message-ID to the end of the commit message",
+        )
+        parsed_args = parser.parse_args(args)
+
+        # Call porcelain function
+        result = porcelain.mailinfo(
+            input_path=parsed_args.mail,
+            msg_file=parsed_args.msg,
+            patch_file=parsed_args.patch,
+            keep_subject=parsed_args.keep_subject,
+            keep_non_patch=parsed_args.keep_non_patch,
+            encoding=parsed_args.encoding,
+            scissors=parsed_args.scissors,
+            message_id=parsed_args.message_id,
+        )
+
+        # Print author info to stdout (as git mailinfo does)
+        print(f"Author: {result.author_name}")
+        print(f"Email: {result.author_email}")
+        print(f"Subject: {result.subject}")
+        if result.author_date:
+            print(f"Date: {result.author_date}")
+
+
 class cmd_bundle(Command):
     """Create, unpack, and manipulate bundle files."""
 
@@ -6090,6 +6164,7 @@ commands = {
     "ls-remote": cmd_ls_remote,
     "ls-tree": cmd_ls_tree,
     "maintenance": cmd_maintenance,
+    "mailinfo": cmd_mailinfo,
     "mailsplit": cmd_mailsplit,
     "merge": cmd_merge,
     "merge-base": cmd_merge_base,

+ 60 - 2
dulwich/mbox.py

@@ -22,14 +22,18 @@
 """Classes for dealing with mbox files and Maildir.
 
 This module provides functionality to split mbox files and Maildir
-into individual message files, similar to git mailsplit.
+into individual message files, similar to git mailsplit, and to extract
+patch information from email messages, similar to git mailinfo.
 """
 
 import mailbox
 import os
 from collections.abc import Iterable, Iterator
 from pathlib import Path
-from typing import BinaryIO
+from typing import TYPE_CHECKING, BinaryIO, TextIO
+
+if TYPE_CHECKING:
+    from .patch import MailinfoResult
 
 
 def split_mbox(
@@ -233,3 +237,57 @@ def _reverse_mboxrd_escaping(message_bytes: bytes) -> bytes:
             result_lines.append(line)
 
     return b"\n".join(result_lines)
+
+
+def mailinfo(
+    input_file: str | bytes | BinaryIO | TextIO,
+    keep_subject: bool = False,
+    keep_non_patch: bool = False,
+    encoding: str | None = None,
+    scissors: bool = False,
+    message_id: bool = False,
+) -> "MailinfoResult":
+    """Extract patch information from an email message.
+
+    High-level wrapper around patch.mailinfo() that handles file I/O.
+
+    Args:
+        input_file: Path to email file or file-like object (binary or text)
+        keep_subject: If True, keep subject intact without munging (-k)
+        keep_non_patch: If True, only strip [PATCH] from brackets (-b)
+        encoding: Character encoding to use (default: detect from message)
+        scissors: If True, remove everything before scissors line
+        message_id: If True, include Message-ID in commit message (-m)
+
+    Returns:
+        MailinfoResult with parsed information (from patch.mailinfo)
+
+    Raises:
+        ValueError: If message is malformed or missing required fields
+        OSError: If there are issues reading the file
+    """
+    from .patch import mailinfo as patch_mailinfo
+
+    # Handle file path input
+    if isinstance(input_file, (str, bytes)):
+        if isinstance(input_file, bytes):
+            input_file = input_file.decode("utf-8")
+        with open(input_file, "rb") as f:
+            return patch_mailinfo(
+                f,
+                keep_subject=keep_subject,
+                keep_non_patch=keep_non_patch,
+                encoding=encoding,
+                scissors=scissors,
+                message_id=message_id,
+            )
+
+    # Handle file-like objects
+    return patch_mailinfo(
+        input_file,
+        keep_subject=keep_subject,
+        keep_non_patch=keep_non_patch,
+        encoding=encoding,
+        scissors=scissors,
+        message_id=message_id,
+    )

+ 247 - 3
dulwich/patch.py

@@ -25,9 +25,13 @@ These patches are basically unified diffs with some extra metadata tacked
 on.
 """
 
+import email.message
 import email.parser
+import email.utils
+import re
 import time
 from collections.abc import Generator, Sequence
+from dataclasses import dataclass
 from difflib import SequenceMatcher
 from typing import (
     IO,
@@ -38,8 +42,6 @@ from typing import (
 )
 
 if TYPE_CHECKING:
-    import email.message
-
     from .object_store import BaseObjectStore
 
 from .objects import S_ISGITLINK, Blob, Commit
@@ -592,7 +594,7 @@ def git_am_patch_split(
 
 
 def parse_patch_message(
-    msg: "email.message.Message", encoding: str | None = None
+    msg: email.message.Message, encoding: str | None = None
 ) -> tuple["Commit", bytes, bytes | None]:
     """Extract a Commit object and patch from an e-mail message.
 
@@ -759,3 +761,245 @@ def commit_patch_id(store: "BaseObjectStore", commit_id: bytes) -> bytes:
     write_tree_diff(diff_output, store, parent_tree, commit.tree)
 
     return patch_id(diff_output.getvalue())
+
+
+@dataclass
+class MailinfoResult:
+    """Result of mailinfo parsing.
+
+    Attributes:
+        author_name: Author's name
+        author_email: Author's email address
+        author_date: Author's date (if present in the email)
+        subject: Processed subject line
+        message: Commit message body
+        patch: Patch content
+        message_id: Message-ID header (if -m/--message-id was used)
+    """
+
+    author_name: str
+    author_email: str
+    author_date: str | None
+    subject: str
+    message: str
+    patch: str
+    message_id: str | None = None
+
+
+def _munge_subject(subject: str, keep_subject: bool, keep_non_patch: bool) -> str:
+    """Munge email subject line for commit message.
+
+    Args:
+        subject: Original subject line
+        keep_subject: If True, keep subject intact (-k option)
+        keep_non_patch: If True, only strip [PATCH] (-b option)
+
+    Returns:
+        Processed subject line
+    """
+    if keep_subject:
+        return subject
+
+    result = subject
+
+    # First remove Re: prefixes (they can appear before brackets)
+    while True:
+        new_result = re.sub(r"^\s*(?:re|RE|Re):\s*", "", result, flags=re.IGNORECASE)
+        if new_result == result:
+            break
+        result = new_result
+
+    # Remove bracketed strings
+    if keep_non_patch:
+        # Only remove brackets containing "PATCH"
+        # Match each bracket individually anywhere in the string
+        while True:
+            # Remove PATCH bracket, but be careful with whitespace
+            new_result = re.sub(
+                r"\[[^\]]*?PATCH[^\]]*?\](\s+)?", r"\1", result, flags=re.IGNORECASE
+            )
+            if new_result == result:
+                break
+            result = new_result
+    else:
+        # Remove all bracketed strings
+        while True:
+            new_result = re.sub(r"^\s*\[.*?\]\s*", "", result)
+            if new_result == result:
+                break
+            result = new_result
+
+    # Remove leading/trailing whitespace
+    result = result.strip()
+
+    # Normalize multiple whitespace to single space
+    result = re.sub(r"\s+", " ", result)
+
+    return result
+
+
+def _find_scissors_line(lines: list[bytes]) -> int | None:
+    """Find the scissors line in message body.
+
+    Args:
+        lines: List of lines in the message body
+
+    Returns:
+        Index of scissors line, or None if not found
+    """
+    scissors_pattern = re.compile(
+        rb"^(?:>?\s*-+\s*)?(?:8<|>8)?\s*-+\s*$|^(?:>?\s*-+\s*)(?:cut here|scissors)(?:\s*-+)?$",
+        re.IGNORECASE,
+    )
+
+    for i, line in enumerate(lines):
+        if scissors_pattern.match(line.strip()):
+            return i
+
+    return None
+
+
+def mailinfo(
+    msg: email.message.Message | BinaryIO | TextIO,
+    keep_subject: bool = False,
+    keep_non_patch: bool = False,
+    encoding: str | None = None,
+    scissors: bool = False,
+    message_id: bool = False,
+) -> MailinfoResult:
+    """Extract patch information from an email message.
+
+    This function parses an email message and extracts commit metadata
+    (author, email, subject) and separates the commit message from the
+    patch content, similar to git mailinfo.
+
+    Args:
+        msg: Email message (email.message.Message object) or file handle to read from
+        keep_subject: If True, keep subject intact without munging (-k)
+        keep_non_patch: If True, only strip [PATCH] from brackets (-b)
+        encoding: Character encoding to use (default: detect from message)
+        scissors: If True, remove everything before scissors line
+        message_id: If True, include Message-ID in commit message (-m)
+
+    Returns:
+        MailinfoResult with parsed information
+
+    Raises:
+        ValueError: If message is malformed or missing required fields
+    """
+    # Parse message if given a file handle
+    parsed_msg: email.message.Message
+    if not isinstance(msg, email.message.Message):
+        if hasattr(msg, "read"):
+            content = msg.read()
+            if isinstance(content, bytes):
+                bparser = email.parser.BytesParser()
+                parsed_msg = bparser.parsebytes(content)
+            else:
+                sparser = email.parser.Parser()
+                parsed_msg = sparser.parsestr(content)
+        else:
+            raise ValueError("msg must be an email.message.Message or file-like object")
+    else:
+        parsed_msg = msg
+
+    # Detect encoding from message if not specified
+    if encoding is None:
+        encoding = parsed_msg.get_content_charset() or "utf-8"
+
+    # Extract author information
+    from_header = parsed_msg.get("From", "")
+    if not from_header:
+        raise ValueError("Email message missing 'From' header")
+
+    # Parse "Name <email>" format
+    author_name, author_email = email.utils.parseaddr(from_header)
+    if not author_email:
+        raise ValueError(
+            f"Could not parse email address from 'From' header: {from_header}"
+        )
+
+    # Extract date
+    date_header = parsed_msg.get("Date")
+    author_date = date_header if date_header else None
+
+    # Extract and process subject
+    subject = parsed_msg.get("Subject", "")
+    if not subject:
+        subject = "(no subject)"
+
+    # Convert Header object to string if needed
+    subject = str(subject)
+
+    # Remove newlines from subject
+    subject = subject.replace("\n", " ").replace("\r", " ")
+    subject = _munge_subject(subject, keep_subject, keep_non_patch)
+
+    # Extract Message-ID if requested
+    msg_id = None
+    if message_id:
+        msg_id = parsed_msg.get("Message-ID")
+
+    # Get message body
+    body = parsed_msg.get_payload(decode=True)
+    if body is None:
+        body = b""
+    elif isinstance(body, str):
+        body = body.encode(encoding)
+    elif not isinstance(body, bytes):
+        # Handle multipart or other types
+        body = str(body).encode(encoding)
+
+    # Split into lines
+    lines = body.splitlines(keepends=True)
+
+    # Handle scissors
+    scissors_idx = None
+    if scissors:
+        scissors_idx = _find_scissors_line(lines)
+        if scissors_idx is not None:
+            # Remove everything up to and including scissors line
+            lines = lines[scissors_idx + 1 :]
+
+    # Separate commit message from patch
+    # Look for the "---" separator that indicates start of diffstat/patch
+    message_lines: list[bytes] = []
+    patch_lines: list[bytes] = []
+    in_patch = False
+
+    for line in lines:
+        if not in_patch and line == b"---\n":
+            in_patch = True
+            patch_lines.append(line)
+        elif in_patch:
+            # Stop at signature marker "-- "
+            if line == b"-- \n":
+                break
+            patch_lines.append(line)
+        else:
+            message_lines.append(line)
+
+    # Build commit message
+    commit_message = b"".join(message_lines).decode(encoding, errors="replace")
+
+    # Clean up commit message
+    commit_message = commit_message.strip()
+
+    # Append Message-ID if requested
+    if message_id and msg_id:
+        if commit_message:
+            commit_message += "\n\n"
+        commit_message += f"Message-ID: {msg_id}"
+
+    # Build patch content
+    patch_content = b"".join(patch_lines).decode(encoding, errors="replace")
+
+    return MailinfoResult(
+        author_name=author_name,
+        author_email=author_email,
+        author_date=author_date,
+        subject=subject,
+        message=commit_message,
+        patch=patch_content,
+        message_id=msg_id,
+    )

+ 82 - 0
dulwich/porcelain.py

@@ -184,6 +184,7 @@ from .objectspec import (
 )
 from .pack import UnpackedObject, write_pack_from_container, write_pack_index
 from .patch import (
+    MailinfoResult,
     get_summary,
     write_commit_patch,
     write_object_diff,
@@ -8743,3 +8744,84 @@ def mailsplit(
             keep_cr=keep_cr,
             mboxrd=mboxrd,
         )
+
+
+def mailinfo(
+    input_path: str | os.PathLike[str] | IO[bytes] | IO[str] | None = None,
+    msg_file: str | os.PathLike[str] | None = None,
+    patch_file: str | os.PathLike[str] | None = None,
+    keep_subject: bool = False,
+    keep_non_patch: bool = False,
+    encoding: str | None = None,
+    scissors: bool = False,
+    message_id: bool = False,
+) -> MailinfoResult:
+    """Extract patch information from an email message.
+
+    This is similar to git mailinfo.
+
+    Args:
+        input_path: Path to email file or file-like object. If None, reads from stdin.
+        msg_file: Path to write commit message. If None, message not written to file.
+        patch_file: Path to write patch content. If None, patch not written to file.
+        keep_subject: If True, keep subject intact without munging (-k)
+        keep_non_patch: If True, only strip [PATCH] from brackets (-b)
+        encoding: Character encoding to use (default: detect from message)
+        scissors: If True, remove everything before scissors line
+        message_id: If True, include Message-ID in commit message (-m)
+
+    Returns:
+        MailinfoResult with parsed information
+
+    Raises:
+        ValueError: If message is malformed or missing required fields
+        OSError: If there are issues reading/writing files
+
+    Example:
+        >>> result = mailinfo("patch.eml", "msg", "patch")
+        >>> print(f"Author: {result.author_name} <{result.author_email}>")
+        >>> print(f"Subject: {result.subject}")
+    """
+    from typing import BinaryIO, TextIO, cast
+
+    from .mbox import mailinfo as mbox_mailinfo
+
+    if input_path is None:
+        # Read from stdin
+        input_file: str | bytes | BinaryIO | TextIO = sys.stdin.buffer
+    else:
+        # Convert PathLike to str if needed
+        if isinstance(input_path, os.PathLike):
+            input_file = os.fspath(input_path)
+        else:
+            # input_path is either str or IO[bytes] or IO[str] here
+            input_file = cast(str | BinaryIO | TextIO, input_path)
+
+    result = mbox_mailinfo(
+        input_file,
+        keep_subject=keep_subject,
+        keep_non_patch=keep_non_patch,
+        encoding=encoding,
+        scissors=scissors,
+        message_id=message_id,
+    )
+
+    # Write message to file if requested
+    if msg_file is not None:
+        msg_path = (
+            os.fspath(msg_file) if isinstance(msg_file, os.PathLike) else msg_file
+        )
+        with open(msg_path, "w", encoding=encoding or "utf-8") as f:
+            f.write(result.message)
+            if not result.message.endswith("\n"):
+                f.write("\n")
+
+    # Write patch to file if requested
+    if patch_file is not None:
+        patch_path = (
+            os.fspath(patch_file) if isinstance(patch_file, os.PathLike) else patch_file
+        )
+        with open(patch_path, "w", encoding=encoding or "utf-8") as f:
+            f.write(result.patch)
+
+    return result

+ 165 - 0
tests/test_cli.py

@@ -4027,5 +4027,170 @@ class StripspaceCommandTest(DulwichCliTestCase):
         self.assertEqual(stdout, "hello\n\nworld\n")
 
 
+class MailinfoCommandTests(DulwichCliTestCase):
+    """Tests for the mailinfo command."""
+
+    def test_mailinfo_basic(self):
+        """Test basic mailinfo command."""
+        email_content = b"""From: Test User <test@example.com>
+Subject: [PATCH] Add feature
+Date: Mon, 1 Jan 2024 12:00:00 +0000
+
+This is the commit message.
+
+---
+diff --git a/file.txt b/file.txt
+"""
+        email_file = os.path.join(self.test_dir, "email.txt")
+        with open(email_file, "wb") as f:
+            f.write(email_content)
+
+        msg_file = os.path.join(self.test_dir, "msg")
+        patch_file = os.path.join(self.test_dir, "patch")
+
+        result, stdout, _stderr = self._run_cli(
+            "mailinfo", msg_file, patch_file, email_file
+        )
+        self.assertIsNone(result)
+
+        # Check stdout contains author info
+        self.assertIn("Author: Test User", stdout)
+        self.assertIn("Email: test@example.com", stdout)
+        self.assertIn("Subject: Add feature", stdout)
+
+        # Check files were written
+        self.assertTrue(os.path.exists(msg_file))
+        self.assertTrue(os.path.exists(patch_file))
+
+        # Check file contents
+        with open(msg_file) as f:
+            msg_content = f.read()
+            self.assertIn("This is the commit message.", msg_content)
+
+        with open(patch_file) as f:
+            patch_content = f.read()
+            self.assertIn("diff --git", patch_content)
+
+    def test_mailinfo_keep_subject(self):
+        """Test mailinfo with -k flag."""
+        email_content = b"""From: Test <test@example.com>
+Subject: [PATCH 1/2] Feature
+
+Body
+"""
+        email_file = os.path.join(self.test_dir, "email.txt")
+        with open(email_file, "wb") as f:
+            f.write(email_content)
+
+        msg_file = os.path.join(self.test_dir, "msg")
+        patch_file = os.path.join(self.test_dir, "patch")
+
+        result, stdout, _stderr = self._run_cli(
+            "mailinfo", "-k", msg_file, patch_file, email_file
+        )
+        self.assertIsNone(result)
+        self.assertIn("Subject: [PATCH 1/2] Feature", stdout)
+
+    def test_mailinfo_keep_non_patch(self):
+        """Test mailinfo with -b flag."""
+        email_content = b"""From: Test <test@example.com>
+Subject: [RFC][PATCH] Feature
+
+Body
+"""
+        email_file = os.path.join(self.test_dir, "email.txt")
+        with open(email_file, "wb") as f:
+            f.write(email_content)
+
+        msg_file = os.path.join(self.test_dir, "msg")
+        patch_file = os.path.join(self.test_dir, "patch")
+
+        result, stdout, _stderr = self._run_cli(
+            "mailinfo", "-b", msg_file, patch_file, email_file
+        )
+        self.assertIsNone(result)
+        self.assertIn("Subject: [RFC] Feature", stdout)
+
+    def test_mailinfo_scissors(self):
+        """Test mailinfo with --scissors flag."""
+        email_content = b"""From: Test <test@example.com>
+Subject: Test
+
+Ignore this part
+
+-- >8 --
+
+Keep this part
+"""
+        email_file = os.path.join(self.test_dir, "email.txt")
+        with open(email_file, "wb") as f:
+            f.write(email_content)
+
+        msg_file = os.path.join(self.test_dir, "msg")
+        patch_file = os.path.join(self.test_dir, "patch")
+
+        result, _stdout, _stderr = self._run_cli(
+            "mailinfo", "--scissors", msg_file, patch_file, email_file
+        )
+        self.assertIsNone(result)
+
+        # Check message file
+        with open(msg_file) as f:
+            msg_content = f.read()
+            self.assertIn("Keep this part", msg_content)
+            self.assertNotIn("Ignore this part", msg_content)
+
+    def test_mailinfo_message_id(self):
+        """Test mailinfo with -m flag."""
+        email_content = b"""From: Test <test@example.com>
+Subject: Test
+Message-ID: <test123@example.com>
+
+Body
+"""
+        email_file = os.path.join(self.test_dir, "email.txt")
+        with open(email_file, "wb") as f:
+            f.write(email_content)
+
+        msg_file = os.path.join(self.test_dir, "msg")
+        patch_file = os.path.join(self.test_dir, "patch")
+
+        result, _stdout, _stderr = self._run_cli(
+            "mailinfo", "-m", msg_file, patch_file, email_file
+        )
+        self.assertIsNone(result)
+
+        # Check message file contains Message-ID
+        with open(msg_file) as f:
+            msg_content = f.read()
+            self.assertIn("Message-ID:", msg_content)
+
+    def test_mailinfo_encoding(self):
+        """Test mailinfo with --encoding flag."""
+        email_content = (
+            b"From: Test <test@example.com>\n"
+            b"Subject: Test\n"
+            b"Content-Type: text/plain; charset=utf-8\n"
+            b"\n"
+            b"Body with UTF-8: " + "naïve".encode() + b"\n"
+        )
+        email_file = os.path.join(self.test_dir, "email.txt")
+        with open(email_file, "wb") as f:
+            f.write(email_content)
+
+        msg_file = os.path.join(self.test_dir, "msg")
+        patch_file = os.path.join(self.test_dir, "patch")
+
+        result, _stdout, _stderr = self._run_cli(
+            "mailinfo", "--encoding", "utf-8", msg_file, patch_file, email_file
+        )
+        self.assertIsNone(result)
+
+        # Just verify the command runs successfully
+        with open(msg_file) as f:
+            msg_content = f.read()
+            self.assertIn("Body", msg_content)
+
+
 if __name__ == "__main__":
     unittest.main()

+ 67 - 0
tests/test_mbox.py

@@ -446,3 +446,70 @@ From quoted text
                 )
 
             self.assertIn("required", str(cm.exception).lower())
+
+
+class MailinfoTests(TestCase):
+    """Tests for mbox.mailinfo function."""
+
+    def test_mailinfo_from_file_path(self) -> None:
+        """Test mailinfo with file path."""
+        from dulwich.mbox import mailinfo
+
+        email_content = b"""From: Test User <test@example.com>
+Subject: [PATCH] Test patch
+Date: Mon, 1 Jan 2024 12:00:00 +0000
+
+This is the commit message.
+
+---
+diff --git a/test.txt b/test.txt
+"""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            email_path = os.path.join(tmpdir, "email.txt")
+            with open(email_path, "wb") as f:
+                f.write(email_content)
+
+            # Test with file path
+            result = mailinfo(email_path)
+            self.assertEqual("Test User", result.author_name)
+            self.assertEqual("test@example.com", result.author_email)
+            self.assertEqual("Test patch", result.subject)
+            self.assertIn("This is the commit message.", result.message)
+            self.assertIn("diff --git", result.patch)
+
+    def test_mailinfo_from_file_object(self) -> None:
+        """Test mailinfo with file-like object."""
+        from dulwich.mbox import mailinfo
+
+        email_content = b"""From: Test User <test@example.com>
+Subject: Test subject
+
+Body text
+"""
+        result = mailinfo(BytesIO(email_content))
+        self.assertEqual("Test User", result.author_name)
+        self.assertEqual("test@example.com", result.author_email)
+        self.assertEqual("Test subject", result.subject)
+
+    def test_mailinfo_with_options(self) -> None:
+        """Test mailinfo with various options."""
+        from dulwich.mbox import mailinfo
+
+        email_content = b"""From: Test <test@example.com>
+Subject: [PATCH] Feature
+Message-ID: <test123@example.com>
+
+Ignore this
+
+-- >8 --
+
+Keep this
+"""
+        # Test with scissors and message_id
+        result = mailinfo(
+            BytesIO(email_content), scissors=True, message_id=True, keep_subject=False
+        )
+        self.assertEqual("Feature", result.subject)
+        self.assertIn("Keep this", result.message)
+        self.assertNotIn("Ignore this", result.message)
+        self.assertIn("Message-ID:", result.message)

+ 213 - 0
tests/test_patch.py

@@ -902,3 +902,216 @@ index 3b0f961..a116b51 644
         pid = commit_patch_id(store, commit.id)
         self.assertEqual(40, len(pid))
         self.assertTrue(all(c in b"0123456789abcdef" for c in pid))
+
+
+class MailinfoTests(TestCase):
+    """Tests for mailinfo functionality."""
+
+    def test_basic_parsing(self):
+        """Test basic email parsing."""
+        from io import BytesIO
+
+        from dulwich.patch import mailinfo
+
+        email_content = b"""From: John Doe <john@example.com>
+Date: Mon, 1 Jan 2024 12:00:00 +0000
+Subject: [PATCH] Add new feature
+Message-ID: <test@example.com>
+
+This is the commit message.
+
+More details here.
+
+---
+ file.txt | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/file.txt b/file.txt
+--- a/file.txt
++++ b/file.txt
+@@ -1 +1,2 @@
+ line1
++line2
+--
+2.39.0
+"""
+        result = mailinfo(BytesIO(email_content))
+
+        self.assertEqual("John Doe", result.author_name)
+        self.assertEqual("john@example.com", result.author_email)
+        self.assertEqual("Add new feature", result.subject)
+        self.assertIn("This is the commit message.", result.message)
+        self.assertIn("More details here.", result.message)
+        self.assertIn("diff --git a/file.txt b/file.txt", result.patch)
+
+    def test_subject_munging(self):
+        """Test subject line munging."""
+        from io import BytesIO
+
+        from dulwich.patch import mailinfo
+
+        # Test with [PATCH] tag
+        email = b"""From: Test <test@example.com>
+Subject: [PATCH 1/2] Fix bug
+
+Body
+"""
+        result = mailinfo(BytesIO(email))
+        self.assertEqual("Fix bug", result.subject)
+
+        # Test with Re: prefix
+        email = b"""From: Test <test@example.com>
+Subject: Re: [PATCH] Fix bug
+
+Body
+"""
+        result = mailinfo(BytesIO(email))
+        self.assertEqual("Fix bug", result.subject)
+
+        # Test with multiple brackets
+        email = b"""From: Test <test@example.com>
+Subject: [RFC][PATCH] New feature
+
+Body
+"""
+        result = mailinfo(BytesIO(email))
+        self.assertEqual("New feature", result.subject)
+
+    def test_keep_subject(self):
+        """Test -k flag (keep subject intact)."""
+        from io import BytesIO
+
+        from dulwich.patch import mailinfo
+
+        email = b"""From: Test <test@example.com>
+Subject: [PATCH 1/2] Fix bug
+
+Body
+"""
+        result = mailinfo(BytesIO(email), keep_subject=True)
+        self.assertEqual("[PATCH 1/2] Fix bug", result.subject)
+
+    def test_keep_non_patch(self):
+        """Test -b flag (only strip [PATCH])."""
+        from io import BytesIO
+
+        from dulwich.patch import mailinfo
+
+        email = b"""From: Test <test@example.com>
+Subject: [RFC][PATCH] New feature
+
+Body
+"""
+        result = mailinfo(BytesIO(email), keep_non_patch=True)
+        self.assertEqual("[RFC] New feature", result.subject)
+
+    def test_scissors(self):
+        """Test scissors line handling."""
+        from io import BytesIO
+
+        from dulwich.patch import mailinfo
+
+        email = b"""From: Test <test@example.com>
+Subject: Test
+
+Ignore this part
+
+-- >8 --
+
+Keep this part
+
+---
+diff --git a/file.txt b/file.txt
+"""
+        result = mailinfo(BytesIO(email), scissors=True)
+        self.assertIn("Keep this part", result.message)
+        self.assertNotIn("Ignore this part", result.message)
+
+    def test_message_id(self):
+        """Test -m flag (include Message-ID)."""
+        from io import BytesIO
+
+        from dulwich.patch import mailinfo
+
+        email = b"""From: Test <test@example.com>
+Subject: Test
+Message-ID: <12345@example.com>
+
+Body text
+"""
+        result = mailinfo(BytesIO(email), message_id=True)
+        self.assertIn("Message-ID: <12345@example.com>", result.message)
+        self.assertEqual("<12345@example.com>", result.message_id)
+
+    def test_encoding(self):
+        """Test encoding handling."""
+        from io import BytesIO
+
+        from dulwich.patch import mailinfo
+
+        # Use explicit UTF-8 bytes with MIME encoded subject
+        email = (
+            b"From: Test <test@example.com>\n"
+            b"Subject: =?utf-8?q?Test_with_UTF-8=3A_caf=C3=A9?=\n"
+            b"Content-Type: text/plain; charset=utf-8\n"
+            b"Content-Transfer-Encoding: 8bit\n"
+            b"\n"
+            b"Body with UTF-8: " + "naïve".encode() + b"\n"
+        )
+
+        result = mailinfo(BytesIO(email), encoding="utf-8")
+        # The subject should be decoded from MIME encoding
+        self.assertIn("caf", result.subject)
+        self.assertIn("na", result.message)
+
+    def test_patch_separation(self):
+        """Test separation of message from patch."""
+        from io import BytesIO
+
+        from dulwich.patch import mailinfo
+
+        email = b"""From: Test <test@example.com>
+Subject: Test
+
+Commit message line 1
+Commit message line 2
+
+---
+ file.txt | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/file.txt b/file.txt
+"""
+        result = mailinfo(BytesIO(email))
+        self.assertIn("Commit message line 1", result.message)
+        self.assertIn("Commit message line 2", result.message)
+        self.assertIn("---", result.patch)
+        self.assertIn("diff --git", result.patch)
+        self.assertNotIn("---", result.message)
+
+    def test_no_subject(self):
+        """Test handling of missing subject."""
+        from io import BytesIO
+
+        from dulwich.patch import mailinfo
+
+        email = b"""From: Test <test@example.com>
+
+Body text
+"""
+        result = mailinfo(BytesIO(email))
+        self.assertEqual("(no subject)", result.subject)
+
+    def test_missing_from_header(self):
+        """Test error on missing From header."""
+        from io import BytesIO
+
+        from dulwich.patch import mailinfo
+
+        email = b"""Subject: Test
+
+Body text
+"""
+        with self.assertRaises(ValueError) as cm:
+            mailinfo(BytesIO(email))
+        self.assertIn("From", str(cm.exception))

+ 103 - 0
tests/test_porcelain.py

@@ -10905,3 +10905,106 @@ class GitReflogActionTests(PorcelainTestCase):
         entries = list(porcelain.reflog(self.repo_path, b"refs/heads/test-branch"))
         self.assertEqual(1, len(entries))
         self.assertTrue(entries[0].message.startswith(b"branch: Created from"))
+
+
+class PorcelainMailinfoTests(TestCase):
+    """Tests for porcelain.mailinfo function."""
+
+    def test_mailinfo_basic(self) -> None:
+        """Test basic mailinfo functionality."""
+        from io import BytesIO
+
+        email_content = b"""From: Test User <test@example.com>
+Subject: [PATCH] Add feature
+Date: Mon, 1 Jan 2024 12:00:00 +0000
+
+Commit message here.
+
+---
+diff --git a/file.txt b/file.txt
+"""
+        result = porcelain.mailinfo(BytesIO(email_content))
+        self.assertEqual("Test User", result.author_name)
+        self.assertEqual("test@example.com", result.author_email)
+        self.assertEqual("Add feature", result.subject)
+        self.assertIn("Commit message here.", result.message)
+        self.assertIn("diff --git", result.patch)
+
+    def test_mailinfo_write_to_files(self) -> None:
+        """Test mailinfo writing message and patch to files."""
+        from io import BytesIO
+
+        email_content = b"""From: Test <test@example.com>
+Subject: Test
+
+Message content
+
+---
+Patch content
+"""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            msg_file = os.path.join(tmpdir, "msg")
+            patch_file = os.path.join(tmpdir, "patch")
+
+            result = porcelain.mailinfo(
+                BytesIO(email_content), msg_file=msg_file, patch_file=patch_file
+            )
+
+            # Check that files were written
+            self.assertTrue(os.path.exists(msg_file))
+            self.assertTrue(os.path.exists(patch_file))
+
+            # Check file contents
+            with open(msg_file) as f:
+                msg_content = f.read()
+                self.assertIn("Message content", msg_content)
+
+            with open(patch_file) as f:
+                patch_content = f.read()
+                self.assertIn("Patch content", patch_content)
+
+            # Check return value
+            self.assertEqual("Test", result.subject)
+
+    def test_mailinfo_with_options(self) -> None:
+        """Test mailinfo with various options."""
+        from io import BytesIO
+
+        email_content = b"""From: Test <test@example.com>
+Subject: [RFC][PATCH] Feature
+Message-ID: <id@example.com>
+
+Text before scissors
+
+-- >8 --
+
+Text after scissors
+"""
+        # Test with keep_non_patch, scissors, and message_id
+        result = porcelain.mailinfo(
+            BytesIO(email_content),
+            keep_non_patch=True,
+            scissors=True,
+            message_id=True,
+        )
+
+        self.assertEqual("[RFC] Feature", result.subject)
+        self.assertIn("Text after scissors", result.message)
+        self.assertNotIn("Text before scissors", result.message)
+        self.assertIn("Message-ID:", result.message)
+
+    def test_mailinfo_from_file_path(self) -> None:
+        """Test mailinfo reading from file path."""
+        email_content = b"""From: Test <test@example.com>
+Subject: Test
+
+Body
+"""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            email_path = os.path.join(tmpdir, "email.txt")
+            with open(email_path, "wb") as f:
+                f.write(email_content)
+
+            result = porcelain.mailinfo(input_path=email_path)
+            self.assertEqual("Test", result.subject)
+            self.assertEqual("test@example.com", result.author_email)