فهرست منبع

Add support for dulwich stripspace (#1967)

Implements functionality to clean up whitespace in text and commit
messages, commonly used for normalizing commit message formatting before
commits.

Fixes #1838
Jelmer Vernooij 2 ماه پیش
والد
کامیت
87033b1500
7فایلهای تغییر یافته به همراه516 افزوده شده و 0 حذف شده
  1. 3 0
      NEWS
  2. 54 0
      dulwich/cli.py
  3. 61 0
      dulwich/porcelain.py
  4. 127 0
      dulwich/stripspace.py
  5. 1 0
      tests/__init__.py
  6. 91 0
      tests/test_cli.py
  7. 179 0
      tests/test_stripspace.py

+ 3 - 0
NEWS

@@ -36,6 +36,9 @@
    reflog time specifications (``@{time}``) using Git's approxidate format (e.g.,
    ``HEAD@{yesterday}`, ``master@{2.weeks.ago}``). (Jelmer Vernooij, #1783)
 
+ * Add ``dulwich stripspace`` command to remove unnecessary whitespace from text.
+   (Jelmer Vernooij, #1838)
+
 0.24.7	2025-10-23
 
  * Add sparse index support for improved performance with large repositories.

+ 54 - 0
dulwich/cli.py

@@ -1566,6 +1566,59 @@ class cmd_interpret_trailers(Command):
         sys.stdout.buffer.write(result)
 
 
+class cmd_stripspace(Command):
+    """Remove unnecessary whitespace from text."""
+
+    def run(self, args: Sequence[str]) -> None:
+        """Execute the stripspace command.
+
+        Args:
+            args: Command line arguments
+        """
+        parser = argparse.ArgumentParser()
+        parser.add_argument(
+            "file",
+            nargs="?",
+            help="File to read text from. If not specified, reads from stdin.",
+        )
+        parser.add_argument(
+            "-s",
+            "--strip-comments",
+            action="store_true",
+            help="Strip lines that begin with comment character",
+        )
+        parser.add_argument(
+            "-c",
+            "--comment-lines",
+            action="store_true",
+            help="Prepend comment character to each line",
+        )
+        parser.add_argument(
+            "--comment-char",
+            default="#",
+            help="Comment character to use (default: #)",
+        )
+        parsed_args = parser.parse_args(args)
+
+        # Read text from file or stdin
+        if parsed_args.file:
+            with open(parsed_args.file, "rb") as f:
+                text = f.read()
+        else:
+            text = sys.stdin.buffer.read()
+
+        # Call stripspace
+        result = porcelain.stripspace(
+            text,
+            strip_comments=parsed_args.strip_comments,
+            comment_char=parsed_args.comment_char,
+            comment_lines=parsed_args.comment_lines,
+        )
+
+        # Output result
+        sys.stdout.buffer.write(result)
+
+
 class cmd_init(Command):
     """Create an empty Git repository or reinitialize an existing one."""
 
@@ -6070,6 +6123,7 @@ commands = {
     "show-ref": cmd_show_ref,
     "stash": cmd_stash,
     "status": cmd_status,
+    "stripspace": cmd_stripspace,
     "shortlog": cmd_shortlog,
     "symbolic-ref": cmd_symbolic_ref,
     "submodule": cmd_submodule,

+ 61 - 0
dulwich/porcelain.py

@@ -970,6 +970,67 @@ def interpret_trailers(
         return message_bytes
 
 
+def stripspace(
+    text: Union[str, bytes],
+    *,
+    strip_comments: bool = False,
+    comment_char: str = "#",
+    comment_lines: bool = False,
+) -> bytes:
+    r"""Strip unnecessary whitespace from text.
+
+    This function implements the functionality of `git stripspace`, commonly
+    used to clean up commit messages and other text content.
+
+    Args:
+        text: The text to process (string or bytes)
+        strip_comments: If True, remove lines that begin with comment_char
+        comment_char: The comment character to use (default: "#")
+        comment_lines: If True, prepend comment_char to each line
+
+    Returns:
+        The processed text as bytes
+
+    The function performs the following operations:
+        1. If comment_lines is True, prepend comment_char + space to each line
+        2. Strip trailing whitespace from each line
+        3. If strip_comments is True, remove lines starting with comment_char
+        4. Collapse multiple consecutive blank lines into a single blank line
+        5. Remove leading blank lines
+        6. Remove trailing blank lines
+        7. Ensure the text ends with a newline (unless empty)
+
+    Examples:
+        >>> stripspace(b"  hello  \\n\\n\\nworld  \\n\\n")
+        b'hello\\n\\nworld\\n'
+
+        >>> stripspace(b"# comment\\ntext\\n", strip_comments=True)
+        b'text\\n'
+
+        >>> stripspace(b"line\\n", comment_lines=True)
+        b'# line\\n'
+    """
+    from dulwich.stripspace import stripspace as _stripspace
+
+    # Convert text to bytes
+    if isinstance(text, str):
+        text_bytes = text.encode("utf-8")
+    else:
+        text_bytes = text
+
+    # Convert comment_char to bytes
+    comment_char_bytes = (
+        comment_char.encode("utf-8") if isinstance(comment_char, str) else comment_char
+    )
+
+    return _stripspace(
+        text_bytes,
+        strip_comments=strip_comments,
+        comment_char=comment_char_bytes,
+        comment_lines=comment_lines,
+    )
+
+
 def init(
     path: Union[str, os.PathLike[str]] = ".",
     *,

+ 127 - 0
dulwich/stripspace.py

@@ -0,0 +1,127 @@
+# stripspace.py -- Git stripspace functionality
+# Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+
+"""Git stripspace functionality for cleaning up text and commit messages."""
+
+
+def stripspace(
+    text: bytes,
+    *,
+    strip_comments: bool = False,
+    comment_char: bytes = b"#",
+    comment_lines: bool = False,
+) -> bytes:
+    """Strip unnecessary whitespace from text.
+
+    This function mimics the behavior of `git stripspace`, which is commonly
+    used to clean up commit messages and other text content.
+
+    Args:
+      text: The text to process (as bytes)
+      strip_comments: If True, remove lines that begin with comment_char
+      comment_char: The comment character to use (default: b"#")
+      comment_lines: If True, prepend comment_char to each line
+
+    Returns:
+      The processed text as bytes
+
+    The function performs the following operations (in order):
+      1. If comment_lines is True, prepend comment_char + space to each line
+      2. Strip trailing whitespace from each line
+      3. If strip_comments is True, remove lines starting with comment_char
+      4. Collapse multiple consecutive blank lines into a single blank line
+      5. Remove leading blank lines
+      6. Remove trailing blank lines
+      7. Ensure the text ends with a newline (unless empty)
+    """
+    if not text:
+        return b""
+
+    # Split into lines (preserving line endings for processing)
+    lines = text.splitlines(keepends=True)
+
+    # Step 1 & 2: Strip leading and trailing whitespace from each line (but keep newlines)
+    processed_lines = []
+    for line in lines:
+        # Determine line ending
+        line_ending = b""
+        if line.endswith(b"\r\n"):
+            line_ending = b"\r\n"
+        elif line.endswith(b"\n"):
+            line_ending = b"\n"
+        elif line.endswith(b"\r"):
+            line_ending = b"\r"
+
+        # Strip all whitespace from the line content
+        stripped_content = line.rstrip(b"\r\n\t ").lstrip()
+
+        # If comment_lines is True, prepend comment char to non-empty lines only
+        if comment_lines and stripped_content:
+            stripped_content = comment_char + b" " + stripped_content
+
+        # Reassemble with line ending
+        processed_lines.append(stripped_content + line_ending)
+
+    # Step 3: Strip comments if requested
+    if strip_comments:
+        processed_lines = [
+            line
+            for line in processed_lines
+            if not line.lstrip().startswith(comment_char)
+        ]
+
+    if not processed_lines:
+        return b""
+
+    # Step 4 & 5 & 6: Collapse multiple blank lines, remove leading/trailing blank lines
+    # First, identify blank lines (lines that are just whitespace/newline)
+    def is_blank(line: bytes) -> bool:
+        return line.strip() == b""
+
+    # Remove leading blank lines
+    while processed_lines and is_blank(processed_lines[0]):
+        processed_lines.pop(0)
+
+    # Remove trailing blank lines
+    while processed_lines and is_blank(processed_lines[-1]):
+        processed_lines.pop()
+
+    # Collapse consecutive blank lines
+    collapsed_lines = []
+    prev_was_blank = False
+    for line in processed_lines:
+        is_current_blank = is_blank(line)
+        if is_current_blank and prev_was_blank:
+            # Skip this blank line
+            continue
+        collapsed_lines.append(line)
+        prev_was_blank = is_current_blank
+
+    if not collapsed_lines:
+        return b""
+
+    # Step 7: Ensure text ends with newline
+    # Join all lines together
+    result = b"".join(collapsed_lines)
+
+    # If the result doesn't end with a newline, add one
+    if not result.endswith((b"\n", b"\r\n", b"\r")):
+        result += b"\n"
+
+    return result

+ 1 - 0
tests/__init__.py

@@ -184,6 +184,7 @@ def self_test_suite() -> unittest.TestSuite:
         "server",
         "sparse_patterns",
         "stash",
+        "stripspace",
         "submodule",
         "trailers",
         "utils",

+ 91 - 0
tests/test_cli.py

@@ -3936,5 +3936,96 @@ class ReplaceCommandTest(DulwichCliTestCase):
         self.assertEqual(result, 1)
 
 
+class StripspaceCommandTest(DulwichCliTestCase):
+    """Tests for stripspace command."""
+
+    def test_stripspace_from_file(self):
+        """Test stripspace from a file."""
+        # Create a text file with whitespace issues
+        text_file = os.path.join(self.test_dir, "message.txt")
+        with open(text_file, "wb") as f:
+            f.write(b"  hello  \n\n\n\n  world  \n\n")
+
+        result, stdout, _stderr = self._run_cli("stripspace", text_file)
+        self.assertIsNone(result)
+        self.assertEqual(stdout, "hello\n\nworld\n")
+
+    def test_stripspace_simple(self):
+        """Test basic stripspace functionality."""
+        text_file = os.path.join(self.test_dir, "message.txt")
+        with open(text_file, "wb") as f:
+            f.write(b"hello\nworld\n")
+
+        result, stdout, _stderr = self._run_cli("stripspace", text_file)
+        self.assertIsNone(result)
+        self.assertEqual(stdout, "hello\nworld\n")
+
+    def test_stripspace_trailing_whitespace(self):
+        """Test that trailing whitespace is removed."""
+        text_file = os.path.join(self.test_dir, "message.txt")
+        with open(text_file, "wb") as f:
+            f.write(b"hello  \nworld\t\n")
+
+        result, stdout, _stderr = self._run_cli("stripspace", text_file)
+        self.assertIsNone(result)
+        self.assertEqual(stdout, "hello\nworld\n")
+
+    def test_stripspace_strip_comments(self):
+        """Test stripping comments."""
+        text_file = os.path.join(self.test_dir, "message.txt")
+        with open(text_file, "wb") as f:
+            f.write(b"# comment\nhello\n# another comment\nworld\n")
+
+        result, stdout, _stderr = self._run_cli(
+            "stripspace", "--strip-comments", text_file
+        )
+        self.assertIsNone(result)
+        self.assertEqual(stdout, "hello\nworld\n")
+
+    def test_stripspace_comment_lines(self):
+        """Test prepending comment character."""
+        text_file = os.path.join(self.test_dir, "message.txt")
+        with open(text_file, "wb") as f:
+            f.write(b"hello\nworld\n")
+
+        result, stdout, _stderr = self._run_cli(
+            "stripspace", "--comment-lines", text_file
+        )
+        self.assertIsNone(result)
+        self.assertEqual(stdout, "# hello\n# world\n")
+
+    def test_stripspace_custom_comment_char(self):
+        """Test using custom comment character."""
+        text_file = os.path.join(self.test_dir, "message.txt")
+        with open(text_file, "wb") as f:
+            f.write(b"; comment\nhello\n; another comment\nworld\n")
+
+        result, stdout, _stderr = self._run_cli(
+            "stripspace", "--strip-comments", "--comment-char", ";", text_file
+        )
+        self.assertIsNone(result)
+        self.assertEqual(stdout, "hello\nworld\n")
+
+    def test_stripspace_leading_trailing_blanks(self):
+        """Test removing leading and trailing blank lines."""
+        text_file = os.path.join(self.test_dir, "message.txt")
+        with open(text_file, "wb") as f:
+            f.write(b"\n\nhello\nworld\n\n\n")
+
+        result, stdout, _stderr = self._run_cli("stripspace", text_file)
+        self.assertIsNone(result)
+        self.assertEqual(stdout, "hello\nworld\n")
+
+    def test_stripspace_collapse_blank_lines(self):
+        """Test collapsing multiple blank lines."""
+        text_file = os.path.join(self.test_dir, "message.txt")
+        with open(text_file, "wb") as f:
+            f.write(b"hello\n\n\n\nworld\n")
+
+        result, stdout, _stderr = self._run_cli("stripspace", text_file)
+        self.assertIsNone(result)
+        self.assertEqual(stdout, "hello\n\nworld\n")
+
+
 if __name__ == "__main__":
     unittest.main()

+ 179 - 0
tests/test_stripspace.py

@@ -0,0 +1,179 @@
+# test_stripspace.py -- Tests for stripspace functionality
+# Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+
+"""Tests for stripspace functionality."""
+
+import unittest
+
+from dulwich.stripspace import stripspace
+
+
+class StripspaceTests(unittest.TestCase):
+    """Tests for the stripspace function."""
+
+    def test_empty_input(self):
+        """Test that empty input returns empty output."""
+        self.assertEqual(stripspace(b""), b"")
+
+    def test_simple_text(self):
+        """Test simple text with no issues."""
+        self.assertEqual(stripspace(b"hello\nworld\n"), b"hello\nworld\n")
+
+    def test_trailing_whitespace(self):
+        """Test that trailing whitespace is removed."""
+        self.assertEqual(stripspace(b"hello  \nworld\t\n"), b"hello\nworld\n")
+
+    def test_multiple_blank_lines(self):
+        """Test that multiple blank lines are collapsed to one."""
+        self.assertEqual(
+            stripspace(b"hello\n\n\n\nworld\n"),
+            b"hello\n\nworld\n",
+        )
+
+    def test_leading_blank_lines(self):
+        """Test that leading blank lines are removed."""
+        self.assertEqual(
+            stripspace(b"\n\n\nhello\nworld\n"),
+            b"hello\nworld\n",
+        )
+
+    def test_trailing_blank_lines(self):
+        """Test that trailing blank lines are removed."""
+        self.assertEqual(
+            stripspace(b"hello\nworld\n\n\n"),
+            b"hello\nworld\n",
+        )
+
+    def test_leading_and_trailing_blank_lines(self):
+        """Test that both leading and trailing blank lines are removed."""
+        self.assertEqual(
+            stripspace(b"\n\nhello\nworld\n\n"),
+            b"hello\nworld\n",
+        )
+
+    def test_adds_final_newline(self):
+        """Test that a final newline is added if missing."""
+        self.assertEqual(stripspace(b"hello\nworld"), b"hello\nworld\n")
+
+    def test_complex_whitespace(self):
+        """Test complex whitespace cleanup."""
+        input_text = b"  hello  \n\n\n\n  world  \n\n"
+        expected = b"hello\n\nworld\n"
+        self.assertEqual(stripspace(input_text), expected)
+
+    def test_strip_comments(self):
+        """Test stripping lines that start with comment character."""
+        input_text = b"# comment\nhello\n# another comment\nworld\n"
+        expected = b"hello\nworld\n"
+        self.assertEqual(stripspace(input_text, strip_comments=True), expected)
+
+    def test_strip_comments_indented(self):
+        """Test that indented comments are also stripped."""
+        input_text = b"  # comment\nhello\n\t# another comment\nworld\n"
+        expected = b"hello\nworld\n"
+        self.assertEqual(stripspace(input_text, strip_comments=True), expected)
+
+    def test_strip_comments_custom_char(self):
+        """Test stripping comments with custom comment character."""
+        input_text = b"; comment\nhello\n; another comment\nworld\n"
+        expected = b"hello\nworld\n"
+        self.assertEqual(
+            stripspace(input_text, strip_comments=True, comment_char=b";"),
+            expected,
+        )
+
+    def test_comment_lines(self):
+        """Test prepending comment character to each line."""
+        input_text = b"hello\nworld\n"
+        expected = b"# hello\n# world\n"
+        self.assertEqual(stripspace(input_text, comment_lines=True), expected)
+
+    def test_comment_lines_custom_char(self):
+        """Test prepending custom comment character to each line."""
+        input_text = b"hello\nworld\n"
+        expected = b"; hello\n; world\n"
+        self.assertEqual(
+            stripspace(input_text, comment_lines=True, comment_char=b";"),
+            expected,
+        )
+
+    def test_only_whitespace(self):
+        """Test input that contains only whitespace."""
+        self.assertEqual(stripspace(b"  \n\t\n  \n"), b"")
+
+    def test_only_blank_lines(self):
+        """Test input that contains only blank lines."""
+        self.assertEqual(stripspace(b"\n\n\n"), b"")
+
+    def test_crlf_line_endings(self):
+        """Test handling of Windows-style line endings."""
+        self.assertEqual(
+            stripspace(b"hello  \r\nworld\t\r\n"),
+            b"hello\r\nworld\r\n",
+        )
+
+    def test_mixed_line_endings(self):
+        """Test handling of mixed line endings."""
+        self.assertEqual(
+            stripspace(b"hello  \r\nworld\t\n"),
+            b"hello\r\nworld\n",
+        )
+
+    def test_commit_message_example(self):
+        """Test a realistic commit message cleanup."""
+        input_text = b"""
+
+Add new feature
+
+This commit adds a new feature to the project.
+
+
+Signed-off-by: Alice <alice@example.com>
+
+"""
+        expected = b"""Add new feature
+
+This commit adds a new feature to the project.
+
+Signed-off-by: Alice <alice@example.com>
+"""
+        self.assertEqual(stripspace(input_text), expected)
+
+    def test_strip_comments_preserves_non_comment_hash(self):
+        """Test that # in the middle of a line is not treated as a comment."""
+        input_text = b"# comment\nhello # world\n"
+        expected = b"hello # world\n"
+        self.assertEqual(stripspace(input_text, strip_comments=True), expected)
+
+    def test_comment_lines_then_strip_whitespace(self):
+        """Test that comment_lines works correctly with whitespace stripping."""
+        input_text = b"hello  \n\n\nworld  \n"
+        expected = b"# hello\n\n# world\n"
+        self.assertEqual(stripspace(input_text, comment_lines=True), expected)
+
+    def test_only_comments(self):
+        """Test input that contains only comments."""
+        input_text = b"# comment 1\n# comment 2\n# comment 3\n"
+        self.assertEqual(stripspace(input_text, strip_comments=True), b"")
+
+    def test_blank_line_between_content(self):
+        """Test that a single blank line between content is preserved."""
+        input_text = b"hello\n\nworld\n"
+        expected = b"hello\n\nworld\n"
+        self.assertEqual(stripspace(input_text), expected)