stripspace.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. # stripspace.py -- Git stripspace functionality
  2. # Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Git stripspace functionality for cleaning up text and commit messages."""
  22. __all__ = [
  23. "stripspace",
  24. ]
  25. def stripspace(
  26. text: bytes,
  27. *,
  28. strip_comments: bool = False,
  29. comment_char: bytes = b"#",
  30. comment_lines: bool = False,
  31. ) -> bytes:
  32. """Strip unnecessary whitespace from text.
  33. This function mimics the behavior of ``git stripspace``, which is commonly
  34. used to clean up commit messages and other text content.
  35. Args:
  36. text: The text to process (as bytes)
  37. strip_comments: If True, remove lines that begin with comment_char
  38. comment_char: The comment character to use (default: b"#")
  39. comment_lines: If True, prepend comment_char to each line
  40. Returns:
  41. The processed text as bytes
  42. The function performs the following operations (in order):
  43. 1. If comment_lines is True, prepend comment_char + space to each line
  44. 2. Strip trailing whitespace from each line
  45. 3. If strip_comments is True, remove lines starting with comment_char
  46. 4. Collapse multiple consecutive blank lines into a single blank line
  47. 5. Remove leading blank lines
  48. 6. Remove trailing blank lines
  49. 7. Ensure the text ends with a newline (unless empty)
  50. """
  51. if not text:
  52. return b""
  53. # Split into lines (preserving line endings for processing)
  54. lines = text.splitlines(keepends=True)
  55. # Step 1 & 2: Strip leading and trailing whitespace from each line (but keep newlines)
  56. processed_lines = []
  57. for line in lines:
  58. # Determine line ending
  59. line_ending = b""
  60. if line.endswith(b"\r\n"):
  61. line_ending = b"\r\n"
  62. elif line.endswith(b"\n"):
  63. line_ending = b"\n"
  64. elif line.endswith(b"\r"):
  65. line_ending = b"\r"
  66. # Strip all whitespace from the line content
  67. stripped_content = line.rstrip(b"\r\n\t ").lstrip()
  68. # If comment_lines is True, prepend comment char to non-empty lines only
  69. if comment_lines and stripped_content:
  70. stripped_content = comment_char + b" " + stripped_content
  71. # Reassemble with line ending
  72. processed_lines.append(stripped_content + line_ending)
  73. # Step 3: Strip comments if requested
  74. if strip_comments:
  75. processed_lines = [
  76. line
  77. for line in processed_lines
  78. if not line.lstrip().startswith(comment_char)
  79. ]
  80. if not processed_lines:
  81. return b""
  82. # Step 4 & 5 & 6: Collapse multiple blank lines, remove leading/trailing blank lines
  83. # First, identify blank lines (lines that are just whitespace/newline)
  84. def is_blank(line: bytes) -> bool:
  85. return line.strip() == b""
  86. # Remove leading blank lines
  87. while processed_lines and is_blank(processed_lines[0]):
  88. processed_lines.pop(0)
  89. # Remove trailing blank lines
  90. while processed_lines and is_blank(processed_lines[-1]):
  91. processed_lines.pop()
  92. # Collapse consecutive blank lines
  93. collapsed_lines = []
  94. prev_was_blank = False
  95. for line in processed_lines:
  96. is_current_blank = is_blank(line)
  97. if is_current_blank and prev_was_blank:
  98. # Skip this blank line
  99. continue
  100. collapsed_lines.append(line)
  101. prev_was_blank = is_current_blank
  102. if not collapsed_lines:
  103. return b""
  104. # Step 7: Ensure text ends with newline
  105. # Join all lines together
  106. result = b"".join(collapsed_lines)
  107. # If the result doesn't end with a newline, add one
  108. if not result.endswith((b"\n", b"\r\n", b"\r")):
  109. result += b"\n"
  110. return result