trailers.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446
  1. # trailers.py -- Git trailers parsing and manipulation
  2. # Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as published by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Git trailers parsing and manipulation.
  21. This module provides functionality for parsing and manipulating Git trailers,
  22. which are structured information blocks appended to commit messages.
  23. Trailers follow the format:
  24. Token: value
  25. Token: value
  26. They are similar to RFC 822 email headers and appear at the end of commit
  27. messages after free-form content.
  28. """
  29. class Trailer:
  30. """Represents a single Git trailer.
  31. Args:
  32. key: The trailer key/token (e.g., "Signed-off-by")
  33. value: The trailer value
  34. separator: The separator character used (default ':')
  35. """
  36. def __init__(self, key: str, value: str, separator: str = ":") -> None:
  37. """Initialize a Trailer instance.
  38. Args:
  39. key: The trailer key/token
  40. value: The trailer value
  41. separator: The separator character (default ':')
  42. """
  43. self.key = key
  44. self.value = value
  45. self.separator = separator
  46. def __eq__(self, other: object) -> bool:
  47. """Compare two Trailer instances for equality.
  48. Args:
  49. other: The object to compare with
  50. Returns:
  51. True if trailers have the same key, value, and separator
  52. """
  53. if not isinstance(other, Trailer):
  54. return NotImplemented
  55. return (
  56. self.key == other.key
  57. and self.value == other.value
  58. and self.separator == other.separator
  59. )
  60. def __repr__(self) -> str:
  61. """Return a string representation suitable for debugging.
  62. Returns:
  63. A string showing the trailer's key, value, and separator
  64. """
  65. return f"Trailer(key={self.key!r}, value={self.value!r}, separator={self.separator!r})"
  66. def __str__(self) -> str:
  67. """Return the trailer formatted as it would appear in a commit message.
  68. Returns:
  69. The trailer in the format "key: value"
  70. """
  71. return f"{self.key}{self.separator} {self.value}"
  72. def parse_trailers(
  73. message: bytes,
  74. separators: str = ":",
  75. ) -> tuple[bytes, list[Trailer]]:
  76. """Parse trailers from a commit message.
  77. Trailers are extracted from the input by looking for a group of one or more
  78. lines that (i) is all trailers, or (ii) contains at least one Git-generated
  79. or user-configured trailer and consists of at least 25% trailers.
  80. The group must be preceded by one or more empty (or whitespace-only) lines.
  81. The group must either be at the end of the input or be the last non-whitespace
  82. lines before a line that starts with '---'.
  83. Args:
  84. message: The commit message as bytes
  85. separators: Characters to recognize as trailer separators (default ':')
  86. Returns:
  87. A tuple of (message_without_trailers, list_of_trailers)
  88. """
  89. if not message:
  90. return (b"", [])
  91. # Decode message
  92. try:
  93. text = message.decode("utf-8")
  94. except UnicodeDecodeError:
  95. text = message.decode("latin-1")
  96. lines = text.splitlines(keepends=True)
  97. # Find the trailer block by searching backwards
  98. # Look for a blank line followed by trailer-like lines
  99. trailer_start = None
  100. cutoff_line = None
  101. # First, check if there's a "---" line that marks the end of the message
  102. for i in range(len(lines) - 1, -1, -1):
  103. if lines[i].lstrip().startswith("---"):
  104. cutoff_line = i
  105. break
  106. # Determine the search range
  107. search_end = cutoff_line if cutoff_line is not None else len(lines)
  108. # Search backwards for the trailer block
  109. # A trailer block must be preceded by a blank line and extend to the end
  110. for i in range(search_end - 1, -1, -1):
  111. line = lines[i].rstrip()
  112. # Check if this is a blank line
  113. if not line:
  114. # Check if the lines after this blank line are trailers
  115. potential_trailers = lines[i + 1 : search_end]
  116. # Remove trailing blank lines from potential trailers
  117. while potential_trailers and not potential_trailers[-1].strip():
  118. potential_trailers = potential_trailers[:-1]
  119. # Check if these lines form a trailer block and extend to search_end
  120. if potential_trailers and _is_trailer_block(potential_trailers, separators):
  121. # Verify these trailers extend to the end (search_end)
  122. # by checking there are no non-blank lines after them
  123. last_trailer_index = i + 1 + len(potential_trailers)
  124. has_content_after = False
  125. for j in range(last_trailer_index, search_end):
  126. if lines[j].strip():
  127. has_content_after = True
  128. break
  129. if not has_content_after:
  130. trailer_start = i + 1
  131. break
  132. if trailer_start is None:
  133. # No trailer block found
  134. return (message, [])
  135. # Parse the trailers
  136. trailer_lines = lines[trailer_start:search_end]
  137. trailers = _parse_trailer_lines(trailer_lines, separators)
  138. # Reconstruct the message without trailers
  139. # Keep everything before the blank line that precedes the trailers
  140. message_lines = lines[:trailer_start]
  141. # Remove trailing blank lines from the message
  142. while message_lines and not message_lines[-1].strip():
  143. message_lines.pop()
  144. message_without_trailers = "".join(message_lines)
  145. if message_without_trailers and not message_without_trailers.endswith("\n"):
  146. message_without_trailers += "\n"
  147. return (message_without_trailers.encode("utf-8"), trailers)
  148. def _is_trailer_block(lines: list[str], separators: str) -> bool:
  149. """Check if a group of lines forms a valid trailer block.
  150. A trailer block must be composed entirely of trailer lines (with possible
  151. blank lines and continuation lines). A single non-trailer line invalidates
  152. the entire block.
  153. Args:
  154. lines: The lines to check
  155. separators: Valid separator characters
  156. Returns:
  157. True if the lines form a valid trailer block
  158. """
  159. if not lines:
  160. return False
  161. # Remove empty lines at the end
  162. while lines and not lines[-1].strip():
  163. lines = lines[:-1]
  164. if not lines:
  165. return False
  166. has_any_trailer = False
  167. i = 0
  168. while i < len(lines):
  169. line = lines[i].rstrip()
  170. if not line:
  171. # Empty lines are allowed within the trailer block
  172. i += 1
  173. continue
  174. # Check if this line is a continuation (starts with whitespace)
  175. if line and line[0].isspace():
  176. # This is a continuation of the previous line
  177. i += 1
  178. continue
  179. # Check if this is a trailer line
  180. is_trailer = False
  181. for sep in separators:
  182. if sep in line:
  183. key_part = line.split(sep, 1)[0]
  184. # Key must not contain whitespace
  185. if key_part and not any(c.isspace() for c in key_part):
  186. is_trailer = True
  187. has_any_trailer = True
  188. break
  189. # If this is not a trailer line, the block is invalid
  190. if not is_trailer:
  191. return False
  192. i += 1
  193. # Must have at least one trailer
  194. return has_any_trailer
  195. def _parse_trailer_lines(lines: list[str], separators: str) -> list[Trailer]:
  196. """Parse individual trailer lines.
  197. Args:
  198. lines: The trailer lines to parse
  199. separators: Valid separator characters
  200. Returns:
  201. List of parsed Trailer objects
  202. """
  203. trailers: list[Trailer] = []
  204. current_trailer: Trailer | None = None
  205. for line in lines:
  206. stripped = line.rstrip()
  207. if not stripped:
  208. # Empty line - finalize current trailer if any
  209. if current_trailer:
  210. trailers.append(current_trailer)
  211. current_trailer = None
  212. continue
  213. # Check if this is a continuation line (starts with whitespace)
  214. if stripped[0].isspace():
  215. if current_trailer:
  216. # Append to the current trailer value
  217. continuation = stripped.lstrip()
  218. current_trailer.value += " " + continuation
  219. continue
  220. # Finalize the previous trailer if any
  221. if current_trailer:
  222. trailers.append(current_trailer)
  223. current_trailer = None
  224. # Try to parse as a new trailer
  225. for sep in separators:
  226. if sep in stripped:
  227. parts = stripped.split(sep, 1)
  228. key = parts[0]
  229. # Key must not contain whitespace
  230. if key and not any(c.isspace() for c in key):
  231. value = parts[1].strip() if len(parts) > 1 else ""
  232. current_trailer = Trailer(key, value, sep)
  233. break
  234. # Don't forget the last trailer
  235. if current_trailer:
  236. trailers.append(current_trailer)
  237. return trailers
  238. def format_trailers(trailers: list[Trailer]) -> bytes:
  239. """Format a list of trailers as bytes.
  240. Args:
  241. trailers: List of Trailer objects
  242. Returns:
  243. Formatted trailers as bytes
  244. """
  245. if not trailers:
  246. return b""
  247. lines = [str(trailer) for trailer in trailers]
  248. return "\n".join(lines).encode("utf-8") + b"\n"
  249. def add_trailer_to_message(
  250. message: bytes,
  251. key: str,
  252. value: str,
  253. separator: str = ":",
  254. where: str = "end",
  255. if_exists: str = "addIfDifferentNeighbor",
  256. if_missing: str = "add",
  257. ) -> bytes:
  258. """Add a trailer to a commit message.
  259. Args:
  260. message: The original commit message
  261. key: The trailer key
  262. value: The trailer value
  263. separator: The separator to use
  264. where: Where to add the trailer ('end', 'start', 'after', 'before')
  265. if_exists: How to handle existing trailers with the same key
  266. - 'add': Always add
  267. - 'replace': Replace all existing
  268. - 'addIfDifferent': Add only if value is different from all existing
  269. - 'addIfDifferentNeighbor': Add only if value differs from neighbors
  270. - 'doNothing': Don't add if key exists
  271. if_missing: What to do if the key doesn't exist
  272. - 'add': Add the trailer
  273. - 'doNothing': Don't add the trailer
  274. Returns:
  275. The message with the trailer added
  276. """
  277. message_body, existing_trailers = parse_trailers(message, separator)
  278. new_trailer = Trailer(key, value, separator)
  279. # Check if the key exists
  280. key_exists = any(t.key == key for t in existing_trailers)
  281. if not key_exists:
  282. if if_missing == "doNothing":
  283. return message
  284. # Add the new trailer
  285. updated_trailers = [*existing_trailers, new_trailer]
  286. else:
  287. # Key exists - apply if_exists logic
  288. if if_exists == "doNothing":
  289. return message
  290. elif if_exists == "replace":
  291. # Replace all trailers with this key
  292. updated_trailers = [t for t in existing_trailers if t.key != key]
  293. updated_trailers.append(new_trailer)
  294. elif if_exists == "addIfDifferent":
  295. # Add only if no existing trailer has the same value
  296. has_same_value = any(
  297. t.key == key and t.value == value for t in existing_trailers
  298. )
  299. if has_same_value:
  300. return message
  301. updated_trailers = [*existing_trailers, new_trailer]
  302. elif if_exists == "addIfDifferentNeighbor":
  303. # Add only if adjacent trailers with same key have different values
  304. should_add = True
  305. # Check if there's a neighboring trailer with the same key and value
  306. for i, t in enumerate(existing_trailers):
  307. if t.key == key and t.value == value:
  308. # Check if it's a neighbor (last trailer with this key)
  309. is_neighbor = True
  310. for j in range(i + 1, len(existing_trailers)):
  311. if existing_trailers[j].key == key:
  312. is_neighbor = False
  313. break
  314. if is_neighbor:
  315. should_add = False
  316. break
  317. if not should_add:
  318. return message
  319. updated_trailers = [*existing_trailers, new_trailer]
  320. else: # 'add'
  321. updated_trailers = [*existing_trailers, new_trailer]
  322. # Apply where logic
  323. if where == "start":
  324. updated_trailers = [new_trailer] + [
  325. t for t in updated_trailers if t != new_trailer
  326. ]
  327. elif where == "before":
  328. # Insert before the first trailer with the same key
  329. result = []
  330. inserted = False
  331. for t in updated_trailers:
  332. if not inserted and t.key == key and t != new_trailer:
  333. result.append(new_trailer)
  334. inserted = True
  335. if t != new_trailer:
  336. result.append(t)
  337. if not inserted:
  338. result.append(new_trailer)
  339. updated_trailers = result
  340. elif where == "after":
  341. # Insert after the last trailer with the same key
  342. result = []
  343. last_key_index = -1
  344. for i, t in enumerate(updated_trailers):
  345. if t.key == key and t != new_trailer:
  346. last_key_index = len(result)
  347. if t != new_trailer:
  348. result.append(t)
  349. if last_key_index >= 0:
  350. result.insert(last_key_index + 1, new_trailer)
  351. else:
  352. result.append(new_trailer)
  353. updated_trailers = result
  354. # 'end' is the default - trailer is already at the end
  355. # Reconstruct the message
  356. result_message = message_body
  357. if result_message and not result_message.endswith(b"\n"):
  358. result_message += b"\n"
  359. if updated_trailers:
  360. result_message += b"\n"
  361. result_message += format_trailers(updated_trailers)
  362. return result_message