patch.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406
  1. # patch.py -- For dealing with packed-style patches.
  2. # Copyright (C) 2009-2013 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Classes for dealing with git am-style patches.
  22. These patches are basically unified diffs with some extra metadata tacked
  23. on.
  24. """
  25. import email.parser
  26. import time
  27. from difflib import SequenceMatcher
  28. from typing import BinaryIO, Optional, TextIO, Union
  29. from .objects import S_ISGITLINK, Blob, Commit
  30. from .pack import ObjectContainer
  31. FIRST_FEW_BYTES = 8000
  32. def write_commit_patch(
  33. f, commit, contents, progress, version=None, encoding=None
  34. ) -> None:
  35. """Write a individual file patch.
  36. Args:
  37. commit: Commit object
  38. progress: Tuple with current patch number and total.
  39. Returns:
  40. tuple with filename and contents
  41. """
  42. encoding = encoding or getattr(f, "encoding", "ascii")
  43. if isinstance(contents, str):
  44. contents = contents.encode(encoding)
  45. (num, total) = progress
  46. f.write(
  47. b"From "
  48. + commit.id
  49. + b" "
  50. + time.ctime(commit.commit_time).encode(encoding)
  51. + b"\n"
  52. )
  53. f.write(b"From: " + commit.author + b"\n")
  54. f.write(
  55. b"Date: " + time.strftime("%a, %d %b %Y %H:%M:%S %Z").encode(encoding) + b"\n"
  56. )
  57. f.write(
  58. (f"Subject: [PATCH {num}/{total}] ").encode(encoding) + commit.message + b"\n"
  59. )
  60. f.write(b"\n")
  61. f.write(b"---\n")
  62. try:
  63. import subprocess
  64. p = subprocess.Popen(
  65. ["diffstat"], stdout=subprocess.PIPE, stdin=subprocess.PIPE
  66. )
  67. except (ImportError, OSError):
  68. pass # diffstat not available?
  69. else:
  70. (diffstat, _) = p.communicate(contents)
  71. f.write(diffstat)
  72. f.write(b"\n")
  73. f.write(contents)
  74. f.write(b"-- \n")
  75. if version is None:
  76. from dulwich import __version__ as dulwich_version
  77. f.write(b"Dulwich %d.%d.%d\n" % dulwich_version)
  78. else:
  79. f.write(version.encode(encoding) + b"\n")
  80. def get_summary(commit):
  81. """Determine the summary line for use in a filename.
  82. Args:
  83. commit: Commit
  84. Returns: Summary string
  85. """
  86. decoded = commit.message.decode(errors="replace")
  87. return decoded.splitlines()[0].replace(" ", "-")
  88. # Unified Diff
  89. def _format_range_unified(start, stop) -> str:
  90. """Convert range to the "ed" format."""
  91. # Per the diff spec at http://www.unix.org/single_unix_specification/
  92. beginning = start + 1 # lines start numbering with one
  93. length = stop - start
  94. if length == 1:
  95. return f"{beginning}"
  96. if not length:
  97. beginning -= 1 # empty ranges begin at line just before the range
  98. return f"{beginning},{length}"
  99. def unified_diff(
  100. a,
  101. b,
  102. fromfile="",
  103. tofile="",
  104. fromfiledate="",
  105. tofiledate="",
  106. n=3,
  107. lineterm="\n",
  108. tree_encoding="utf-8",
  109. output_encoding="utf-8",
  110. ):
  111. """difflib.unified_diff that can detect "No newline at end of file" as
  112. original "git diff" does.
  113. Based on the same function in Python2.7 difflib.py
  114. """
  115. started = False
  116. for group in SequenceMatcher(None, a, b).get_grouped_opcodes(n):
  117. if not started:
  118. started = True
  119. fromdate = f"\t{fromfiledate}" if fromfiledate else ""
  120. todate = f"\t{tofiledate}" if tofiledate else ""
  121. yield f"--- {fromfile.decode(tree_encoding)}{fromdate}{lineterm}".encode(
  122. output_encoding
  123. )
  124. yield f"+++ {tofile.decode(tree_encoding)}{todate}{lineterm}".encode(
  125. output_encoding
  126. )
  127. first, last = group[0], group[-1]
  128. file1_range = _format_range_unified(first[1], last[2])
  129. file2_range = _format_range_unified(first[3], last[4])
  130. yield f"@@ -{file1_range} +{file2_range} @@{lineterm}".encode(output_encoding)
  131. for tag, i1, i2, j1, j2 in group:
  132. if tag == "equal":
  133. for line in a[i1:i2]:
  134. yield b" " + line
  135. continue
  136. if tag in ("replace", "delete"):
  137. for line in a[i1:i2]:
  138. if not line[-1:] == b"\n":
  139. line += b"\n\\ No newline at end of file\n"
  140. yield b"-" + line
  141. if tag in ("replace", "insert"):
  142. for line in b[j1:j2]:
  143. if not line[-1:] == b"\n":
  144. line += b"\n\\ No newline at end of file\n"
  145. yield b"+" + line
  146. def is_binary(content):
  147. """See if the first few bytes contain any null characters.
  148. Args:
  149. content: Bytestring to check for binary content
  150. """
  151. return b"\0" in content[:FIRST_FEW_BYTES]
  152. def shortid(hexsha):
  153. if hexsha is None:
  154. return b"0" * 7
  155. else:
  156. return hexsha[:7]
  157. def patch_filename(p, root):
  158. if p is None:
  159. return b"/dev/null"
  160. else:
  161. return root + b"/" + p
  162. def write_object_diff(
  163. f, store: ObjectContainer, old_file, new_file, diff_binary=False
  164. ) -> None:
  165. """Write the diff for an object.
  166. Args:
  167. f: File-like object to write to
  168. store: Store to retrieve objects from, if necessary
  169. old_file: (path, mode, hexsha) tuple
  170. new_file: (path, mode, hexsha) tuple
  171. diff_binary: Whether to diff files even if they
  172. are considered binary files by is_binary().
  173. Note: the tuple elements should be None for nonexistent files
  174. """
  175. (old_path, old_mode, old_id) = old_file
  176. (new_path, new_mode, new_id) = new_file
  177. patched_old_path = patch_filename(old_path, b"a")
  178. patched_new_path = patch_filename(new_path, b"b")
  179. def content(mode, hexsha):
  180. if hexsha is None:
  181. return Blob.from_string(b"")
  182. elif S_ISGITLINK(mode):
  183. return Blob.from_string(b"Subproject commit " + hexsha + b"\n")
  184. else:
  185. return store[hexsha]
  186. def lines(content):
  187. if not content:
  188. return []
  189. else:
  190. return content.splitlines()
  191. f.writelines(
  192. gen_diff_header((old_path, new_path), (old_mode, new_mode), (old_id, new_id))
  193. )
  194. old_content = content(old_mode, old_id)
  195. new_content = content(new_mode, new_id)
  196. if not diff_binary and (is_binary(old_content.data) or is_binary(new_content.data)):
  197. binary_diff = (
  198. b"Binary files "
  199. + patched_old_path
  200. + b" and "
  201. + patched_new_path
  202. + b" differ\n"
  203. )
  204. f.write(binary_diff)
  205. else:
  206. f.writelines(
  207. unified_diff(
  208. lines(old_content),
  209. lines(new_content),
  210. patched_old_path,
  211. patched_new_path,
  212. )
  213. )
  214. # TODO(jelmer): Support writing unicode, rather than bytes.
  215. def gen_diff_header(paths, modes, shas):
  216. """Write a blob diff header.
  217. Args:
  218. paths: Tuple with old and new path
  219. modes: Tuple with old and new modes
  220. shas: Tuple with old and new shas
  221. """
  222. (old_path, new_path) = paths
  223. (old_mode, new_mode) = modes
  224. (old_sha, new_sha) = shas
  225. if old_path is None and new_path is not None:
  226. old_path = new_path
  227. if new_path is None and old_path is not None:
  228. new_path = old_path
  229. old_path = patch_filename(old_path, b"a")
  230. new_path = patch_filename(new_path, b"b")
  231. yield b"diff --git " + old_path + b" " + new_path + b"\n"
  232. if old_mode != new_mode:
  233. if new_mode is not None:
  234. if old_mode is not None:
  235. yield (f"old file mode {old_mode:o}\n").encode("ascii")
  236. yield (f"new file mode {new_mode:o}\n").encode("ascii")
  237. else:
  238. yield (f"deleted file mode {old_mode:o}\n").encode("ascii")
  239. yield b"index " + shortid(old_sha) + b".." + shortid(new_sha)
  240. if new_mode is not None and old_mode is not None:
  241. yield (f" {new_mode:o}").encode("ascii")
  242. yield b"\n"
  243. # TODO(jelmer): Support writing unicode, rather than bytes.
  244. def write_blob_diff(f, old_file, new_file) -> None:
  245. """Write blob diff.
  246. Args:
  247. f: File-like object to write to
  248. old_file: (path, mode, hexsha) tuple (None if nonexisting)
  249. new_file: (path, mode, hexsha) tuple (None if nonexisting)
  250. Note: The use of write_object_diff is recommended over this function.
  251. """
  252. (old_path, old_mode, old_blob) = old_file
  253. (new_path, new_mode, new_blob) = new_file
  254. patched_old_path = patch_filename(old_path, b"a")
  255. patched_new_path = patch_filename(new_path, b"b")
  256. def lines(blob):
  257. if blob is not None:
  258. return blob.splitlines()
  259. else:
  260. return []
  261. f.writelines(
  262. gen_diff_header(
  263. (old_path, new_path),
  264. (old_mode, new_mode),
  265. (getattr(old_blob, "id", None), getattr(new_blob, "id", None)),
  266. )
  267. )
  268. old_contents = lines(old_blob)
  269. new_contents = lines(new_blob)
  270. f.writelines(
  271. unified_diff(old_contents, new_contents, patched_old_path, patched_new_path)
  272. )
  273. def write_tree_diff(f, store, old_tree, new_tree, diff_binary=False) -> None:
  274. """Write tree diff.
  275. Args:
  276. f: File-like object to write to.
  277. old_tree: Old tree id
  278. new_tree: New tree id
  279. diff_binary: Whether to diff files even if they
  280. are considered binary files by is_binary().
  281. """
  282. changes = store.tree_changes(old_tree, new_tree)
  283. for (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) in changes:
  284. write_object_diff(
  285. f,
  286. store,
  287. (oldpath, oldmode, oldsha),
  288. (newpath, newmode, newsha),
  289. diff_binary=diff_binary,
  290. )
  291. def git_am_patch_split(f: Union[TextIO, BinaryIO], encoding: Optional[str] = None):
  292. """Parse a git-am-style patch and split it up into bits.
  293. Args:
  294. f: File-like object to parse
  295. encoding: Encoding to use when creating Git objects
  296. Returns: Tuple with commit object, diff contents and git version
  297. """
  298. encoding = encoding or getattr(f, "encoding", "ascii")
  299. encoding = encoding or "ascii"
  300. contents = f.read()
  301. if isinstance(contents, bytes):
  302. bparser = email.parser.BytesParser()
  303. msg = bparser.parsebytes(contents)
  304. else:
  305. uparser = email.parser.Parser()
  306. msg = uparser.parsestr(contents)
  307. return parse_patch_message(msg, encoding)
  308. def parse_patch_message(msg, encoding=None):
  309. """Extract a Commit object and patch from an e-mail message.
  310. Args:
  311. msg: An email message (email.message.Message)
  312. encoding: Encoding to use to encode Git commits
  313. Returns: Tuple with commit object, diff contents and git version
  314. """
  315. c = Commit()
  316. c.author = msg["from"].encode(encoding)
  317. c.committer = msg["from"].encode(encoding)
  318. try:
  319. patch_tag_start = msg["subject"].index("[PATCH")
  320. except ValueError:
  321. subject = msg["subject"]
  322. else:
  323. close = msg["subject"].index("] ", patch_tag_start)
  324. subject = msg["subject"][close + 2 :]
  325. c.message = (subject.replace("\n", "") + "\n").encode(encoding)
  326. first = True
  327. body = msg.get_payload(decode=True)
  328. lines = body.splitlines(True)
  329. line_iter = iter(lines)
  330. for line in line_iter:
  331. if line == b"---\n":
  332. break
  333. if first:
  334. if line.startswith(b"From: "):
  335. c.author = line[len(b"From: ") :].rstrip()
  336. else:
  337. c.message += b"\n" + line
  338. first = False
  339. else:
  340. c.message += line
  341. diff = b""
  342. for line in line_iter:
  343. if line == b"-- \n":
  344. break
  345. diff += line
  346. try:
  347. version = next(line_iter).rstrip(b"\n")
  348. except StopIteration:
  349. version = None
  350. return c, diff, version