patch.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. # patch.py -- For dealing with packed-style patches.
  2. # Copyright (C) 2009-2013 Jelmer Vernooij <jelmer@samba.org>
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; version 2
  7. # of the License or (at your option) a later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """Classes for dealing with git am-style patches.
  19. These patches are basically unified diffs with some extra metadata tacked
  20. on.
  21. """
  22. from io import BytesIO
  23. from difflib import SequenceMatcher
  24. import email.parser
  25. import time
  26. from dulwich.objects import (
  27. Commit,
  28. S_ISGITLINK,
  29. )
  30. FIRST_FEW_BYTES = 8000
  31. def write_commit_patch(f, commit, contents, progress, version=None):
  32. """Write a individual file patch.
  33. :param commit: Commit object
  34. :param progress: Tuple with current patch number and total.
  35. :return: tuple with filename and contents
  36. """
  37. (num, total) = progress
  38. f.write("From %s %s\n" % (commit.id, time.ctime(commit.commit_time)))
  39. f.write("From: %s\n" % commit.author)
  40. f.write("Date: %s\n" % time.strftime("%a, %d %b %Y %H:%M:%S %Z"))
  41. f.write("Subject: [PATCH %d/%d] %s\n" % (num, total, commit.message))
  42. f.write("\n")
  43. f.write("---\n")
  44. try:
  45. import subprocess
  46. p = subprocess.Popen(["diffstat"], stdout=subprocess.PIPE,
  47. stdin=subprocess.PIPE)
  48. except (ImportError, OSError):
  49. pass # diffstat not available?
  50. else:
  51. (diffstat, _) = p.communicate(contents)
  52. f.write(diffstat)
  53. f.write("\n")
  54. f.write(contents)
  55. f.write("-- \n")
  56. if version is None:
  57. from dulwich import __version__ as dulwich_version
  58. f.write("Dulwich %d.%d.%d\n" % dulwich_version)
  59. else:
  60. f.write("%s\n" % version)
  61. def get_summary(commit):
  62. """Determine the summary line for use in a filename.
  63. :param commit: Commit
  64. :return: Summary string
  65. """
  66. return commit.message.splitlines()[0].replace(" ", "-")
  67. def unified_diff(a, b, fromfile='', tofile='', n=3):
  68. """difflib.unified_diff that doesn't write any dates or trailing spaces.
  69. Based on the same function in Python2.6.5-rc2's difflib.py
  70. """
  71. started = False
  72. for group in SequenceMatcher(None, a, b).get_grouped_opcodes(n):
  73. if not started:
  74. yield '--- %s\n' % fromfile
  75. yield '+++ %s\n' % tofile
  76. started = True
  77. i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
  78. yield "@@ -%d,%d +%d,%d @@\n" % (i1+1, i2-i1, j1+1, j2-j1)
  79. for tag, i1, i2, j1, j2 in group:
  80. if tag == 'equal':
  81. for line in a[i1:i2]:
  82. yield ' ' + line
  83. continue
  84. if tag == 'replace' or tag == 'delete':
  85. for line in a[i1:i2]:
  86. if not line[-1] == '\n':
  87. line += '\n\\ No newline at end of file\n'
  88. yield '-' + line
  89. if tag == 'replace' or tag == 'insert':
  90. for line in b[j1:j2]:
  91. if not line[-1] == '\n':
  92. line += '\n\\ No newline at end of file\n'
  93. yield '+' + line
  94. def is_binary(content):
  95. """See if the first few bytes contain any null characters.
  96. :param content: Bytestring to check for binary content
  97. """
  98. return '\0' in content[:FIRST_FEW_BYTES]
  99. def write_object_diff(f, store, old_file, new_file, diff_binary=False):
  100. """Write the diff for an object.
  101. :param f: File-like object to write to
  102. :param store: Store to retrieve objects from, if necessary
  103. :param old_file: (path, mode, hexsha) tuple
  104. :param new_file: (path, mode, hexsha) tuple
  105. :param diff_binary: Whether to diff files even if they
  106. are considered binary files by is_binary().
  107. :note: the tuple elements should be None for nonexistant files
  108. """
  109. (old_path, old_mode, old_id) = old_file
  110. (new_path, new_mode, new_id) = new_file
  111. def shortid(hexsha):
  112. if hexsha is None:
  113. return "0" * 7
  114. else:
  115. return hexsha[:7]
  116. def content(mode, hexsha):
  117. if hexsha is None:
  118. return ''
  119. elif S_ISGITLINK(mode):
  120. return "Submodule commit " + hexsha + "\n"
  121. else:
  122. return store[hexsha].data
  123. def lines(content):
  124. if not content:
  125. return []
  126. else:
  127. return content.splitlines(True)
  128. if old_path is None:
  129. old_path = "/dev/null"
  130. else:
  131. old_path = "a/%s" % old_path
  132. if new_path is None:
  133. new_path = "/dev/null"
  134. else:
  135. new_path = "b/%s" % new_path
  136. f.write("diff --git %s %s\n" % (old_path, new_path))
  137. if old_mode != new_mode:
  138. if new_mode is not None:
  139. if old_mode is not None:
  140. f.write("old mode %o\n" % old_mode)
  141. f.write("new mode %o\n" % new_mode)
  142. else:
  143. f.write("deleted mode %o\n" % old_mode)
  144. f.write("index %s..%s" % (shortid(old_id), shortid(new_id)))
  145. if new_mode is not None:
  146. f.write(" %o" % new_mode)
  147. f.write("\n")
  148. old_content = content(old_mode, old_id)
  149. new_content = content(new_mode, new_id)
  150. if not diff_binary and (is_binary(old_content) or is_binary(new_content)):
  151. f.write("Binary files %s and %s differ\n" % (old_path, new_path))
  152. else:
  153. f.writelines(unified_diff(lines(old_content), lines(new_content),
  154. old_path, new_path))
  155. def write_blob_diff(f, old_file, new_file):
  156. """Write diff file header.
  157. :param f: File-like object to write to
  158. :param old_file: (path, mode, hexsha) tuple (None if nonexisting)
  159. :param new_file: (path, mode, hexsha) tuple (None if nonexisting)
  160. :note: The use of write_object_diff is recommended over this function.
  161. """
  162. (old_path, old_mode, old_blob) = old_file
  163. (new_path, new_mode, new_blob) = new_file
  164. def blob_id(blob):
  165. if blob is None:
  166. return "0" * 7
  167. else:
  168. return blob.id[:7]
  169. def lines(blob):
  170. if blob is not None:
  171. return blob.data.splitlines(True)
  172. else:
  173. return []
  174. if old_path is None:
  175. old_path = "/dev/null"
  176. else:
  177. old_path = "a/%s" % old_path
  178. if new_path is None:
  179. new_path = "/dev/null"
  180. else:
  181. new_path = "b/%s" % new_path
  182. f.write("diff --git %s %s\n" % (old_path, new_path))
  183. if old_mode != new_mode:
  184. if new_mode is not None:
  185. if old_mode is not None:
  186. f.write("old mode %o\n" % old_mode)
  187. f.write("new mode %o\n" % new_mode)
  188. else:
  189. f.write("deleted mode %o\n" % old_mode)
  190. f.write("index %s..%s" % (blob_id(old_blob), blob_id(new_blob)))
  191. if new_mode is not None:
  192. f.write(" %o" % new_mode)
  193. f.write("\n")
  194. old_contents = lines(old_blob)
  195. new_contents = lines(new_blob)
  196. f.writelines(unified_diff(old_contents, new_contents,
  197. old_path, new_path))
  198. def write_tree_diff(f, store, old_tree, new_tree, diff_binary=False):
  199. """Write tree diff.
  200. :param f: File-like object to write to.
  201. :param old_tree: Old tree id
  202. :param new_tree: New tree id
  203. :param diff_binary: Whether to diff files even if they
  204. are considered binary files by is_binary().
  205. """
  206. changes = store.tree_changes(old_tree, new_tree)
  207. for (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) in changes:
  208. write_object_diff(f, store, (oldpath, oldmode, oldsha),
  209. (newpath, newmode, newsha),
  210. diff_binary=diff_binary)
  211. def git_am_patch_split(f):
  212. """Parse a git-am-style patch and split it up into bits.
  213. :param f: File-like object to parse
  214. :return: Tuple with commit object, diff contents and git version
  215. """
  216. parser = email.parser.Parser()
  217. msg = parser.parse(f)
  218. c = Commit()
  219. c.author = msg["from"]
  220. c.committer = msg["from"]
  221. try:
  222. patch_tag_start = msg["subject"].index("[PATCH")
  223. except ValueError:
  224. subject = msg["subject"]
  225. else:
  226. close = msg["subject"].index("] ", patch_tag_start)
  227. subject = msg["subject"][close+2:]
  228. c.message = subject.replace("\n", "") + "\n"
  229. first = True
  230. body = BytesIO(msg.get_payload())
  231. for l in body:
  232. if l == "---\n":
  233. break
  234. if first:
  235. if l.startswith("From: "):
  236. c.author = l[len("From: "):].rstrip()
  237. else:
  238. c.message += "\n" + l
  239. first = False
  240. else:
  241. c.message += l
  242. diff = ""
  243. for l in body:
  244. if l == "-- \n":
  245. break
  246. diff += l
  247. try:
  248. version = next(body).rstrip("\n")
  249. except StopIteration:
  250. version = None
  251. return c, diff, version