2
0

patch.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. # patch.py -- For dealing with packed-style patches.
  2. # Copyright (C) 2009-2013 Jelmer Vernooij <jelmer@samba.org>
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Classes for dealing with git am-style patches.
  21. These patches are basically unified diffs with some extra metadata tacked
  22. on.
  23. """
  24. from difflib import SequenceMatcher
  25. import email.parser
  26. import time
  27. from dulwich.objects import (
  28. Blob,
  29. Commit,
  30. S_ISGITLINK,
  31. )
  32. FIRST_FEW_BYTES = 8000
  33. def write_commit_patch(f, commit, contents, progress, version=None,
  34. encoding=None):
  35. """Write a individual file patch.
  36. :param commit: Commit object
  37. :param progress: Tuple with current patch number and total.
  38. :return: tuple with filename and contents
  39. """
  40. encoding = encoding or getattr(f, "encoding", "ascii")
  41. if isinstance(contents, str):
  42. contents = contents.encode(encoding)
  43. (num, total) = progress
  44. f.write(b"From " + commit.id + b" " +
  45. time.ctime(commit.commit_time).encode(encoding) + b"\n")
  46. f.write(b"From: " + commit.author + b"\n")
  47. f.write(b"Date: " +
  48. time.strftime("%a, %d %b %Y %H:%M:%S %Z").encode(encoding) + b"\n")
  49. f.write(("Subject: [PATCH %d/%d] " % (num, total)).encode(encoding) +
  50. commit.message + b"\n")
  51. f.write(b"\n")
  52. f.write(b"---\n")
  53. try:
  54. import subprocess
  55. p = subprocess.Popen(["diffstat"], stdout=subprocess.PIPE,
  56. stdin=subprocess.PIPE)
  57. except (ImportError, OSError):
  58. pass # diffstat not available?
  59. else:
  60. (diffstat, _) = p.communicate(contents)
  61. f.write(diffstat)
  62. f.write(b"\n")
  63. f.write(contents)
  64. f.write(b"-- \n")
  65. if version is None:
  66. from dulwich import __version__ as dulwich_version
  67. f.write(b"Dulwich %d.%d.%d\n" % dulwich_version)
  68. else:
  69. f.write(version.encode(encoding) + b"\n")
  70. def get_summary(commit):
  71. """Determine the summary line for use in a filename.
  72. :param commit: Commit
  73. :return: Summary string
  74. """
  75. return commit.message.splitlines()[0].replace(" ", "-")
  76. def unified_diff(a, b, fromfile, tofile, n=3):
  77. """difflib.unified_diff that doesn't write any dates or trailing spaces.
  78. Based on the same function in Python2.6.5-rc2's difflib.py
  79. """
  80. started = False
  81. for group in SequenceMatcher(None, a, b).get_grouped_opcodes(n):
  82. if not started:
  83. yield b'--- ' + fromfile + b'\n'
  84. yield b'+++ ' + tofile + b'\n'
  85. started = True
  86. i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
  87. sizes = "@@ -%d,%d +%d,%d @@\n" % (i1+1, i2-i1, j1+1, j2-j1)
  88. yield sizes.encode('ascii')
  89. for tag, i1, i2, j1, j2 in group:
  90. if tag == 'equal':
  91. for line in a[i1:i2]:
  92. yield b' ' + line
  93. continue
  94. if tag == 'replace' or tag == 'delete':
  95. for line in a[i1:i2]:
  96. if not line[-1:] == b'\n':
  97. line += b'\n\\ No newline at end of file\n'
  98. yield b'-' + line
  99. if tag == 'replace' or tag == 'insert':
  100. for line in b[j1:j2]:
  101. if not line[-1:] == b'\n':
  102. line += b'\n\\ No newline at end of file\n'
  103. yield b'+' + line
  104. def is_binary(content):
  105. """See if the first few bytes contain any null characters.
  106. :param content: Bytestring to check for binary content
  107. """
  108. return b'\0' in content[:FIRST_FEW_BYTES]
  109. def shortid(hexsha):
  110. if hexsha is None:
  111. return b"0" * 7
  112. else:
  113. return hexsha[:7]
  114. def patch_filename(p, root):
  115. if p is None:
  116. return b"/dev/null"
  117. else:
  118. return root + b"/" + p
  119. def write_object_diff(f, store, old_file, new_file, diff_binary=False):
  120. """Write the diff for an object.
  121. :param f: File-like object to write to
  122. :param store: Store to retrieve objects from, if necessary
  123. :param old_file: (path, mode, hexsha) tuple
  124. :param new_file: (path, mode, hexsha) tuple
  125. :param diff_binary: Whether to diff files even if they
  126. are considered binary files by is_binary().
  127. :note: the tuple elements should be None for nonexistant files
  128. """
  129. (old_path, old_mode, old_id) = old_file
  130. (new_path, new_mode, new_id) = new_file
  131. old_path = patch_filename(old_path, b"a")
  132. new_path = patch_filename(new_path, b"b")
  133. def content(mode, hexsha):
  134. if hexsha is None:
  135. return Blob.from_string(b'')
  136. elif S_ISGITLINK(mode):
  137. return Blob.from_string(b"Submodule commit " + hexsha + b"\n")
  138. else:
  139. return store[hexsha]
  140. def lines(content):
  141. if not content:
  142. return []
  143. else:
  144. return content.splitlines()
  145. f.writelines(gen_diff_header(
  146. (old_path, new_path), (old_mode, new_mode), (old_id, new_id)))
  147. old_content = content(old_mode, old_id)
  148. new_content = content(new_mode, new_id)
  149. if not diff_binary and (
  150. is_binary(old_content.data) or is_binary(new_content.data)):
  151. f.write(b"Binary files " + old_path + b" and " + new_path +
  152. b" differ\n")
  153. else:
  154. f.writelines(unified_diff(lines(old_content), lines(new_content),
  155. old_path, new_path))
  156. # TODO(jelmer): Support writing unicode, rather than bytes.
  157. def gen_diff_header(paths, modes, shas):
  158. """Write a blob diff header.
  159. :param paths: Tuple with old and new path
  160. :param modes: Tuple with old and new modes
  161. :param shas: Tuple with old and new shas
  162. """
  163. (old_path, new_path) = paths
  164. (old_mode, new_mode) = modes
  165. (old_sha, new_sha) = shas
  166. yield b"diff --git " + old_path + b" " + new_path + b"\n"
  167. if old_mode != new_mode:
  168. if new_mode is not None:
  169. if old_mode is not None:
  170. yield ("old mode %o\n" % old_mode).encode('ascii')
  171. yield ("new mode %o\n" % new_mode).encode('ascii')
  172. else:
  173. yield ("deleted mode %o\n" % old_mode).encode('ascii')
  174. yield b"index " + shortid(old_sha) + b".." + shortid(new_sha)
  175. if new_mode is not None:
  176. yield (" %o" % new_mode).encode('ascii')
  177. yield b"\n"
  178. # TODO(jelmer): Support writing unicode, rather than bytes.
  179. def write_blob_diff(f, old_file, new_file):
  180. """Write blob diff.
  181. :param f: File-like object to write to
  182. :param old_file: (path, mode, hexsha) tuple (None if nonexisting)
  183. :param new_file: (path, mode, hexsha) tuple (None if nonexisting)
  184. :note: The use of write_object_diff is recommended over this function.
  185. """
  186. (old_path, old_mode, old_blob) = old_file
  187. (new_path, new_mode, new_blob) = new_file
  188. old_path = patch_filename(old_path, b"a")
  189. new_path = patch_filename(new_path, b"b")
  190. def lines(blob):
  191. if blob is not None:
  192. return blob.splitlines()
  193. else:
  194. return []
  195. f.writelines(gen_diff_header(
  196. (old_path, new_path), (old_mode, new_mode),
  197. (getattr(old_blob, "id", None), getattr(new_blob, "id", None))))
  198. old_contents = lines(old_blob)
  199. new_contents = lines(new_blob)
  200. f.writelines(unified_diff(old_contents, new_contents,
  201. old_path, new_path))
  202. def write_tree_diff(f, store, old_tree, new_tree, diff_binary=False):
  203. """Write tree diff.
  204. :param f: File-like object to write to.
  205. :param old_tree: Old tree id
  206. :param new_tree: New tree id
  207. :param diff_binary: Whether to diff files even if they
  208. are considered binary files by is_binary().
  209. """
  210. changes = store.tree_changes(old_tree, new_tree)
  211. for (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) in changes:
  212. write_object_diff(f, store, (oldpath, oldmode, oldsha),
  213. (newpath, newmode, newsha), diff_binary=diff_binary)
  214. def git_am_patch_split(f, encoding=None):
  215. """Parse a git-am-style patch and split it up into bits.
  216. :param f: File-like object to parse
  217. :param encoding: Encoding to use when creating Git objects
  218. :return: Tuple with commit object, diff contents and git version
  219. """
  220. encoding = encoding or getattr(f, "encoding", "ascii")
  221. contents = f.read()
  222. if (isinstance(contents, bytes) and
  223. getattr(email.parser, "BytesParser", None)):
  224. parser = email.parser.BytesParser()
  225. msg = parser.parsebytes(contents)
  226. else:
  227. parser = email.parser.Parser()
  228. msg = parser.parsestr(contents)
  229. return parse_patch_message(msg, encoding)
  230. def parse_patch_message(msg, encoding=None):
  231. """Extract a Commit object and patch from an e-mail message.
  232. :param msg: An email message (email.message.Message)
  233. :param encoding: Encoding to use to encode Git commits
  234. :return: Tuple with commit object, diff contents and git version
  235. """
  236. c = Commit()
  237. c.author = msg["from"].encode(encoding)
  238. c.committer = msg["from"].encode(encoding)
  239. try:
  240. patch_tag_start = msg["subject"].index("[PATCH")
  241. except ValueError:
  242. subject = msg["subject"]
  243. else:
  244. close = msg["subject"].index("] ", patch_tag_start)
  245. subject = msg["subject"][close+2:]
  246. c.message = (subject.replace("\n", "") + "\n").encode(encoding)
  247. first = True
  248. body = msg.get_payload(decode=True)
  249. lines = body.splitlines(True)
  250. line_iter = iter(lines)
  251. for l in line_iter:
  252. if l == b"---\n":
  253. break
  254. if first:
  255. if l.startswith(b"From: "):
  256. c.author = l[len(b"From: "):].rstrip()
  257. else:
  258. c.message += b"\n" + l
  259. first = False
  260. else:
  261. c.message += l
  262. diff = b""
  263. for l in line_iter:
  264. if l == b"-- \n":
  265. break
  266. diff += l
  267. try:
  268. version = next(line_iter).rstrip(b"\n")
  269. except StopIteration:
  270. version = None
  271. return c, diff, version