fastexport.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. # __init__.py -- Fast export/import functionality
  2. # Copyright (C) 2010-2013 Jelmer Vernooij <jelmer@samba.org>
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; version 2
  7. # of the License or (at your option) any later version of
  8. # the License.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program; if not, write to the Free Software
  17. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  18. # MA 02110-1301, USA.
  19. """Fast export/import functionality."""
  20. from dulwich.index import (
  21. commit_tree,
  22. )
  23. from dulwich.objects import (
  24. Blob,
  25. Commit,
  26. Tag,
  27. )
  28. from fastimport import (
  29. commands,
  30. errors as fastimport_errors,
  31. parser,
  32. processor,
  33. )
  34. import stat
  35. def split_email(text):
  36. (name, email) = text.rsplit(" <", 1)
  37. return (name, email.rstrip(">"))
  38. class GitFastExporter(object):
  39. """Generate a fast-export output stream for Git objects."""
  40. def __init__(self, outf, store):
  41. self.outf = outf
  42. self.store = store
  43. self.markers = {}
  44. self._marker_idx = 0
  45. def print_cmd(self, cmd):
  46. self.outf.write("%r\n" % cmd)
  47. def _allocate_marker(self):
  48. self._marker_idx+=1
  49. return str(self._marker_idx)
  50. def _export_blob(self, blob):
  51. marker = self._allocate_marker()
  52. self.markers[marker] = blob.id
  53. return (commands.BlobCommand(marker, blob.data), marker)
  54. def emit_blob(self, blob):
  55. (cmd, marker) = self._export_blob(blob)
  56. self.print_cmd(cmd)
  57. return marker
  58. def _iter_files(self, base_tree, new_tree):
  59. for ((old_path, new_path), (old_mode, new_mode),
  60. (old_hexsha, new_hexsha)) in \
  61. self.store.tree_changes(base_tree, new_tree):
  62. if new_path is None:
  63. yield commands.FileDeleteCommand(old_path)
  64. continue
  65. if not stat.S_ISDIR(new_mode):
  66. blob = self.store[new_hexsha]
  67. marker = self.emit_blob(blob)
  68. if old_path != new_path and old_path is not None:
  69. yield commands.FileRenameCommand(old_path, new_path)
  70. if old_mode != new_mode or old_hexsha != new_hexsha:
  71. yield commands.FileModifyCommand(new_path, new_mode, marker,
  72. None)
  73. def _export_commit(self, commit, ref, base_tree=None):
  74. file_cmds = list(self._iter_files(base_tree, commit.tree))
  75. marker = self._allocate_marker()
  76. if commit.parents:
  77. from_ = commit.parents[0]
  78. merges = commit.parents[1:]
  79. else:
  80. from_ = None
  81. merges = []
  82. author, author_email = split_email(commit.author)
  83. committer, committer_email = split_email(commit.committer)
  84. cmd = commands.CommitCommand(ref, marker,
  85. (author, author_email, commit.author_time, commit.author_timezone),
  86. (committer, committer_email, commit.commit_time,
  87. commit.commit_timezone),
  88. commit.message, from_, merges, file_cmds)
  89. return (cmd, marker)
  90. def emit_commit(self, commit, ref, base_tree=None):
  91. cmd, marker = self._export_commit(commit, ref, base_tree)
  92. self.print_cmd(cmd)
  93. return marker
  94. class GitImportProcessor(processor.ImportProcessor):
  95. """An import processor that imports into a Git repository using Dulwich.
  96. """
  97. # FIXME: Batch creation of objects?
  98. def __init__(self, repo, params=None, verbose=False, outf=None):
  99. processor.ImportProcessor.__init__(self, params, verbose)
  100. self.repo = repo
  101. self.last_commit = None
  102. self.markers = {}
  103. self._contents = {}
  104. def import_stream(self, stream):
  105. p = parser.ImportParser(stream)
  106. self.process(p.iter_commands)
  107. return self.markers
  108. def blob_handler(self, cmd):
  109. """Process a BlobCommand."""
  110. blob = Blob.from_string(cmd.data)
  111. self.repo.object_store.add_object(blob)
  112. if cmd.mark:
  113. self.markers[cmd.mark] = blob.id
  114. def checkpoint_handler(self, cmd):
  115. """Process a CheckpointCommand."""
  116. pass
  117. def commit_handler(self, cmd):
  118. """Process a CommitCommand."""
  119. commit = Commit()
  120. if cmd.author is not None:
  121. author = cmd.author
  122. else:
  123. author = cmd.committer
  124. (author_name, author_email, author_timestamp, author_timezone) = author
  125. (committer_name, committer_email, commit_timestamp,
  126. commit_timezone) = cmd.committer
  127. commit.author = "%s <%s>" % (author_name, author_email)
  128. commit.author_timezone = author_timezone
  129. commit.author_time = int(author_timestamp)
  130. commit.committer = "%s <%s>" % (committer_name, committer_email)
  131. commit.commit_timezone = commit_timezone
  132. commit.commit_time = int(commit_timestamp)
  133. commit.message = cmd.message
  134. commit.parents = []
  135. if cmd.from_:
  136. self._reset_base(cmd.from_)
  137. for filecmd in cmd.iter_files():
  138. if filecmd.name == "filemodify":
  139. if filecmd.data is not None:
  140. blob = Blob.from_string(filecmd.data)
  141. self.repo.object_store.add(blob)
  142. blob_id = blob.id
  143. else:
  144. assert filecmd.dataref[0] == ":", \
  145. "non-marker refs not supported yet"
  146. blob_id = self.markers[filecmd.dataref[1:]]
  147. self._contents[filecmd.path] = (filecmd.mode, blob_id)
  148. elif filecmd.name == "filedelete":
  149. del self._contents[filecmd.path]
  150. elif filecmd.name == "filecopy":
  151. self._contents[filecmd.dest_path] = self._contents[
  152. filecmd.src_path]
  153. elif filecmd.name == "filerename":
  154. self._contents[filecmd.new_path] = self._contents[
  155. filecmd.old_path]
  156. del self._contents[filecmd.old_path]
  157. elif filecmd.name == "filedeleteall":
  158. self._contents = {}
  159. else:
  160. raise Exception("Command %s not supported" % filecmd.name)
  161. commit.tree = commit_tree(self.repo.object_store,
  162. ((path, hexsha, mode) for (path, (mode, hexsha)) in
  163. self._contents.iteritems()))
  164. if self.last_commit is not None:
  165. commit.parents.append(self.last_commit)
  166. commit.parents += cmd.merges
  167. self.repo.object_store.add_object(commit)
  168. self.repo[cmd.ref] = commit.id
  169. self.last_commit = commit.id
  170. if cmd.mark:
  171. self.markers[cmd.mark] = commit.id
  172. def progress_handler(self, cmd):
  173. """Process a ProgressCommand."""
  174. pass
  175. def _reset_base(self, commit_id):
  176. if self.last_commit == commit_id:
  177. return
  178. self.last_commit = commit_id
  179. self._contents = {}
  180. tree_id = self.repo[commit_id].tree
  181. for (path, mode, hexsha) in (
  182. self.repo.object_store.iter_tree_contents(tree_id)):
  183. self._contents[path] = (mode, hexsha)
  184. def reset_handler(self, cmd):
  185. """Process a ResetCommand."""
  186. self._reset_base(cmd.from_)
  187. self.repo.refs[cmd.ref] = cmd.from_
  188. def tag_handler(self, cmd):
  189. """Process a TagCommand."""
  190. tag = Tag()
  191. tag.tagger = cmd.tagger
  192. tag.message = cmd.message
  193. tag.name = cmd.tag
  194. self.repo.add_object(tag)
  195. self.repo.refs["refs/tags/" + tag.name] = tag.id
  196. def feature_handler(self, cmd):
  197. """Process a FeatureCommand."""
  198. raise fastimport_errors.UnknownFeature(cmd.feature_name)