8.6 KB

  1. # -- Fast export/import functionality
  2. # Copyright (C) 2010-2013 Jelmer Vernooij <>
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <> for a copy of the GNU General Public License
  17. # and <> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Fast export/import functionality."""
  21. import sys
  22. from dulwich.index import (
  23. commit_tree,
  24. )
  25. from dulwich.objects import (
  26. Blob,
  27. Commit,
  28. Tag,
  29. ZERO_SHA,
  30. )
  31. from fastimport import ( # noqa: E402
  32. commands,
  33. errors as fastimport_errors,
  34. parser,
  35. processor,
  36. )
  37. import stat # noqa: E402
  38. def split_email(text):
  39. (name, email) = text.rsplit(b" <", 1)
  40. return (name, email.rstrip(b">"))
  41. class GitFastExporter(object):
  42. """Generate a fast-export output stream for Git objects."""
  43. def __init__(self, outf, store):
  44. self.outf = outf
  45. = store
  46. self.markers = {}
  47. self._marker_idx = 0
  48. def print_cmd(self, cmd):
  49. self.outf.write(getattr(cmd, "__bytes__", cmd.__repr__)() + b"\n")
  50. def _allocate_marker(self):
  51. self._marker_idx += 1
  52. return ("%d" % (self._marker_idx,)).encode('ascii')
  53. def _export_blob(self, blob):
  54. marker = self._allocate_marker()
  55. self.markers[marker] =
  56. return (commands.BlobCommand(marker,, marker)
  57. def emit_blob(self, blob):
  58. (cmd, marker) = self._export_blob(blob)
  59. self.print_cmd(cmd)
  60. return marker
  61. def _iter_files(self, base_tree, new_tree):
  62. for ((old_path, new_path), (old_mode, new_mode),
  63. (old_hexsha, new_hexsha)) in \
  64., new_tree):
  65. if new_path is None:
  66. yield commands.FileDeleteCommand(old_path)
  67. continue
  68. if not stat.S_ISDIR(new_mode):
  69. blob =[new_hexsha]
  70. marker = self.emit_blob(blob)
  71. if old_path != new_path and old_path is not None:
  72. yield commands.FileRenameCommand(old_path, new_path)
  73. if old_mode != new_mode or old_hexsha != new_hexsha:
  74. prefixed_marker = b':' + marker
  75. yield commands.FileModifyCommand(
  76. new_path, new_mode, prefixed_marker, None
  77. )
  78. def _export_commit(self, commit, ref, base_tree=None):
  79. file_cmds = list(self._iter_files(base_tree, commit.tree))
  80. marker = self._allocate_marker()
  81. if commit.parents:
  82. from_ = commit.parents[0]
  83. merges = commit.parents[1:]
  84. else:
  85. from_ = None
  86. merges = []
  87. author, author_email = split_email(
  88. committer, committer_email = split_email(commit.committer)
  89. cmd = commands.CommitCommand(
  90. ref, marker,
  91. (author, author_email, commit.author_time, commit.author_timezone),
  92. (committer, committer_email, commit.commit_time,
  93. commit.commit_timezone),
  94. commit.message, from_, merges, file_cmds)
  95. return (cmd, marker)
  96. def emit_commit(self, commit, ref, base_tree=None):
  97. cmd, marker = self._export_commit(commit, ref, base_tree)
  98. self.print_cmd(cmd)
  99. return marker
  100. class GitImportProcessor(processor.ImportProcessor):
  101. """An import processor that imports into a Git repository using Dulwich.
  102. """
  103. # FIXME: Batch creation of objects?
  104. def __init__(self, repo, params=None, verbose=False, outf=None):
  105. processor.ImportProcessor.__init__(self, params, verbose)
  106. self.repo = repo
  107. self.last_commit = ZERO_SHA
  108. self.markers = {}
  109. self._contents = {}
  110. def lookup_object(self, objectish):
  111. if objectish.startswith(b":"):
  112. return self.markers[objectish[1:]]
  113. return objectish
  114. def import_stream(self, stream):
  115. p = parser.ImportParser(stream)
  116. self.process(p.iter_commands)
  117. return self.markers
  118. def blob_handler(self, cmd):
  119. """Process a BlobCommand."""
  120. blob = Blob.from_string(
  121. self.repo.object_store.add_object(blob)
  122. if cmd.mark:
  123. self.markers[cmd.mark] =
  124. def checkpoint_handler(self, cmd):
  125. """Process a CheckpointCommand."""
  126. pass
  127. def commit_handler(self, cmd):
  128. """Process a CommitCommand."""
  129. commit = Commit()
  130. if is not None:
  131. author =
  132. else:
  133. author = cmd.committer
  134. (author_name, author_email, author_timestamp, author_timezone) = author
  135. (committer_name, committer_email, commit_timestamp,
  136. commit_timezone) = cmd.committer
  137. = author_name + b" <" + author_email + b">"
  138. commit.author_timezone = author_timezone
  139. commit.author_time = int(author_timestamp)
  140. commit.committer = committer_name + b" <" + committer_email + b">"
  141. commit.commit_timezone = commit_timezone
  142. commit.commit_time = int(commit_timestamp)
  143. commit.message = cmd.message
  144. commit.parents = []
  145. if cmd.from_:
  146. cmd.from_ = self.lookup_object(cmd.from_)
  147. self._reset_base(cmd.from_)
  148. for filecmd in cmd.iter_files():
  149. if == b"filemodify":
  150. if is not None:
  151. blob = Blob.from_string(
  152. self.repo.object_store.add(blob)
  153. blob_id =
  154. else:
  155. blob_id = self.lookup_object(filecmd.dataref)
  156. self._contents[filecmd.path] = (filecmd.mode, blob_id)
  157. elif == b"filedelete":
  158. del self._contents[filecmd.path]
  159. elif == b"filecopy":
  160. self._contents[filecmd.dest_path] = self._contents[
  161. filecmd.src_path]
  162. elif == b"filerename":
  163. self._contents[filecmd.new_path] = self._contents[
  164. filecmd.old_path]
  165. del self._contents[filecmd.old_path]
  166. elif == b"filedeleteall":
  167. self._contents = {}
  168. else:
  169. raise Exception("Command %s not supported" %
  170. commit.tree = commit_tree(
  171. self.repo.object_store,
  172. ((path, hexsha, mode) for (path, (mode, hexsha)) in
  173. self._contents.items()))
  174. if self.last_commit != ZERO_SHA:
  175. commit.parents.append(self.last_commit)
  176. for merge in cmd.merges:
  177. commit.parents.append(self.lookup_object(merge))
  178. self.repo.object_store.add_object(commit)
  179. self.repo[cmd.ref] =
  180. self.last_commit =
  181. if cmd.mark:
  182. self.markers[cmd.mark] =
  183. def progress_handler(self, cmd):
  184. """Process a ProgressCommand."""
  185. pass
  186. def _reset_base(self, commit_id):
  187. if self.last_commit == commit_id:
  188. return
  189. self._contents = {}
  190. self.last_commit = commit_id
  191. if commit_id != ZERO_SHA:
  192. tree_id = self.repo[commit_id].tree
  193. for (path, mode, hexsha) in (
  194. self.repo.object_store.iter_tree_contents(tree_id)):
  195. self._contents[path] = (mode, hexsha)
  196. def reset_handler(self, cmd):
  197. """Process a ResetCommand."""
  198. if cmd.from_ is None:
  199. from_ = ZERO_SHA
  200. else:
  201. from_ = self.lookup_object(cmd.from_)
  202. self._reset_base(from_)
  203. self.repo.refs[cmd.ref] = from_
  204. def tag_handler(self, cmd):
  205. """Process a TagCommand."""
  206. tag = Tag()
  207. tag.tagger = cmd.tagger
  208. tag.message = cmd.message
  209. = cmd.tag
  210. self.repo.add_object(tag)
  211. self.repo.refs["refs/tags/" +] =
  212. def feature_handler(self, cmd):
  213. """Process a FeatureCommand."""
  214. raise fastimport_errors.UnknownFeature(cmd.feature_name)