2
0

fastexport.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. # __init__.py -- Fast export/import functionality
  2. # Copyright (C) 2010-2013 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Fast export/import functionality."""
  21. import sys
  22. from dulwich.index import (
  23. commit_tree,
  24. )
  25. from dulwich.objects import (
  26. Blob,
  27. Commit,
  28. Tag,
  29. ZERO_SHA,
  30. )
  31. from fastimport import __version__ as fastimport_version
  32. if (fastimport_version <= (0, 9, 5) and
  33. sys.version_info[0] == 3 and sys.version_info[1] < 5):
  34. raise ImportError("Older versions of fastimport don't support python3<3.5")
  35. from fastimport import ( # noqa: E402
  36. commands,
  37. errors as fastimport_errors,
  38. parser,
  39. processor,
  40. )
  41. import stat # noqa: E402
  42. def split_email(text):
  43. (name, email) = text.rsplit(b" <", 1)
  44. return (name, email.rstrip(b">"))
  45. class GitFastExporter(object):
  46. """Generate a fast-export output stream for Git objects."""
  47. def __init__(self, outf, store):
  48. self.outf = outf
  49. self.store = store
  50. self.markers = {}
  51. self._marker_idx = 0
  52. def print_cmd(self, cmd):
  53. self.outf.write(getattr(cmd, "__bytes__", cmd.__repr__)() + b"\n")
  54. def _allocate_marker(self):
  55. self._marker_idx += 1
  56. return ("%d" % (self._marker_idx,)).encode('ascii')
  57. def _export_blob(self, blob):
  58. marker = self._allocate_marker()
  59. self.markers[marker] = blob.id
  60. return (commands.BlobCommand(marker, blob.data), marker)
  61. def emit_blob(self, blob):
  62. (cmd, marker) = self._export_blob(blob)
  63. self.print_cmd(cmd)
  64. return marker
  65. def _iter_files(self, base_tree, new_tree):
  66. for ((old_path, new_path), (old_mode, new_mode),
  67. (old_hexsha, new_hexsha)) in \
  68. self.store.tree_changes(base_tree, new_tree):
  69. if new_path is None:
  70. yield commands.FileDeleteCommand(old_path)
  71. continue
  72. if not stat.S_ISDIR(new_mode):
  73. blob = self.store[new_hexsha]
  74. marker = self.emit_blob(blob)
  75. if old_path != new_path and old_path is not None:
  76. yield commands.FileRenameCommand(old_path, new_path)
  77. if old_mode != new_mode or old_hexsha != new_hexsha:
  78. prefixed_marker = b':' + marker
  79. yield commands.FileModifyCommand(
  80. new_path, new_mode, prefixed_marker, None
  81. )
  82. def _export_commit(self, commit, ref, base_tree=None):
  83. file_cmds = list(self._iter_files(base_tree, commit.tree))
  84. marker = self._allocate_marker()
  85. if commit.parents:
  86. from_ = commit.parents[0]
  87. merges = commit.parents[1:]
  88. else:
  89. from_ = None
  90. merges = []
  91. author, author_email = split_email(commit.author)
  92. committer, committer_email = split_email(commit.committer)
  93. cmd = commands.CommitCommand(
  94. ref, marker,
  95. (author, author_email, commit.author_time, commit.author_timezone),
  96. (committer, committer_email, commit.commit_time,
  97. commit.commit_timezone),
  98. commit.message, from_, merges, file_cmds)
  99. return (cmd, marker)
  100. def emit_commit(self, commit, ref, base_tree=None):
  101. cmd, marker = self._export_commit(commit, ref, base_tree)
  102. self.print_cmd(cmd)
  103. return marker
  104. class GitImportProcessor(processor.ImportProcessor):
  105. """An import processor that imports into a Git repository using Dulwich.
  106. """
  107. # FIXME: Batch creation of objects?
  108. def __init__(self, repo, params=None, verbose=False, outf=None):
  109. processor.ImportProcessor.__init__(self, params, verbose)
  110. self.repo = repo
  111. self.last_commit = ZERO_SHA
  112. self.markers = {}
  113. self._contents = {}
  114. def lookup_object(self, objectish):
  115. if objectish.startswith(b":"):
  116. return self.markers[objectish[1:]]
  117. return objectish
  118. def import_stream(self, stream):
  119. p = parser.ImportParser(stream)
  120. self.process(p.iter_commands)
  121. return self.markers
  122. def blob_handler(self, cmd):
  123. """Process a BlobCommand."""
  124. blob = Blob.from_string(cmd.data)
  125. self.repo.object_store.add_object(blob)
  126. if cmd.mark:
  127. self.markers[cmd.mark] = blob.id
  128. def checkpoint_handler(self, cmd):
  129. """Process a CheckpointCommand."""
  130. pass
  131. def commit_handler(self, cmd):
  132. """Process a CommitCommand."""
  133. commit = Commit()
  134. if cmd.author is not None:
  135. author = cmd.author
  136. else:
  137. author = cmd.committer
  138. (author_name, author_email, author_timestamp, author_timezone) = author
  139. (committer_name, committer_email, commit_timestamp,
  140. commit_timezone) = cmd.committer
  141. commit.author = author_name + b" <" + author_email + b">"
  142. commit.author_timezone = author_timezone
  143. commit.author_time = int(author_timestamp)
  144. commit.committer = committer_name + b" <" + committer_email + b">"
  145. commit.commit_timezone = commit_timezone
  146. commit.commit_time = int(commit_timestamp)
  147. commit.message = cmd.message
  148. commit.parents = []
  149. if cmd.from_:
  150. cmd.from_ = self.lookup_object(cmd.from_)
  151. self._reset_base(cmd.from_)
  152. for filecmd in cmd.iter_files():
  153. if filecmd.name == b"filemodify":
  154. if filecmd.data is not None:
  155. blob = Blob.from_string(filecmd.data)
  156. self.repo.object_store.add(blob)
  157. blob_id = blob.id
  158. else:
  159. blob_id = self.lookup_object(filecmd.dataref)
  160. self._contents[filecmd.path] = (filecmd.mode, blob_id)
  161. elif filecmd.name == b"filedelete":
  162. del self._contents[filecmd.path]
  163. elif filecmd.name == b"filecopy":
  164. self._contents[filecmd.dest_path] = self._contents[
  165. filecmd.src_path]
  166. elif filecmd.name == b"filerename":
  167. self._contents[filecmd.new_path] = self._contents[
  168. filecmd.old_path]
  169. del self._contents[filecmd.old_path]
  170. elif filecmd.name == b"filedeleteall":
  171. self._contents = {}
  172. else:
  173. raise Exception("Command %s not supported" % filecmd.name)
  174. commit.tree = commit_tree(
  175. self.repo.object_store,
  176. ((path, hexsha, mode) for (path, (mode, hexsha)) in
  177. self._contents.items()))
  178. if self.last_commit != ZERO_SHA:
  179. commit.parents.append(self.last_commit)
  180. for merge in cmd.merges:
  181. commit.parents.append(self.lookup_object(merge))
  182. self.repo.object_store.add_object(commit)
  183. self.repo[cmd.ref] = commit.id
  184. self.last_commit = commit.id
  185. if cmd.mark:
  186. self.markers[cmd.mark] = commit.id
  187. def progress_handler(self, cmd):
  188. """Process a ProgressCommand."""
  189. pass
  190. def _reset_base(self, commit_id):
  191. if self.last_commit == commit_id:
  192. return
  193. self._contents = {}
  194. self.last_commit = commit_id
  195. if commit_id != ZERO_SHA:
  196. tree_id = self.repo[commit_id].tree
  197. for (path, mode, hexsha) in (
  198. self.repo.object_store.iter_tree_contents(tree_id)):
  199. self._contents[path] = (mode, hexsha)
  200. def reset_handler(self, cmd):
  201. """Process a ResetCommand."""
  202. if cmd.from_ is None:
  203. from_ = ZERO_SHA
  204. else:
  205. from_ = self.lookup_object(cmd.from_)
  206. self._reset_base(from_)
  207. self.repo.refs[cmd.ref] = from_
  208. def tag_handler(self, cmd):
  209. """Process a TagCommand."""
  210. tag = Tag()
  211. tag.tagger = cmd.tagger
  212. tag.message = cmd.message
  213. tag.name = cmd.tag
  214. self.repo.add_object(tag)
  215. self.repo.refs["refs/tags/" + tag.name] = tag.id
  216. def feature_handler(self, cmd):
  217. """Process a FeatureCommand."""
  218. raise fastimport_errors.UnknownFeature(cmd.feature_name)