fastexport.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. # __init__.py -- Fast export/import functionality
  2. # Copyright (C) 2010-2013 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Fast export/import functionality."""
  22. import stat
  23. from collections.abc import Generator
  24. from typing import TYPE_CHECKING, Any, BinaryIO, Optional
  25. from fastimport import commands, parser, processor
  26. from fastimport import errors as fastimport_errors
  27. from .index import commit_tree
  28. from .object_store import iter_tree_contents
  29. from .objects import ZERO_SHA, Blob, Commit, ObjectID, Tag
  30. from .refs import Ref
  31. if TYPE_CHECKING:
  32. from .object_store import BaseObjectStore
  33. from .repo import BaseRepo
  34. def split_email(text: bytes) -> tuple[bytes, bytes]:
  35. # TODO(jelmer): Dedupe this and the same functionality in
  36. # format_annotate_line.
  37. (name, email) = text.rsplit(b" <", 1)
  38. return (name, email.rstrip(b">"))
  39. class GitFastExporter:
  40. """Generate a fast-export output stream for Git objects."""
  41. def __init__(self, outf: BinaryIO, store: "BaseObjectStore") -> None:
  42. self.outf = outf
  43. self.store = store
  44. self.markers: dict[bytes, bytes] = {}
  45. self._marker_idx = 0
  46. def print_cmd(self, cmd: object) -> None:
  47. if hasattr(cmd, "__bytes__"):
  48. output = cmd.__bytes__()
  49. else:
  50. output = cmd.__repr__().encode("utf-8")
  51. self.outf.write(output + b"\n")
  52. def _allocate_marker(self) -> bytes:
  53. self._marker_idx += 1
  54. return str(self._marker_idx).encode("ascii")
  55. def _export_blob(self, blob: Blob) -> tuple[Any, bytes]:
  56. marker = self._allocate_marker()
  57. self.markers[marker] = blob.id
  58. return (commands.BlobCommand(marker, blob.data), marker)
  59. def emit_blob(self, blob: Blob) -> bytes:
  60. (cmd, marker) = self._export_blob(blob)
  61. self.print_cmd(cmd)
  62. return marker
  63. def _iter_files(
  64. self, base_tree: Optional[bytes], new_tree: Optional[bytes]
  65. ) -> Generator[Any, None, None]:
  66. for (
  67. (old_path, new_path),
  68. (old_mode, new_mode),
  69. (old_hexsha, new_hexsha),
  70. ) in self.store.tree_changes(base_tree, new_tree):
  71. if new_path is None:
  72. if old_path is not None:
  73. yield commands.FileDeleteCommand(old_path)
  74. continue
  75. marker = b""
  76. if new_mode is not None and not stat.S_ISDIR(new_mode):
  77. if new_hexsha is not None:
  78. blob = self.store[new_hexsha]
  79. from .objects import Blob
  80. if isinstance(blob, Blob):
  81. marker = self.emit_blob(blob)
  82. if old_path != new_path and old_path is not None:
  83. yield commands.FileRenameCommand(old_path, new_path)
  84. if old_mode != new_mode or old_hexsha != new_hexsha:
  85. prefixed_marker = b":" + marker
  86. yield commands.FileModifyCommand(
  87. new_path, new_mode, prefixed_marker, None
  88. )
  89. def _export_commit(
  90. self, commit: Commit, ref: Ref, base_tree: Optional[ObjectID] = None
  91. ) -> tuple[Any, bytes]:
  92. file_cmds = list(self._iter_files(base_tree, commit.tree))
  93. marker = self._allocate_marker()
  94. if commit.parents:
  95. from_ = commit.parents[0]
  96. merges = commit.parents[1:]
  97. else:
  98. from_ = None
  99. merges = []
  100. author, author_email = split_email(commit.author)
  101. committer, committer_email = split_email(commit.committer)
  102. cmd = commands.CommitCommand(
  103. ref,
  104. marker,
  105. (author, author_email, commit.author_time, commit.author_timezone),
  106. (
  107. committer,
  108. committer_email,
  109. commit.commit_time,
  110. commit.commit_timezone,
  111. ),
  112. commit.message,
  113. from_,
  114. merges,
  115. file_cmds,
  116. )
  117. return (cmd, marker)
  118. def emit_commit(
  119. self, commit: Commit, ref: Ref, base_tree: Optional[ObjectID] = None
  120. ) -> bytes:
  121. cmd, marker = self._export_commit(commit, ref, base_tree)
  122. self.print_cmd(cmd)
  123. return marker
  124. class GitImportProcessor(processor.ImportProcessor):
  125. """An import processor that imports into a Git repository using Dulwich."""
  126. # FIXME: Batch creation of objects?
  127. def __init__(
  128. self,
  129. repo: "BaseRepo",
  130. params: Optional[Any] = None, # noqa: ANN401
  131. verbose: bool = False,
  132. outf: Optional[BinaryIO] = None,
  133. ) -> None:
  134. processor.ImportProcessor.__init__(self, params, verbose)
  135. self.repo = repo
  136. self.last_commit = ZERO_SHA
  137. self.markers: dict[bytes, bytes] = {}
  138. self._contents: dict[bytes, tuple[int, bytes]] = {}
  139. def lookup_object(self, objectish: bytes) -> ObjectID:
  140. if objectish.startswith(b":"):
  141. return self.markers[objectish[1:]]
  142. return objectish
  143. def import_stream(self, stream: BinaryIO) -> dict[bytes, bytes]:
  144. p = parser.ImportParser(stream)
  145. self.process(p.iter_commands)
  146. return self.markers
  147. def blob_handler(self, cmd: commands.BlobCommand) -> None:
  148. """Process a BlobCommand."""
  149. blob = Blob.from_string(cmd.data)
  150. self.repo.object_store.add_object(blob)
  151. if cmd.mark:
  152. self.markers[cmd.mark] = blob.id
  153. def checkpoint_handler(self, cmd: commands.CheckpointCommand) -> None:
  154. """Process a CheckpointCommand."""
  155. def commit_handler(self, cmd: commands.CommitCommand) -> None:
  156. """Process a CommitCommand."""
  157. commit = Commit()
  158. if cmd.author is not None:
  159. author = cmd.author
  160. else:
  161. author = cmd.committer
  162. (author_name, author_email, author_timestamp, author_timezone) = author
  163. (
  164. committer_name,
  165. committer_email,
  166. commit_timestamp,
  167. commit_timezone,
  168. ) = cmd.committer
  169. commit.author = author_name + b" <" + author_email + b">"
  170. commit.author_timezone = author_timezone
  171. commit.author_time = int(author_timestamp)
  172. commit.committer = committer_name + b" <" + committer_email + b">"
  173. commit.commit_timezone = commit_timezone
  174. commit.commit_time = int(commit_timestamp)
  175. commit.message = cmd.message
  176. commit.parents = []
  177. if cmd.from_:
  178. cmd.from_ = self.lookup_object(cmd.from_)
  179. self._reset_base(cmd.from_)
  180. for filecmd in cmd.iter_files():
  181. if filecmd.name == b"filemodify":
  182. if filecmd.data is not None:
  183. blob = Blob.from_string(filecmd.data)
  184. self.repo.object_store.add_object(blob)
  185. blob_id = blob.id
  186. else:
  187. blob_id = self.lookup_object(filecmd.dataref)
  188. self._contents[filecmd.path] = (filecmd.mode, blob_id)
  189. elif filecmd.name == b"filedelete":
  190. del self._contents[filecmd.path]
  191. elif filecmd.name == b"filecopy":
  192. self._contents[filecmd.dest_path] = self._contents[filecmd.src_path]
  193. elif filecmd.name == b"filerename":
  194. self._contents[filecmd.new_path] = self._contents[filecmd.old_path]
  195. del self._contents[filecmd.old_path]
  196. elif filecmd.name == b"filedeleteall":
  197. self._contents = {}
  198. else:
  199. raise Exception(f"Command {filecmd.name} not supported")
  200. commit.tree = commit_tree(
  201. self.repo.object_store,
  202. ((path, hexsha, mode) for (path, (mode, hexsha)) in self._contents.items()),
  203. )
  204. if self.last_commit != ZERO_SHA:
  205. commit.parents.append(self.last_commit)
  206. for merge in cmd.merges:
  207. commit.parents.append(self.lookup_object(merge))
  208. self.repo.object_store.add_object(commit)
  209. self.repo[cmd.ref] = commit.id
  210. self.last_commit = commit.id
  211. if cmd.mark:
  212. self.markers[cmd.mark] = commit.id
  213. def progress_handler(self, cmd: commands.ProgressCommand) -> None:
  214. """Process a ProgressCommand."""
  215. def _reset_base(self, commit_id: ObjectID) -> None:
  216. if self.last_commit == commit_id:
  217. return
  218. self._contents = {}
  219. self.last_commit = commit_id
  220. if commit_id != ZERO_SHA:
  221. from .objects import Commit
  222. commit = self.repo[commit_id]
  223. tree_id = commit.tree if isinstance(commit, Commit) else None
  224. if tree_id is None:
  225. return
  226. for (
  227. path,
  228. mode,
  229. hexsha,
  230. ) in iter_tree_contents(self.repo.object_store, tree_id):
  231. self._contents[path] = (mode, hexsha)
  232. def reset_handler(self, cmd: commands.ResetCommand) -> None:
  233. """Process a ResetCommand."""
  234. if cmd.from_ is None:
  235. from_ = ZERO_SHA
  236. else:
  237. from_ = self.lookup_object(cmd.from_)
  238. self._reset_base(from_)
  239. self.repo.refs[cmd.ref] = from_
  240. def tag_handler(self, cmd: commands.TagCommand) -> None:
  241. """Process a TagCommand."""
  242. tag = Tag()
  243. tag.tagger = cmd.tagger
  244. tag.message = cmd.message
  245. tag.name = cmd.from_
  246. self.repo.object_store.add_object(tag)
  247. self.repo.refs["refs/tags/" + tag.name] = tag.id
  248. def feature_handler(self, cmd: commands.FeatureCommand) -> None:
  249. """Process a FeatureCommand."""
  250. raise fastimport_errors.UnknownFeature(cmd.feature_name)