bundle.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. # bundle.py -- Bundle format support
  2. # Copyright (C) 2020 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Bundle format support."""
  22. __all__ = [
  23. "Bundle",
  24. "PackDataLike",
  25. "create_bundle_from_repo",
  26. "read_bundle",
  27. "write_bundle",
  28. ]
  29. from collections.abc import Callable, Iterator, Sequence
  30. from typing import (
  31. TYPE_CHECKING,
  32. BinaryIO,
  33. Protocol,
  34. cast,
  35. runtime_checkable,
  36. )
  37. if TYPE_CHECKING:
  38. from .object_format import ObjectFormat
  39. from .objects import ObjectID
  40. from .pack import PackData, UnpackedObject, write_pack_data
  41. from .refs import Ref
  42. @runtime_checkable
  43. class PackDataLike(Protocol):
  44. """Protocol for objects that behave like PackData."""
  45. object_format: "ObjectFormat"
  46. def __len__(self) -> int:
  47. """Return the number of objects in the pack."""
  48. ...
  49. def iter_unpacked(self) -> Iterator[UnpackedObject]:
  50. """Iterate over unpacked objects in the pack."""
  51. ...
  52. if TYPE_CHECKING:
  53. from .object_store import BaseObjectStore
  54. from .repo import BaseRepo
  55. class Bundle:
  56. """Git bundle object representation."""
  57. version: int | None
  58. capabilities: dict[str, str | None]
  59. prerequisites: list[tuple[ObjectID, bytes]]
  60. references: dict[Ref, ObjectID]
  61. pack_data: PackDataLike | None
  62. def __repr__(self) -> str:
  63. """Return string representation of Bundle."""
  64. return (
  65. f"<{type(self).__name__}(version={self.version}, "
  66. f"capabilities={self.capabilities}, "
  67. f"prerequisites={self.prerequisites}, "
  68. f"references={self.references})>"
  69. )
  70. def __eq__(self, other: object) -> bool:
  71. """Check equality with another Bundle."""
  72. if not isinstance(other, type(self)):
  73. return False
  74. if self.version != other.version:
  75. return False
  76. if self.capabilities != other.capabilities:
  77. return False
  78. if self.prerequisites != other.prerequisites:
  79. return False
  80. if self.references != other.references:
  81. return False
  82. if self.pack_data != other.pack_data:
  83. return False
  84. return True
  85. def store_objects(
  86. self,
  87. object_store: "BaseObjectStore",
  88. progress: Callable[[str], None] | None = None,
  89. ) -> None:
  90. """Store all objects from this bundle into an object store.
  91. Args:
  92. object_store: The object store to add objects to
  93. progress: Optional progress callback function
  94. """
  95. from .objects import ShaFile
  96. if self.pack_data is None:
  97. raise ValueError("pack_data is not loaded")
  98. count = 0
  99. for unpacked in self.pack_data.iter_unpacked():
  100. # Convert the unpacked object to a proper git object
  101. if unpacked.decomp_chunks and unpacked.obj_type_num is not None:
  102. git_obj = ShaFile.from_raw_chunks(
  103. unpacked.obj_type_num, unpacked.decomp_chunks
  104. )
  105. object_store.add_object(git_obj)
  106. count += 1
  107. if progress and count % 100 == 0:
  108. progress(f"Stored {count} objects")
  109. if progress:
  110. progress(f"Stored {count} objects total")
  111. def _read_bundle(f: BinaryIO, version: int) -> Bundle:
  112. capabilities = {}
  113. prerequisites = []
  114. references: dict[Ref, ObjectID] = {}
  115. line = f.readline()
  116. if version >= 3:
  117. while line.startswith(b"@"):
  118. line = line[1:].rstrip(b"\n")
  119. try:
  120. key, value_bytes = line.split(b"=", 1)
  121. value = value_bytes.decode("utf-8")
  122. except ValueError:
  123. key = line
  124. value = None
  125. capabilities[key.decode("utf-8")] = value
  126. line = f.readline()
  127. while line.startswith(b"-"):
  128. (obj_id, comment) = line[1:].rstrip(b"\n").split(b" ", 1)
  129. prerequisites.append((ObjectID(obj_id), comment))
  130. line = f.readline()
  131. while line != b"\n":
  132. (obj_id, ref) = line.rstrip(b"\n").split(b" ", 1)
  133. references[Ref(ref)] = ObjectID(obj_id)
  134. line = f.readline()
  135. # Extract pack data to separate stream since PackData expects
  136. # the file to start with PACK header at position 0
  137. pack_bytes = f.read()
  138. if not pack_bytes:
  139. raise ValueError("Bundle file contains no pack data")
  140. from io import BytesIO
  141. from .object_format import DEFAULT_OBJECT_FORMAT
  142. pack_file = BytesIO(pack_bytes)
  143. # TODO: Support specifying object format based on bundle metadata
  144. pack_data = PackData.from_file(pack_file, object_format=DEFAULT_OBJECT_FORMAT)
  145. ret = Bundle()
  146. ret.references = references
  147. ret.capabilities = capabilities
  148. ret.prerequisites = prerequisites
  149. ret.pack_data = pack_data
  150. ret.version = version
  151. return ret
  152. def read_bundle(f: BinaryIO) -> Bundle:
  153. """Read a bundle file.
  154. Args:
  155. f: A seekable binary file-like object. The file must remain open
  156. for the lifetime of the returned Bundle object.
  157. """
  158. if not hasattr(f, "seek"):
  159. raise ValueError("Bundle file must be seekable")
  160. firstline = f.readline()
  161. if firstline == b"# v2 git bundle\n":
  162. return _read_bundle(f, 2)
  163. if firstline == b"# v3 git bundle\n":
  164. return _read_bundle(f, 3)
  165. raise AssertionError(f"unsupported bundle format header: {firstline!r}")
  166. def write_bundle(f: BinaryIO, bundle: Bundle) -> None:
  167. """Write a bundle to a file.
  168. Args:
  169. f: File-like object to write to
  170. bundle: Bundle object to write
  171. """
  172. version = bundle.version
  173. if version is None:
  174. if bundle.capabilities:
  175. version = 3
  176. else:
  177. version = 2
  178. if version == 2:
  179. f.write(b"# v2 git bundle\n")
  180. elif version == 3:
  181. f.write(b"# v3 git bundle\n")
  182. else:
  183. raise AssertionError(f"unknown version {version}")
  184. if version == 3:
  185. for key, value in bundle.capabilities.items():
  186. f.write(b"@" + key.encode("utf-8"))
  187. if value is not None:
  188. f.write(b"=" + value.encode("utf-8"))
  189. f.write(b"\n")
  190. for obj_id, comment in bundle.prerequisites:
  191. f.write(b"-" + obj_id + b" " + comment + b"\n")
  192. for ref, obj_id in bundle.references.items():
  193. f.write(obj_id + b" " + ref + b"\n")
  194. f.write(b"\n")
  195. if bundle.pack_data is None:
  196. raise ValueError("bundle.pack_data is not loaded")
  197. write_pack_data(
  198. cast(Callable[[bytes], None], f.write),
  199. num_records=len(bundle.pack_data),
  200. records=bundle.pack_data.iter_unpacked(),
  201. object_format=bundle.pack_data.object_format,
  202. )
  203. def create_bundle_from_repo(
  204. repo: "BaseRepo",
  205. refs: Sequence[Ref] | None = None,
  206. prerequisites: Sequence[bytes] | None = None,
  207. version: int | None = None,
  208. capabilities: dict[str, str | None] | None = None,
  209. progress: Callable[[str], None] | None = None,
  210. ) -> Bundle:
  211. """Create a bundle from a repository.
  212. Args:
  213. repo: Repository object to create bundle from
  214. refs: List of refs to include (defaults to all refs)
  215. prerequisites: List of commit SHAs that are prerequisites
  216. version: Bundle version (2 or 3, auto-detected if None)
  217. capabilities: Bundle capabilities (for v3 bundles)
  218. progress: Optional progress reporting function
  219. Returns:
  220. Bundle object ready for writing
  221. """
  222. if refs is None:
  223. refs = list(repo.refs.keys())
  224. if prerequisites is None:
  225. prerequisites = []
  226. if capabilities is None:
  227. capabilities = {}
  228. # Build the references dictionary for the bundle
  229. bundle_refs: dict[Ref, ObjectID] = {}
  230. want_objects: set[ObjectID] = set()
  231. for ref in refs:
  232. if ref in repo.refs:
  233. ref_value = repo.refs[ref]
  234. # Handle peeled refs
  235. try:
  236. peeled_value = repo.refs.get_peeled(ref)
  237. if peeled_value is not None and peeled_value != ref_value:
  238. bundle_refs[ref] = peeled_value
  239. else:
  240. bundle_refs[ref] = ref_value
  241. except KeyError:
  242. bundle_refs[ref] = ref_value
  243. want_objects.add(bundle_refs[ref])
  244. # Convert prerequisites to proper format
  245. bundle_prerequisites = []
  246. have_objects: set[ObjectID] = set()
  247. for prereq in prerequisites:
  248. if not isinstance(prereq, bytes):
  249. raise TypeError(
  250. f"Invalid prerequisite type: {type(prereq)}, expected bytes"
  251. )
  252. if len(prereq) != 40:
  253. raise ValueError(
  254. f"Invalid prerequisite SHA length: {len(prereq)}, expected 40 hex characters"
  255. )
  256. try:
  257. # Validate it's actually hex
  258. bytes.fromhex(prereq.decode("utf-8"))
  259. except ValueError:
  260. raise ValueError(f"Invalid prerequisite format: {prereq!r}")
  261. # Store hex in bundle and for pack generation
  262. bundle_prerequisites.append((ObjectID(prereq), b""))
  263. have_objects.add(ObjectID(prereq))
  264. # Generate pack data containing all objects needed for the refs
  265. pack_count, pack_objects = repo.generate_pack_data(
  266. have=have_objects,
  267. want=want_objects,
  268. progress=progress,
  269. )
  270. # Store the pack objects directly, we'll write them when saving the bundle
  271. # For now, create a simple wrapper to hold the data
  272. class _BundlePackData:
  273. def __init__(
  274. self,
  275. count: int,
  276. objects: Iterator[UnpackedObject],
  277. object_format: "ObjectFormat",
  278. ) -> None:
  279. self._count = count
  280. self._objects = list(objects) # Materialize the iterator
  281. self.object_format = object_format
  282. def __len__(self) -> int:
  283. return self._count
  284. def iter_unpacked(self) -> Iterator[UnpackedObject]:
  285. return iter(self._objects)
  286. pack_data = _BundlePackData(pack_count, pack_objects, repo.object_format)
  287. # Create bundle object
  288. bundle = Bundle()
  289. bundle.version = version
  290. bundle.capabilities = capabilities
  291. bundle.prerequisites = bundle_prerequisites
  292. bundle.references = bundle_refs
  293. bundle.pack_data = pack_data
  294. return bundle