bundle.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. # bundle.py -- Bundle format support
  2. # Copyright (C) 2020 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Bundle format support."""
  22. from collections.abc import Iterator, Sequence
  23. from typing import (
  24. TYPE_CHECKING,
  25. BinaryIO,
  26. Callable,
  27. Optional,
  28. Protocol,
  29. cast,
  30. runtime_checkable,
  31. )
  32. from .pack import PackData, UnpackedObject, write_pack_data
  33. @runtime_checkable
  34. class PackDataLike(Protocol):
  35. """Protocol for objects that behave like PackData."""
  36. def __len__(self) -> int:
  37. """Return the number of objects in the pack."""
  38. ...
  39. def iter_unpacked(self) -> Iterator[UnpackedObject]:
  40. """Iterate over unpacked objects in the pack."""
  41. ...
  42. if TYPE_CHECKING:
  43. from .object_store import BaseObjectStore
  44. from .repo import BaseRepo
  45. class Bundle:
  46. """Git bundle object representation."""
  47. version: Optional[int]
  48. capabilities: dict[str, Optional[str]]
  49. prerequisites: list[tuple[bytes, bytes]]
  50. references: dict[bytes, bytes]
  51. pack_data: Optional[PackDataLike]
  52. def __repr__(self) -> str:
  53. """Return string representation of Bundle."""
  54. return (
  55. f"<{type(self).__name__}(version={self.version}, "
  56. f"capabilities={self.capabilities}, "
  57. f"prerequisites={self.prerequisites}, "
  58. f"references={self.references})>"
  59. )
  60. def __eq__(self, other: object) -> bool:
  61. """Check equality with another Bundle."""
  62. if not isinstance(other, type(self)):
  63. return False
  64. if self.version != other.version:
  65. return False
  66. if self.capabilities != other.capabilities:
  67. return False
  68. if self.prerequisites != other.prerequisites:
  69. return False
  70. if self.references != other.references:
  71. return False
  72. if self.pack_data != other.pack_data:
  73. return False
  74. return True
  75. def store_objects(
  76. self,
  77. object_store: "BaseObjectStore",
  78. progress: Optional[Callable[[str], None]] = None,
  79. ) -> None:
  80. """Store all objects from this bundle into an object store.
  81. Args:
  82. object_store: The object store to add objects to
  83. progress: Optional progress callback function
  84. """
  85. from .objects import ShaFile
  86. if self.pack_data is None:
  87. raise ValueError("pack_data is not loaded")
  88. count = 0
  89. for unpacked in self.pack_data.iter_unpacked():
  90. # Convert the unpacked object to a proper git object
  91. if unpacked.decomp_chunks and unpacked.obj_type_num is not None:
  92. git_obj = ShaFile.from_raw_chunks(
  93. unpacked.obj_type_num, unpacked.decomp_chunks
  94. )
  95. object_store.add_object(git_obj)
  96. count += 1
  97. if progress and count % 100 == 0:
  98. progress(f"Stored {count} objects")
  99. if progress:
  100. progress(f"Stored {count} objects total")
  101. def _read_bundle(f: BinaryIO, version: int) -> Bundle:
  102. capabilities = {}
  103. prerequisites = []
  104. references = {}
  105. line = f.readline()
  106. if version >= 3:
  107. while line.startswith(b"@"):
  108. line = line[1:].rstrip(b"\n")
  109. try:
  110. key, value_bytes = line.split(b"=", 1)
  111. value = value_bytes.decode("utf-8")
  112. except ValueError:
  113. key = line
  114. value = None
  115. capabilities[key.decode("utf-8")] = value
  116. line = f.readline()
  117. while line.startswith(b"-"):
  118. (obj_id, comment) = line[1:].rstrip(b"\n").split(b" ", 1)
  119. prerequisites.append((obj_id, comment))
  120. line = f.readline()
  121. while line != b"\n":
  122. (obj_id, ref) = line.rstrip(b"\n").split(b" ", 1)
  123. references[ref] = obj_id
  124. line = f.readline()
  125. # Extract pack data to separate stream since PackData expects
  126. # the file to start with PACK header at position 0
  127. pack_bytes = f.read()
  128. if not pack_bytes:
  129. raise ValueError("Bundle file contains no pack data")
  130. from io import BytesIO
  131. pack_file = BytesIO(pack_bytes)
  132. pack_data = PackData.from_file(pack_file)
  133. ret = Bundle()
  134. ret.references = references
  135. ret.capabilities = capabilities
  136. ret.prerequisites = prerequisites
  137. ret.pack_data = pack_data
  138. ret.version = version
  139. return ret
  140. def read_bundle(f: BinaryIO) -> Bundle:
  141. """Read a bundle file.
  142. Args:
  143. f: A seekable binary file-like object. The file must remain open
  144. for the lifetime of the returned Bundle object.
  145. """
  146. if not hasattr(f, "seek"):
  147. raise ValueError("Bundle file must be seekable")
  148. firstline = f.readline()
  149. if firstline == b"# v2 git bundle\n":
  150. return _read_bundle(f, 2)
  151. if firstline == b"# v3 git bundle\n":
  152. return _read_bundle(f, 3)
  153. raise AssertionError(f"unsupported bundle format header: {firstline!r}")
  154. def write_bundle(f: BinaryIO, bundle: Bundle) -> None:
  155. """Write a bundle to a file.
  156. Args:
  157. f: File-like object to write to
  158. bundle: Bundle object to write
  159. """
  160. version = bundle.version
  161. if version is None:
  162. if bundle.capabilities:
  163. version = 3
  164. else:
  165. version = 2
  166. if version == 2:
  167. f.write(b"# v2 git bundle\n")
  168. elif version == 3:
  169. f.write(b"# v3 git bundle\n")
  170. else:
  171. raise AssertionError(f"unknown version {version}")
  172. if version == 3:
  173. for key, value in bundle.capabilities.items():
  174. f.write(b"@" + key.encode("utf-8"))
  175. if value is not None:
  176. f.write(b"=" + value.encode("utf-8"))
  177. f.write(b"\n")
  178. for obj_id, comment in bundle.prerequisites:
  179. f.write(b"-" + obj_id + b" " + comment + b"\n")
  180. for ref, obj_id in bundle.references.items():
  181. f.write(obj_id + b" " + ref + b"\n")
  182. f.write(b"\n")
  183. if bundle.pack_data is None:
  184. raise ValueError("bundle.pack_data is not loaded")
  185. write_pack_data(
  186. cast(Callable[[bytes], None], f.write),
  187. num_records=len(bundle.pack_data),
  188. records=bundle.pack_data.iter_unpacked(),
  189. )
  190. def create_bundle_from_repo(
  191. repo: "BaseRepo",
  192. refs: Optional[Sequence[bytes]] = None,
  193. prerequisites: Optional[Sequence[bytes]] = None,
  194. version: Optional[int] = None,
  195. capabilities: Optional[dict[str, Optional[str]]] = None,
  196. progress: Optional[Callable[[str], None]] = None,
  197. ) -> Bundle:
  198. """Create a bundle from a repository.
  199. Args:
  200. repo: Repository object to create bundle from
  201. refs: List of refs to include (defaults to all refs)
  202. prerequisites: List of commit SHAs that are prerequisites
  203. version: Bundle version (2 or 3, auto-detected if None)
  204. capabilities: Bundle capabilities (for v3 bundles)
  205. progress: Optional progress reporting function
  206. Returns:
  207. Bundle object ready for writing
  208. """
  209. if refs is None:
  210. refs = list(repo.refs.keys())
  211. if prerequisites is None:
  212. prerequisites = []
  213. if capabilities is None:
  214. capabilities = {}
  215. # Build the references dictionary for the bundle
  216. bundle_refs = {}
  217. want_objects = set()
  218. for ref in refs:
  219. if ref in repo.refs:
  220. ref_value = repo.refs[ref]
  221. # Handle peeled refs
  222. try:
  223. peeled_value = repo.refs.get_peeled(ref)
  224. if peeled_value is not None and peeled_value != ref_value:
  225. bundle_refs[ref] = peeled_value
  226. else:
  227. bundle_refs[ref] = ref_value
  228. except KeyError:
  229. bundle_refs[ref] = ref_value
  230. want_objects.add(bundle_refs[ref])
  231. # Convert prerequisites to proper format
  232. bundle_prerequisites = []
  233. have_objects = set()
  234. for prereq in prerequisites:
  235. if not isinstance(prereq, bytes):
  236. raise TypeError(
  237. f"Invalid prerequisite type: {type(prereq)}, expected bytes"
  238. )
  239. if len(prereq) != 40:
  240. raise ValueError(
  241. f"Invalid prerequisite SHA length: {len(prereq)}, expected 40 hex characters"
  242. )
  243. try:
  244. # Validate it's actually hex
  245. bytes.fromhex(prereq.decode("utf-8"))
  246. except ValueError:
  247. raise ValueError(f"Invalid prerequisite format: {prereq!r}")
  248. # Store hex in bundle and for pack generation
  249. bundle_prerequisites.append((prereq, b""))
  250. have_objects.add(prereq)
  251. # Generate pack data containing all objects needed for the refs
  252. pack_count, pack_objects = repo.generate_pack_data(
  253. have=have_objects,
  254. want=want_objects,
  255. progress=progress,
  256. )
  257. # Store the pack objects directly, we'll write them when saving the bundle
  258. # For now, create a simple wrapper to hold the data
  259. class _BundlePackData:
  260. def __init__(self, count: int, objects: Iterator[UnpackedObject]) -> None:
  261. self._count = count
  262. self._objects = list(objects) # Materialize the iterator
  263. def __len__(self) -> int:
  264. return self._count
  265. def iter_unpacked(self) -> Iterator[UnpackedObject]:
  266. return iter(self._objects)
  267. pack_data = _BundlePackData(pack_count, pack_objects)
  268. # Create bundle object
  269. bundle = Bundle()
  270. bundle.version = version
  271. bundle.capabilities = capabilities
  272. bundle.prerequisites = bundle_prerequisites
  273. bundle.references = bundle_refs
  274. bundle.pack_data = pack_data
  275. return bundle