bundle.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. # bundle.py -- Bundle format support
  2. # Copyright (C) 2020 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Bundle format support."""
  22. from collections.abc import Iterator
  23. from typing import TYPE_CHECKING, Any, BinaryIO, Callable, Optional
  24. from .pack import PackData, write_pack_data
  25. if TYPE_CHECKING:
  26. from .object_store import BaseObjectStore
  27. from .repo import BaseRepo
  28. class Bundle:
  29. """Git bundle object representation."""
  30. version: Optional[int]
  31. capabilities: dict[str, Optional[str]]
  32. prerequisites: list[tuple[bytes, bytes]]
  33. references: dict[bytes, bytes]
  34. pack_data: PackData
  35. def __repr__(self) -> str:
  36. """Return string representation of Bundle."""
  37. return (
  38. f"<{type(self).__name__}(version={self.version}, "
  39. f"capabilities={self.capabilities}, "
  40. f"prerequisites={self.prerequisites}, "
  41. f"references={self.references})>"
  42. )
  43. def __eq__(self, other: object) -> bool:
  44. """Check equality with another Bundle."""
  45. if not isinstance(other, type(self)):
  46. return False
  47. if self.version != other.version:
  48. return False
  49. if self.capabilities != other.capabilities:
  50. return False
  51. if self.prerequisites != other.prerequisites:
  52. return False
  53. if self.references != other.references:
  54. return False
  55. if self.pack_data != other.pack_data:
  56. return False
  57. return True
  58. def store_objects(
  59. self,
  60. object_store: "BaseObjectStore",
  61. progress: Optional[Callable[[str], None]] = None,
  62. ) -> None:
  63. """Store all objects from this bundle into an object store.
  64. Args:
  65. object_store: The object store to add objects to
  66. progress: Optional progress callback function
  67. """
  68. from .objects import ShaFile
  69. count = 0
  70. for unpacked in self.pack_data.iter_unpacked():
  71. # Convert the unpacked object to a proper git object
  72. if unpacked.decomp_chunks:
  73. git_obj = ShaFile.from_raw_chunks(
  74. unpacked.obj_type_num, unpacked.decomp_chunks
  75. )
  76. object_store.add_object(git_obj)
  77. count += 1
  78. if progress and count % 100 == 0:
  79. progress(f"Stored {count} objects")
  80. if progress:
  81. progress(f"Stored {count} objects total")
  82. def _read_bundle(f: BinaryIO, version: int) -> Bundle:
  83. capabilities = {}
  84. prerequisites = []
  85. references = {}
  86. line = f.readline()
  87. if version >= 3:
  88. while line.startswith(b"@"):
  89. line = line[1:].rstrip(b"\n")
  90. try:
  91. key, value_bytes = line.split(b"=", 1)
  92. value = value_bytes.decode("utf-8")
  93. except ValueError:
  94. key = line
  95. value = None
  96. capabilities[key.decode("utf-8")] = value
  97. line = f.readline()
  98. while line.startswith(b"-"):
  99. (obj_id, comment) = line[1:].rstrip(b"\n").split(b" ", 1)
  100. prerequisites.append((obj_id, comment))
  101. line = f.readline()
  102. while line != b"\n":
  103. (obj_id, ref) = line.rstrip(b"\n").split(b" ", 1)
  104. references[ref] = obj_id
  105. line = f.readline()
  106. # Extract pack data to separate stream since PackData expects
  107. # the file to start with PACK header at position 0
  108. pack_bytes = f.read()
  109. if not pack_bytes:
  110. raise ValueError("Bundle file contains no pack data")
  111. from io import BytesIO
  112. pack_file = BytesIO(pack_bytes)
  113. pack_data = PackData.from_file(pack_file)
  114. ret = Bundle()
  115. ret.references = references
  116. ret.capabilities = capabilities
  117. ret.prerequisites = prerequisites
  118. ret.pack_data = pack_data
  119. ret.version = version
  120. return ret
  121. def read_bundle(f: BinaryIO) -> Bundle:
  122. """Read a bundle file.
  123. Args:
  124. f: A seekable binary file-like object. The file must remain open
  125. for the lifetime of the returned Bundle object.
  126. """
  127. if not hasattr(f, "seek"):
  128. raise ValueError("Bundle file must be seekable")
  129. firstline = f.readline()
  130. if firstline == b"# v2 git bundle\n":
  131. return _read_bundle(f, 2)
  132. if firstline == b"# v3 git bundle\n":
  133. return _read_bundle(f, 3)
  134. raise AssertionError(f"unsupported bundle format header: {firstline!r}")
  135. def write_bundle(f: BinaryIO, bundle: Bundle) -> None:
  136. """Write a bundle to a file.
  137. Args:
  138. f: File-like object to write to
  139. bundle: Bundle object to write
  140. """
  141. version = bundle.version
  142. if version is None:
  143. if bundle.capabilities:
  144. version = 3
  145. else:
  146. version = 2
  147. if version == 2:
  148. f.write(b"# v2 git bundle\n")
  149. elif version == 3:
  150. f.write(b"# v3 git bundle\n")
  151. else:
  152. raise AssertionError(f"unknown version {version}")
  153. if version == 3:
  154. for key, value in bundle.capabilities.items():
  155. f.write(b"@" + key.encode("utf-8"))
  156. if value is not None:
  157. f.write(b"=" + value.encode("utf-8"))
  158. f.write(b"\n")
  159. for obj_id, comment in bundle.prerequisites:
  160. f.write(b"-" + obj_id + b" " + comment + b"\n")
  161. for ref, obj_id in bundle.references.items():
  162. f.write(obj_id + b" " + ref + b"\n")
  163. f.write(b"\n")
  164. write_pack_data(
  165. f.write,
  166. num_records=len(bundle.pack_data),
  167. records=bundle.pack_data.iter_unpacked(),
  168. )
  169. def create_bundle_from_repo(
  170. repo: "BaseRepo",
  171. refs: Optional[list[bytes]] = None,
  172. prerequisites: Optional[list[bytes]] = None,
  173. version: Optional[int] = None,
  174. capabilities: Optional[dict[str, Optional[str]]] = None,
  175. progress: Optional[Callable[[str], None]] = None,
  176. ) -> Bundle:
  177. """Create a bundle from a repository.
  178. Args:
  179. repo: Repository object to create bundle from
  180. refs: List of refs to include (defaults to all refs)
  181. prerequisites: List of commit SHAs that are prerequisites
  182. version: Bundle version (2 or 3, auto-detected if None)
  183. capabilities: Bundle capabilities (for v3 bundles)
  184. progress: Optional progress reporting function
  185. Returns:
  186. Bundle object ready for writing
  187. """
  188. if refs is None:
  189. refs = list(repo.refs.keys())
  190. if prerequisites is None:
  191. prerequisites = []
  192. if capabilities is None:
  193. capabilities = {}
  194. # Build the references dictionary for the bundle
  195. bundle_refs = {}
  196. want_objects = []
  197. for ref in refs:
  198. if ref in repo.refs:
  199. ref_value = repo.refs[ref]
  200. # Handle peeled refs
  201. try:
  202. peeled_value = repo.refs.get_peeled(ref)
  203. if peeled_value is not None and peeled_value != ref_value:
  204. bundle_refs[ref] = peeled_value
  205. else:
  206. bundle_refs[ref] = ref_value
  207. except KeyError:
  208. bundle_refs[ref] = ref_value
  209. want_objects.append(bundle_refs[ref])
  210. # Convert prerequisites to proper format
  211. bundle_prerequisites = []
  212. have_objects = []
  213. for prereq in prerequisites:
  214. if isinstance(prereq, str):
  215. prereq = prereq.encode("utf-8")
  216. if isinstance(prereq, bytes):
  217. if len(prereq) == 40: # SHA1 hex string
  218. try:
  219. # Validate it's actually hex
  220. bytes.fromhex(prereq.decode("utf-8"))
  221. # Store hex in bundle and for pack generation
  222. bundle_prerequisites.append((prereq, b""))
  223. have_objects.append(prereq)
  224. except ValueError:
  225. # Not a valid hex string, invalid prerequisite
  226. raise ValueError(f"Invalid prerequisite format: {prereq!r}")
  227. elif len(prereq) == 20:
  228. # Binary SHA, convert to hex for both bundle and pack generation
  229. hex_prereq = prereq.hex().encode("ascii")
  230. bundle_prerequisites.append((hex_prereq, b""))
  231. have_objects.append(hex_prereq)
  232. else:
  233. # Invalid length
  234. raise ValueError(f"Invalid prerequisite SHA length: {len(prereq)}")
  235. else:
  236. # Assume it's already a binary SHA
  237. hex_prereq = prereq.hex().encode("ascii")
  238. bundle_prerequisites.append((hex_prereq, b""))
  239. have_objects.append(hex_prereq)
  240. # Generate pack data containing all objects needed for the refs
  241. pack_count, pack_objects = repo.generate_pack_data(
  242. have=have_objects,
  243. want=want_objects,
  244. progress=progress,
  245. )
  246. # Store the pack objects directly, we'll write them when saving the bundle
  247. # For now, create a simple wrapper to hold the data
  248. class _BundlePackData:
  249. def __init__(self, count: int, objects: Iterator[Any]) -> None:
  250. self._count = count
  251. self._objects = list(objects) # Materialize the iterator
  252. def __len__(self) -> int:
  253. return self._count
  254. def iter_unpacked(self) -> Iterator[Any]:
  255. return iter(self._objects)
  256. pack_data = _BundlePackData(pack_count, pack_objects)
  257. # Create bundle object
  258. bundle = Bundle()
  259. bundle.version = version
  260. bundle.capabilities = capabilities
  261. bundle.prerequisites = bundle_prerequisites
  262. bundle.references = bundle_refs
  263. bundle.pack_data = pack_data # type: ignore[assignment]
  264. return bundle