bundle.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. # bundle.py -- Bundle format support
  2. # Copyright (C) 2020 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Bundle format support."""
  22. from typing import BinaryIO, Callable, Optional
  23. from .pack import PackData, write_pack_data
  24. class Bundle:
  25. version: Optional[int]
  26. capabilities: dict[str, Optional[str]]
  27. prerequisites: list[tuple[bytes, bytes]]
  28. references: dict[bytes, bytes]
  29. pack_data: PackData
  30. def __repr__(self) -> str:
  31. return (
  32. f"<{type(self).__name__}(version={self.version}, "
  33. f"capabilities={self.capabilities}, "
  34. f"prerequisites={self.prerequisites}, "
  35. f"references={self.references})>"
  36. )
  37. def __eq__(self, other: object) -> bool:
  38. if not isinstance(other, type(self)):
  39. return False
  40. if self.version != other.version:
  41. return False
  42. if self.capabilities != other.capabilities:
  43. return False
  44. if self.prerequisites != other.prerequisites:
  45. return False
  46. if self.references != other.references:
  47. return False
  48. if self.pack_data != other.pack_data:
  49. return False
  50. return True
  51. def store_objects(
  52. self, object_store, progress: Optional[Callable[[str], None]] = None
  53. ):
  54. """Store all objects from this bundle into an object store.
  55. Args:
  56. object_store: The object store to add objects to
  57. progress: Optional progress callback function
  58. """
  59. from .objects import ShaFile
  60. count = 0
  61. for unpacked in self.pack_data.iter_unpacked():
  62. # Convert the unpacked object to a proper git object
  63. if unpacked.decomp_chunks:
  64. git_obj = ShaFile.from_raw_chunks(
  65. unpacked.obj_type_num, unpacked.decomp_chunks
  66. )
  67. object_store.add_object(git_obj)
  68. count += 1
  69. if progress and count % 100 == 0:
  70. progress(f"Stored {count} objects")
  71. if progress:
  72. progress(f"Stored {count} objects total")
  73. def _read_bundle(f: BinaryIO, version: int) -> Bundle:
  74. capabilities = {}
  75. prerequisites = []
  76. references = {}
  77. line = f.readline()
  78. if version >= 3:
  79. while line.startswith(b"@"):
  80. line = line[1:].rstrip(b"\n")
  81. try:
  82. key, value_bytes = line.split(b"=", 1)
  83. value = value_bytes.decode("utf-8")
  84. except ValueError:
  85. key = line
  86. value = None
  87. capabilities[key.decode("utf-8")] = value
  88. line = f.readline()
  89. while line.startswith(b"-"):
  90. (obj_id, comment) = line[1:].rstrip(b"\n").split(b" ", 1)
  91. prerequisites.append((obj_id, comment))
  92. line = f.readline()
  93. while line != b"\n":
  94. (obj_id, ref) = line.rstrip(b"\n").split(b" ", 1)
  95. references[ref] = obj_id
  96. line = f.readline()
  97. # Extract pack data to separate stream since PackData expects
  98. # the file to start with PACK header at position 0
  99. pack_bytes = f.read()
  100. if not pack_bytes:
  101. raise ValueError("Bundle file contains no pack data")
  102. from io import BytesIO
  103. pack_file = BytesIO(pack_bytes)
  104. pack_data = PackData.from_file(pack_file)
  105. ret = Bundle()
  106. ret.references = references
  107. ret.capabilities = capabilities
  108. ret.prerequisites = prerequisites
  109. ret.pack_data = pack_data
  110. ret.version = version
  111. return ret
  112. def read_bundle(f: BinaryIO) -> Bundle:
  113. """Read a bundle file.
  114. Args:
  115. f: A seekable binary file-like object. The file must remain open
  116. for the lifetime of the returned Bundle object.
  117. """
  118. if not hasattr(f, "seek"):
  119. raise ValueError("Bundle file must be seekable")
  120. firstline = f.readline()
  121. if firstline == b"# v2 git bundle\n":
  122. return _read_bundle(f, 2)
  123. if firstline == b"# v3 git bundle\n":
  124. return _read_bundle(f, 3)
  125. raise AssertionError(f"unsupported bundle format header: {firstline!r}")
  126. def write_bundle(f: BinaryIO, bundle: Bundle) -> None:
  127. version = bundle.version
  128. if version is None:
  129. if bundle.capabilities:
  130. version = 3
  131. else:
  132. version = 2
  133. if version == 2:
  134. f.write(b"# v2 git bundle\n")
  135. elif version == 3:
  136. f.write(b"# v3 git bundle\n")
  137. else:
  138. raise AssertionError(f"unknown version {version}")
  139. if version == 3:
  140. for key, value in bundle.capabilities.items():
  141. f.write(b"@" + key.encode("utf-8"))
  142. if value is not None:
  143. f.write(b"=" + value.encode("utf-8"))
  144. f.write(b"\n")
  145. for obj_id, comment in bundle.prerequisites:
  146. f.write(b"-" + obj_id + b" " + comment + b"\n")
  147. for ref, obj_id in bundle.references.items():
  148. f.write(obj_id + b" " + ref + b"\n")
  149. f.write(b"\n")
  150. write_pack_data(
  151. f.write,
  152. num_records=len(bundle.pack_data),
  153. records=bundle.pack_data.iter_unpacked(),
  154. )
  155. def create_bundle_from_repo(
  156. repo,
  157. refs: Optional[list[bytes]] = None,
  158. prerequisites: Optional[list[bytes]] = None,
  159. version: Optional[int] = None,
  160. capabilities: Optional[dict[str, Optional[str]]] = None,
  161. progress: Optional[Callable[[str], None]] = None,
  162. ) -> Bundle:
  163. """Create a bundle from a repository.
  164. Args:
  165. repo: Repository object to create bundle from
  166. refs: List of refs to include (defaults to all refs)
  167. prerequisites: List of commit SHAs that are prerequisites
  168. version: Bundle version (2 or 3, auto-detected if None)
  169. capabilities: Bundle capabilities (for v3 bundles)
  170. progress: Optional progress reporting function
  171. Returns:
  172. Bundle object ready for writing
  173. """
  174. if refs is None:
  175. refs = list(repo.refs.keys())
  176. if prerequisites is None:
  177. prerequisites = []
  178. if capabilities is None:
  179. capabilities = {}
  180. # Build the references dictionary for the bundle
  181. bundle_refs = {}
  182. want_objects = []
  183. for ref in refs:
  184. if ref in repo.refs:
  185. ref_value = repo.refs[ref]
  186. # Handle peeled refs
  187. try:
  188. peeled_value = repo.refs.get_peeled(ref)
  189. if peeled_value is not None and peeled_value != ref_value:
  190. bundle_refs[ref] = peeled_value
  191. else:
  192. bundle_refs[ref] = ref_value
  193. except KeyError:
  194. bundle_refs[ref] = ref_value
  195. want_objects.append(bundle_refs[ref])
  196. # Convert prerequisites to proper format
  197. bundle_prerequisites = []
  198. have_objects = []
  199. for prereq in prerequisites:
  200. if isinstance(prereq, str):
  201. prereq = prereq.encode("utf-8")
  202. if isinstance(prereq, bytes):
  203. if len(prereq) == 40: # SHA1 hex string
  204. try:
  205. # Validate it's actually hex
  206. bytes.fromhex(prereq.decode("utf-8"))
  207. # Store hex in bundle and for pack generation
  208. bundle_prerequisites.append((prereq, b""))
  209. have_objects.append(prereq)
  210. except ValueError:
  211. # Not a valid hex string, invalid prerequisite
  212. raise ValueError(f"Invalid prerequisite format: {prereq!r}")
  213. elif len(prereq) == 20:
  214. # Binary SHA, convert to hex for both bundle and pack generation
  215. hex_prereq = prereq.hex().encode("ascii")
  216. bundle_prerequisites.append((hex_prereq, b""))
  217. have_objects.append(hex_prereq)
  218. else:
  219. # Invalid length
  220. raise ValueError(f"Invalid prerequisite SHA length: {len(prereq)}")
  221. else:
  222. # Assume it's already a binary SHA
  223. hex_prereq = prereq.hex().encode("ascii")
  224. bundle_prerequisites.append((hex_prereq, b""))
  225. have_objects.append(hex_prereq)
  226. # Generate pack data containing all objects needed for the refs
  227. pack_count, pack_objects = repo.generate_pack_data(
  228. have=have_objects,
  229. want=want_objects,
  230. progress=progress,
  231. )
  232. # Store the pack objects directly, we'll write them when saving the bundle
  233. # For now, create a simple wrapper to hold the data
  234. class _BundlePackData:
  235. def __init__(self, count, objects):
  236. self._count = count
  237. self._objects = list(objects) # Materialize the iterator
  238. def __len__(self):
  239. return self._count
  240. def iter_unpacked(self):
  241. return iter(self._objects)
  242. pack_data = _BundlePackData(pack_count, pack_objects)
  243. # Create bundle object
  244. bundle = Bundle()
  245. bundle.version = version
  246. bundle.capabilities = capabilities
  247. bundle.prerequisites = bundle_prerequisites
  248. bundle.references = bundle_refs
  249. bundle.pack_data = pack_data # type: ignore[assignment]
  250. return bundle