objectspec.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. # objectspec.py -- Object specification
  2. # Copyright (C) 2014 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Object specification."""
  22. from typing import TYPE_CHECKING, Optional, Union
  23. from .objects import Commit, ShaFile, Tag, Tree
  24. if TYPE_CHECKING:
  25. from .object_store import BaseObjectStore
  26. from .refs import Ref, RefsContainer
  27. from .repo import Repo
  28. def to_bytes(text: Union[str, bytes]) -> bytes:
  29. if getattr(text, "encode", None) is not None:
  30. text = text.encode("ascii") # type: ignore
  31. return text # type: ignore
  32. def _resolve_object(repo: "Repo", ref: bytes) -> "ShaFile":
  33. """Resolve a reference to an object using multiple strategies."""
  34. try:
  35. return repo[ref]
  36. except KeyError:
  37. try:
  38. ref_sha = parse_ref(repo, ref)
  39. return repo[ref_sha]
  40. except KeyError:
  41. try:
  42. return repo.object_store[ref]
  43. except (KeyError, ValueError):
  44. # Re-raise original KeyError for consistency
  45. raise KeyError(ref)
  46. def _parse_number_suffix(suffix: bytes) -> tuple[int, bytes]:
  47. """Parse a number from the start of suffix, return (number, remaining)."""
  48. if not suffix or not suffix[0:1].isdigit():
  49. return 1, suffix
  50. end = 1
  51. while end < len(suffix) and suffix[end : end + 1].isdigit():
  52. end += 1
  53. return int(suffix[:end]), suffix[end:]
  54. def parse_object(repo: "Repo", objectish: Union[bytes, str]) -> "ShaFile":
  55. """Parse a string referring to an object.
  56. Args:
  57. repo: A `Repo` object
  58. objectish: A string referring to an object
  59. Returns: A git object
  60. Raises:
  61. KeyError: If the object can not be found
  62. """
  63. objectish = to_bytes(objectish)
  64. # Handle :<path> - lookup path in tree
  65. if b":" in objectish:
  66. rev, path = objectish.split(b":", 1)
  67. if not rev:
  68. raise NotImplementedError("Index path lookup (:path) not yet supported")
  69. tree = parse_tree(repo, rev)
  70. mode, sha = tree.lookup_path(repo.object_store.__getitem__, path)
  71. return repo[sha]
  72. # Handle @{N} - reflog lookup
  73. if b"@{" in objectish:
  74. base, rest = objectish.split(b"@{", 1)
  75. if not rest.endswith(b"}"):
  76. raise ValueError("Invalid @{} syntax")
  77. spec = rest[:-1]
  78. if not spec.isdigit():
  79. raise NotImplementedError(f"Only @{{N}} supported, not @{{{spec!r}}}")
  80. ref = base if base else b"HEAD"
  81. entries = list(repo.read_reflog(ref))
  82. entries.reverse() # Git uses reverse chronological order
  83. index = int(spec)
  84. if index >= len(entries):
  85. raise ValueError(f"Reflog for {ref!r} has only {len(entries)} entries")
  86. return repo[entries[index].new_sha]
  87. # Handle ^{} - tag dereferencing
  88. if objectish.endswith(b"^{}"):
  89. obj = _resolve_object(repo, objectish[:-3])
  90. while isinstance(obj, Tag):
  91. obj_type, obj_sha = obj.object
  92. obj = repo[obj_sha]
  93. return obj
  94. # Handle ~ and ^ operators
  95. for sep in [b"~", b"^"]:
  96. if sep in objectish:
  97. base, suffix = objectish.split(sep, 1)
  98. if not base:
  99. raise ValueError(f"Empty base before {sep!r}")
  100. obj = _resolve_object(repo, base)
  101. num, suffix = _parse_number_suffix(suffix)
  102. if sep == b"~":
  103. # Follow first parent N times
  104. commit = obj if isinstance(obj, Commit) else parse_commit(repo, obj.id)
  105. for _ in range(num):
  106. if not commit.parents:
  107. raise ValueError(
  108. f"Commit {commit.id.decode('ascii', 'replace')} has no parents"
  109. )
  110. commit = repo[commit.parents[0]]
  111. obj = commit
  112. else: # sep == b"^"
  113. # Get N-th parent (or commit itself if N=0)
  114. commit = obj if isinstance(obj, Commit) else parse_commit(repo, obj.id)
  115. if num == 0:
  116. obj = commit
  117. elif num > len(commit.parents):
  118. raise ValueError(
  119. f"Commit {commit.id.decode('ascii', 'replace')} does not have parent #{num}"
  120. )
  121. else:
  122. obj = repo[commit.parents[num - 1]]
  123. # Process remaining operators recursively
  124. return parse_object(repo, obj.id + suffix) if suffix else obj
  125. # No operators, just return the object
  126. return _resolve_object(repo, objectish)
  127. def parse_tree(repo: "Repo", treeish: Union[bytes, str, Tree, Commit, Tag]) -> "Tree":
  128. """Parse a string referring to a tree.
  129. Args:
  130. repo: A `Repo` object
  131. treeish: A string referring to a tree, or a Tree, Commit, or Tag object
  132. Returns: A Tree object
  133. Raises:
  134. KeyError: If the object can not be found
  135. """
  136. # If already a Tree, return it directly
  137. if isinstance(treeish, Tree):
  138. return treeish
  139. # If it's a Commit, return its tree
  140. if isinstance(treeish, Commit):
  141. return repo[treeish.tree]
  142. # For Tag objects or strings, use the existing logic
  143. if isinstance(treeish, Tag):
  144. treeish = treeish.id
  145. else:
  146. treeish = to_bytes(treeish)
  147. try:
  148. treeish = parse_ref(repo, treeish)
  149. except KeyError: # treeish is commit sha
  150. pass
  151. try:
  152. o = repo[treeish]
  153. except KeyError:
  154. # Try parsing as commit (handles short hashes)
  155. try:
  156. commit = parse_commit(repo, treeish)
  157. return repo[commit.tree]
  158. except KeyError:
  159. raise KeyError(treeish)
  160. if o.type_name == b"commit":
  161. return repo[o.tree]
  162. elif o.type_name == b"tag":
  163. # Tag handling - dereference and recurse
  164. obj_type, obj_sha = o.object
  165. return parse_tree(repo, obj_sha)
  166. return o
  167. def parse_ref(
  168. container: Union["Repo", "RefsContainer"], refspec: Union[str, bytes]
  169. ) -> "Ref":
  170. """Parse a string referring to a reference.
  171. Args:
  172. container: A RefsContainer object
  173. refspec: A string referring to a ref
  174. Returns: A ref
  175. Raises:
  176. KeyError: If the ref can not be found
  177. """
  178. refspec = to_bytes(refspec)
  179. possible_refs = [
  180. refspec,
  181. b"refs/" + refspec,
  182. b"refs/tags/" + refspec,
  183. b"refs/heads/" + refspec,
  184. b"refs/remotes/" + refspec,
  185. b"refs/remotes/" + refspec + b"/HEAD",
  186. ]
  187. for ref in possible_refs:
  188. if ref in container:
  189. return ref
  190. raise KeyError(refspec)
  191. def parse_reftuple(
  192. lh_container: Union["Repo", "RefsContainer"],
  193. rh_container: Union["Repo", "RefsContainer"],
  194. refspec: Union[str, bytes],
  195. force: bool = False,
  196. ) -> tuple[Optional["Ref"], Optional["Ref"], bool]:
  197. """Parse a reftuple spec.
  198. Args:
  199. lh_container: A RefsContainer object
  200. rh_container: A RefsContainer object
  201. refspec: A string
  202. Returns: A tuple with left and right ref
  203. Raises:
  204. KeyError: If one of the refs can not be found
  205. """
  206. refspec = to_bytes(refspec)
  207. if refspec.startswith(b"+"):
  208. force = True
  209. refspec = refspec[1:]
  210. lh: Optional[bytes]
  211. rh: Optional[bytes]
  212. if b":" in refspec:
  213. (lh, rh) = refspec.split(b":")
  214. else:
  215. lh = rh = refspec
  216. if lh == b"":
  217. lh = None
  218. else:
  219. lh = parse_ref(lh_container, lh)
  220. if rh == b"":
  221. rh = None
  222. else:
  223. try:
  224. rh = parse_ref(rh_container, rh)
  225. except KeyError:
  226. # TODO: check force?
  227. if b"/" not in rh:
  228. rh = b"refs/heads/" + rh
  229. return (lh, rh, force)
  230. def parse_reftuples(
  231. lh_container: Union["Repo", "RefsContainer"],
  232. rh_container: Union["Repo", "RefsContainer"],
  233. refspecs: Union[bytes, list[bytes]],
  234. force: bool = False,
  235. ) -> list[tuple[Optional["Ref"], Optional["Ref"], bool]]:
  236. """Parse a list of reftuple specs to a list of reftuples.
  237. Args:
  238. lh_container: A RefsContainer object
  239. rh_container: A RefsContainer object
  240. refspecs: A list of refspecs or a string
  241. force: Force overwriting for all reftuples
  242. Returns: A list of refs
  243. Raises:
  244. KeyError: If one of the refs can not be found
  245. """
  246. if not isinstance(refspecs, list):
  247. refspecs = [refspecs]
  248. ret = []
  249. # TODO: Support * in refspecs
  250. for refspec in refspecs:
  251. ret.append(parse_reftuple(lh_container, rh_container, refspec, force=force))
  252. return ret
  253. def parse_refs(
  254. container: Union["Repo", "RefsContainer"],
  255. refspecs: Union[bytes, str, list[Union[bytes, str]]],
  256. ) -> list["Ref"]:
  257. """Parse a list of refspecs to a list of refs.
  258. Args:
  259. container: A RefsContainer object
  260. refspecs: A list of refspecs or a string
  261. Returns: A list of refs
  262. Raises:
  263. KeyError: If one of the refs can not be found
  264. """
  265. # TODO: Support * in refspecs
  266. if not isinstance(refspecs, list):
  267. refspecs = [refspecs]
  268. ret = []
  269. for refspec in refspecs:
  270. ret.append(parse_ref(container, refspec))
  271. return ret
  272. def parse_commit_range(
  273. repo: "Repo", committish: Union[str, bytes]
  274. ) -> Optional[tuple["Commit", "Commit"]]:
  275. """Parse a string referring to a commit range.
  276. Args:
  277. repo: A `Repo` object
  278. committish: A string referring to a commit or range (e.g., "HEAD~3..HEAD")
  279. Returns:
  280. None if committish is a single commit reference
  281. A tuple of (start_commit, end_commit) if it's a range
  282. Raises:
  283. KeyError: When the commits can not be found
  284. ValueError: If the range can not be parsed
  285. """
  286. committish = to_bytes(committish)
  287. if b".." not in committish:
  288. return None
  289. parts = committish.split(b"..", 1)
  290. if len(parts) != 2:
  291. raise ValueError(f"Invalid commit range: {committish.decode('utf-8')}")
  292. start_ref = parts[0]
  293. end_ref = parts[1] if parts[1] else b"HEAD"
  294. start_commit = parse_commit(repo, start_ref)
  295. end_commit = parse_commit(repo, end_ref)
  296. return (start_commit, end_commit)
  297. class AmbiguousShortId(Exception):
  298. """The short id is ambiguous."""
  299. def __init__(self, prefix: bytes, options: list[ShaFile]) -> None:
  300. self.prefix = prefix
  301. self.options = options
  302. def scan_for_short_id(
  303. object_store: "BaseObjectStore", prefix: bytes, tp: type[ShaFile]
  304. ) -> ShaFile:
  305. """Scan an object store for a short id."""
  306. ret = []
  307. for object_id in object_store.iter_prefix(prefix):
  308. o = object_store[object_id]
  309. if isinstance(o, tp):
  310. ret.append(o)
  311. if not ret:
  312. raise KeyError(prefix)
  313. if len(ret) == 1:
  314. return ret[0]
  315. raise AmbiguousShortId(prefix, ret)
  316. def parse_commit(repo: "Repo", committish: Union[str, bytes, Commit, Tag]) -> "Commit":
  317. """Parse a string referring to a single commit.
  318. Args:
  319. repo: A` Repo` object
  320. committish: A string referring to a single commit, or a Commit or Tag object.
  321. Returns: A Commit object
  322. Raises:
  323. KeyError: When the reference commits can not be found
  324. ValueError: If the range can not be parsed
  325. """
  326. def dereference_tag(obj: ShaFile) -> "Commit":
  327. """Follow tag references until we reach a non-tag object."""
  328. while isinstance(obj, Tag):
  329. obj_type, obj_sha = obj.object
  330. try:
  331. obj = repo.object_store[obj_sha]
  332. except KeyError:
  333. # Tag points to a missing object
  334. raise KeyError(obj_sha)
  335. if not isinstance(obj, Commit):
  336. raise ValueError(
  337. f"Expected commit, got {obj.type_name.decode('ascii', 'replace')}"
  338. )
  339. return obj
  340. # If already a Commit object, return it directly
  341. if isinstance(committish, Commit):
  342. return committish
  343. # If it's a Tag object, dereference it
  344. if isinstance(committish, Tag):
  345. return dereference_tag(committish)
  346. committish = to_bytes(committish)
  347. try:
  348. obj = repo[committish]
  349. except KeyError:
  350. pass
  351. else:
  352. return dereference_tag(obj)
  353. try:
  354. obj = repo[parse_ref(repo, committish)]
  355. except KeyError:
  356. pass
  357. else:
  358. return dereference_tag(obj)
  359. if len(committish) >= 4 and len(committish) < 40:
  360. try:
  361. int(committish, 16)
  362. except ValueError:
  363. pass
  364. else:
  365. try:
  366. obj = scan_for_short_id(repo.object_store, committish, Commit)
  367. except KeyError:
  368. pass
  369. else:
  370. return dereference_tag(obj)
  371. raise KeyError(committish)
  372. # TODO: parse_path_in_tree(), which handles e.g. v1.0:Documentation