objectspec.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. # objectspec.py -- Object specification
  2. # Copyright (C) 2014 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Object specification."""
  22. from typing import TYPE_CHECKING, Optional, Union
  23. from .objects import Commit, ShaFile, Tag, Tree
  24. from .repo import BaseRepo
  25. if TYPE_CHECKING:
  26. from .object_store import BaseObjectStore
  27. from .refs import Ref, RefsContainer
  28. from .repo import Repo
  29. def to_bytes(text: Union[str, bytes]) -> bytes:
  30. """Convert text to bytes.
  31. Args:
  32. text: Text to convert (str or bytes)
  33. Returns:
  34. Bytes representation of text
  35. """
  36. if isinstance(text, str):
  37. return text.encode("ascii")
  38. return text
  39. def _resolve_object(repo: "Repo", ref: bytes) -> "ShaFile":
  40. """Resolve a reference to an object using multiple strategies."""
  41. try:
  42. return repo[ref]
  43. except KeyError:
  44. try:
  45. ref_sha = parse_ref(repo, ref)
  46. return repo[ref_sha]
  47. except KeyError:
  48. try:
  49. return repo.object_store[ref]
  50. except (KeyError, ValueError):
  51. # Re-raise original KeyError for consistency
  52. raise KeyError(ref)
  53. def _parse_number_suffix(suffix: bytes) -> tuple[int, bytes]:
  54. """Parse a number from the start of suffix, return (number, remaining)."""
  55. if not suffix or not suffix[0:1].isdigit():
  56. return 1, suffix
  57. end = 1
  58. while end < len(suffix) and suffix[end : end + 1].isdigit():
  59. end += 1
  60. return int(suffix[:end]), suffix[end:]
  61. def parse_object(repo: "Repo", objectish: Union[bytes, str]) -> "ShaFile":
  62. """Parse a string referring to an object.
  63. Args:
  64. repo: A `Repo` object
  65. objectish: A string referring to an object
  66. Returns: A git object
  67. Raises:
  68. KeyError: If the object can not be found
  69. """
  70. objectish = to_bytes(objectish)
  71. # Handle :<path> - lookup path in tree
  72. if b":" in objectish:
  73. rev, path = objectish.split(b":", 1)
  74. if not rev:
  75. raise NotImplementedError("Index path lookup (:path) not yet supported")
  76. tree = parse_tree(repo, rev)
  77. mode, sha = tree.lookup_path(repo.object_store.__getitem__, path)
  78. return repo[sha]
  79. # Handle @{N} - reflog lookup
  80. if b"@{" in objectish:
  81. base, rest = objectish.split(b"@{", 1)
  82. if not rest.endswith(b"}"):
  83. raise ValueError("Invalid @{} syntax")
  84. spec = rest[:-1]
  85. if not spec.isdigit():
  86. raise NotImplementedError(f"Only @{{N}} supported, not @{{{spec!r}}}")
  87. ref = base if base else b"HEAD"
  88. entries = list(repo.read_reflog(ref))
  89. entries.reverse() # Git uses reverse chronological order
  90. index = int(spec)
  91. if index >= len(entries):
  92. raise ValueError(f"Reflog for {ref!r} has only {len(entries)} entries")
  93. return repo[entries[index].new_sha]
  94. # Handle ^{} - tag dereferencing
  95. if objectish.endswith(b"^{}"):
  96. obj = _resolve_object(repo, objectish[:-3])
  97. while isinstance(obj, Tag):
  98. obj_type, obj_sha = obj.object
  99. obj = repo[obj_sha]
  100. return obj
  101. # Handle ~ and ^ operators
  102. for sep in [b"~", b"^"]:
  103. if sep in objectish:
  104. base, suffix = objectish.split(sep, 1)
  105. if not base:
  106. raise ValueError(f"Empty base before {sep!r}")
  107. obj = _resolve_object(repo, base)
  108. num, suffix = _parse_number_suffix(suffix)
  109. if sep == b"~":
  110. # Follow first parent N times
  111. commit = obj if isinstance(obj, Commit) else parse_commit(repo, obj.id)
  112. for _ in range(num):
  113. if not commit.parents:
  114. raise ValueError(
  115. f"Commit {commit.id.decode('ascii', 'replace')} has no parents"
  116. )
  117. parent_obj = repo[commit.parents[0]]
  118. assert isinstance(parent_obj, Commit)
  119. commit = parent_obj
  120. obj = commit
  121. else: # sep == b"^"
  122. # Get N-th parent (or commit itself if N=0)
  123. commit = obj if isinstance(obj, Commit) else parse_commit(repo, obj.id)
  124. if num == 0:
  125. obj = commit
  126. elif num > len(commit.parents):
  127. raise ValueError(
  128. f"Commit {commit.id.decode('ascii', 'replace')} does not have parent #{num}"
  129. )
  130. else:
  131. obj = repo[commit.parents[num - 1]]
  132. # Process remaining operators recursively
  133. return parse_object(repo, obj.id + suffix) if suffix else obj
  134. # No operators, just return the object
  135. return _resolve_object(repo, objectish)
  136. def parse_tree(
  137. repo: "BaseRepo", treeish: Union[bytes, str, Tree, Commit, Tag]
  138. ) -> "Tree":
  139. """Parse a string referring to a tree.
  140. Args:
  141. repo: A repository object
  142. treeish: A string referring to a tree, or a Tree, Commit, or Tag object
  143. Returns: A Tree object
  144. Raises:
  145. KeyError: If the object can not be found
  146. """
  147. # If already a Tree, return it directly
  148. if isinstance(treeish, Tree):
  149. return treeish
  150. # If it's a Commit, return its tree
  151. if isinstance(treeish, Commit):
  152. tree = repo[treeish.tree]
  153. assert isinstance(tree, Tree)
  154. return tree
  155. # For Tag objects or strings, use the existing logic
  156. if isinstance(treeish, Tag):
  157. treeish = treeish.id
  158. else:
  159. treeish = to_bytes(treeish)
  160. try:
  161. treeish = parse_ref(repo.refs, treeish)
  162. except KeyError: # treeish is commit sha
  163. pass
  164. try:
  165. o = repo[treeish]
  166. except KeyError:
  167. # Try parsing as commit (handles short hashes)
  168. try:
  169. commit = parse_commit(repo, treeish)
  170. assert isinstance(commit, Commit)
  171. tree = repo[commit.tree]
  172. assert isinstance(tree, Tree)
  173. return tree
  174. except KeyError:
  175. raise KeyError(treeish)
  176. if isinstance(o, Commit):
  177. tree = repo[o.tree]
  178. assert isinstance(tree, Tree)
  179. return tree
  180. elif isinstance(o, Tag):
  181. # Tag handling - dereference and recurse
  182. obj_type, obj_sha = o.object
  183. return parse_tree(repo, obj_sha)
  184. assert isinstance(o, Tree)
  185. return o
  186. def parse_ref(
  187. container: Union["Repo", "RefsContainer"], refspec: Union[str, bytes]
  188. ) -> "Ref":
  189. """Parse a string referring to a reference.
  190. Args:
  191. container: A RefsContainer object
  192. refspec: A string referring to a ref
  193. Returns: A ref
  194. Raises:
  195. KeyError: If the ref can not be found
  196. """
  197. refspec = to_bytes(refspec)
  198. possible_refs = [
  199. refspec,
  200. b"refs/" + refspec,
  201. b"refs/tags/" + refspec,
  202. b"refs/heads/" + refspec,
  203. b"refs/remotes/" + refspec,
  204. b"refs/remotes/" + refspec + b"/HEAD",
  205. ]
  206. for ref in possible_refs:
  207. if ref in container:
  208. return ref
  209. raise KeyError(refspec)
  210. def parse_reftuple(
  211. lh_container: Union["Repo", "RefsContainer"],
  212. rh_container: Union["Repo", "RefsContainer"],
  213. refspec: Union[str, bytes],
  214. force: bool = False,
  215. ) -> tuple[Optional["Ref"], Optional["Ref"], bool]:
  216. """Parse a reftuple spec.
  217. Args:
  218. lh_container: A RefsContainer object
  219. rh_container: A RefsContainer object
  220. refspec: A string
  221. force: Whether to force the operation
  222. Returns: A tuple with left and right ref
  223. Raises:
  224. KeyError: If one of the refs can not be found
  225. """
  226. refspec = to_bytes(refspec)
  227. if refspec.startswith(b"+"):
  228. force = True
  229. refspec = refspec[1:]
  230. lh: Optional[bytes]
  231. rh: Optional[bytes]
  232. if b":" in refspec:
  233. (lh, rh) = refspec.split(b":")
  234. else:
  235. lh = rh = refspec
  236. if lh == b"":
  237. lh = None
  238. else:
  239. lh = parse_ref(lh_container, lh)
  240. if rh == b"":
  241. rh = None
  242. else:
  243. try:
  244. rh = parse_ref(rh_container, rh)
  245. except KeyError:
  246. # TODO: check force?
  247. if b"/" not in rh:
  248. rh = b"refs/heads/" + rh
  249. return (lh, rh, force)
  250. def parse_reftuples(
  251. lh_container: Union["Repo", "RefsContainer"],
  252. rh_container: Union["Repo", "RefsContainer"],
  253. refspecs: Union[bytes, list[bytes]],
  254. force: bool = False,
  255. ) -> list[tuple[Optional["Ref"], Optional["Ref"], bool]]:
  256. """Parse a list of reftuple specs to a list of reftuples.
  257. Args:
  258. lh_container: A RefsContainer object
  259. rh_container: A RefsContainer object
  260. refspecs: A list of refspecs or a string
  261. force: Force overwriting for all reftuples
  262. Returns: A list of refs
  263. Raises:
  264. KeyError: If one of the refs can not be found
  265. """
  266. if not isinstance(refspecs, list):
  267. refspecs = [refspecs]
  268. ret = []
  269. # TODO: Support * in refspecs
  270. for refspec in refspecs:
  271. ret.append(parse_reftuple(lh_container, rh_container, refspec, force=force))
  272. return ret
  273. def parse_refs(
  274. container: Union["Repo", "RefsContainer"],
  275. refspecs: Union[bytes, str, list[Union[bytes, str]]],
  276. ) -> list["Ref"]:
  277. """Parse a list of refspecs to a list of refs.
  278. Args:
  279. container: A RefsContainer object
  280. refspecs: A list of refspecs or a string
  281. Returns: A list of refs
  282. Raises:
  283. KeyError: If one of the refs can not be found
  284. """
  285. # TODO: Support * in refspecs
  286. if not isinstance(refspecs, list):
  287. refspecs = [refspecs]
  288. ret = []
  289. for refspec in refspecs:
  290. ret.append(parse_ref(container, refspec))
  291. return ret
  292. def parse_commit_range(
  293. repo: "Repo", committish: Union[str, bytes]
  294. ) -> Optional[tuple["Commit", "Commit"]]:
  295. """Parse a string referring to a commit range.
  296. Args:
  297. repo: A `Repo` object
  298. committish: A string referring to a commit or range (e.g., "HEAD~3..HEAD")
  299. Returns:
  300. None if committish is a single commit reference
  301. A tuple of (start_commit, end_commit) if it's a range
  302. Raises:
  303. KeyError: When the commits can not be found
  304. ValueError: If the range can not be parsed
  305. """
  306. committish = to_bytes(committish)
  307. if b".." not in committish:
  308. return None
  309. parts = committish.split(b"..", 1)
  310. if len(parts) != 2:
  311. raise ValueError(f"Invalid commit range: {committish.decode('utf-8')}")
  312. start_ref = parts[0]
  313. end_ref = parts[1] if parts[1] else b"HEAD"
  314. start_commit = parse_commit(repo, start_ref)
  315. end_commit = parse_commit(repo, end_ref)
  316. return (start_commit, end_commit)
  317. class AmbiguousShortId(Exception):
  318. """The short id is ambiguous."""
  319. def __init__(self, prefix: bytes, options: list[ShaFile]) -> None:
  320. """Initialize AmbiguousShortId.
  321. Args:
  322. prefix: The ambiguous prefix
  323. options: List of matching objects
  324. """
  325. self.prefix = prefix
  326. self.options = options
  327. def scan_for_short_id(
  328. object_store: "BaseObjectStore", prefix: bytes, tp: type[ShaFile]
  329. ) -> ShaFile:
  330. """Scan an object store for a short id."""
  331. ret = []
  332. for object_id in object_store.iter_prefix(prefix):
  333. o = object_store[object_id]
  334. if isinstance(o, tp):
  335. ret.append(o)
  336. if not ret:
  337. raise KeyError(prefix)
  338. if len(ret) == 1:
  339. return ret[0]
  340. raise AmbiguousShortId(prefix, ret)
  341. def parse_commit(
  342. repo: "BaseRepo", committish: Union[str, bytes, Commit, Tag]
  343. ) -> "Commit":
  344. """Parse a string referring to a single commit.
  345. Args:
  346. repo: A repository object
  347. committish: A string referring to a single commit, or a Commit or Tag object.
  348. Returns: A Commit object
  349. Raises:
  350. KeyError: When the reference commits can not be found
  351. ValueError: If the range can not be parsed
  352. """
  353. def dereference_tag(obj: ShaFile) -> "Commit":
  354. """Follow tag references until we reach a non-tag object."""
  355. while isinstance(obj, Tag):
  356. obj_type, obj_sha = obj.object
  357. try:
  358. obj = repo.object_store[obj_sha]
  359. except KeyError:
  360. # Tag points to a missing object
  361. raise KeyError(obj_sha)
  362. if not isinstance(obj, Commit):
  363. raise ValueError(
  364. f"Expected commit, got {obj.type_name.decode('ascii', 'replace')}"
  365. )
  366. return obj
  367. # If already a Commit object, return it directly
  368. if isinstance(committish, Commit):
  369. return committish
  370. # If it's a Tag object, dereference it
  371. if isinstance(committish, Tag):
  372. return dereference_tag(committish)
  373. committish = to_bytes(committish)
  374. try:
  375. obj = repo[committish]
  376. except KeyError:
  377. pass
  378. else:
  379. return dereference_tag(obj)
  380. try:
  381. obj = repo[parse_ref(repo.refs, committish)]
  382. except KeyError:
  383. pass
  384. else:
  385. return dereference_tag(obj)
  386. if len(committish) >= 4 and len(committish) < 40:
  387. try:
  388. int(committish, 16)
  389. except ValueError:
  390. pass
  391. else:
  392. try:
  393. obj = scan_for_short_id(repo.object_store, committish, Commit)
  394. except KeyError:
  395. pass
  396. else:
  397. return dereference_tag(obj)
  398. raise KeyError(committish)
  399. # TODO: parse_path_in_tree(), which handles e.g. v1.0:Documentation