objectspec.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. # objectspec.py -- Object specification
  2. # Copyright (C) 2014 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Object specification."""
  22. from collections.abc import Sequence
  23. from typing import TYPE_CHECKING
  24. from .objects import Commit, ShaFile, Tag, Tree
  25. from .refs import local_branch_name, local_tag_name
  26. from .repo import BaseRepo
  27. if TYPE_CHECKING:
  28. from .object_store import BaseObjectStore
  29. from .refs import Ref, RefsContainer
  30. from .repo import Repo
  31. def to_bytes(text: str | bytes) -> bytes:
  32. """Convert text to bytes.
  33. Args:
  34. text: Text to convert (str or bytes)
  35. Returns:
  36. Bytes representation of text
  37. """
  38. if isinstance(text, str):
  39. return text.encode("ascii")
  40. return text
  41. def _resolve_object(repo: "Repo", ref: bytes) -> "ShaFile":
  42. """Resolve a reference to an object using multiple strategies."""
  43. try:
  44. return repo[ref]
  45. except KeyError:
  46. try:
  47. ref_sha = parse_ref(repo, ref)
  48. return repo[ref_sha]
  49. except KeyError:
  50. try:
  51. return repo.object_store[ref]
  52. except (KeyError, ValueError):
  53. # Re-raise original KeyError for consistency
  54. raise KeyError(ref)
  55. def _parse_number_suffix(suffix: bytes) -> tuple[int, bytes]:
  56. """Parse a number from the start of suffix, return (number, remaining)."""
  57. if not suffix or not suffix[0:1].isdigit():
  58. return 1, suffix
  59. end = 1
  60. while end < len(suffix) and suffix[end : end + 1].isdigit():
  61. end += 1
  62. return int(suffix[:end]), suffix[end:]
  63. def parse_object(repo: "Repo", objectish: bytes | str) -> "ShaFile":
  64. """Parse a string referring to an object.
  65. Args:
  66. repo: A `Repo` object
  67. objectish: A string referring to an object
  68. Returns: A git object
  69. Raises:
  70. KeyError: If the object can not be found
  71. """
  72. objectish = to_bytes(objectish)
  73. # Handle :<path> - lookup path in tree or index
  74. if b":" in objectish:
  75. rev, path = objectish.split(b":", 1)
  76. if not rev:
  77. # Index path lookup: :path or :N:path where N is stage 0-3
  78. stage = 0
  79. if path and path[0:1].isdigit() and len(path) > 2 and path[1:2] == b":":
  80. stage = int(path[0:1])
  81. if stage > 3:
  82. raise ValueError(f"Invalid stage number: {stage}. Must be 0-3.")
  83. path = path[2:]
  84. # Open the index and look up the path
  85. try:
  86. index = repo.open_index()
  87. except AttributeError:
  88. raise NotImplementedError(
  89. "Index path lookup requires a non-bare repository"
  90. )
  91. if path not in index:
  92. raise KeyError(f"Path {path!r} not found in index")
  93. entry = index[path]
  94. # Handle ConflictedIndexEntry (merge stages)
  95. from .index import ConflictedIndexEntry
  96. if isinstance(entry, ConflictedIndexEntry):
  97. if stage == 0:
  98. raise ValueError(
  99. f"Path {path!r} has unresolved conflicts. "
  100. "Use :1:path, :2:path, or :3:path to access specific stages."
  101. )
  102. elif stage == 1:
  103. if entry.ancestor is None:
  104. raise KeyError(f"Path {path!r} has no ancestor (stage 1)")
  105. return repo[entry.ancestor.sha]
  106. elif stage == 2:
  107. if entry.this is None:
  108. raise KeyError(f"Path {path!r} has no 'this' version (stage 2)")
  109. return repo[entry.this.sha]
  110. elif stage == 3:
  111. if entry.other is None:
  112. raise KeyError(
  113. f"Path {path!r} has no 'other' version (stage 3)"
  114. )
  115. return repo[entry.other.sha]
  116. else:
  117. # Regular IndexEntry - only stage 0 is valid
  118. if stage != 0:
  119. raise ValueError(
  120. f"Path {path!r} has no conflicts. Only :0:{path!r} or :{path!r} is valid."
  121. )
  122. return repo[entry.sha]
  123. # Regular tree lookup: rev:path
  124. tree = parse_tree(repo, rev)
  125. _mode, sha = tree.lookup_path(repo.object_store.__getitem__, path)
  126. return repo[sha]
  127. # Handle @{N} or @{time} - reflog lookup
  128. if b"@{" in objectish:
  129. base, rest = objectish.split(b"@{", 1)
  130. if not rest.endswith(b"}"):
  131. raise ValueError("Invalid @{} syntax")
  132. spec = rest[:-1]
  133. ref = base if base else b"HEAD"
  134. entries = list(repo.read_reflog(ref))
  135. entries.reverse() # Git uses reverse chronological order
  136. if spec.isdigit():
  137. # Check if it's a small index or a timestamp
  138. # Git treats values < number of entries as indices, larger values as timestamps
  139. num = int(spec)
  140. if num < len(entries):
  141. # Treat as numeric index: @{N}
  142. return repo[entries[num].new_sha]
  143. # Otherwise fall through to treat as timestamp
  144. # Time specification: @{time} (includes large numeric values)
  145. from .approxidate import parse_approxidate
  146. target_time = parse_approxidate(spec)
  147. # Find the most recent entry at or before the target time
  148. for reflog_entry in entries:
  149. if reflog_entry.timestamp <= target_time:
  150. return repo[reflog_entry.new_sha]
  151. # If no entry is old enough, raise an error
  152. if entries:
  153. oldest_time = entries[-1].timestamp
  154. raise ValueError(
  155. f"Reflog for {ref!r} has no entries at or before {spec!r}. "
  156. f"Oldest entry is at timestamp {oldest_time}"
  157. )
  158. else:
  159. raise ValueError(f"Reflog for {ref!r} is empty")
  160. # Handle ^{} - tag dereferencing
  161. if objectish.endswith(b"^{}"):
  162. obj = _resolve_object(repo, objectish[:-3])
  163. while isinstance(obj, Tag):
  164. _obj_type, obj_sha = obj.object
  165. obj = repo[obj_sha]
  166. return obj
  167. # Handle ~ and ^ operators
  168. for sep in [b"~", b"^"]:
  169. if sep in objectish:
  170. base, suffix = objectish.split(sep, 1)
  171. if not base:
  172. raise ValueError(f"Empty base before {sep!r}")
  173. obj = _resolve_object(repo, base)
  174. num, suffix = _parse_number_suffix(suffix)
  175. if sep == b"~":
  176. # Follow first parent N times
  177. commit = obj if isinstance(obj, Commit) else parse_commit(repo, obj.id)
  178. for _ in range(num):
  179. if not commit.parents:
  180. raise ValueError(
  181. f"Commit {commit.id.decode('ascii', 'replace')} has no parents"
  182. )
  183. parent_obj = repo[commit.parents[0]]
  184. assert isinstance(parent_obj, Commit)
  185. commit = parent_obj
  186. obj = commit
  187. else: # sep == b"^"
  188. # Get N-th parent (or commit itself if N=0)
  189. commit = obj if isinstance(obj, Commit) else parse_commit(repo, obj.id)
  190. if num == 0:
  191. obj = commit
  192. elif num > len(commit.parents):
  193. raise ValueError(
  194. f"Commit {commit.id.decode('ascii', 'replace')} does not have parent #{num}"
  195. )
  196. else:
  197. obj = repo[commit.parents[num - 1]]
  198. # Process remaining operators recursively
  199. return parse_object(repo, obj.id + suffix) if suffix else obj
  200. # No operators, just return the object
  201. return _resolve_object(repo, objectish)
  202. def parse_tree(repo: "BaseRepo", treeish: bytes | str | Tree | Commit | Tag) -> "Tree":
  203. """Parse a string referring to a tree.
  204. Args:
  205. repo: A repository object
  206. treeish: A string referring to a tree, or a Tree, Commit, or Tag object
  207. Returns: A Tree object
  208. Raises:
  209. KeyError: If the object can not be found
  210. """
  211. # If already a Tree, return it directly
  212. if isinstance(treeish, Tree):
  213. return treeish
  214. # If it's a Commit, return its tree
  215. if isinstance(treeish, Commit):
  216. tree = repo[treeish.tree]
  217. assert isinstance(tree, Tree)
  218. return tree
  219. # For Tag objects or strings, use the existing logic
  220. if isinstance(treeish, Tag):
  221. treeish = treeish.id
  222. else:
  223. treeish = to_bytes(treeish)
  224. try:
  225. treeish = parse_ref(repo.refs, treeish)
  226. except KeyError: # treeish is commit sha
  227. pass
  228. try:
  229. o = repo[treeish]
  230. except KeyError:
  231. # Try parsing as commit (handles short hashes)
  232. try:
  233. commit = parse_commit(repo, treeish)
  234. assert isinstance(commit, Commit)
  235. tree = repo[commit.tree]
  236. assert isinstance(tree, Tree)
  237. return tree
  238. except KeyError:
  239. raise KeyError(treeish)
  240. if isinstance(o, Commit):
  241. tree = repo[o.tree]
  242. assert isinstance(tree, Tree)
  243. return tree
  244. elif isinstance(o, Tag):
  245. # Tag handling - dereference and recurse
  246. _obj_type, obj_sha = o.object
  247. return parse_tree(repo, obj_sha)
  248. assert isinstance(o, Tree)
  249. return o
  250. def parse_ref(container: "Repo" | "RefsContainer", refspec: str | bytes) -> "Ref":
  251. """Parse a string referring to a reference.
  252. Args:
  253. container: A RefsContainer object
  254. refspec: A string referring to a ref
  255. Returns: A ref
  256. Raises:
  257. KeyError: If the ref can not be found
  258. """
  259. refspec = to_bytes(refspec)
  260. possible_refs = [
  261. refspec,
  262. b"refs/" + refspec,
  263. local_tag_name(refspec),
  264. local_branch_name(refspec),
  265. b"refs/remotes/" + refspec,
  266. b"refs/remotes/" + refspec + b"/HEAD",
  267. ]
  268. for ref in possible_refs:
  269. if ref in container:
  270. return ref
  271. raise KeyError(refspec)
  272. def parse_reftuple(
  273. lh_container: "Repo" | "RefsContainer",
  274. rh_container: "Repo" | "RefsContainer",
  275. refspec: str | bytes,
  276. force: bool = False,
  277. ) -> tuple["Ref" | None, "Ref" | None, bool]:
  278. """Parse a reftuple spec.
  279. Args:
  280. lh_container: A RefsContainer object
  281. rh_container: A RefsContainer object
  282. refspec: A string
  283. force: Whether to force the operation
  284. Returns: A tuple with left and right ref
  285. Raises:
  286. KeyError: If one of the refs can not be found
  287. """
  288. refspec = to_bytes(refspec)
  289. if refspec.startswith(b"+"):
  290. force = True
  291. refspec = refspec[1:]
  292. lh: bytes | None
  293. rh: bytes | None
  294. if b":" in refspec:
  295. (lh, rh) = refspec.split(b":")
  296. else:
  297. lh = rh = refspec
  298. if lh == b"":
  299. lh = None
  300. else:
  301. lh = parse_ref(lh_container, lh)
  302. if rh == b"":
  303. rh = None
  304. else:
  305. try:
  306. rh = parse_ref(rh_container, rh)
  307. except KeyError:
  308. # TODO: check force?
  309. if b"/" not in rh:
  310. rh = local_branch_name(rh)
  311. return (lh, rh, force)
  312. def parse_reftuples(
  313. lh_container: "Repo" | "RefsContainer",
  314. rh_container: "Repo" | "RefsContainer",
  315. refspecs: bytes | Sequence[bytes],
  316. force: bool = False,
  317. ) -> list[tuple["Ref" | None, "Ref" | None, bool]]:
  318. """Parse a list of reftuple specs to a list of reftuples.
  319. Args:
  320. lh_container: A RefsContainer object
  321. rh_container: A RefsContainer object
  322. refspecs: A list of refspecs or a string
  323. force: Force overwriting for all reftuples
  324. Returns: A list of refs
  325. Raises:
  326. KeyError: If one of the refs can not be found
  327. """
  328. if isinstance(refspecs, bytes):
  329. refspecs = [refspecs]
  330. ret = []
  331. # TODO: Support * in refspecs
  332. for refspec in refspecs:
  333. ret.append(parse_reftuple(lh_container, rh_container, refspec, force=force))
  334. return ret
  335. def parse_refs(
  336. container: "Repo" | "RefsContainer",
  337. refspecs: bytes | str | Sequence[bytes | str],
  338. ) -> list["Ref"]:
  339. """Parse a list of refspecs to a list of refs.
  340. Args:
  341. container: A RefsContainer object
  342. refspecs: A list of refspecs or a string
  343. Returns: A list of refs
  344. Raises:
  345. KeyError: If one of the refs can not be found
  346. """
  347. # TODO: Support * in refspecs
  348. if isinstance(refspecs, (bytes, str)):
  349. refspecs = [refspecs]
  350. ret = []
  351. for refspec in refspecs:
  352. ret.append(parse_ref(container, refspec))
  353. return ret
  354. def parse_commit_range(
  355. repo: "Repo", committish: str | bytes
  356. ) -> tuple["Commit", "Commit"] | None:
  357. """Parse a string referring to a commit range.
  358. Args:
  359. repo: A `Repo` object
  360. committish: A string referring to a commit or range (e.g., "HEAD~3..HEAD")
  361. Returns:
  362. None if committish is a single commit reference
  363. A tuple of (start_commit, end_commit) if it's a range
  364. Raises:
  365. KeyError: When the commits can not be found
  366. ValueError: If the range can not be parsed
  367. """
  368. committish = to_bytes(committish)
  369. if b".." not in committish:
  370. return None
  371. parts = committish.split(b"..", 1)
  372. if len(parts) != 2:
  373. raise ValueError(f"Invalid commit range: {committish.decode('utf-8')}")
  374. start_ref = parts[0]
  375. end_ref = parts[1] if parts[1] else b"HEAD"
  376. start_commit = parse_commit(repo, start_ref)
  377. end_commit = parse_commit(repo, end_ref)
  378. return (start_commit, end_commit)
  379. class AmbiguousShortId(Exception):
  380. """The short id is ambiguous."""
  381. def __init__(self, prefix: bytes, options: list[ShaFile]) -> None:
  382. """Initialize AmbiguousShortId.
  383. Args:
  384. prefix: The ambiguous prefix
  385. options: List of matching objects
  386. """
  387. self.prefix = prefix
  388. self.options = options
  389. def scan_for_short_id(
  390. object_store: "BaseObjectStore", prefix: bytes, tp: type[ShaFile]
  391. ) -> ShaFile:
  392. """Scan an object store for a short id."""
  393. ret = []
  394. for object_id in object_store.iter_prefix(prefix):
  395. o = object_store[object_id]
  396. if isinstance(o, tp):
  397. ret.append(o)
  398. if not ret:
  399. raise KeyError(prefix)
  400. if len(ret) == 1:
  401. return ret[0]
  402. raise AmbiguousShortId(prefix, ret)
  403. def parse_commit(repo: "BaseRepo", committish: str | bytes | Commit | Tag) -> "Commit":
  404. """Parse a string referring to a single commit.
  405. Args:
  406. repo: A repository object
  407. committish: A string referring to a single commit, or a Commit or Tag object.
  408. Returns: A Commit object
  409. Raises:
  410. KeyError: When the reference commits can not be found
  411. ValueError: If the range can not be parsed
  412. """
  413. def dereference_tag(obj: ShaFile) -> "Commit":
  414. """Follow tag references until we reach a non-tag object."""
  415. while isinstance(obj, Tag):
  416. _obj_type, obj_sha = obj.object
  417. try:
  418. obj = repo.object_store[obj_sha]
  419. except KeyError:
  420. # Tag points to a missing object
  421. raise KeyError(obj_sha)
  422. if not isinstance(obj, Commit):
  423. raise ValueError(
  424. f"Expected commit, got {obj.type_name.decode('ascii', 'replace')}"
  425. )
  426. return obj
  427. # If already a Commit object, return it directly
  428. if isinstance(committish, Commit):
  429. return committish
  430. # If it's a Tag object, dereference it
  431. if isinstance(committish, Tag):
  432. return dereference_tag(committish)
  433. committish = to_bytes(committish)
  434. try:
  435. obj = repo[committish]
  436. except KeyError:
  437. pass
  438. else:
  439. return dereference_tag(obj)
  440. try:
  441. obj = repo[parse_ref(repo.refs, committish)]
  442. except KeyError:
  443. pass
  444. else:
  445. return dereference_tag(obj)
  446. if len(committish) >= 4 and len(committish) < 40:
  447. try:
  448. int(committish, 16)
  449. except ValueError:
  450. pass
  451. else:
  452. try:
  453. obj = scan_for_short_id(repo.object_store, committish, Commit)
  454. except KeyError:
  455. pass
  456. else:
  457. return dereference_tag(obj)
  458. raise KeyError(committish)
  459. # TODO: parse_path_in_tree(), which handles e.g. v1.0:Documentation