objectspec.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544
  1. # objectspec.py -- Object specification
  2. # Copyright (C) 2014 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Object specification."""
  22. from collections.abc import Sequence
  23. from typing import TYPE_CHECKING
  24. from .objects import Commit, ObjectID, RawObjectID, ShaFile, Tag, Tree
  25. from .refs import Ref, local_branch_name, local_tag_name
  26. from .repo import BaseRepo
  27. if TYPE_CHECKING:
  28. from .object_store import BaseObjectStore
  29. from .refs import Ref, RefsContainer
  30. from .repo import Repo
  31. def to_bytes(text: str | bytes) -> bytes:
  32. """Convert text to bytes.
  33. Args:
  34. text: Text to convert (str or bytes)
  35. Returns:
  36. Bytes representation of text
  37. """
  38. if isinstance(text, str):
  39. return text.encode("ascii")
  40. return text
  41. def _resolve_object(
  42. repo: "Repo", ref: Ref | ObjectID | RawObjectID | bytes
  43. ) -> "ShaFile":
  44. """Resolve a reference to an object using multiple strategies."""
  45. try:
  46. return repo[ref]
  47. except KeyError:
  48. try:
  49. ref_sha = parse_ref(repo, ref)
  50. return repo[ref_sha]
  51. except KeyError:
  52. try:
  53. return repo.object_store[ref] # type: ignore[index]
  54. except (KeyError, ValueError):
  55. # Re-raise original KeyError for consistency
  56. raise KeyError(ref)
  57. def _parse_number_suffix(suffix: bytes) -> tuple[int, bytes]:
  58. """Parse a number from the start of suffix, return (number, remaining)."""
  59. if not suffix or not suffix[0:1].isdigit():
  60. return 1, suffix
  61. end = 1
  62. while end < len(suffix) and suffix[end : end + 1].isdigit():
  63. end += 1
  64. return int(suffix[:end]), suffix[end:]
  65. def parse_object(repo: "Repo", objectish: bytes | str) -> "ShaFile":
  66. """Parse a string referring to an object.
  67. Args:
  68. repo: A `Repo` object
  69. objectish: A string referring to an object
  70. Returns: A git object
  71. Raises:
  72. KeyError: If the object can not be found
  73. """
  74. objectish = to_bytes(objectish)
  75. # Handle :<path> - lookup path in tree or index
  76. if b":" in objectish:
  77. rev, path = objectish.split(b":", 1)
  78. if not rev:
  79. # Index path lookup: :path or :N:path where N is stage 0-3
  80. stage = 0
  81. if path and path[0:1].isdigit() and len(path) > 2 and path[1:2] == b":":
  82. stage = int(path[0:1])
  83. if stage > 3:
  84. raise ValueError(f"Invalid stage number: {stage}. Must be 0-3.")
  85. path = path[2:]
  86. # Open the index and look up the path
  87. try:
  88. index = repo.open_index()
  89. except AttributeError:
  90. raise NotImplementedError(
  91. "Index path lookup requires a non-bare repository"
  92. )
  93. if path not in index:
  94. raise KeyError(f"Path {path!r} not found in index")
  95. entry = index[path]
  96. # Handle ConflictedIndexEntry (merge stages)
  97. from .index import ConflictedIndexEntry
  98. if isinstance(entry, ConflictedIndexEntry):
  99. if stage == 0:
  100. raise ValueError(
  101. f"Path {path!r} has unresolved conflicts. "
  102. "Use :1:path, :2:path, or :3:path to access specific stages."
  103. )
  104. elif stage == 1:
  105. if entry.ancestor is None:
  106. raise KeyError(f"Path {path!r} has no ancestor (stage 1)")
  107. return repo[entry.ancestor.sha]
  108. elif stage == 2:
  109. if entry.this is None:
  110. raise KeyError(f"Path {path!r} has no 'this' version (stage 2)")
  111. return repo[entry.this.sha]
  112. elif stage == 3:
  113. if entry.other is None:
  114. raise KeyError(
  115. f"Path {path!r} has no 'other' version (stage 3)"
  116. )
  117. return repo[entry.other.sha]
  118. else:
  119. # Regular IndexEntry - only stage 0 is valid
  120. if stage != 0:
  121. raise ValueError(
  122. f"Path {path!r} has no conflicts. Only :0:{path!r} or :{path!r} is valid."
  123. )
  124. return repo[entry.sha]
  125. # Regular tree lookup: rev:path
  126. tree = parse_tree(repo, rev)
  127. _mode, sha = tree.lookup_path(repo.object_store.__getitem__, path)
  128. return repo[sha]
  129. # Handle @{N} or @{time} - reflog lookup
  130. if b"@{" in objectish:
  131. base, rest = objectish.split(b"@{", 1)
  132. if not rest.endswith(b"}"):
  133. raise ValueError("Invalid @{} syntax")
  134. spec = rest[:-1]
  135. ref = base if base else b"HEAD"
  136. entries = list(repo.read_reflog(ref))
  137. entries.reverse() # Git uses reverse chronological order
  138. if spec.isdigit():
  139. # Check if it's a small index or a timestamp
  140. # Git treats values < number of entries as indices, larger values as timestamps
  141. num = int(spec)
  142. if num < len(entries):
  143. # Treat as numeric index: @{N}
  144. return repo[entries[num].new_sha]
  145. # Otherwise fall through to treat as timestamp
  146. # Time specification: @{time} (includes large numeric values)
  147. from .approxidate import parse_approxidate
  148. target_time = parse_approxidate(spec)
  149. # Find the most recent entry at or before the target time
  150. for reflog_entry in entries:
  151. if reflog_entry.timestamp <= target_time:
  152. return repo[reflog_entry.new_sha]
  153. # If no entry is old enough, raise an error
  154. if entries:
  155. oldest_time = entries[-1].timestamp
  156. raise ValueError(
  157. f"Reflog for {ref!r} has no entries at or before {spec!r}. "
  158. f"Oldest entry is at timestamp {oldest_time}"
  159. )
  160. else:
  161. raise ValueError(f"Reflog for {ref!r} is empty")
  162. # Handle ^{} - tag dereferencing
  163. if objectish.endswith(b"^{}"):
  164. obj = _resolve_object(repo, objectish[:-3])
  165. while isinstance(obj, Tag):
  166. _obj_type, obj_sha = obj.object
  167. obj = repo[obj_sha]
  168. return obj
  169. # Handle ~ and ^ operators
  170. for sep in [b"~", b"^"]:
  171. if sep in objectish:
  172. base, suffix = objectish.split(sep, 1)
  173. if not base:
  174. raise ValueError(f"Empty base before {sep!r}")
  175. obj = _resolve_object(repo, base)
  176. num, suffix = _parse_number_suffix(suffix)
  177. if sep == b"~":
  178. # Follow first parent N times
  179. commit = obj if isinstance(obj, Commit) else parse_commit(repo, obj.id)
  180. for _ in range(num):
  181. if not commit.parents:
  182. raise ValueError(
  183. f"Commit {commit.id.decode('ascii', 'replace')} has no parents"
  184. )
  185. parent_obj = repo[commit.parents[0]]
  186. assert isinstance(parent_obj, Commit)
  187. commit = parent_obj
  188. obj = commit
  189. else: # sep == b"^"
  190. # Get N-th parent (or commit itself if N=0)
  191. commit = obj if isinstance(obj, Commit) else parse_commit(repo, obj.id)
  192. if num == 0:
  193. obj = commit
  194. elif num > len(commit.parents):
  195. raise ValueError(
  196. f"Commit {commit.id.decode('ascii', 'replace')} does not have parent #{num}"
  197. )
  198. else:
  199. obj = repo[commit.parents[num - 1]]
  200. # Process remaining operators recursively
  201. return parse_object(repo, obj.id + suffix) if suffix else obj
  202. # No operators, just return the object
  203. return _resolve_object(repo, objectish)
  204. def parse_tree(repo: "BaseRepo", treeish: bytes | str | Tree | Commit | Tag) -> "Tree":
  205. """Parse a string referring to a tree.
  206. Args:
  207. repo: A repository object
  208. treeish: A string referring to a tree, or a Tree, Commit, or Tag object
  209. Returns: A Tree object
  210. Raises:
  211. KeyError: If the object can not be found
  212. """
  213. # If already a Tree, return it directly
  214. if isinstance(treeish, Tree):
  215. return treeish
  216. # If it's a Commit, return its tree
  217. if isinstance(treeish, Commit):
  218. tree = repo[treeish.tree]
  219. assert isinstance(tree, Tree)
  220. return tree
  221. # For Tag objects or strings, use the existing logic
  222. if isinstance(treeish, Tag):
  223. treeish = treeish.id
  224. else:
  225. treeish = to_bytes(treeish)
  226. treeish_typed: Ref | ObjectID
  227. try:
  228. treeish_typed = parse_ref(repo.refs, treeish)
  229. except KeyError: # treeish is commit sha
  230. treeish_typed = ObjectID(treeish)
  231. try:
  232. o = repo[treeish_typed]
  233. except KeyError:
  234. # Try parsing as commit (handles short hashes)
  235. try:
  236. commit = parse_commit(repo, treeish)
  237. assert isinstance(commit, Commit)
  238. tree = repo[commit.tree]
  239. assert isinstance(tree, Tree)
  240. return tree
  241. except KeyError:
  242. raise KeyError(treeish)
  243. if isinstance(o, Commit):
  244. tree = repo[o.tree]
  245. assert isinstance(tree, Tree)
  246. return tree
  247. elif isinstance(o, Tag):
  248. # Tag handling - dereference and recurse
  249. _obj_type, obj_sha = o.object
  250. return parse_tree(repo, obj_sha)
  251. assert isinstance(o, Tree)
  252. return o
  253. def parse_ref(container: "Repo | RefsContainer", refspec: str | bytes) -> "Ref":
  254. """Parse a string referring to a reference.
  255. Args:
  256. container: A RefsContainer object
  257. refspec: A string referring to a ref
  258. Returns: A ref
  259. Raises:
  260. KeyError: If the ref can not be found
  261. """
  262. refspec = to_bytes(refspec)
  263. possible_refs = [
  264. refspec,
  265. b"refs/" + refspec,
  266. local_tag_name(refspec),
  267. local_branch_name(refspec),
  268. b"refs/remotes/" + refspec,
  269. b"refs/remotes/" + refspec + b"/HEAD",
  270. ]
  271. for ref in possible_refs:
  272. if ref in container:
  273. return Ref(ref)
  274. raise KeyError(refspec)
  275. def parse_reftuple(
  276. lh_container: "Repo | RefsContainer",
  277. rh_container: "Repo | RefsContainer",
  278. refspec: str | bytes,
  279. force: bool = False,
  280. ) -> tuple["Ref | None", "Ref | None", bool]:
  281. """Parse a reftuple spec.
  282. Args:
  283. lh_container: A RefsContainer object
  284. rh_container: A RefsContainer object
  285. refspec: A string
  286. force: Whether to force the operation
  287. Returns: A tuple with left and right ref
  288. Raises:
  289. KeyError: If one of the refs can not be found
  290. """
  291. refspec = to_bytes(refspec)
  292. if refspec.startswith(b"+"):
  293. force = True
  294. refspec = refspec[1:]
  295. lh_bytes: bytes | None
  296. rh_bytes: bytes | None
  297. if b":" in refspec:
  298. (lh_bytes, rh_bytes) = refspec.split(b":")
  299. else:
  300. lh_bytes = rh_bytes = refspec
  301. lh: Ref | None
  302. if lh_bytes == b"":
  303. lh = None
  304. else:
  305. lh = parse_ref(lh_container, lh_bytes)
  306. rh: Ref | None
  307. if rh_bytes == b"":
  308. rh = None
  309. else:
  310. try:
  311. rh = parse_ref(rh_container, rh_bytes)
  312. except KeyError:
  313. # TODO: check force?
  314. if b"/" not in rh_bytes:
  315. rh = Ref(local_branch_name(rh_bytes))
  316. else:
  317. rh = Ref(rh_bytes)
  318. return (lh, rh, force)
  319. def parse_reftuples(
  320. lh_container: "Repo | RefsContainer",
  321. rh_container: "Repo | RefsContainer",
  322. refspecs: bytes | Sequence[bytes],
  323. force: bool = False,
  324. ) -> list[tuple["Ref | None", "Ref | None", bool]]:
  325. """Parse a list of reftuple specs to a list of reftuples.
  326. Args:
  327. lh_container: A RefsContainer object
  328. rh_container: A RefsContainer object
  329. refspecs: A list of refspecs or a string
  330. force: Force overwriting for all reftuples
  331. Returns: A list of refs
  332. Raises:
  333. KeyError: If one of the refs can not be found
  334. """
  335. if isinstance(refspecs, bytes):
  336. refspecs = [refspecs]
  337. ret = []
  338. # TODO: Support * in refspecs
  339. for refspec in refspecs:
  340. ret.append(parse_reftuple(lh_container, rh_container, refspec, force=force))
  341. return ret
  342. def parse_refs(
  343. container: "Repo | RefsContainer",
  344. refspecs: bytes | str | Sequence[bytes | str],
  345. ) -> list["Ref"]:
  346. """Parse a list of refspecs to a list of refs.
  347. Args:
  348. container: A RefsContainer object
  349. refspecs: A list of refspecs or a string
  350. Returns: A list of refs
  351. Raises:
  352. KeyError: If one of the refs can not be found
  353. """
  354. # TODO: Support * in refspecs
  355. if isinstance(refspecs, (bytes, str)):
  356. refspecs = [refspecs]
  357. ret = []
  358. for refspec in refspecs:
  359. ret.append(parse_ref(container, refspec))
  360. return ret
  361. def parse_commit_range(
  362. repo: "Repo", committish: str | bytes
  363. ) -> tuple["Commit", "Commit"] | None:
  364. """Parse a string referring to a commit range.
  365. Args:
  366. repo: A `Repo` object
  367. committish: A string referring to a commit or range (e.g., "HEAD~3..HEAD")
  368. Returns:
  369. None if committish is a single commit reference
  370. A tuple of (start_commit, end_commit) if it's a range
  371. Raises:
  372. KeyError: When the commits can not be found
  373. ValueError: If the range can not be parsed
  374. """
  375. committish = to_bytes(committish)
  376. if b".." not in committish:
  377. return None
  378. parts = committish.split(b"..", 1)
  379. if len(parts) != 2:
  380. raise ValueError(f"Invalid commit range: {committish.decode('utf-8')}")
  381. start_ref = parts[0]
  382. end_ref = parts[1] if parts[1] else b"HEAD"
  383. start_commit = parse_commit(repo, start_ref)
  384. end_commit = parse_commit(repo, end_ref)
  385. return (start_commit, end_commit)
  386. class AmbiguousShortId(Exception):
  387. """The short id is ambiguous."""
  388. def __init__(self, prefix: bytes, options: list[ShaFile]) -> None:
  389. """Initialize AmbiguousShortId.
  390. Args:
  391. prefix: The ambiguous prefix
  392. options: List of matching objects
  393. """
  394. self.prefix = prefix
  395. self.options = options
  396. def scan_for_short_id(
  397. object_store: "BaseObjectStore", prefix: bytes, tp: type[ShaFile]
  398. ) -> ShaFile:
  399. """Scan an object store for a short id."""
  400. ret = []
  401. for object_id in object_store.iter_prefix(prefix):
  402. o = object_store[object_id]
  403. if isinstance(o, tp):
  404. ret.append(o)
  405. if not ret:
  406. raise KeyError(prefix)
  407. if len(ret) == 1:
  408. return ret[0]
  409. raise AmbiguousShortId(prefix, ret)
  410. def parse_commit(repo: "BaseRepo", committish: str | bytes | Commit | Tag) -> "Commit":
  411. """Parse a string referring to a single commit.
  412. Args:
  413. repo: A repository object
  414. committish: A string referring to a single commit, or a Commit or Tag object.
  415. Returns: A Commit object
  416. Raises:
  417. KeyError: When the reference commits can not be found
  418. ValueError: If the range can not be parsed
  419. """
  420. def dereference_tag(obj: ShaFile) -> "Commit":
  421. """Follow tag references until we reach a non-tag object."""
  422. while isinstance(obj, Tag):
  423. _obj_type, obj_sha = obj.object
  424. try:
  425. obj = repo.object_store[obj_sha]
  426. except KeyError:
  427. # Tag points to a missing object
  428. raise KeyError(obj_sha)
  429. if not isinstance(obj, Commit):
  430. raise ValueError(
  431. f"Expected commit, got {obj.type_name.decode('ascii', 'replace')}"
  432. )
  433. return obj
  434. # If already a Commit object, return it directly
  435. if isinstance(committish, Commit):
  436. return committish
  437. # If it's a Tag object, dereference it
  438. if isinstance(committish, Tag):
  439. return dereference_tag(committish)
  440. committish = to_bytes(committish)
  441. try:
  442. obj = repo[committish]
  443. except KeyError:
  444. pass
  445. else:
  446. return dereference_tag(obj)
  447. try:
  448. obj = repo[parse_ref(repo.refs, committish)]
  449. except KeyError:
  450. pass
  451. else:
  452. return dereference_tag(obj)
  453. if len(committish) >= 4 and len(committish) < 40:
  454. try:
  455. int(committish, 16)
  456. except ValueError:
  457. pass
  458. else:
  459. try:
  460. obj = scan_for_short_id(repo.object_store, committish, Commit)
  461. except KeyError:
  462. pass
  463. else:
  464. return dereference_tag(obj)
  465. raise KeyError(committish)
  466. # TODO: parse_path_in_tree(), which handles e.g. v1.0:Documentation