client.py 151 KB


  1. # client.py -- Implementation of the client side git protocols
  2. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Client side support for the Git protocol.
  22. The Dulwich client supports the following capabilities:
  23. * thin-pack
  24. * multi_ack_detailed
  25. * multi_ack
  26. * side-band-64k
  27. * ofs-delta
  28. * quiet
  29. * report-status
  30. * delete-refs
  31. * shallow
  32. Known capabilities that are not supported:
  33. * no-progress
  34. * include-tag
  35. """
  36. import copy
  37. import functools
  38. import logging
  39. import os
  40. import select
  41. import socket
  42. import subprocess
  43. import sys
  44. from collections.abc import Iterable, Iterator, Mapping, Sequence, Set
  45. from contextlib import closing
  46. from io import BufferedReader, BytesIO
  47. from typing import (
  48. IO,
  49. TYPE_CHECKING,
  50. Any,
  51. Callable,
  52. ClassVar,
  53. Optional,
  54. Union,
  55. )
  56. from urllib.parse import ParseResult, urljoin, urlparse, urlunparse, urlunsplit
  57. from urllib.parse import quote as urlquote
  58. if TYPE_CHECKING:
  59. import urllib3
  60. import dulwich
  61. if TYPE_CHECKING:
  62. from typing import Protocol as TypingProtocol
  63. from .pack import UnpackedObject
  64. class HTTPResponse(TypingProtocol):
  65. """Protocol for HTTP response objects."""
  66. redirect_location: Optional[str]
  67. content_type: Optional[str]
  68. def close(self) -> None:
  69. """Close the response."""
  70. ...
  71. class GeneratePackDataFunc(TypingProtocol):
  72. """Protocol for generate_pack_data functions."""
  73. def __call__(
  74. self,
  75. have: Set[bytes],
  76. want: Set[bytes],
  77. ofs_delta: bool = False,
  78. ) -> tuple[int, Iterator[UnpackedObject]]:
  79. """Generate pack data for the given have and want sets."""
  80. ...
  81. class DetermineWantsFunc(TypingProtocol):
  82. """Protocol for determine_wants functions."""
  83. def __call__(
  84. self,
  85. refs: Mapping[bytes, bytes],
  86. depth: Optional[int] = None,
  87. ) -> list[bytes]:
  88. """Determine the objects to fetch from the given refs."""
  89. ...
  90. from .bundle import Bundle
  91. from .config import Config, apply_instead_of, get_xdg_config_home_path
  92. from .errors import GitProtocolError, HangupException, NotGitRepository, SendPackError
  93. from .object_store import GraphWalker
  94. from .pack import (
  95. PACK_SPOOL_FILE_MAX_SIZE,
  96. PackChunkGenerator,
  97. PackData,
  98. write_pack_from_container,
  99. )
  100. from .protocol import (
  101. _RBUFSIZE,
  102. CAPABILITIES_REF,
  103. CAPABILITY_AGENT,
  104. CAPABILITY_DELETE_REFS,
  105. CAPABILITY_FETCH,
  106. CAPABILITY_FILTER,
  107. CAPABILITY_INCLUDE_TAG,
  108. CAPABILITY_MULTI_ACK,
  109. CAPABILITY_MULTI_ACK_DETAILED,
  110. CAPABILITY_OFS_DELTA,
  111. CAPABILITY_QUIET,
  112. CAPABILITY_REPORT_STATUS,
  113. CAPABILITY_SHALLOW,
  114. CAPABILITY_SIDE_BAND_64K,
  115. CAPABILITY_SYMREF,
  116. CAPABILITY_THIN_PACK,
  117. COMMAND_DEEPEN,
  118. COMMAND_DONE,
  119. COMMAND_HAVE,
  120. COMMAND_SHALLOW,
  121. COMMAND_UNSHALLOW,
  122. COMMAND_WANT,
  123. DEFAULT_GIT_PROTOCOL_VERSION_FETCH,
  124. DEFAULT_GIT_PROTOCOL_VERSION_SEND,
  125. GIT_PROTOCOL_VERSIONS,
  126. KNOWN_RECEIVE_CAPABILITIES,
  127. KNOWN_UPLOAD_CAPABILITIES,
  128. SIDE_BAND_CHANNEL_DATA,
  129. SIDE_BAND_CHANNEL_FATAL,
  130. SIDE_BAND_CHANNEL_PROGRESS,
  131. TCP_GIT_PORT,
  132. ZERO_SHA,
  133. PktLineParser,
  134. Protocol,
  135. agent_string,
  136. capability_agent,
  137. extract_capabilities,
  138. extract_capability_names,
  139. parse_capability,
  140. pkt_line,
  141. pkt_seq,
  142. )
  143. from .refs import (
  144. PEELED_TAG_SUFFIX,
  145. SYMREF,
  146. Ref,
  147. _import_remote_refs,
  148. _set_default_branch,
  149. _set_head,
  150. _set_origin_head,
  151. filter_ref_prefix,
  152. read_info_refs,
  153. split_peeled_refs,
  154. )
  155. from .repo import BaseRepo, Repo
  156. # Default ref prefix, used if none is specified.
  157. # GitHub defaults to just sending HEAD if no ref-prefix is
  158. # specified, so explicitly request all refs to match
  159. # behaviour with v1 when no ref-prefix is specified.
  160. DEFAULT_REF_PREFIX = [b"HEAD", b"refs/"]
  161. ObjectID = bytes
  162. logger = logging.getLogger(__name__)
  163. class InvalidWants(Exception):
  164. """Invalid wants."""
  165. def __init__(self, wants: Set[bytes]) -> None:
  166. """Initialize InvalidWants exception.
  167. Args:
  168. wants: List of invalid wants
  169. """
  170. Exception.__init__(
  171. self, f"requested wants not in server provided refs: {wants!r}"
  172. )
  173. class HTTPUnauthorized(Exception):
  174. """Raised when authentication fails."""
  175. def __init__(self, www_authenticate: Optional[str], url: str) -> None:
  176. """Initialize HTTPUnauthorized exception.
  177. Args:
  178. www_authenticate: WWW-Authenticate header value
  179. url: URL that requires authentication
  180. """
  181. Exception.__init__(self, "No valid credentials provided")
  182. self.www_authenticate = www_authenticate
  183. self.url = url
  184. def _to_optional_dict(refs: Mapping[bytes, bytes]) -> dict[bytes, Optional[bytes]]:
  185. """Convert a dict[bytes, bytes] to dict[bytes, Optional[bytes]].
  186. This is needed for compatibility with result types that expect Optional values.
  187. """
  188. return {k: v for k, v in refs.items()}
  189. class HTTPProxyUnauthorized(Exception):
  190. """Raised when proxy authentication fails."""
  191. def __init__(self, proxy_authenticate: Optional[str], url: str) -> None:
  192. """Initialize HTTPProxyUnauthorized exception.
  193. Args:
  194. proxy_authenticate: Proxy-Authenticate header value
  195. url: URL that requires proxy authentication
  196. """
  197. Exception.__init__(self, "No valid proxy credentials provided")
  198. self.proxy_authenticate = proxy_authenticate
  199. self.url = url
  200. def _fileno_can_read(fileno: int) -> bool:
  201. """Check if a file descriptor is readable."""
  202. return len(select.select([fileno], [], [], 0)[0]) > 0
  203. def _win32_peek_avail(handle: int) -> int:
  204. """Wrapper around PeekNamedPipe to check how many bytes are available."""
  205. from ctypes import ( # type: ignore[attr-defined,unused-ignore]
  206. byref,
  207. windll,
  208. wintypes,
  209. )
  210. c_avail = wintypes.DWORD()
  211. c_message = wintypes.DWORD()
  212. success = windll.kernel32.PeekNamedPipe(
  213. handle, None, 0, None, byref(c_avail), byref(c_message)
  214. )
  215. if not success:
  216. from ctypes import GetLastError # type: ignore[attr-defined,unused-ignore]
  217. raise OSError(GetLastError())
  218. return c_avail.value
  219. COMMON_CAPABILITIES = [CAPABILITY_OFS_DELTA, CAPABILITY_SIDE_BAND_64K]
  220. UPLOAD_CAPABILITIES = [
  221. CAPABILITY_THIN_PACK,
  222. CAPABILITY_MULTI_ACK,
  223. CAPABILITY_MULTI_ACK_DETAILED,
  224. CAPABILITY_SHALLOW,
  225. *COMMON_CAPABILITIES,
  226. ]
  227. RECEIVE_CAPABILITIES = [
  228. CAPABILITY_REPORT_STATUS,
  229. CAPABILITY_DELETE_REFS,
  230. *COMMON_CAPABILITIES,
  231. ]
  232. class ReportStatusParser:
  233. """Handle status as reported by servers with 'report-status' capability."""
  234. def __init__(self) -> None:
  235. """Initialize ReportStatusParser."""
  236. self._done = False
  237. self._pack_status: Optional[bytes] = None
  238. self._ref_statuses: list[bytes] = []
  239. def check(self) -> Iterator[tuple[bytes, Optional[str]]]:
  240. """Check if there were any errors and, if so, raise exceptions.
  241. Raises:
  242. SendPackError: Raised when the server could not unpack
  243. Returns:
  244. iterator over refs
  245. """
  246. if self._pack_status not in (b"unpack ok", None):
  247. raise SendPackError(self._pack_status)
  248. for status in self._ref_statuses:
  249. try:
  250. status, rest = status.split(b" ", 1)
  251. except ValueError:
  252. # malformed response, move on to the next one
  253. continue
  254. if status == b"ng":
  255. ref, error = rest.split(b" ", 1)
  256. yield ref, error.decode("utf-8")
  257. elif status == b"ok":
  258. yield rest, None
  259. else:
  260. raise GitProtocolError(f"invalid ref status {status!r}")
  261. def handle_packet(self, pkt: Optional[bytes]) -> None:
  262. """Handle a packet.
  263. Raises:
  264. GitProtocolError: Raised when packets are received after a flush
  265. packet.
  266. """
  267. if self._done:
  268. raise GitProtocolError("received more data after status report")
  269. if pkt is None:
  270. self._done = True
  271. return
  272. if self._pack_status is None:
  273. self._pack_status = pkt.strip()
  274. else:
  275. ref_status = pkt.strip()
  276. self._ref_statuses.append(ref_status)
  277. def negotiate_protocol_version(proto: Protocol) -> int:
  278. """Negotiate protocol version with the server."""
  279. pkt = proto.read_pkt_line()
  280. if pkt is not None and pkt.strip() == b"version 2":
  281. return 2
  282. proto.unread_pkt_line(pkt)
  283. return 0
  284. def read_server_capabilities(pkt_seq: Iterable[bytes]) -> set[bytes]:
  285. """Read server capabilities from packet sequence."""
  286. server_capabilities = []
  287. for pkt in pkt_seq:
  288. server_capabilities.append(pkt)
  289. return set(server_capabilities)
  290. def read_pkt_refs_v2(
  291. pkt_seq: Iterable[bytes],
  292. ) -> tuple[dict[bytes, Optional[bytes]], dict[bytes, bytes], dict[bytes, bytes]]:
  293. """Read references using protocol version 2."""
  294. refs: dict[bytes, Optional[bytes]] = {}
  295. symrefs = {}
  296. peeled = {}
  297. # Receive refs from server
  298. for pkt in pkt_seq:
  299. parts = pkt.rstrip(b"\n").split(b" ")
  300. sha: Optional[bytes] = parts[0]
  301. if sha == b"unborn":
  302. sha = None
  303. ref = parts[1]
  304. for part in parts[2:]:
  305. if part.startswith(b"peeled:"):
  306. peeled[ref] = part[7:]
  307. elif part.startswith(b"symref-target:"):
  308. symrefs[ref] = part[14:]
  309. else:
  310. logging.warning("unknown part in pkt-ref: %s", part)
  311. refs[ref] = sha
  312. return refs, symrefs, peeled
  313. def read_pkt_refs_v1(
  314. pkt_seq: Iterable[bytes],
  315. ) -> tuple[dict[bytes, bytes], set[bytes]]:
  316. """Read references using protocol version 1."""
  317. server_capabilities = None
  318. refs: dict[bytes, bytes] = {}
  319. # Receive refs from server
  320. for pkt in pkt_seq:
  321. (sha, ref) = pkt.rstrip(b"\n").split(None, 1)
  322. if sha == b"ERR":
  323. raise GitProtocolError(ref.decode("utf-8", "replace"))
  324. if server_capabilities is None:
  325. (ref, server_capabilities) = extract_capabilities(ref)
  326. refs[ref] = sha
  327. if len(refs) == 0:
  328. return {}, set()
  329. if refs == {CAPABILITIES_REF: ZERO_SHA}:
  330. refs = {}
  331. assert server_capabilities is not None
  332. return refs, set(server_capabilities)
  333. class _DeprecatedDictProxy:
  334. """Base class for result objects that provide deprecated dict-like interface."""
  335. refs: dict[bytes, Optional[bytes]] # To be overridden by subclasses
  336. _FORWARDED_ATTRS: ClassVar[set[str]] = {
  337. "clear",
  338. "copy",
  339. "fromkeys",
  340. "get",
  341. "items",
  342. "keys",
  343. "pop",
  344. "popitem",
  345. "setdefault",
  346. "update",
  347. "values",
  348. "viewitems",
  349. "viewkeys",
  350. "viewvalues",
  351. }
  352. def _warn_deprecated(self) -> None:
  353. import warnings
  354. warnings.warn(
  355. f"Use {self.__class__.__name__}.refs instead.",
  356. DeprecationWarning,
  357. stacklevel=3,
  358. )
  359. def __contains__(self, name: bytes) -> bool:
  360. self._warn_deprecated()
  361. return name in self.refs
  362. def __getitem__(self, name: bytes) -> Optional[bytes]:
  363. self._warn_deprecated()
  364. return self.refs[name]
  365. def __len__(self) -> int:
  366. self._warn_deprecated()
  367. return len(self.refs)
  368. def __iter__(self) -> Iterator[bytes]:
  369. self._warn_deprecated()
  370. return iter(self.refs)
  371. def __getattribute__(self, name: str) -> object:
  372. # Avoid infinite recursion by checking against class variable directly
  373. if name != "_FORWARDED_ATTRS" and name in type(self)._FORWARDED_ATTRS:
  374. self._warn_deprecated()
  375. # Direct attribute access to avoid recursion
  376. refs = object.__getattribute__(self, "refs")
  377. return getattr(refs, name)
  378. return super().__getattribute__(name)
  379. class FetchPackResult(_DeprecatedDictProxy):
  380. """Result of a fetch-pack operation.
  381. Attributes:
  382. refs: Dictionary with all remote refs
  383. symrefs: Dictionary with remote symrefs
  384. agent: User agent string
  385. """
  386. symrefs: dict[bytes, bytes]
  387. agent: Optional[bytes]
  388. def __init__(
  389. self,
  390. refs: dict[bytes, Optional[bytes]],
  391. symrefs: dict[bytes, bytes],
  392. agent: Optional[bytes],
  393. new_shallow: Optional[set[bytes]] = None,
  394. new_unshallow: Optional[set[bytes]] = None,
  395. ) -> None:
  396. """Initialize FetchPackResult.
  397. Args:
  398. refs: Dictionary with all remote refs
  399. symrefs: Dictionary with remote symrefs
  400. agent: User agent string
  401. new_shallow: New shallow commits
  402. new_unshallow: New unshallow commits
  403. """
  404. self.refs = refs
  405. self.symrefs = symrefs
  406. self.agent = agent
  407. self.new_shallow = new_shallow
  408. self.new_unshallow = new_unshallow
  409. def __eq__(self, other: object) -> bool:
  410. """Check equality with another object."""
  411. if isinstance(other, dict):
  412. self._warn_deprecated()
  413. return self.refs == other
  414. if not isinstance(other, FetchPackResult):
  415. return False
  416. return (
  417. self.refs == other.refs
  418. and self.symrefs == other.symrefs
  419. and self.agent == other.agent
  420. )
  421. def __repr__(self) -> str:
  422. """Return string representation of FetchPackResult."""
  423. return f"{self.__class__.__name__}({self.refs!r}, {self.symrefs!r}, {self.agent!r})"
  424. class LsRemoteResult(_DeprecatedDictProxy):
  425. """Result of a ls-remote operation.
  426. Attributes:
  427. refs: Dictionary with all remote refs
  428. symrefs: Dictionary with remote symrefs
  429. """
  430. symrefs: dict[bytes, bytes]
  431. def __init__(
  432. self, refs: dict[bytes, Optional[bytes]], symrefs: dict[bytes, bytes]
  433. ) -> None:
  434. """Initialize LsRemoteResult.
  435. Args:
  436. refs: Dictionary with all remote refs
  437. symrefs: Dictionary with remote symrefs
  438. """
  439. self.refs = refs
  440. self.symrefs = symrefs
  441. def _warn_deprecated(self) -> None:
  442. import warnings
  443. warnings.warn(
  444. "Treating LsRemoteResult as a dictionary is deprecated. "
  445. "Use result.refs instead.",
  446. DeprecationWarning,
  447. stacklevel=3,
  448. )
  449. def __eq__(self, other: object) -> bool:
  450. """Check equality with another object."""
  451. if isinstance(other, dict):
  452. self._warn_deprecated()
  453. return self.refs == other
  454. if not isinstance(other, LsRemoteResult):
  455. return False
  456. return self.refs == other.refs and self.symrefs == other.symrefs
  457. def __repr__(self) -> str:
  458. """Return string representation of LsRemoteResult."""
  459. return f"{self.__class__.__name__}({self.refs!r}, {self.symrefs!r})"
  460. class SendPackResult(_DeprecatedDictProxy):
  461. """Result of a upload-pack operation.
  462. Attributes:
  463. refs: Dictionary with all remote refs
  464. agent: User agent string
  465. ref_status: Optional dictionary mapping ref name to error message (if it
  466. failed to update), or None if it was updated successfully
  467. """
  468. def __init__(
  469. self,
  470. refs: dict[bytes, Optional[bytes]],
  471. agent: Optional[bytes] = None,
  472. ref_status: Optional[dict[bytes, Optional[str]]] = None,
  473. ) -> None:
  474. """Initialize SendPackResult.
  475. Args:
  476. refs: Dictionary with all remote refs
  477. agent: User agent string
  478. ref_status: Optional dictionary mapping ref name to error message
  479. """
  480. self.refs = refs
  481. self.agent = agent
  482. self.ref_status = ref_status
  483. def __eq__(self, other: object) -> bool:
  484. """Check equality with another object."""
  485. if isinstance(other, dict):
  486. self._warn_deprecated()
  487. return self.refs == other
  488. if not isinstance(other, SendPackResult):
  489. return False
  490. return self.refs == other.refs and self.agent == other.agent
  491. def __repr__(self) -> str:
  492. """Return string representation of SendPackResult."""
  493. return f"{self.__class__.__name__}({self.refs!r}, {self.agent!r})"
  494. def _read_shallow_updates(pkt_seq: Iterable[bytes]) -> tuple[set[bytes], set[bytes]]:
  495. new_shallow = set()
  496. new_unshallow = set()
  497. for pkt in pkt_seq:
  498. if pkt == b"shallow-info\n": # Git-protocol v2
  499. continue
  500. try:
  501. cmd, sha = pkt.split(b" ", 1)
  502. except ValueError:
  503. raise GitProtocolError(f"unknown command {pkt!r}")
  504. if cmd == COMMAND_SHALLOW:
  505. new_shallow.add(sha.strip())
  506. elif cmd == COMMAND_UNSHALLOW:
  507. new_unshallow.add(sha.strip())
  508. else:
  509. raise GitProtocolError(f"unknown command {pkt!r}")
  510. return (new_shallow, new_unshallow)
  511. class _v1ReceivePackHeader:
  512. def __init__(
  513. self,
  514. capabilities: Sequence[bytes],
  515. old_refs: Mapping[bytes, bytes],
  516. new_refs: Mapping[bytes, bytes],
  517. ) -> None:
  518. self.want: set[bytes] = set()
  519. self.have: set[bytes] = set()
  520. self._it = self._handle_receive_pack_head(capabilities, old_refs, new_refs)
  521. self.sent_capabilities = False
  522. def __iter__(self) -> Iterator[Optional[bytes]]:
  523. return self._it
  524. def _handle_receive_pack_head(
  525. self,
  526. capabilities: Sequence[bytes],
  527. old_refs: Mapping[bytes, bytes],
  528. new_refs: Mapping[bytes, bytes],
  529. ) -> Iterator[Optional[bytes]]:
  530. """Handle the head of a 'git-receive-pack' request.
  531. Args:
  532. capabilities: List of negotiated capabilities
  533. old_refs: Old refs, as received from the server
  534. new_refs: Refs to change
  535. Returns:
  536. (have, want) tuple
  537. """
  538. self.have = {x for x in old_refs.values() if not x == ZERO_SHA}
  539. for refname in new_refs:
  540. if not isinstance(refname, bytes):
  541. raise TypeError(f"refname is not a bytestring: {refname!r}")
  542. old_sha1 = old_refs.get(refname, ZERO_SHA)
  543. if not isinstance(old_sha1, bytes):
  544. raise TypeError(
  545. f"old sha1 for {refname!r} is not a bytestring: {old_sha1!r}"
  546. )
  547. new_sha1 = new_refs.get(refname, ZERO_SHA)
  548. if not isinstance(new_sha1, bytes):
  549. raise TypeError(
  550. f"old sha1 for {refname!r} is not a bytestring {new_sha1!r}"
  551. )
  552. if old_sha1 != new_sha1:
  553. logger.debug(
  554. "Sending updated ref %r: %r -> %r", refname, old_sha1, new_sha1
  555. )
  556. if self.sent_capabilities:
  557. yield old_sha1 + b" " + new_sha1 + b" " + refname
  558. else:
  559. yield (
  560. old_sha1
  561. + b" "
  562. + new_sha1
  563. + b" "
  564. + refname
  565. + b"\0"
  566. + b" ".join(sorted(capabilities))
  567. )
  568. self.sent_capabilities = True
  569. if new_sha1 not in self.have and new_sha1 != ZERO_SHA:
  570. self.want.add(new_sha1)
  571. yield None
  572. def _read_side_band64k_data(pkt_seq: Iterable[bytes]) -> Iterator[tuple[int, bytes]]:
  573. """Read per-channel data.
  574. This requires the side-band-64k capability.
  575. Args:
  576. pkt_seq: Sequence of packets to read
  577. """
  578. for pkt in pkt_seq:
  579. channel = ord(pkt[:1])
  580. yield channel, pkt[1:]
  581. def find_capability(
  582. capabilities: Iterable[bytes], key: bytes, value: Optional[bytes]
  583. ) -> Optional[bytes]:
  584. """Find a capability with a specific key and value."""
  585. for capability in capabilities:
  586. k, v = parse_capability(capability)
  587. if k != key:
  588. continue
  589. if value and v and value not in v.split(b" "):
  590. continue
  591. return capability
  592. return None
  593. def _handle_upload_pack_head(
  594. proto: Protocol,
  595. capabilities: Iterable[bytes],
  596. graph_walker: GraphWalker,
  597. wants: list[bytes],
  598. can_read: Optional[Callable[[], bool]],
  599. depth: Optional[int],
  600. protocol_version: Optional[int],
  601. ) -> tuple[Optional[set[bytes]], Optional[set[bytes]]]:
  602. """Handle the head of a 'git-upload-pack' request.
  603. Args:
  604. proto: Protocol object to read from
  605. capabilities: List of negotiated capabilities
  606. graph_walker: GraphWalker instance to call .ack() on
  607. wants: List of commits to fetch
  608. can_read: function that returns a boolean that indicates
  609. whether there is extra graph data to read on proto
  610. depth: Depth for request
  611. protocol_version: Neogiated Git protocol version.
  612. """
  613. new_shallow: Optional[set[bytes]]
  614. new_unshallow: Optional[set[bytes]]
  615. assert isinstance(wants, list) and isinstance(wants[0], bytes)
  616. wantcmd = COMMAND_WANT + b" " + wants[0]
  617. if protocol_version is None:
  618. protocol_version = DEFAULT_GIT_PROTOCOL_VERSION_SEND
  619. if protocol_version != 2:
  620. wantcmd += b" " + b" ".join(sorted(capabilities))
  621. wantcmd += b"\n"
  622. proto.write_pkt_line(wantcmd)
  623. for want in wants[1:]:
  624. proto.write_pkt_line(COMMAND_WANT + b" " + want + b"\n")
  625. if depth not in (0, None) or (
  626. hasattr(graph_walker, "shallow") and graph_walker.shallow
  627. ):
  628. if protocol_version == 2:
  629. if not find_capability(capabilities, CAPABILITY_FETCH, CAPABILITY_SHALLOW):
  630. raise GitProtocolError(
  631. "server does not support shallow capability required for depth"
  632. )
  633. elif CAPABILITY_SHALLOW not in capabilities:
  634. raise GitProtocolError(
  635. "server does not support shallow capability required for depth"
  636. )
  637. if hasattr(graph_walker, "shallow"):
  638. for sha in graph_walker.shallow:
  639. proto.write_pkt_line(COMMAND_SHALLOW + b" " + sha + b"\n")
  640. if depth is not None:
  641. proto.write_pkt_line(
  642. COMMAND_DEEPEN + b" " + str(depth).encode("ascii") + b"\n"
  643. )
  644. if protocol_version != 2:
  645. proto.write_pkt_line(None)
  646. have = next(graph_walker)
  647. while have:
  648. proto.write_pkt_line(COMMAND_HAVE + b" " + have + b"\n")
  649. if can_read is not None and can_read():
  650. pkt = proto.read_pkt_line()
  651. assert pkt is not None
  652. parts = pkt.rstrip(b"\n").split(b" ")
  653. if parts[0] == b"ACK":
  654. graph_walker.ack(parts[1])
  655. if parts[2] in (b"continue", b"common"):
  656. pass
  657. elif parts[2] == b"ready":
  658. break
  659. else:
  660. raise AssertionError(
  661. f"{parts[2]!r} not in ('continue', 'ready', 'common)"
  662. )
  663. have = next(graph_walker)
  664. proto.write_pkt_line(COMMAND_DONE + b"\n")
  665. if protocol_version == 2:
  666. proto.write_pkt_line(None)
  667. if depth not in (0, None):
  668. if can_read is not None:
  669. (new_shallow, new_unshallow) = _read_shallow_updates(proto.read_pkt_seq())
  670. else:
  671. new_shallow = None
  672. new_unshallow = None
  673. else:
  674. new_shallow = new_unshallow = set()
  675. return (new_shallow, new_unshallow)
  676. def _handle_upload_pack_tail(
  677. proto: "Protocol",
  678. capabilities: Set[bytes],
  679. graph_walker: "GraphWalker",
  680. pack_data: Callable[[bytes], int],
  681. progress: Optional[Callable[[bytes], None]] = None,
  682. rbufsize: int = _RBUFSIZE,
  683. protocol_version: int = 0,
  684. ) -> None:
  685. """Handle the tail of a 'git-upload-pack' request.
  686. Args:
  687. proto: Protocol object to read from
  688. capabilities: List of negotiated capabilities
  689. graph_walker: GraphWalker instance to call .ack() on
  690. pack_data: Function to call with pack data
  691. progress: Optional progress reporting function
  692. rbufsize: Read buffer size
  693. protocol_version: Neogiated Git protocol version.
  694. """
  695. pkt = proto.read_pkt_line()
  696. while pkt:
  697. parts = pkt.rstrip(b"\n").split(b" ")
  698. if protocol_version == 2 and parts[0] != b"packfile":
  699. break
  700. else:
  701. if parts[0] == b"ACK":
  702. graph_walker.ack(parts[1])
  703. if parts[0] == b"NAK":
  704. graph_walker.nak()
  705. if len(parts) < 3 or parts[2] not in (
  706. b"ready",
  707. b"continue",
  708. b"common",
  709. ):
  710. break
  711. pkt = proto.read_pkt_line()
  712. if CAPABILITY_SIDE_BAND_64K in capabilities or protocol_version == 2:
  713. if progress is None:
  714. # Just ignore progress data
  715. def progress(x: bytes) -> None:
  716. pass
  717. for chan, data in _read_side_band64k_data(proto.read_pkt_seq()):
  718. if chan == SIDE_BAND_CHANNEL_DATA:
  719. pack_data(data)
  720. elif chan == SIDE_BAND_CHANNEL_PROGRESS:
  721. progress(data)
  722. else:
  723. raise AssertionError(f"Invalid sideband channel {chan}")
  724. else:
  725. while True:
  726. data = proto.read(rbufsize)
  727. if data == b"":
  728. break
  729. pack_data(data)
  730. def _extract_symrefs_and_agent(
  731. capabilities: Iterable[bytes],
  732. ) -> tuple[dict[bytes, bytes], Optional[bytes]]:
  733. """Extract symrefs and agent from capabilities.
  734. Args:
  735. capabilities: List of capabilities
  736. Returns:
  737. (symrefs, agent) tuple
  738. """
  739. symrefs = {}
  740. agent = None
  741. for capability in capabilities:
  742. k, v = parse_capability(capability)
  743. if k == CAPABILITY_SYMREF:
  744. assert v is not None
  745. (src, dst) = v.split(b":", 1)
  746. symrefs[src] = dst
  747. if k == CAPABILITY_AGENT:
  748. agent = v
  749. return (symrefs, agent)
  750. # TODO(durin42): this doesn't correctly degrade if the server doesn't
  751. # support some capabilities. This should work properly with servers
  752. # that don't support multi_ack.
  753. class GitClient:
  754. """Git smart server client."""
  755. def __init__(
  756. self,
  757. thin_packs: bool = True,
  758. report_activity: Optional[Callable[[int, str], None]] = None,
  759. quiet: bool = False,
  760. include_tags: bool = False,
  761. ) -> None:
  762. """Create a new GitClient instance.
  763. Args:
  764. thin_packs: Whether or not thin packs should be retrieved
  765. report_activity: Optional callback for reporting transport
  766. activity.
  767. quiet: Whether to suppress output
  768. include_tags: send annotated tags when sending the objects they point
  769. to
  770. """
  771. self._report_activity = report_activity
  772. self._report_status_parser: Optional[ReportStatusParser] = None
  773. self._fetch_capabilities = set(UPLOAD_CAPABILITIES)
  774. self._fetch_capabilities.add(capability_agent())
  775. self._send_capabilities = set(RECEIVE_CAPABILITIES)
  776. self._send_capabilities.add(capability_agent())
  777. if quiet:
  778. self._send_capabilities.add(CAPABILITY_QUIET)
  779. if not thin_packs:
  780. self._fetch_capabilities.remove(CAPABILITY_THIN_PACK)
  781. if include_tags:
  782. self._fetch_capabilities.add(CAPABILITY_INCLUDE_TAG)
  783. self.protocol_version = 0 # will be overridden later
  784. def get_url(self, path: str) -> str:
  785. """Retrieves full url to given path.
  786. Args:
  787. path: Repository path (as string)
  788. Returns:
  789. Url to path (as string)
  790. """
  791. raise NotImplementedError(self.get_url)
  792. @classmethod
  793. def from_parsedurl(
  794. cls,
  795. parsedurl: ParseResult,
  796. thin_packs: bool = True,
  797. report_activity: Optional[Callable[[int, str], None]] = None,
  798. quiet: bool = False,
  799. include_tags: bool = False,
  800. dumb: bool = False,
  801. username: Optional[str] = None,
  802. password: Optional[str] = None,
  803. config: Optional[Config] = None,
  804. ) -> "GitClient":
  805. """Create an instance of this client from a urlparse.parsed object.
  806. Args:
  807. parsedurl: Result of urlparse()
  808. thin_packs: Whether or not thin packs should be retrieved
  809. report_activity: Optional callback for reporting transport activity
  810. quiet: Whether to suppress progress output
  811. include_tags: Whether to include tags
  812. dumb: Whether to use dumb HTTP transport (only for HTTP)
  813. username: Optional username for authentication (only for HTTP)
  814. password: Optional password for authentication (only for HTTP)
  815. config: Optional configuration object
  816. Returns:
  817. A `GitClient` object
  818. """
  819. raise NotImplementedError(cls.from_parsedurl)
  820. def send_pack(
  821. self,
  822. path: bytes,
  823. update_refs: Callable[[dict[bytes, bytes]], dict[bytes, bytes]],
  824. generate_pack_data: "GeneratePackDataFunc",
  825. progress: Optional[Callable[[bytes], None]] = None,
  826. ) -> SendPackResult:
  827. """Upload a pack to a remote repository.
  828. Args:
  829. path: Repository path (as bytestring)
  830. update_refs: Function to determine changes to remote refs. Receive
  831. dict with existing remote refs, returns dict with
  832. changed refs (name -> sha, where sha=ZERO_SHA for deletions)
  833. generate_pack_data: Function that can return a tuple
  834. with number of objects and list of pack data to include
  835. progress: Optional progress function
  836. Returns:
  837. SendPackResult object
  838. Raises:
  839. SendPackError: if server rejects the pack data
  840. """
  841. raise NotImplementedError(self.send_pack)
  842. def clone(
  843. self,
  844. path: str,
  845. target_path: str,
  846. mkdir: bool = True,
  847. bare: bool = False,
  848. origin: Optional[str] = "origin",
  849. checkout: Optional[bool] = None,
  850. branch: Optional[str] = None,
  851. progress: Optional[Callable[[bytes], None]] = None,
  852. depth: Optional[int] = None,
  853. ref_prefix: Optional[Sequence[Ref]] = None,
  854. filter_spec: Optional[bytes] = None,
  855. protocol_version: Optional[int] = None,
  856. ) -> Repo:
  857. """Clone a repository."""
  858. if mkdir:
  859. os.mkdir(target_path)
  860. try:
  861. target = None
  862. if not bare:
  863. target = Repo.init(target_path)
  864. if checkout is None:
  865. checkout = True
  866. else:
  867. if checkout:
  868. raise ValueError("checkout and bare are incompatible")
  869. target = Repo.init_bare(target_path)
  870. # TODO(jelmer): abstract method for get_location?
  871. if isinstance(self, (LocalGitClient, SubprocessGitClient)):
  872. encoded_path = path.encode("utf-8")
  873. else:
  874. encoded_path = self.get_url(path).encode("utf-8")
  875. assert target is not None
  876. if origin is not None:
  877. target_config = target.get_config()
  878. target_config.set(
  879. (b"remote", origin.encode("utf-8")), b"url", encoded_path
  880. )
  881. target_config.set(
  882. (b"remote", origin.encode("utf-8")),
  883. b"fetch",
  884. b"+refs/heads/*:refs/remotes/" + origin.encode("utf-8") + b"/*",
  885. )
  886. target_config.write_to_path()
  887. ref_message = b"clone: from " + encoded_path
  888. result = self.fetch(
  889. path.encode("utf-8"),
  890. target,
  891. progress=progress,
  892. depth=depth,
  893. ref_prefix=ref_prefix,
  894. filter_spec=filter_spec,
  895. protocol_version=protocol_version,
  896. )
  897. if origin is not None:
  898. _import_remote_refs(
  899. target.refs, origin, result.refs, message=ref_message
  900. )
  901. origin_head = result.symrefs.get(b"HEAD")
  902. origin_sha = result.refs.get(b"HEAD")
  903. if origin is None or (origin_sha and not origin_head):
  904. # set detached HEAD
  905. if origin_sha is not None:
  906. target.refs[b"HEAD"] = origin_sha
  907. head = origin_sha
  908. else:
  909. head = None
  910. else:
  911. _set_origin_head(target.refs, origin.encode("utf-8"), origin_head)
  912. head_ref = _set_default_branch(
  913. target.refs,
  914. origin.encode("utf-8"),
  915. origin_head,
  916. branch.encode("utf-8") if branch is not None else None,
  917. ref_message,
  918. )
  919. # Update target head
  920. if head_ref:
  921. head = _set_head(target.refs, head_ref, ref_message)
  922. else:
  923. head = None
  924. if checkout and head is not None:
  925. target.get_worktree().reset_index()
  926. except BaseException:
  927. if target is not None:
  928. target.close()
  929. if mkdir:
  930. import shutil
  931. shutil.rmtree(target_path)
  932. raise
  933. return target
  934. def fetch(
  935. self,
  936. path: bytes,
  937. target: BaseRepo,
  938. determine_wants: Optional["DetermineWantsFunc"] = None,
  939. progress: Optional[Callable[[bytes], None]] = None,
  940. depth: Optional[int] = None,
  941. ref_prefix: Optional[Sequence[Ref]] = None,
  942. filter_spec: Optional[bytes] = None,
  943. protocol_version: Optional[int] = None,
  944. ) -> FetchPackResult:
  945. """Fetch into a target repository.
  946. Args:
  947. path: Path to fetch from (as bytestring)
  948. target: Target repository to fetch into
  949. determine_wants: Optional function to determine what refs to fetch.
  950. Receives dictionary of name->sha, should return
  951. list of shas to fetch. Defaults to all shas.
  952. progress: Optional progress function
  953. depth: Depth to fetch at
  954. ref_prefix: List of prefixes of desired references, as a list of
  955. bytestrings. Filtering is done by the server if supported, and
  956. client side otherwise.
  957. filter_spec: A git-rev-list-style object filter spec, as bytestring.
  958. Only used if the server supports the Git protocol-v2 'filter'
  959. feature, and ignored otherwise.
  960. protocol_version: Desired Git protocol version. By default the highest
  961. mutually supported protocol version will be used.
  962. Returns:
  963. Dictionary with all remote refs (not just those fetched)
  964. """
  965. if determine_wants is None:
  966. determine_wants = target.object_store.determine_wants_all
  967. if CAPABILITY_THIN_PACK in self._fetch_capabilities:
  968. from tempfile import SpooledTemporaryFile
  969. f: IO[bytes] = SpooledTemporaryFile(
  970. max_size=PACK_SPOOL_FILE_MAX_SIZE,
  971. prefix="incoming-",
  972. dir=getattr(target.object_store, "path", None),
  973. )
  974. def commit() -> None:
  975. if f.tell():
  976. f.seek(0)
  977. target.object_store.add_thin_pack(f.read, None, progress=progress) # type: ignore
  978. f.close()
  979. def abort() -> None:
  980. f.close()
  981. else:
  982. f, commit, abort = target.object_store.add_pack()
  983. try:
  984. result = self.fetch_pack(
  985. path,
  986. determine_wants,
  987. target.get_graph_walker(),
  988. f.write,
  989. progress=progress,
  990. depth=depth,
  991. ref_prefix=ref_prefix,
  992. filter_spec=filter_spec,
  993. protocol_version=protocol_version,
  994. )
  995. except BaseException:
  996. abort()
  997. raise
  998. else:
  999. commit()
  1000. target.update_shallow(result.new_shallow, result.new_unshallow)
  1001. return result
  1002. def fetch_pack(
  1003. self,
  1004. path: bytes,
  1005. determine_wants: "DetermineWantsFunc",
  1006. graph_walker: GraphWalker,
  1007. pack_data: Callable[[bytes], int],
  1008. *,
  1009. progress: Optional[Callable[[bytes], None]] = None,
  1010. depth: Optional[int] = None,
  1011. ref_prefix: Optional[Sequence[Ref]] = None,
  1012. filter_spec: Optional[bytes] = None,
  1013. protocol_version: Optional[int] = None,
  1014. ) -> FetchPackResult:
  1015. """Retrieve a pack from a git smart server.
  1016. Args:
  1017. path: Remote path to fetch from
  1018. determine_wants: Function determine what refs
  1019. to fetch. Receives dictionary of name->sha, should return
  1020. list of shas to fetch.
  1021. graph_walker: Object with next() and ack().
  1022. pack_data: Callback called for each bit of data in the pack
  1023. progress: Callback for progress reports (strings)
  1024. depth: Shallow fetch depth
  1025. ref_prefix: List of prefixes of desired references, as a list of
  1026. bytestrings. Filtering is done by the server if supported, and
  1027. client side otherwise.
  1028. filter_spec: A git-rev-list-style object filter spec, as bytestring.
  1029. Only used if the server supports the Git protocol-v2 'filter'
  1030. feature, and ignored otherwise.
  1031. protocol_version: Desired Git protocol version. By default the highest
  1032. mutually supported protocol version will be used.
  1033. Returns:
  1034. FetchPackResult object
  1035. """
  1036. raise NotImplementedError(self.fetch_pack)
  1037. def get_refs(
  1038. self,
  1039. path: bytes,
  1040. protocol_version: Optional[int] = None,
  1041. ref_prefix: Optional[Sequence[Ref]] = None,
  1042. ) -> LsRemoteResult:
  1043. """Retrieve the current refs from a git smart server.
  1044. Args:
  1045. path: Path to the repo to fetch from. (as bytestring)
  1046. protocol_version: Desired Git protocol version.
  1047. ref_prefix: Prefix filter for refs.
  1048. Returns:
  1049. LsRemoteResult object with refs and symrefs
  1050. """
  1051. raise NotImplementedError(self.get_refs)
  1052. @staticmethod
  1053. def _should_send_pack(new_refs: Mapping[bytes, bytes]) -> bool:
  1054. # The packfile MUST NOT be sent if the only command used is delete.
  1055. return any(sha != ZERO_SHA for sha in new_refs.values())
  1056. def _negotiate_receive_pack_capabilities(
  1057. self, server_capabilities: set[bytes]
  1058. ) -> tuple[set[bytes], Optional[bytes]]:
  1059. negotiated_capabilities = self._send_capabilities & server_capabilities
  1060. (_symrefs, agent) = _extract_symrefs_and_agent(server_capabilities)
  1061. (extract_capability_names(server_capabilities) - KNOWN_RECEIVE_CAPABILITIES)
  1062. # TODO(jelmer): warn about unknown capabilities
  1063. return (negotiated_capabilities, agent)
  1064. def _handle_receive_pack_tail(
  1065. self,
  1066. proto: Protocol,
  1067. capabilities: Set[bytes],
  1068. progress: Optional[Callable[[bytes], None]] = None,
  1069. ) -> Optional[dict[bytes, Optional[str]]]:
  1070. """Handle the tail of a 'git-receive-pack' request.
  1071. Args:
  1072. proto: Protocol object to read from
  1073. capabilities: List of negotiated capabilities
  1074. progress: Optional progress reporting function
  1075. Returns:
  1076. dict mapping ref name to:
  1077. error message if the ref failed to update
  1078. None if it was updated successfully
  1079. """
  1080. if CAPABILITY_SIDE_BAND_64K in capabilities or self.protocol_version == 2:
  1081. if progress is None:
  1082. def progress(x: bytes) -> None:
  1083. pass
  1084. if CAPABILITY_REPORT_STATUS in capabilities:
  1085. assert self._report_status_parser is not None
  1086. pktline_parser = PktLineParser(self._report_status_parser.handle_packet)
  1087. for chan, data in _read_side_band64k_data(proto.read_pkt_seq()):
  1088. if chan == SIDE_BAND_CHANNEL_DATA:
  1089. if CAPABILITY_REPORT_STATUS in capabilities:
  1090. pktline_parser.parse(data)
  1091. elif chan == SIDE_BAND_CHANNEL_PROGRESS:
  1092. progress(data)
  1093. else:
  1094. raise AssertionError(f"Invalid sideband channel {chan}")
  1095. else:
  1096. if CAPABILITY_REPORT_STATUS in capabilities:
  1097. assert self._report_status_parser
  1098. for pkt in proto.read_pkt_seq():
  1099. self._report_status_parser.handle_packet(pkt)
  1100. if self._report_status_parser is not None:
  1101. return dict(self._report_status_parser.check())
  1102. return None
  1103. def _negotiate_upload_pack_capabilities(
  1104. self, server_capabilities: set[bytes]
  1105. ) -> tuple[set[bytes], dict[bytes, bytes], Optional[bytes]]:
  1106. (extract_capability_names(server_capabilities) - KNOWN_UPLOAD_CAPABILITIES)
  1107. # TODO(jelmer): warn about unknown capabilities
  1108. fetch_capa = None
  1109. for capability in server_capabilities:
  1110. k, v = parse_capability(capability)
  1111. if self.protocol_version == 2 and k == CAPABILITY_FETCH:
  1112. fetch_capa = CAPABILITY_FETCH
  1113. fetch_features = []
  1114. assert v is not None
  1115. v_list = v.strip().split(b" ")
  1116. if b"shallow" in v_list:
  1117. fetch_features.append(CAPABILITY_SHALLOW)
  1118. if b"filter" in v_list:
  1119. fetch_features.append(CAPABILITY_FILTER)
  1120. for i in range(len(fetch_features)):
  1121. if i == 0:
  1122. fetch_capa += b"="
  1123. else:
  1124. fetch_capa += b" "
  1125. fetch_capa += fetch_features[i]
  1126. (symrefs, agent) = _extract_symrefs_and_agent(server_capabilities)
  1127. negotiated_capabilities = self._fetch_capabilities & server_capabilities
  1128. if fetch_capa:
  1129. negotiated_capabilities.add(fetch_capa)
  1130. return (negotiated_capabilities, symrefs, agent)
  1131. def archive(
  1132. self,
  1133. path: bytes,
  1134. committish: bytes,
  1135. write_data: Callable[[bytes], None],
  1136. progress: Optional[Callable[[bytes], None]] = None,
  1137. write_error: Optional[Callable[[bytes], None]] = None,
  1138. format: Optional[bytes] = None,
  1139. subdirs: Optional[Sequence[bytes]] = None,
  1140. prefix: Optional[bytes] = None,
  1141. ) -> None:
  1142. """Retrieve an archive of the specified tree."""
  1143. raise NotImplementedError(self.archive)
  1144. @staticmethod
  1145. def _warn_filter_objects() -> None:
  1146. import warnings
  1147. warnings.warn(
  1148. "object filtering not recognized by server, ignoring",
  1149. UserWarning,
  1150. )
  1151. def check_wants(wants: Set[bytes], refs: Mapping[bytes, bytes]) -> None:
  1152. """Check that a set of wants is valid.
  1153. Args:
  1154. wants: Set of object SHAs to fetch
  1155. refs: Refs dictionary to check against
  1156. """
  1157. missing = set(wants) - {
  1158. v for (k, v) in refs.items() if not k.endswith(PEELED_TAG_SUFFIX)
  1159. }
  1160. if missing:
  1161. raise InvalidWants(missing)
  1162. def _remote_error_from_stderr(stderr: Optional[IO[bytes]]) -> Exception:
  1163. if stderr is None:
  1164. return HangupException()
  1165. lines = [line.rstrip(b"\n") for line in stderr.readlines()]
  1166. for line in lines:
  1167. if line.startswith(b"ERROR: "):
  1168. return GitProtocolError(line[len(b"ERROR: ") :].decode("utf-8", "replace"))
  1169. return HangupException(lines)
  1170. class TraditionalGitClient(GitClient):
  1171. """Traditional Git client."""
  1172. DEFAULT_ENCODING = "utf-8"
  1173. def __init__(
  1174. self,
  1175. path_encoding: str = DEFAULT_ENCODING,
  1176. thin_packs: bool = True,
  1177. report_activity: Optional[Callable[[int, str], None]] = None,
  1178. quiet: bool = False,
  1179. include_tags: bool = False,
  1180. ) -> None:
  1181. """Initialize a TraditionalGitClient.
  1182. Args:
  1183. path_encoding: Encoding for paths (default: utf-8)
  1184. thin_packs: Whether or not thin packs should be retrieved
  1185. report_activity: Optional callback for reporting transport activity
  1186. quiet: Whether to suppress progress output
  1187. include_tags: Whether to include tags
  1188. """
  1189. self._remote_path_encoding = path_encoding
  1190. super().__init__(
  1191. thin_packs=thin_packs,
  1192. report_activity=report_activity,
  1193. quiet=quiet,
  1194. include_tags=include_tags,
  1195. )
  1196. def _connect(
  1197. self,
  1198. cmd: bytes,
  1199. path: Union[str, bytes],
  1200. protocol_version: Optional[int] = None,
  1201. ) -> tuple[Protocol, Callable[[], bool], Optional[IO[bytes]]]:
  1202. """Create a connection to the server.
  1203. This method is abstract - concrete implementations should
  1204. implement their own variant which connects to the server and
  1205. returns an initialized Protocol object with the service ready
  1206. for use and a can_read function which may be used to see if
  1207. reads would block.
  1208. Args:
  1209. cmd: The git service name to which we should connect.
  1210. path: The path we should pass to the service. (as bytestirng)
  1211. protocol_version: Desired Git protocol version. By default the highest
  1212. mutually supported protocol version will be used.
  1213. """
  1214. raise NotImplementedError
  1215. def send_pack(
  1216. self,
  1217. path: bytes,
  1218. update_refs: Callable[[dict[bytes, bytes]], dict[bytes, bytes]],
  1219. generate_pack_data: "GeneratePackDataFunc",
  1220. progress: Optional[Callable[[bytes], None]] = None,
  1221. ) -> SendPackResult:
  1222. """Upload a pack to a remote repository.
  1223. Args:
  1224. path: Repository path (as bytestring)
  1225. update_refs: Function to determine changes to remote refs.
  1226. Receive dict with existing remote refs, returns dict with
  1227. changed refs (name -> sha, where sha=ZERO_SHA for deletions)
  1228. generate_pack_data: Function that can return a tuple with
  1229. number of objects and pack data to upload.
  1230. progress: Optional callback called with progress updates
  1231. Returns:
  1232. SendPackResult
  1233. Raises:
  1234. SendPackError: if server rejects the pack data
  1235. """
  1236. self.protocol_version = DEFAULT_GIT_PROTOCOL_VERSION_SEND
  1237. proto, _unused_can_read, stderr = self._connect(b"receive-pack", path)
  1238. with proto:
  1239. try:
  1240. old_refs, server_capabilities = read_pkt_refs_v1(proto.read_pkt_seq())
  1241. except HangupException as exc:
  1242. raise _remote_error_from_stderr(stderr) from exc
  1243. (
  1244. negotiated_capabilities,
  1245. agent,
  1246. ) = self._negotiate_receive_pack_capabilities(server_capabilities)
  1247. if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
  1248. self._report_status_parser = ReportStatusParser()
  1249. report_status_parser = self._report_status_parser
  1250. try:
  1251. new_refs = orig_new_refs = update_refs(old_refs)
  1252. except BaseException:
  1253. proto.write_pkt_line(None)
  1254. raise
  1255. if set(new_refs.items()).issubset(set(old_refs.items())):
  1256. proto.write_pkt_line(None)
  1257. # Convert new_refs to match SendPackResult expected type
  1258. return SendPackResult(
  1259. _to_optional_dict(new_refs), agent=agent, ref_status={}
  1260. )
  1261. if CAPABILITY_DELETE_REFS not in server_capabilities:
  1262. # Server does not support deletions. Fail later.
  1263. new_refs = dict(orig_new_refs)
  1264. for ref, sha in orig_new_refs.items():
  1265. if sha == ZERO_SHA:
  1266. if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
  1267. assert report_status_parser is not None
  1268. report_status_parser._ref_statuses.append(
  1269. b"ng " + ref + b" remote does not support deleting refs"
  1270. )
  1271. del new_refs[ref]
  1272. if new_refs is None:
  1273. proto.write_pkt_line(None)
  1274. return SendPackResult(old_refs, agent=agent, ref_status={})
  1275. if len(new_refs) == 0 and orig_new_refs:
  1276. # NOOP - Original new refs filtered out by policy
  1277. proto.write_pkt_line(None)
  1278. if report_status_parser is not None:
  1279. ref_status = dict(report_status_parser.check())
  1280. else:
  1281. ref_status = None
  1282. # Convert to Optional type for SendPackResult
  1283. return SendPackResult(
  1284. _to_optional_dict(old_refs), agent=agent, ref_status=ref_status
  1285. )
  1286. header_handler = _v1ReceivePackHeader(
  1287. list(negotiated_capabilities),
  1288. old_refs,
  1289. new_refs,
  1290. )
  1291. for pkt in header_handler:
  1292. proto.write_pkt_line(pkt)
  1293. pack_data_count, pack_data = generate_pack_data(
  1294. header_handler.have,
  1295. header_handler.want,
  1296. (CAPABILITY_OFS_DELTA in negotiated_capabilities),
  1297. )
  1298. if self._should_send_pack(new_refs):
  1299. for chunk in PackChunkGenerator(pack_data_count, pack_data):
  1300. proto.write(chunk)
  1301. ref_status = self._handle_receive_pack_tail(
  1302. proto, negotiated_capabilities, progress
  1303. )
  1304. refs_with_optional_2: dict[bytes, Optional[bytes]] = {
  1305. k: v for k, v in new_refs.items()
  1306. }
  1307. return SendPackResult(
  1308. refs_with_optional_2, agent=agent, ref_status=ref_status
  1309. )
  1310. def fetch_pack(
  1311. self,
  1312. path: bytes,
  1313. determine_wants: "DetermineWantsFunc",
  1314. graph_walker: GraphWalker,
  1315. pack_data: Callable[[bytes], int],
  1316. progress: Optional[Callable[[bytes], None]] = None,
  1317. depth: Optional[int] = None,
  1318. ref_prefix: Optional[Sequence[Ref]] = None,
  1319. filter_spec: Optional[bytes] = None,
  1320. protocol_version: Optional[int] = None,
  1321. ) -> FetchPackResult:
  1322. """Retrieve a pack from a git smart server.
  1323. Args:
  1324. path: Remote path to fetch from
  1325. determine_wants: Function determine what refs
  1326. to fetch. Receives dictionary of name->sha, should return
  1327. list of shas to fetch.
  1328. graph_walker: Object with next() and ack().
  1329. pack_data: Callback called for each bit of data in the pack
  1330. progress: Callback for progress reports (strings)
  1331. depth: Shallow fetch depth
  1332. ref_prefix: List of prefixes of desired references, as a list of
  1333. bytestrings. Filtering is done by the server if supported, and
  1334. client side otherwise.
  1335. filter_spec: A git-rev-list-style object filter spec, as bytestring.
  1336. Only used if the server supports the Git protocol-v2 'filter'
  1337. feature, and ignored otherwise.
  1338. protocol_version: Desired Git protocol version. By default the highest
  1339. mutually supported protocol version will be used.
  1340. Returns:
  1341. FetchPackResult object
  1342. """
  1343. if (
  1344. protocol_version is not None
  1345. and protocol_version not in GIT_PROTOCOL_VERSIONS
  1346. ):
  1347. raise ValueError(f"unknown Git protocol version {protocol_version}")
  1348. proto, can_read, stderr = self._connect(b"upload-pack", path, protocol_version)
  1349. server_protocol_version = negotiate_protocol_version(proto)
  1350. if server_protocol_version not in GIT_PROTOCOL_VERSIONS:
  1351. raise ValueError(
  1352. f"unknown Git protocol version {server_protocol_version} used by server"
  1353. )
  1354. if protocol_version and server_protocol_version > protocol_version:
  1355. raise ValueError(
  1356. f"bad Git protocol version {server_protocol_version} used by server"
  1357. )
  1358. self.protocol_version = server_protocol_version
  1359. with proto:
  1360. # refs may have None values in v2 but not in v1
  1361. refs: dict[bytes, Optional[bytes]]
  1362. if self.protocol_version == 2:
  1363. try:
  1364. server_capabilities = read_server_capabilities(proto.read_pkt_seq())
  1365. except HangupException as exc:
  1366. raise _remote_error_from_stderr(stderr) from exc
  1367. (
  1368. negotiated_capabilities,
  1369. symrefs,
  1370. agent,
  1371. ) = self._negotiate_upload_pack_capabilities(server_capabilities)
  1372. proto.write_pkt_line(b"command=ls-refs\n")
  1373. proto.write(b"0001") # delim-pkt
  1374. proto.write_pkt_line(b"symrefs")
  1375. proto.write_pkt_line(b"peel")
  1376. if ref_prefix is None:
  1377. ref_prefix = DEFAULT_REF_PREFIX
  1378. for prefix in ref_prefix:
  1379. proto.write_pkt_line(b"ref-prefix " + prefix)
  1380. proto.write_pkt_line(None)
  1381. refs, symrefs, _peeled = read_pkt_refs_v2(proto.read_pkt_seq())
  1382. else:
  1383. try:
  1384. refs_v1, server_capabilities = read_pkt_refs_v1(
  1385. proto.read_pkt_seq()
  1386. )
  1387. # v1 refs never have None values, but we need Optional type for compatibility
  1388. refs = _to_optional_dict(refs_v1)
  1389. except HangupException as exc:
  1390. raise _remote_error_from_stderr(stderr) from exc
  1391. (
  1392. negotiated_capabilities,
  1393. symrefs,
  1394. agent,
  1395. ) = self._negotiate_upload_pack_capabilities(server_capabilities)
  1396. if ref_prefix is not None:
  1397. refs = filter_ref_prefix(refs, ref_prefix)
  1398. if refs is None:
  1399. proto.write_pkt_line(None)
  1400. return FetchPackResult(refs, symrefs, agent)
  1401. try:
  1402. # Filter out None values (shouldn't be any in v1 protocol)
  1403. refs_no_none = {k: v for k, v in refs.items() if v is not None}
  1404. # Handle both old and new style determine_wants
  1405. try:
  1406. wants = determine_wants(refs_no_none, depth)
  1407. except TypeError:
  1408. # Old-style determine_wants that doesn't accept depth
  1409. wants = determine_wants(refs_no_none)
  1410. except BaseException:
  1411. proto.write_pkt_line(None)
  1412. raise
  1413. if wants is not None:
  1414. wants = [cid for cid in wants if cid != ZERO_SHA]
  1415. if not wants:
  1416. proto.write_pkt_line(None)
  1417. return FetchPackResult(refs, symrefs, agent)
  1418. if self.protocol_version == 2:
  1419. proto.write_pkt_line(b"command=fetch\n")
  1420. proto.write(b"0001") # delim-pkt
  1421. if CAPABILITY_THIN_PACK in self._fetch_capabilities:
  1422. proto.write(pkt_line(b"thin-pack\n"))
  1423. if (
  1424. find_capability(
  1425. list(negotiated_capabilities),
  1426. CAPABILITY_FETCH,
  1427. CAPABILITY_FILTER,
  1428. )
  1429. and filter_spec
  1430. ):
  1431. proto.write(pkt_line(b"filter %s\n" % filter_spec))
  1432. elif filter_spec:
  1433. self._warn_filter_objects()
  1434. elif filter_spec:
  1435. self._warn_filter_objects()
  1436. (new_shallow, new_unshallow) = _handle_upload_pack_head(
  1437. proto,
  1438. list(negotiated_capabilities),
  1439. graph_walker,
  1440. wants,
  1441. can_read,
  1442. depth=depth,
  1443. protocol_version=self.protocol_version,
  1444. )
  1445. _handle_upload_pack_tail(
  1446. proto,
  1447. negotiated_capabilities,
  1448. graph_walker,
  1449. pack_data,
  1450. progress,
  1451. protocol_version=self.protocol_version,
  1452. )
  1453. return FetchPackResult(refs, symrefs, agent, new_shallow, new_unshallow)
  1454. def get_refs(
  1455. self,
  1456. path: bytes,
  1457. protocol_version: Optional[int] = None,
  1458. ref_prefix: Optional[Sequence[Ref]] = None,
  1459. ) -> LsRemoteResult:
  1460. """Retrieve the current refs from a git smart server."""
  1461. # stock `git ls-remote` uses upload-pack
  1462. if (
  1463. protocol_version is not None
  1464. and protocol_version not in GIT_PROTOCOL_VERSIONS
  1465. ):
  1466. raise ValueError(f"unknown Git protocol version {protocol_version}")
  1467. proto, _, stderr = self._connect(b"upload-pack", path, protocol_version)
  1468. server_protocol_version = negotiate_protocol_version(proto)
  1469. if server_protocol_version not in GIT_PROTOCOL_VERSIONS:
  1470. raise ValueError(
  1471. f"unknown Git protocol version {server_protocol_version} used by server"
  1472. )
  1473. if protocol_version and server_protocol_version > protocol_version:
  1474. raise ValueError(
  1475. f"bad Git protocol version {server_protocol_version} used by server"
  1476. )
  1477. self.protocol_version = server_protocol_version
  1478. if self.protocol_version == 2:
  1479. server_capabilities = read_server_capabilities(proto.read_pkt_seq())
  1480. proto.write_pkt_line(b"command=ls-refs\n")
  1481. proto.write(b"0001") # delim-pkt
  1482. proto.write_pkt_line(b"symrefs")
  1483. proto.write_pkt_line(b"peel")
  1484. if ref_prefix is None:
  1485. ref_prefix = DEFAULT_REF_PREFIX
  1486. for prefix in ref_prefix:
  1487. proto.write_pkt_line(b"ref-prefix " + prefix)
  1488. proto.write_pkt_line(None)
  1489. with proto:
  1490. try:
  1491. refs, symrefs, peeled = read_pkt_refs_v2(proto.read_pkt_seq())
  1492. except HangupException as exc:
  1493. raise _remote_error_from_stderr(stderr) from exc
  1494. proto.write_pkt_line(None)
  1495. for refname, refvalue in peeled.items():
  1496. refs[refname + PEELED_TAG_SUFFIX] = refvalue
  1497. return LsRemoteResult(refs, symrefs)
  1498. else:
  1499. with proto:
  1500. try:
  1501. refs_v1, server_capabilities = read_pkt_refs_v1(
  1502. proto.read_pkt_seq()
  1503. )
  1504. # v1 refs never have None values, but we need Optional type for compatibility
  1505. refs = _to_optional_dict(refs_v1)
  1506. except HangupException as exc:
  1507. raise _remote_error_from_stderr(stderr) from exc
  1508. proto.write_pkt_line(None)
  1509. (symrefs, _agent) = _extract_symrefs_and_agent(server_capabilities)
  1510. if ref_prefix is not None:
  1511. refs = filter_ref_prefix(refs, ref_prefix)
  1512. return LsRemoteResult(refs, symrefs)
  1513. def archive(
  1514. self,
  1515. path: bytes,
  1516. committish: bytes,
  1517. write_data: Callable[[bytes], None],
  1518. progress: Optional[Callable[[bytes], None]] = None,
  1519. write_error: Optional[Callable[[bytes], None]] = None,
  1520. format: Optional[bytes] = None,
  1521. subdirs: Optional[Sequence[bytes]] = None,
  1522. prefix: Optional[bytes] = None,
  1523. ) -> None:
  1524. """Request an archive of a specific commit.
  1525. Args:
  1526. path: Repository path
  1527. committish: Commit ID or ref to archive
  1528. write_data: Function to write archive data
  1529. progress: Optional progress callback
  1530. write_error: Optional error callback
  1531. format: Optional archive format
  1532. subdirs: Optional subdirectories to include
  1533. prefix: Optional prefix for archived files
  1534. """
  1535. proto, _can_read, stderr = self._connect(b"upload-archive", path)
  1536. with proto:
  1537. if format is not None:
  1538. proto.write_pkt_line(b"argument --format=" + format)
  1539. proto.write_pkt_line(b"argument " + committish)
  1540. if subdirs is not None:
  1541. for subdir in subdirs:
  1542. proto.write_pkt_line(b"argument " + subdir)
  1543. if prefix is not None:
  1544. proto.write_pkt_line(b"argument --prefix=" + prefix)
  1545. proto.write_pkt_line(None)
  1546. try:
  1547. pkt = proto.read_pkt_line()
  1548. except HangupException as exc:
  1549. raise _remote_error_from_stderr(stderr) from exc
  1550. if pkt == b"NACK\n" or pkt == b"NACK":
  1551. return
  1552. elif pkt == b"ACK\n" or pkt == b"ACK":
  1553. pass
  1554. elif pkt and pkt.startswith(b"ERR "):
  1555. raise GitProtocolError(pkt[4:].rstrip(b"\n").decode("utf-8", "replace"))
  1556. else:
  1557. raise AssertionError(f"invalid response {pkt!r}")
  1558. ret = proto.read_pkt_line()
  1559. if ret is not None:
  1560. raise AssertionError("expected pkt tail")
  1561. for chan, data in _read_side_band64k_data(proto.read_pkt_seq()):
  1562. if chan == SIDE_BAND_CHANNEL_DATA:
  1563. write_data(data)
  1564. elif chan == SIDE_BAND_CHANNEL_PROGRESS:
  1565. if progress is not None:
  1566. progress(data)
  1567. elif chan == SIDE_BAND_CHANNEL_FATAL:
  1568. if write_error is not None:
  1569. write_error(data)
  1570. else:
  1571. raise AssertionError(f"Invalid sideband channel {chan}")
  1572. class TCPGitClient(TraditionalGitClient):
  1573. """A Git Client that works over TCP directly (i.e. git://)."""
  1574. def __init__(
  1575. self,
  1576. host: str,
  1577. port: Optional[int] = None,
  1578. thin_packs: bool = True,
  1579. report_activity: Optional[Callable[[int, str], None]] = None,
  1580. quiet: bool = False,
  1581. include_tags: bool = False,
  1582. ) -> None:
  1583. """Initialize a TCPGitClient.
  1584. Args:
  1585. host: Hostname or IP address to connect to
  1586. port: Port number (defaults to TCP_GIT_PORT)
  1587. thin_packs: Whether or not thin packs should be retrieved
  1588. report_activity: Optional callback for reporting transport activity
  1589. quiet: Whether to suppress progress output
  1590. include_tags: Whether to include tags
  1591. """
  1592. if port is None:
  1593. port = TCP_GIT_PORT
  1594. self._host = host
  1595. self._port = port
  1596. super().__init__(
  1597. thin_packs=thin_packs,
  1598. report_activity=report_activity,
  1599. quiet=quiet,
  1600. include_tags=include_tags,
  1601. )
  1602. @classmethod
  1603. def from_parsedurl(
  1604. cls,
  1605. parsedurl: ParseResult,
  1606. thin_packs: bool = True,
  1607. report_activity: Optional[Callable[[int, str], None]] = None,
  1608. quiet: bool = False,
  1609. include_tags: bool = False,
  1610. dumb: bool = False,
  1611. username: Optional[str] = None,
  1612. password: Optional[str] = None,
  1613. config: Optional[Config] = None,
  1614. ) -> "TCPGitClient":
  1615. """Create an instance of TCPGitClient from a parsed URL.
  1616. Args:
  1617. parsedurl: Result of urlparse()
  1618. thin_packs: Whether or not thin packs should be retrieved
  1619. report_activity: Optional callback for reporting transport activity
  1620. quiet: Whether to suppress progress output
  1621. include_tags: Whether to include tags
  1622. dumb: Whether to use dumb protocol (not used for TCPGitClient)
  1623. username: Username for authentication (not used for TCPGitClient)
  1624. password: Password for authentication (not used for TCPGitClient)
  1625. config: Configuration object (not used for TCPGitClient)
  1626. Returns:
  1627. A TCPGitClient instance
  1628. """
  1629. assert parsedurl.hostname is not None
  1630. return cls(
  1631. parsedurl.hostname,
  1632. port=parsedurl.port,
  1633. thin_packs=thin_packs,
  1634. report_activity=report_activity,
  1635. quiet=quiet,
  1636. include_tags=include_tags,
  1637. )
  1638. def get_url(self, path: str) -> str:
  1639. r"""Get the URL for a TCP git connection.
  1640. Args:
  1641. path: Repository path
  1642. Returns:
  1643. ``git://`` URL for the path
  1644. """
  1645. # IPv6 addresses contain colons and need to be wrapped in brackets
  1646. if ":" in self._host:
  1647. netloc = f"[{self._host}]"
  1648. else:
  1649. netloc = self._host
  1650. if self._port is not None and self._port != TCP_GIT_PORT:
  1651. netloc += f":{self._port}"
  1652. return urlunsplit(("git", netloc, path, "", ""))
  1653. def _connect(
  1654. self,
  1655. cmd: bytes,
  1656. path: Union[str, bytes],
  1657. protocol_version: Optional[int] = None,
  1658. ) -> tuple[Protocol, Callable[[], bool], Optional[IO[bytes]]]:
  1659. if not isinstance(cmd, bytes):
  1660. raise TypeError(cmd)
  1661. if not isinstance(path, bytes):
  1662. path = path.encode(self._remote_path_encoding)
  1663. sockaddrs = socket.getaddrinfo(
  1664. self._host, self._port, socket.AF_UNSPEC, socket.SOCK_STREAM
  1665. )
  1666. s = None
  1667. err = OSError(f"no address found for {self._host}")
  1668. for family, socktype, protof, canonname, sockaddr in sockaddrs:
  1669. s = socket.socket(family, socktype, protof)
  1670. s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
  1671. try:
  1672. s.connect(sockaddr)
  1673. break
  1674. except OSError as e:
  1675. err = e
  1676. if s is not None:
  1677. s.close()
  1678. s = None
  1679. if s is None:
  1680. raise err
  1681. # -1 means system default buffering
  1682. rfile = s.makefile("rb", -1)
  1683. # 0 means unbuffered
  1684. wfile = s.makefile("wb", 0)
  1685. def close() -> None:
  1686. rfile.close()
  1687. wfile.close()
  1688. s.close()
  1689. proto = Protocol(
  1690. rfile.read,
  1691. wfile.write,
  1692. close,
  1693. report_activity=self._report_activity,
  1694. )
  1695. if path.startswith(b"/~"):
  1696. path = path[1:]
  1697. if cmd == b"upload-pack":
  1698. if protocol_version is None:
  1699. self.protocol_version = DEFAULT_GIT_PROTOCOL_VERSION_FETCH
  1700. else:
  1701. self.protocol_version = protocol_version
  1702. else:
  1703. self.protocol_version = DEFAULT_GIT_PROTOCOL_VERSION_SEND
  1704. if cmd == b"upload-pack" and self.protocol_version == 2:
  1705. # Git protocol version advertisement is hidden behind two NUL bytes
  1706. # for compatibility with older Git server implementations, which
  1707. # would crash if something other than a "host=" header was found
  1708. # after the first NUL byte.
  1709. version_str = b"\0\0version=%d\0" % self.protocol_version
  1710. else:
  1711. version_str = b""
  1712. # TODO(jelmer): Alternative to ascii?
  1713. proto.send_cmd(
  1714. b"git-" + cmd, path, b"host=" + self._host.encode("ascii") + version_str
  1715. )
  1716. return proto, lambda: _fileno_can_read(s.fileno()), None
  1717. class SubprocessWrapper:
  1718. """A socket-like object that talks to a subprocess via pipes."""
  1719. def __init__(self, proc: subprocess.Popen[bytes]) -> None:
  1720. """Initialize a SubprocessWrapper.
  1721. Args:
  1722. proc: Subprocess.Popen instance to wrap
  1723. """
  1724. self.proc = proc
  1725. assert proc.stdout is not None
  1726. assert proc.stdin is not None
  1727. self.read = BufferedReader(proc.stdout).read # type: ignore[type-var]
  1728. self.write = proc.stdin.write
  1729. @property
  1730. def stderr(self) -> Optional[IO[bytes]]:
  1731. """Return the stderr stream of the subprocess."""
  1732. return self.proc.stderr
  1733. def can_read(self) -> bool:
  1734. """Check if there is data available to read.
  1735. Returns: True if data is available, False otherwise
  1736. """
  1737. if sys.platform == "win32":
  1738. from msvcrt import get_osfhandle
  1739. assert self.proc.stdout is not None
  1740. handle = get_osfhandle(self.proc.stdout.fileno())
  1741. return _win32_peek_avail(handle) != 0
  1742. else:
  1743. assert self.proc.stdout is not None
  1744. return _fileno_can_read(self.proc.stdout.fileno())
  1745. def close(self, timeout: Optional[int] = 60) -> None:
  1746. """Close the subprocess and wait for it to terminate.
  1747. Args:
  1748. timeout: Maximum time to wait for subprocess to terminate (seconds)
  1749. Raises:
  1750. GitProtocolError: If subprocess doesn't terminate within timeout
  1751. """
  1752. if self.proc.stdin:
  1753. self.proc.stdin.close()
  1754. if self.proc.stdout:
  1755. self.proc.stdout.close()
  1756. if self.proc.stderr:
  1757. self.proc.stderr.close()
  1758. try:
  1759. self.proc.wait(timeout=timeout)
  1760. except subprocess.TimeoutExpired as e:
  1761. self.proc.kill()
  1762. self.proc.wait()
  1763. raise GitProtocolError(
  1764. f"Git subprocess did not terminate within {timeout} seconds; killed it."
  1765. ) from e
  1766. def find_git_command() -> list[str]:
  1767. """Find command to run for system Git (usually C Git)."""
  1768. if sys.platform == "win32": # support .exe, .bat and .cmd
  1769. try: # to avoid overhead
  1770. import pywintypes
  1771. import win32api
  1772. except ImportError: # run through cmd.exe with some overhead
  1773. return ["cmd", "/c", "git"]
  1774. else:
  1775. try:
  1776. _status, git = win32api.FindExecutable("git")
  1777. return [git]
  1778. except pywintypes.error:
  1779. return ["cmd", "/c", "git"]
  1780. else:
  1781. return ["git"]
  1782. class SubprocessGitClient(TraditionalGitClient):
  1783. """Git client that talks to a server using a subprocess."""
  1784. @classmethod
  1785. def from_parsedurl(
  1786. cls,
  1787. parsedurl: ParseResult,
  1788. thin_packs: bool = True,
  1789. report_activity: Optional[Callable[[int, str], None]] = None,
  1790. quiet: bool = False,
  1791. include_tags: bool = False,
  1792. dumb: bool = False,
  1793. username: Optional[str] = None,
  1794. password: Optional[str] = None,
  1795. config: Optional[Config] = None,
  1796. ) -> "SubprocessGitClient":
  1797. """Create an instance of SubprocessGitClient from a parsed URL.
  1798. Args:
  1799. parsedurl: Result of urlparse()
  1800. thin_packs: Whether or not thin packs should be retrieved
  1801. report_activity: Optional callback for reporting transport activity
  1802. quiet: Whether to suppress progress output
  1803. include_tags: Whether to include tags
  1804. dumb: Whether to use dumb protocol (not used for SubprocessGitClient)
  1805. username: Username for authentication (not used for SubprocessGitClient)
  1806. password: Password for authentication (not used for SubprocessGitClient)
  1807. config: Configuration object (not used for SubprocessGitClient)
  1808. Returns:
  1809. A SubprocessGitClient instance
  1810. """
  1811. return cls(
  1812. thin_packs=thin_packs,
  1813. report_activity=report_activity,
  1814. quiet=quiet,
  1815. include_tags=include_tags,
  1816. )
  1817. git_command: Optional[str] = None
  1818. def _connect(
  1819. self,
  1820. service: bytes,
  1821. path: Union[bytes, str],
  1822. protocol_version: Optional[int] = None,
  1823. ) -> tuple[Protocol, Callable[[], bool], Optional[IO[bytes]]]:
  1824. if not isinstance(service, bytes):
  1825. raise TypeError(service)
  1826. if isinstance(path, bytes):
  1827. path = path.decode(self._remote_path_encoding)
  1828. if self.git_command is None:
  1829. git_command = find_git_command()
  1830. argv = [*git_command, service.decode("ascii"), path]
  1831. p = subprocess.Popen(
  1832. argv,
  1833. bufsize=0,
  1834. stdin=subprocess.PIPE,
  1835. stdout=subprocess.PIPE,
  1836. stderr=subprocess.PIPE,
  1837. )
  1838. pw = SubprocessWrapper(p)
  1839. return (
  1840. Protocol(
  1841. pw.read,
  1842. pw.write,
  1843. pw.close,
  1844. report_activity=self._report_activity,
  1845. ),
  1846. pw.can_read,
  1847. p.stderr,
  1848. )
  1849. class LocalGitClient(GitClient):
  1850. """Git Client that just uses a local on-disk repository."""
  1851. def __init__(
  1852. self,
  1853. thin_packs: bool = True,
  1854. report_activity: Optional[Callable[[int, str], None]] = None,
  1855. config: Optional[Config] = None,
  1856. quiet: bool = False,
  1857. include_tags: bool = False,
  1858. ) -> None:
  1859. """Create a new LocalGitClient instance.
  1860. Args:
  1861. thin_packs: Whether or not thin packs should be retrieved
  1862. report_activity: Optional callback for reporting transport
  1863. activity.
  1864. config: Optional configuration object
  1865. quiet: Whether to suppress progress output
  1866. include_tags: Whether to include tags
  1867. """
  1868. self._report_activity = report_activity
  1869. self._quiet = quiet
  1870. self._include_tags = include_tags
  1871. # Ignore the thin_packs argument
  1872. def get_url(self, path: str) -> str:
  1873. """Get the URL for a local file path.
  1874. Args:
  1875. path: Local file path
  1876. Returns:
  1877. file:// URL for the path
  1878. """
  1879. return urlunsplit(("file", "", path, "", ""))
  1880. @classmethod
  1881. def from_parsedurl(
  1882. cls,
  1883. parsedurl: ParseResult,
  1884. thin_packs: bool = True,
  1885. report_activity: Optional[Callable[[int, str], None]] = None,
  1886. quiet: bool = False,
  1887. include_tags: bool = False,
  1888. dumb: bool = False,
  1889. username: Optional[str] = None,
  1890. password: Optional[str] = None,
  1891. config: Optional[Config] = None,
  1892. ) -> "LocalGitClient":
  1893. """Create an instance of LocalGitClient from a parsed URL.
  1894. Args:
  1895. parsedurl: Result of urlparse()
  1896. thin_packs: Whether or not thin packs should be retrieved
  1897. report_activity: Optional callback for reporting transport activity
  1898. quiet: Whether to suppress progress output
  1899. include_tags: Whether to include tags
  1900. dumb: Whether to use dumb protocol (not used for LocalGitClient)
  1901. username: Username for authentication (not used for LocalGitClient)
  1902. password: Password for authentication (not used for LocalGitClient)
  1903. config: Optional configuration object
  1904. Returns:
  1905. A LocalGitClient instance
  1906. """
  1907. return cls(
  1908. thin_packs=thin_packs,
  1909. report_activity=report_activity,
  1910. quiet=quiet,
  1911. include_tags=include_tags,
  1912. config=config,
  1913. )
  1914. @classmethod
  1915. def _open_repo(cls, path: Union[str, bytes]) -> "closing[Repo]":
  1916. """Open a local repository.
  1917. Args:
  1918. path: Repository path (as bytes or str)
  1919. Returns:
  1920. Repo instance wrapped in a closing context manager
  1921. """
  1922. if not isinstance(path, str):
  1923. path = os.fsdecode(path)
  1924. return closing(Repo(path))
  1925. def send_pack(
  1926. self,
  1927. path: Union[str, bytes],
  1928. update_refs: Callable[[dict[bytes, bytes]], dict[bytes, bytes]],
  1929. generate_pack_data: "GeneratePackDataFunc",
  1930. progress: Optional[Callable[[bytes], None]] = None,
  1931. ) -> SendPackResult:
  1932. """Upload a pack to a local on-disk repository.
  1933. Args:
  1934. path: Repository path (as bytestring)
  1935. update_refs: Function to determine changes to remote refs.
  1936. Receive dict with existing remote refs, returns dict with
  1937. changed refs (name -> sha, where sha=ZERO_SHA for deletions)
  1938. with number of items and pack data to upload.
  1939. generate_pack_data: Function that generates pack data given
  1940. have and want object sets
  1941. progress: Optional progress function
  1942. Returns:
  1943. SendPackResult
  1944. Raises:
  1945. SendPackError: if server rejects the pack data
  1946. """
  1947. if not progress:
  1948. def progress(x: bytes) -> None:
  1949. pass
  1950. with self._open_repo(path) as target:
  1951. old_refs = target.get_refs()
  1952. new_refs = update_refs(dict(old_refs))
  1953. have = [sha1 for sha1 in old_refs.values() if sha1 != ZERO_SHA]
  1954. want = []
  1955. for refname, new_sha1 in new_refs.items():
  1956. if (
  1957. new_sha1 not in have
  1958. and new_sha1 not in want
  1959. and new_sha1 != ZERO_SHA
  1960. ):
  1961. want.append(new_sha1)
  1962. if not want and set(new_refs.items()).issubset(set(old_refs.items())):
  1963. return SendPackResult(_to_optional_dict(new_refs), ref_status={})
  1964. target.object_store.add_pack_data(*generate_pack_data(set(have), set(want)))
  1965. ref_status: dict[bytes, Optional[str]] = {}
  1966. for refname, new_sha1 in new_refs.items():
  1967. old_sha1 = old_refs.get(refname, ZERO_SHA)
  1968. if new_sha1 != ZERO_SHA:
  1969. if not target.refs.set_if_equals(refname, old_sha1, new_sha1):
  1970. msg = f"unable to set {refname!r} to {new_sha1!r}"
  1971. progress(msg.encode())
  1972. ref_status[refname] = msg
  1973. else:
  1974. if not target.refs.remove_if_equals(refname, old_sha1):
  1975. progress(f"unable to remove {refname!r}".encode())
  1976. ref_status[refname] = "unable to remove"
  1977. return SendPackResult(_to_optional_dict(new_refs), ref_status=ref_status)
  1978. def fetch(
  1979. self,
  1980. path: bytes,
  1981. target: BaseRepo,
  1982. determine_wants: Optional["DetermineWantsFunc"] = None,
  1983. progress: Optional[Callable[[bytes], None]] = None,
  1984. depth: Optional[int] = None,
  1985. ref_prefix: Optional[Sequence[bytes]] = None,
  1986. filter_spec: Optional[bytes] = None,
  1987. protocol_version: Optional[int] = None,
  1988. ) -> FetchPackResult:
  1989. """Fetch into a target repository.
  1990. Args:
  1991. path: Path to fetch from (as bytestring)
  1992. target: Target repository to fetch into
  1993. determine_wants: Optional function determine what refs
  1994. to fetch. Receives dictionary of name->sha, should return
  1995. list of shas to fetch. Defaults to all shas.
  1996. progress: Optional progress function
  1997. depth: Shallow fetch depth
  1998. ref_prefix: List of prefixes of desired references, as a list of
  1999. bytestrings. Filtering is done by the server if supported, and
  2000. client side otherwise.
  2001. filter_spec: A git-rev-list-style object filter spec, as bytestring.
  2002. Only used if the server supports the Git protocol-v2 'filter'
  2003. feature, and ignored otherwise.
  2004. protocol_version: Optional Git protocol version
  2005. Returns:
  2006. FetchPackResult object
  2007. """
  2008. with self._open_repo(path) as r:
  2009. refs = r.fetch(
  2010. target,
  2011. determine_wants=determine_wants,
  2012. progress=progress,
  2013. depth=depth,
  2014. )
  2015. return FetchPackResult(
  2016. _to_optional_dict(refs), r.refs.get_symrefs(), agent_string()
  2017. )
  2018. def fetch_pack(
  2019. self,
  2020. path: Union[str, bytes],
  2021. determine_wants: "DetermineWantsFunc",
  2022. graph_walker: GraphWalker,
  2023. pack_data: Callable[[bytes], int],
  2024. progress: Optional[Callable[[bytes], None]] = None,
  2025. depth: Optional[int] = None,
  2026. ref_prefix: Optional[Sequence[Ref]] = None,
  2027. filter_spec: Optional[bytes] = None,
  2028. protocol_version: Optional[int] = None,
  2029. ) -> FetchPackResult:
  2030. """Retrieve a pack from a local on-disk repository.
  2031. Args:
  2032. path: Remote path to fetch from
  2033. determine_wants: Function determine what refs
  2034. to fetch. Receives dictionary of name->sha, should return
  2035. list of shas to fetch.
  2036. graph_walker: Object with next() and ack().
  2037. pack_data: Callback called for each bit of data in the pack
  2038. progress: Callback for progress reports (strings)
  2039. depth: Shallow fetch depth
  2040. ref_prefix: List of prefixes of desired references, as a list of
  2041. bytestrings. Filtering is done by the server if supported, and
  2042. client side otherwise.
  2043. filter_spec: A git-rev-list-style object filter spec, as bytestring.
  2044. Only used if the server supports the Git protocol-v2 'filter'
  2045. feature, and ignored otherwise.
  2046. protocol_version: Optional Git protocol version
  2047. Returns:
  2048. FetchPackResult object
  2049. """
  2050. with self._open_repo(path) as r:
  2051. missing_objects = r.find_missing_objects(
  2052. determine_wants, graph_walker, progress=progress, depth=depth
  2053. )
  2054. if missing_objects is None:
  2055. other_haves = set()
  2056. object_ids = []
  2057. else:
  2058. other_haves = missing_objects.get_remote_has()
  2059. object_ids = list(missing_objects)
  2060. symrefs = r.refs.get_symrefs()
  2061. agent = agent_string()
  2062. # Did the process short-circuit (e.g. in a stateless RPC call)?
  2063. # Note that the client still expects a 0-object pack in most cases.
  2064. if object_ids is None:
  2065. return FetchPackResult(None, symrefs, agent)
  2066. write_pack_from_container(
  2067. pack_data, # type: ignore[arg-type]
  2068. r.object_store,
  2069. object_ids,
  2070. other_haves=other_haves,
  2071. )
  2072. # Convert refs to Optional type for FetchPackResult
  2073. return FetchPackResult(_to_optional_dict(r.get_refs()), symrefs, agent)
  2074. def get_refs(
  2075. self,
  2076. path: Union[str, bytes],
  2077. protocol_version: Optional[int] = None,
  2078. ref_prefix: Optional[Sequence[Ref]] = None,
  2079. ) -> LsRemoteResult:
  2080. """Retrieve the current refs from a local on-disk repository."""
  2081. with self._open_repo(path) as target:
  2082. refs_dict = target.get_refs()
  2083. refs = _to_optional_dict(refs_dict)
  2084. # Extract symrefs from the local repository
  2085. symrefs: dict[bytes, bytes] = {}
  2086. for ref in refs:
  2087. try:
  2088. # Check if this ref is symbolic by reading it directly
  2089. ref_value = target.refs.read_ref(ref)
  2090. if ref_value and ref_value.startswith(SYMREF):
  2091. # Extract the target from the symref
  2092. symrefs[ref] = ref_value[len(SYMREF) :]
  2093. except (KeyError, ValueError):
  2094. # Not a symbolic ref or error reading it
  2095. pass
  2096. return LsRemoteResult(refs, symrefs)
  2097. class BundleClient(GitClient):
  2098. """Git Client that reads from a bundle file."""
  2099. def __init__(
  2100. self,
  2101. thin_packs: bool = True,
  2102. report_activity: Optional[Callable[[int, str], None]] = None,
  2103. config: Optional[Config] = None,
  2104. quiet: bool = False,
  2105. include_tags: bool = False,
  2106. ) -> None:
  2107. """Create a new BundleClient instance.
  2108. Args:
  2109. thin_packs: Whether or not thin packs should be retrieved
  2110. report_activity: Optional callback for reporting transport
  2111. activity.
  2112. config: Optional configuration object
  2113. quiet: Whether to suppress progress output
  2114. include_tags: Whether to include tags
  2115. """
  2116. self._report_activity = report_activity
  2117. self._quiet = quiet
  2118. self._include_tags = include_tags
  2119. def get_url(self, path: str) -> str:
  2120. """Get the URL for a bundle file path.
  2121. Args:
  2122. path: Bundle file path
  2123. Returns:
  2124. The path unchanged (bundle files use local paths)
  2125. """
  2126. return path
  2127. @classmethod
  2128. def from_parsedurl(
  2129. cls,
  2130. parsedurl: ParseResult,
  2131. thin_packs: bool = True,
  2132. report_activity: Optional[Callable[[int, str], None]] = None,
  2133. quiet: bool = False,
  2134. include_tags: bool = False,
  2135. dumb: bool = False,
  2136. username: Optional[str] = None,
  2137. password: Optional[str] = None,
  2138. config: Optional[Config] = None,
  2139. ) -> "BundleClient":
  2140. """Create an instance of BundleClient from a parsed URL.
  2141. Args:
  2142. parsedurl: Result of urlparse()
  2143. thin_packs: Whether or not thin packs should be retrieved
  2144. report_activity: Optional callback for reporting transport activity
  2145. quiet: Whether to suppress progress output
  2146. include_tags: Whether to include tags
  2147. dumb: Whether to use dumb protocol (not used for BundleClient)
  2148. username: Username for authentication (not used for BundleClient)
  2149. password: Password for authentication (not used for BundleClient)
  2150. config: Configuration object (not used for BundleClient)
  2151. Returns:
  2152. A BundleClient instance
  2153. """
  2154. return cls(
  2155. thin_packs=thin_packs,
  2156. report_activity=report_activity,
  2157. quiet=quiet,
  2158. include_tags=include_tags,
  2159. )
  2160. @classmethod
  2161. def _is_bundle_file(cls, path: str) -> bool:
  2162. """Check if a file is a git bundle by reading the first line."""
  2163. try:
  2164. with open(path, "rb") as f:
  2165. first_line = f.readline()
  2166. return first_line in (b"# v2 git bundle\n", b"# v3 git bundle\n")
  2167. except OSError:
  2168. return False
  2169. @classmethod
  2170. def _open_bundle(cls, path: Union[str, bytes]) -> "Bundle":
  2171. """Open and parse a bundle file.
  2172. Args:
  2173. path: Path to the bundle file (bytes or str)
  2174. Returns:
  2175. Bundle object with parsed metadata
  2176. Raises:
  2177. AssertionError: If bundle format is unsupported
  2178. """
  2179. if not isinstance(path, str):
  2180. path = os.fsdecode(path)
  2181. # Read bundle metadata without PackData to avoid file handle issues
  2182. with open(path, "rb") as f:
  2183. from dulwich.bundle import Bundle
  2184. version = None
  2185. firstline = f.readline()
  2186. if firstline == b"# v2 git bundle\n":
  2187. version = 2
  2188. elif firstline == b"# v3 git bundle\n":
  2189. version = 3
  2190. else:
  2191. raise AssertionError(f"unsupported bundle format header: {firstline!r}")
  2192. capabilities = {}
  2193. prerequisites = []
  2194. references = {}
  2195. line = f.readline()
  2196. if version >= 3:
  2197. while line.startswith(b"@"):
  2198. line = line[1:].rstrip(b"\n")
  2199. try:
  2200. key, value_bytes = line.split(b"=", 1)
  2201. value = value_bytes.decode("utf-8")
  2202. except ValueError:
  2203. key = line
  2204. value = None
  2205. capabilities[key.decode("utf-8")] = value
  2206. line = f.readline()
  2207. while line.startswith(b"-"):
  2208. (obj_id, comment) = line[1:].rstrip(b"\n").split(b" ", 1)
  2209. prerequisites.append((obj_id, comment))
  2210. line = f.readline()
  2211. while line != b"\n":
  2212. (obj_id, ref) = line.rstrip(b"\n").split(b" ", 1)
  2213. references[ref] = obj_id
  2214. line = f.readline()
  2215. # Don't read PackData here, we'll do it later
  2216. bundle = Bundle()
  2217. bundle.version = version
  2218. bundle.capabilities = capabilities
  2219. bundle.prerequisites = prerequisites
  2220. bundle.references = references
  2221. bundle.pack_data = None # Will be read on demand
  2222. return bundle
  2223. @staticmethod
  2224. def _skip_to_pack_data(f: IO[bytes], version: int) -> None:
  2225. """Skip to the pack data section in a bundle file.
  2226. Args:
  2227. f: File object positioned at the beginning of the bundle
  2228. version: Bundle format version (2 or 3)
  2229. Raises:
  2230. AssertionError: If bundle header is invalid
  2231. """
  2232. # Skip header
  2233. header = f.readline()
  2234. if header not in (b"# v2 git bundle\n", b"# v3 git bundle\n"):
  2235. raise AssertionError(f"Invalid bundle header: {header!r}")
  2236. line = f.readline()
  2237. # Skip capabilities (v3 only)
  2238. if version >= 3:
  2239. while line.startswith(b"@"):
  2240. line = f.readline()
  2241. # Skip prerequisites
  2242. while line.startswith(b"-"):
  2243. line = f.readline()
  2244. # Skip references
  2245. while line != b"\n":
  2246. line = f.readline()
  2247. # Now at pack data
  2248. def send_pack(
  2249. self,
  2250. path: Union[str, bytes],
  2251. update_refs: Callable[[dict[bytes, bytes]], dict[bytes, bytes]],
  2252. generate_pack_data: "GeneratePackDataFunc",
  2253. progress: Optional[Callable[[bytes], None]] = None,
  2254. ) -> SendPackResult:
  2255. """Upload is not supported for bundle files."""
  2256. raise NotImplementedError("Bundle files are read-only")
  2257. def fetch(
  2258. self,
  2259. path: bytes,
  2260. target: BaseRepo,
  2261. determine_wants: Optional["DetermineWantsFunc"] = None,
  2262. progress: Optional[Callable[[bytes], None]] = None,
  2263. depth: Optional[int] = None,
  2264. ref_prefix: Optional[Sequence[Ref]] = None,
  2265. filter_spec: Optional[bytes] = None,
  2266. protocol_version: Optional[int] = None,
  2267. ) -> FetchPackResult:
  2268. """Fetch into a target repository from a bundle file."""
  2269. bundle = self._open_bundle(path)
  2270. # Get references from bundle
  2271. refs = dict(bundle.references)
  2272. # Determine what we want to fetch
  2273. if determine_wants is None:
  2274. _ = list(refs.values())
  2275. else:
  2276. _ = determine_wants(refs, None)
  2277. # Add pack data to target repository
  2278. # Need to reopen the file for pack data access
  2279. with open(path, "rb") as pack_file:
  2280. # Skip to pack data section
  2281. assert bundle.version is not None
  2282. BundleClient._skip_to_pack_data(pack_file, bundle.version)
  2283. # Read pack data into memory to avoid file positioning issues
  2284. pack_bytes = pack_file.read()
  2285. # Create PackData from in-memory bytes
  2286. from io import BytesIO
  2287. pack_io = BytesIO(pack_bytes)
  2288. pack_data = PackData.from_file(pack_io)
  2289. target.object_store.add_pack_data(len(pack_data), pack_data.iter_unpacked())
  2290. # Apply ref filtering if specified
  2291. if ref_prefix:
  2292. filtered_refs = {}
  2293. for ref_name, ref_value in refs.items():
  2294. for prefix in ref_prefix:
  2295. if ref_name.startswith(prefix):
  2296. filtered_refs[ref_name] = ref_value
  2297. break
  2298. refs = filtered_refs
  2299. return FetchPackResult(_to_optional_dict(refs), {}, agent_string())
  2300. def fetch_pack(
  2301. self,
  2302. path: Union[str, bytes],
  2303. determine_wants: "DetermineWantsFunc",
  2304. graph_walker: GraphWalker,
  2305. pack_data: Callable[[bytes], int],
  2306. progress: Optional[Callable[[bytes], None]] = None,
  2307. depth: Optional[int] = None,
  2308. ref_prefix: Optional[Sequence[Ref]] = None,
  2309. filter_spec: Optional[bytes] = None,
  2310. protocol_version: Optional[int] = None,
  2311. ) -> FetchPackResult:
  2312. """Retrieve a pack from a bundle file."""
  2313. bundle = self._open_bundle(path)
  2314. # Get references from bundle
  2315. refs = dict(bundle.references)
  2316. # Determine what we want to fetch
  2317. try:
  2318. _ = determine_wants(refs, depth)
  2319. except TypeError:
  2320. # Old-style determine_wants that doesn't accept depth
  2321. _ = determine_wants(refs)
  2322. # Write pack data to the callback
  2323. # Need to reopen the file for pack data access
  2324. with open(path, "rb") as pack_file:
  2325. # Skip to pack data section
  2326. assert bundle.version is not None
  2327. BundleClient._skip_to_pack_data(pack_file, bundle.version)
  2328. # Read pack data and write it to the callback
  2329. pack_bytes = pack_file.read()
  2330. pack_data(pack_bytes)
  2331. # Apply ref filtering if specified
  2332. if ref_prefix:
  2333. filtered_refs = {}
  2334. for ref_name, ref_value in refs.items():
  2335. for prefix in ref_prefix:
  2336. if ref_name.startswith(prefix):
  2337. filtered_refs[ref_name] = ref_value
  2338. break
  2339. refs = filtered_refs
  2340. return FetchPackResult(_to_optional_dict(refs), {}, agent_string())
  2341. def get_refs(
  2342. self,
  2343. path: Union[str, bytes],
  2344. protocol_version: Optional[int] = None,
  2345. ref_prefix: Optional[Sequence[Ref]] = None,
  2346. ) -> LsRemoteResult:
  2347. """Retrieve the current refs from a bundle file."""
  2348. bundle = self._open_bundle(path)
  2349. refs = dict(bundle.references)
  2350. # Apply ref filtering if specified
  2351. if ref_prefix:
  2352. filtered_refs = {}
  2353. for ref_name, ref_value in refs.items():
  2354. for prefix in ref_prefix:
  2355. if ref_name.startswith(prefix):
  2356. filtered_refs[ref_name] = ref_value
  2357. break
  2358. refs = filtered_refs
  2359. # Bundle refs are always concrete (never None), but LsRemoteResult expects Optional
  2360. return LsRemoteResult(_to_optional_dict(refs), {})
  2361. # What Git client to use for local access
  2362. default_local_git_client_cls = LocalGitClient
  2363. class SSHVendor:
  2364. """A client side SSH implementation."""
  2365. def run_command(
  2366. self,
  2367. host: str,
  2368. command: bytes,
  2369. username: Optional[str] = None,
  2370. port: Optional[int] = None,
  2371. password: Optional[str] = None,
  2372. key_filename: Optional[str] = None,
  2373. ssh_command: Optional[str] = None,
  2374. protocol_version: Optional[int] = None,
  2375. ) -> SubprocessWrapper:
  2376. """Connect to an SSH server.
  2377. Run a command remotely and return a file-like object for interaction
  2378. with the remote command.
  2379. Args:
  2380. host: Host name
  2381. command: Command to run (as argv array)
  2382. username: Optional ame of user to log in as
  2383. port: Optional SSH port to use
  2384. password: Optional ssh password for login or private key
  2385. key_filename: Optional path to private keyfile
  2386. ssh_command: Optional SSH command
  2387. protocol_version: Desired Git protocol version. By default the highest
  2388. mutually supported protocol version will be used.
  2389. """
  2390. raise NotImplementedError(self.run_command)
  2391. class StrangeHostname(Exception):
  2392. """Refusing to connect to strange SSH hostname."""
  2393. def __init__(self, hostname: str) -> None:
  2394. """Initialize StrangeHostname exception.
  2395. Args:
  2396. hostname: The strange hostname that was rejected
  2397. """
  2398. super().__init__(hostname)
  2399. class SubprocessSSHVendor(SSHVendor):
  2400. """SSH vendor that shells out to the local 'ssh' command."""
  2401. def run_command(
  2402. self,
  2403. host: str,
  2404. command: bytes,
  2405. username: Optional[str] = None,
  2406. port: Optional[int] = None,
  2407. password: Optional[str] = None,
  2408. key_filename: Optional[str] = None,
  2409. ssh_command: Optional[str] = None,
  2410. protocol_version: Optional[int] = None,
  2411. ) -> SubprocessWrapper:
  2412. """Run a git command over SSH.
  2413. Args:
  2414. host: SSH host to connect to
  2415. command: Git command to run
  2416. username: Optional username
  2417. port: Optional port number
  2418. password: Optional password (not supported)
  2419. key_filename: Optional SSH key file
  2420. ssh_command: Optional custom SSH command
  2421. protocol_version: Optional Git protocol version
  2422. Returns:
  2423. Tuple of (subprocess.Popen, Protocol, stderr_stream)
  2424. """
  2425. if password is not None:
  2426. raise NotImplementedError(
  2427. "Setting password not supported by SubprocessSSHVendor."
  2428. )
  2429. if ssh_command:
  2430. import shlex
  2431. args = [*shlex.split(ssh_command, posix=sys.platform != "win32"), "-x"]
  2432. else:
  2433. args = ["ssh", "-x"]
  2434. if port:
  2435. args.extend(["-p", str(port)])
  2436. if key_filename:
  2437. args.extend(["-i", str(key_filename)])
  2438. if protocol_version is None:
  2439. protocol_version = DEFAULT_GIT_PROTOCOL_VERSION_FETCH
  2440. if protocol_version > 0:
  2441. args.extend(["-o", f"SetEnv GIT_PROTOCOL=version={protocol_version}"])
  2442. if username:
  2443. host = f"{username}@{host}"
  2444. if host.startswith("-"):
  2445. raise StrangeHostname(hostname=host)
  2446. args.append(host)
  2447. proc = subprocess.Popen(
  2448. [*args, command],
  2449. bufsize=0,
  2450. stdin=subprocess.PIPE,
  2451. stdout=subprocess.PIPE,
  2452. stderr=subprocess.PIPE,
  2453. )
  2454. return SubprocessWrapper(proc)
  2455. class PLinkSSHVendor(SSHVendor):
  2456. """SSH vendor that shells out to the local 'plink' command."""
  2457. def run_command(
  2458. self,
  2459. host: str,
  2460. command: bytes,
  2461. username: Optional[str] = None,
  2462. port: Optional[int] = None,
  2463. password: Optional[str] = None,
  2464. key_filename: Optional[str] = None,
  2465. ssh_command: Optional[str] = None,
  2466. protocol_version: Optional[int] = None,
  2467. ) -> SubprocessWrapper:
  2468. """Run a git command over SSH using PLink.
  2469. Args:
  2470. host: SSH host to connect to
  2471. command: Git command to run
  2472. username: Optional username
  2473. port: Optional port number
  2474. password: Optional password
  2475. key_filename: Optional SSH key file
  2476. ssh_command: Optional custom SSH command
  2477. protocol_version: Optional Git protocol version
  2478. Returns:
  2479. Tuple of (subprocess.Popen, Protocol, stderr_stream)
  2480. """
  2481. if ssh_command:
  2482. import shlex
  2483. args = [*shlex.split(ssh_command, posix=sys.platform != "win32"), "-ssh"]
  2484. elif sys.platform == "win32":
  2485. args = ["plink.exe", "-ssh"]
  2486. else:
  2487. args = ["plink", "-ssh"]
  2488. if password is not None:
  2489. import warnings
  2490. warnings.warn(
  2491. "Invoking PLink with a password exposes the password in the "
  2492. "process list."
  2493. )
  2494. args.extend(["-pw", str(password)])
  2495. if port:
  2496. args.extend(["-P", str(port)])
  2497. if key_filename:
  2498. args.extend(["-i", str(key_filename)])
  2499. if username:
  2500. host = f"{username}@{host}"
  2501. if host.startswith("-"):
  2502. raise StrangeHostname(hostname=host)
  2503. args.append(host)
  2504. # plink.exe does not provide a way to pass environment variables
  2505. # via the command line. The best we can do is set an environment
  2506. # variable and hope that plink will pass it to the server. If this
  2507. # does not work then the server should behave as if we had requested
  2508. # protocol version 0.
  2509. env = copy.deepcopy(os.environ)
  2510. if protocol_version is None:
  2511. protocol_version = DEFAULT_GIT_PROTOCOL_VERSION_FETCH
  2512. if protocol_version > 0:
  2513. env["GIT_PROTOCOL"] = f"version={protocol_version}"
  2514. proc = subprocess.Popen(
  2515. [*args, command],
  2516. bufsize=0,
  2517. stdin=subprocess.PIPE,
  2518. stdout=subprocess.PIPE,
  2519. stderr=subprocess.PIPE,
  2520. env=env,
  2521. )
  2522. return SubprocessWrapper(proc)
  2523. # Can be overridden by users
  2524. get_ssh_vendor: Callable[[], SSHVendor] = SubprocessSSHVendor
  2525. class SSHGitClient(TraditionalGitClient):
  2526. """Git client that connects over SSH."""
  2527. def __init__(
  2528. self,
  2529. host: str,
  2530. port: Optional[int] = None,
  2531. username: Optional[str] = None,
  2532. vendor: Optional[SSHVendor] = None,
  2533. config: Optional[Config] = None,
  2534. password: Optional[str] = None,
  2535. key_filename: Optional[str] = None,
  2536. ssh_command: Optional[str] = None,
  2537. path_encoding: str = TraditionalGitClient.DEFAULT_ENCODING,
  2538. thin_packs: bool = True,
  2539. report_activity: Optional[Callable[[int, str], None]] = None,
  2540. quiet: bool = False,
  2541. include_tags: bool = False,
  2542. ) -> None:
  2543. """Initialize SSHGitClient.
  2544. Args:
  2545. host: SSH hostname
  2546. port: Optional SSH port
  2547. username: Optional username
  2548. vendor: Optional SSH vendor
  2549. config: Optional configuration
  2550. password: Optional password
  2551. key_filename: Optional SSH key file
  2552. ssh_command: Optional custom SSH command
  2553. path_encoding: Encoding for paths (default: utf-8)
  2554. thin_packs: Whether or not thin packs should be retrieved
  2555. report_activity: Optional callback for reporting transport activity
  2556. quiet: Whether to suppress output
  2557. include_tags: Send annotated tags when sending the objects they point to
  2558. """
  2559. self.host = host
  2560. self.port = port
  2561. self.username = username
  2562. self.password = password
  2563. self.key_filename = key_filename
  2564. # Priority: ssh_command parameter, then env vars, then core.sshCommand config
  2565. if ssh_command:
  2566. self.ssh_command = ssh_command
  2567. else:
  2568. # Check environment variables first
  2569. env_ssh_command = os.environ.get("GIT_SSH_COMMAND")
  2570. if env_ssh_command:
  2571. self.ssh_command = env_ssh_command
  2572. else:
  2573. env_ssh = os.environ.get("GIT_SSH")
  2574. if env_ssh:
  2575. self.ssh_command = env_ssh
  2576. else:
  2577. # Fall back to config if no environment variable set
  2578. if config is not None:
  2579. try:
  2580. config_ssh_command = config.get((b"core",), b"sshCommand")
  2581. self.ssh_command = (
  2582. config_ssh_command.decode()
  2583. if config_ssh_command
  2584. else "ssh"
  2585. )
  2586. except KeyError:
  2587. self.ssh_command = "ssh"
  2588. else:
  2589. self.ssh_command = "ssh"
  2590. super().__init__(
  2591. path_encoding=path_encoding,
  2592. thin_packs=thin_packs,
  2593. report_activity=report_activity,
  2594. quiet=quiet,
  2595. include_tags=include_tags,
  2596. )
  2597. self.alternative_paths: dict[bytes, bytes] = {}
  2598. if vendor is not None:
  2599. self.ssh_vendor = vendor
  2600. else:
  2601. self.ssh_vendor = get_ssh_vendor()
  2602. def get_url(self, path: str) -> str:
  2603. """Get the SSH URL for a path."""
  2604. netloc = self.host
  2605. if self.port is not None:
  2606. netloc += f":{self.port}"
  2607. if self.username is not None:
  2608. netloc = urlquote(self.username, "@/:") + "@" + netloc
  2609. return urlunsplit(("ssh", netloc, path, "", ""))
  2610. @classmethod
  2611. def from_parsedurl(
  2612. cls,
  2613. parsedurl: ParseResult,
  2614. thin_packs: bool = True,
  2615. report_activity: Optional[Callable[[int, str], None]] = None,
  2616. quiet: bool = False,
  2617. include_tags: bool = False,
  2618. dumb: bool = False,
  2619. username: Optional[str] = None,
  2620. password: Optional[str] = None,
  2621. config: Optional[Config] = None,
  2622. path_encoding: str = TraditionalGitClient.DEFAULT_ENCODING,
  2623. vendor: Optional[SSHVendor] = None,
  2624. ) -> "SSHGitClient":
  2625. """Create an SSHGitClient from a parsed URL.
  2626. Args:
  2627. parsedurl: Result of urlparse()
  2628. thin_packs: Whether or not thin packs should be retrieved
  2629. report_activity: Optional callback for reporting transport activity
  2630. quiet: Whether to suppress progress output
  2631. include_tags: Whether to include tags
  2632. dumb: Whether to use dumb protocol (not used for SSHGitClient)
  2633. username: SSH username
  2634. password: SSH password
  2635. config: Configuration object
  2636. path_encoding: Encoding for paths
  2637. vendor: SSH implementation to use
  2638. Returns:
  2639. An SSHGitClient instance
  2640. """
  2641. if parsedurl.hostname is None:
  2642. raise ValueError("SSH URL must have a hostname")
  2643. return cls(
  2644. host=parsedurl.hostname,
  2645. port=parsedurl.port,
  2646. username=username or parsedurl.username,
  2647. thin_packs=thin_packs,
  2648. report_activity=report_activity,
  2649. quiet=quiet,
  2650. include_tags=include_tags,
  2651. path_encoding=path_encoding,
  2652. vendor=vendor,
  2653. config=config,
  2654. password=password,
  2655. )
  2656. def _get_cmd_path(self, cmd: bytes) -> bytes:
  2657. cmd = self.alternative_paths.get(cmd, b"git-" + cmd)
  2658. assert isinstance(cmd, bytes)
  2659. return cmd
  2660. def _connect(
  2661. self,
  2662. cmd: bytes,
  2663. path: Union[str, bytes],
  2664. protocol_version: Optional[int] = None,
  2665. ) -> tuple[Protocol, Callable[[], bool], Optional[IO[bytes]]]:
  2666. if not isinstance(cmd, bytes):
  2667. raise TypeError(cmd)
  2668. if isinstance(path, bytes):
  2669. path = path.decode(self._remote_path_encoding)
  2670. if path.startswith("/~"):
  2671. path = path[1:]
  2672. argv = (
  2673. self._get_cmd_path(cmd)
  2674. + b" '"
  2675. + path.encode(self._remote_path_encoding)
  2676. + b"'"
  2677. )
  2678. kwargs = {}
  2679. if self.password is not None:
  2680. kwargs["password"] = self.password
  2681. if self.key_filename is not None:
  2682. kwargs["key_filename"] = self.key_filename
  2683. # GIT_SSH_COMMAND takes precedence over GIT_SSH
  2684. if self.ssh_command is not None:
  2685. kwargs["ssh_command"] = self.ssh_command
  2686. con = self.ssh_vendor.run_command(
  2687. self.host,
  2688. argv,
  2689. port=self.port,
  2690. username=self.username,
  2691. protocol_version=protocol_version,
  2692. )
  2693. return (
  2694. Protocol(
  2695. con.read,
  2696. con.write,
  2697. con.close,
  2698. report_activity=self._report_activity,
  2699. ),
  2700. con.can_read,
  2701. getattr(con, "stderr", None),
  2702. )
  2703. def default_user_agent_string() -> str:
  2704. """Return the default user agent string for Dulwich."""
  2705. # Start user agent with "git/", because GitHub requires this. :-( See
  2706. # https://github.com/jelmer/dulwich/issues/562 for details.
  2707. return "git/dulwich/{}".format(".".join([str(x) for x in dulwich.__version__]))
  2708. def default_urllib3_manager(
  2709. config: Optional[Config],
  2710. pool_manager_cls: Optional[type] = None,
  2711. proxy_manager_cls: Optional[type] = None,
  2712. base_url: Optional[str] = None,
  2713. timeout: Optional[float] = None,
  2714. cert_reqs: Optional[str] = None,
  2715. ) -> Union["urllib3.ProxyManager", "urllib3.PoolManager"]:
  2716. """Return urllib3 connection pool manager.
  2717. Honour detected proxy configurations.
  2718. Args:
  2719. config: `dulwich.config.ConfigDict` instance with Git configuration.
  2720. pool_manager_cls: Pool manager class to use
  2721. proxy_manager_cls: Proxy manager class to use
  2722. base_url: Base URL for proxy bypass checks
  2723. timeout: Timeout for HTTP requests in seconds
  2724. cert_reqs: SSL certificate requirements (e.g. "CERT_REQUIRED", "CERT_NONE")
  2725. Returns:
  2726. Either pool_manager_cls (defaults to `urllib3.ProxyManager`) instance for
  2727. proxy configurations, proxy_manager_cls
  2728. (defaults to `urllib3.PoolManager`) instance otherwise
  2729. """
  2730. proxy_server: Optional[str] = None
  2731. user_agent: Optional[str] = None
  2732. ca_certs: Optional[str] = None
  2733. ssl_verify: Optional[bool] = None
  2734. if proxy_server is None:
  2735. for proxyname in ("https_proxy", "http_proxy", "all_proxy"):
  2736. proxy_server = os.environ.get(proxyname)
  2737. if proxy_server:
  2738. break
  2739. if proxy_server:
  2740. if check_for_proxy_bypass(base_url):
  2741. proxy_server = None
  2742. if config is not None:
  2743. if proxy_server is None:
  2744. try:
  2745. proxy_server_bytes = config.get(b"http", b"proxy")
  2746. if proxy_server_bytes is not None:
  2747. proxy_server = proxy_server_bytes.decode("utf-8")
  2748. except KeyError:
  2749. pass
  2750. try:
  2751. user_agent_bytes = config.get(b"http", b"useragent")
  2752. if user_agent_bytes is not None:
  2753. user_agent = user_agent_bytes.decode("utf-8")
  2754. except KeyError:
  2755. pass
  2756. # TODO(jelmer): Support per-host settings
  2757. try:
  2758. ssl_verify = config.get_boolean(b"http", b"sslVerify")
  2759. except KeyError:
  2760. ssl_verify = True
  2761. try:
  2762. ca_certs_bytes = config.get(b"http", b"sslCAInfo")
  2763. if ca_certs_bytes is not None:
  2764. ca_certs = ca_certs_bytes.decode("utf-8")
  2765. except KeyError:
  2766. ca_certs = None
  2767. # Check for timeout configuration
  2768. if timeout is None:
  2769. try:
  2770. timeout_bytes = config.get(b"http", b"timeout")
  2771. if timeout_bytes is not None:
  2772. timeout = float(timeout_bytes.decode("utf-8"))
  2773. except KeyError:
  2774. pass
  2775. if user_agent is None:
  2776. user_agent = default_user_agent_string()
  2777. headers = {"User-agent": user_agent}
  2778. # Check for extra headers in config
  2779. if config is not None:
  2780. try:
  2781. # Git allows multiple http.extraHeader entries
  2782. extra_headers = config.get_multivar(b"http", b"extraHeader")
  2783. for extra_header in extra_headers:
  2784. if extra_header and b": " in extra_header:
  2785. # Parse the header (format: "Header-Name: value")
  2786. header_name, header_value = extra_header.split(b": ", 1)
  2787. headers[header_name.decode("utf-8")] = header_value.decode("utf-8")
  2788. except KeyError:
  2789. pass
  2790. kwargs: dict[str, Union[str, float, None]] = {
  2791. "ca_certs": ca_certs,
  2792. }
  2793. # Add timeout if specified
  2794. if timeout is not None:
  2795. kwargs["timeout"] = timeout
  2796. # Handle cert_reqs - allow override from parameter
  2797. if cert_reqs is not None:
  2798. kwargs["cert_reqs"] = cert_reqs
  2799. elif ssl_verify is True:
  2800. kwargs["cert_reqs"] = "CERT_REQUIRED"
  2801. elif ssl_verify is False:
  2802. kwargs["cert_reqs"] = "CERT_NONE"
  2803. else:
  2804. # Default to SSL verification
  2805. kwargs["cert_reqs"] = "CERT_REQUIRED"
  2806. import urllib3
  2807. manager: Union[urllib3.ProxyManager, urllib3.PoolManager]
  2808. if proxy_server is not None:
  2809. if proxy_manager_cls is None:
  2810. proxy_manager_cls = urllib3.ProxyManager
  2811. if not isinstance(proxy_server, str):
  2812. proxy_server = proxy_server.decode()
  2813. proxy_server_url = urlparse(proxy_server)
  2814. if proxy_server_url.username is not None:
  2815. proxy_headers = urllib3.make_headers(
  2816. proxy_basic_auth=f"{proxy_server_url.username}:{proxy_server_url.password or ''}"
  2817. )
  2818. else:
  2819. proxy_headers = {}
  2820. manager = proxy_manager_cls(
  2821. proxy_server, proxy_headers=proxy_headers, headers=headers, **kwargs
  2822. )
  2823. else:
  2824. if pool_manager_cls is None:
  2825. pool_manager_cls = urllib3.PoolManager
  2826. manager = pool_manager_cls(headers=headers, **kwargs)
  2827. return manager
  2828. def check_for_proxy_bypass(base_url: Optional[str]) -> bool:
  2829. """Check if proxy should be bypassed for the given URL."""
  2830. # Check if a proxy bypass is defined with the no_proxy environment variable
  2831. if base_url: # only check if base_url is provided
  2832. no_proxy_str = os.environ.get("no_proxy")
  2833. if no_proxy_str:
  2834. # implementation based on curl behavior: https://curl.se/libcurl/c/CURLOPT_NOPROXY.html
  2835. # get hostname of provided parsed url
  2836. parsed_url = urlparse(base_url)
  2837. hostname = parsed_url.hostname
  2838. if hostname:
  2839. import ipaddress
  2840. # check if hostname is an ip address
  2841. try:
  2842. hostname_ip = ipaddress.ip_address(hostname)
  2843. except ValueError:
  2844. hostname_ip = None
  2845. no_proxy_values = no_proxy_str.split(",")
  2846. for no_proxy_value in no_proxy_values:
  2847. no_proxy_value = no_proxy_value.strip()
  2848. if no_proxy_value:
  2849. no_proxy_value = no_proxy_value.lower()
  2850. no_proxy_value = no_proxy_value.lstrip(
  2851. "."
  2852. ) # ignore leading dots
  2853. if hostname_ip:
  2854. # check if no_proxy_value is a ip network
  2855. try:
  2856. no_proxy_value_network = ipaddress.ip_network(
  2857. no_proxy_value, strict=False
  2858. )
  2859. except ValueError:
  2860. no_proxy_value_network = None
  2861. if no_proxy_value_network:
  2862. # if hostname is a ip address and no_proxy_value is a ip network -> check if ip address is part of network
  2863. if hostname_ip in no_proxy_value_network:
  2864. return True
  2865. if no_proxy_value == "*":
  2866. # '*' is special case for always bypass proxy
  2867. return True
  2868. if hostname == no_proxy_value:
  2869. return True
  2870. no_proxy_value = (
  2871. "." + no_proxy_value
  2872. ) # add a dot to only match complete domains
  2873. if hostname.endswith(no_proxy_value):
  2874. return True
  2875. return False
  2876. class AbstractHttpGitClient(GitClient):
  2877. """Abstract base class for HTTP Git Clients.
  2878. This is agonistic of the actual HTTP implementation.
  2879. Subclasses should provide an implementation of the
  2880. _http_request method.
  2881. """
  2882. def __init__(
  2883. self,
  2884. base_url: str,
  2885. dumb: bool = False,
  2886. thin_packs: bool = True,
  2887. report_activity: Optional[Callable[[int, str], None]] = None,
  2888. quiet: bool = False,
  2889. include_tags: bool = False,
  2890. ) -> None:
  2891. """Initialize AbstractHttpGitClient."""
  2892. self._base_url = base_url.rstrip("/") + "/"
  2893. self.dumb = dumb
  2894. GitClient.__init__(
  2895. self,
  2896. thin_packs=thin_packs,
  2897. report_activity=report_activity,
  2898. quiet=quiet,
  2899. include_tags=include_tags,
  2900. )
  2901. def _http_request(
  2902. self,
  2903. url: str,
  2904. headers: Optional[dict[str, str]] = None,
  2905. data: Optional[Union[bytes, Iterator[bytes]]] = None,
  2906. raise_for_status: bool = True,
  2907. ) -> tuple["HTTPResponse", Callable[[int], bytes]]:
  2908. """Perform HTTP request.
  2909. Args:
  2910. url: Request URL.
  2911. headers: Optional custom headers to override defaults.
  2912. data: Request data.
  2913. raise_for_status: Whether to raise an exception for HTTP errors.
  2914. Returns:
  2915. Tuple (response, read), where response is an urllib3
  2916. response object with additional content_type and
  2917. redirect_location properties, and read is a consumable read
  2918. method for the response data.
  2919. Raises:
  2920. GitProtocolError
  2921. """
  2922. raise NotImplementedError(self._http_request)
  2923. def _discover_references(
  2924. self,
  2925. service: bytes,
  2926. base_url: str,
  2927. protocol_version: Optional[int] = None,
  2928. ref_prefix: Optional[Sequence[Ref]] = None,
  2929. ) -> tuple[
  2930. dict[Ref, Optional[ObjectID]],
  2931. set[bytes],
  2932. str,
  2933. dict[Ref, Ref],
  2934. dict[Ref, ObjectID],
  2935. ]:
  2936. if (
  2937. protocol_version is not None
  2938. and protocol_version not in GIT_PROTOCOL_VERSIONS
  2939. ):
  2940. raise ValueError(f"unknown Git protocol version {protocol_version}")
  2941. assert base_url[-1] == "/"
  2942. tail = "info/refs"
  2943. headers = {"Accept": "*/*"}
  2944. if self.dumb is not True:
  2945. tail += "?service={}".format(service.decode("ascii"))
  2946. # Enable protocol v2 only when fetching, not when pushing.
  2947. # Git does not yet implement push over protocol v2, and as of
  2948. # git version 2.37.3 git-http-backend's behaviour is erratic if
  2949. # we try: It responds with a Git-protocol-v1-style ref listing
  2950. # which lacks the "001f# service=git-receive-pack" marker.
  2951. if service == b"git-upload-pack":
  2952. if protocol_version is None:
  2953. self.protocol_version = DEFAULT_GIT_PROTOCOL_VERSION_FETCH
  2954. else:
  2955. self.protocol_version = protocol_version
  2956. if self.protocol_version == 2:
  2957. headers["Git-Protocol"] = "version=2"
  2958. else:
  2959. self.protocol_version = DEFAULT_GIT_PROTOCOL_VERSION_SEND
  2960. url = urljoin(base_url, tail)
  2961. resp, read = self._http_request(url, headers)
  2962. if resp.redirect_location:
  2963. # Something changed (redirect!), so let's update the base URL
  2964. if not resp.redirect_location.endswith(tail):
  2965. raise GitProtocolError(
  2966. f"Redirected from URL {url} to URL {resp.redirect_location} without {tail}"
  2967. )
  2968. base_url = urljoin(url, resp.redirect_location[: -len(tail)])
  2969. try:
  2970. self.dumb = resp.content_type is None or not resp.content_type.startswith(
  2971. "application/x-git-"
  2972. )
  2973. if not self.dumb:
  2974. def begin_protocol_v2(
  2975. proto: Protocol,
  2976. ) -> tuple[set[bytes], Any, Callable[[int], bytes], Protocol]:
  2977. nonlocal ref_prefix
  2978. server_capabilities = read_server_capabilities(proto.read_pkt_seq())
  2979. if ref_prefix is None:
  2980. ref_prefix = DEFAULT_REF_PREFIX
  2981. pkts = [
  2982. b"symrefs",
  2983. b"peel",
  2984. ]
  2985. for prefix in ref_prefix:
  2986. pkts.append(b"ref-prefix " + prefix)
  2987. body = b"".join(
  2988. [pkt_line(b"command=ls-refs\n"), b"0001", pkt_seq(*pkts)]
  2989. )
  2990. resp, read = self._smart_request(
  2991. service.decode("ascii"), base_url, body
  2992. )
  2993. proto = Protocol(read, lambda data: None)
  2994. return server_capabilities, resp, read, proto
  2995. proto = Protocol(read, lambda data: None)
  2996. server_protocol_version = negotiate_protocol_version(proto)
  2997. if server_protocol_version not in GIT_PROTOCOL_VERSIONS:
  2998. raise ValueError(
  2999. f"unknown Git protocol version {server_protocol_version} used by server"
  3000. )
  3001. if protocol_version and server_protocol_version > protocol_version:
  3002. raise ValueError(
  3003. f"bad Git protocol version {server_protocol_version} used by server"
  3004. )
  3005. self.protocol_version = server_protocol_version
  3006. if self.protocol_version == 2:
  3007. server_capabilities, resp, read, proto = begin_protocol_v2(proto)
  3008. (refs, symrefs, peeled) = read_pkt_refs_v2(proto.read_pkt_seq())
  3009. return refs, server_capabilities, base_url, symrefs, peeled
  3010. else:
  3011. try:
  3012. [pkt] = list(proto.read_pkt_seq())
  3013. except ValueError as exc:
  3014. raise GitProtocolError(
  3015. "unexpected number of packets received"
  3016. ) from exc
  3017. if pkt.rstrip(b"\n") != (b"# service=" + service):
  3018. raise GitProtocolError(
  3019. f"unexpected first line {pkt!r} from smart server"
  3020. )
  3021. # Github sends "version 2" after sending the service name.
  3022. # Try to negotiate protocol version 2 again.
  3023. server_protocol_version = negotiate_protocol_version(proto)
  3024. if server_protocol_version not in GIT_PROTOCOL_VERSIONS:
  3025. raise ValueError(
  3026. f"unknown Git protocol version {server_protocol_version} used by server"
  3027. )
  3028. if protocol_version and server_protocol_version > protocol_version:
  3029. raise ValueError(
  3030. f"bad Git protocol version {server_protocol_version} used by server"
  3031. )
  3032. self.protocol_version = server_protocol_version
  3033. if self.protocol_version == 2:
  3034. server_capabilities, resp, read, proto = begin_protocol_v2(
  3035. proto
  3036. )
  3037. (refs, symrefs, peeled) = read_pkt_refs_v2(proto.read_pkt_seq())
  3038. else:
  3039. (
  3040. refs_v1,
  3041. server_capabilities,
  3042. ) = read_pkt_refs_v1(proto.read_pkt_seq())
  3043. # Convert v1 refs to Optional type
  3044. refs = _to_optional_dict(refs_v1)
  3045. (refs, peeled) = split_peeled_refs(refs)
  3046. (symrefs, _agent) = _extract_symrefs_and_agent(
  3047. server_capabilities
  3048. )
  3049. if ref_prefix is not None:
  3050. refs = filter_ref_prefix(refs, ref_prefix)
  3051. return refs, server_capabilities, base_url, symrefs, peeled
  3052. else:
  3053. self.protocol_version = 0 # dumb servers only support protocol v0
  3054. # Read all the response data
  3055. data = b""
  3056. while True:
  3057. chunk = read(4096)
  3058. if not chunk:
  3059. break
  3060. data += chunk
  3061. from typing import Optional, cast
  3062. info_refs = read_info_refs(BytesIO(data))
  3063. (refs, peeled) = split_peeled_refs(
  3064. cast(dict[bytes, Optional[bytes]], info_refs)
  3065. )
  3066. if ref_prefix is not None:
  3067. refs = filter_ref_prefix(refs, ref_prefix)
  3068. return refs, set(), base_url, {}, peeled
  3069. finally:
  3070. resp.close()
  3071. def _smart_request(
  3072. self, service: str, url: str, data: Union[bytes, Iterator[bytes]]
  3073. ) -> tuple["HTTPResponse", Callable[[int], bytes]]:
  3074. """Send a 'smart' HTTP request.
  3075. This is a simple wrapper around _http_request that sets
  3076. a couple of extra headers.
  3077. """
  3078. assert url[-1] == "/"
  3079. url = urljoin(url, service)
  3080. result_content_type = f"application/x-{service}-result"
  3081. headers = {
  3082. "Content-Type": f"application/x-{service}-request",
  3083. "Accept": result_content_type,
  3084. }
  3085. if self.protocol_version == 2:
  3086. headers["Git-Protocol"] = "version=2"
  3087. if isinstance(data, bytes):
  3088. headers["Content-Length"] = str(len(data))
  3089. resp, read = self._http_request(url, headers, data)
  3090. if (
  3091. not resp.content_type
  3092. or resp.content_type.split(";")[0] != result_content_type
  3093. ):
  3094. raise GitProtocolError(
  3095. f"Invalid content-type from server: {resp.content_type}"
  3096. )
  3097. return resp, read
  3098. def send_pack(
  3099. self,
  3100. path: Union[str, bytes],
  3101. update_refs: Callable[[dict[bytes, bytes]], dict[bytes, bytes]],
  3102. generate_pack_data: "GeneratePackDataFunc",
  3103. progress: Optional[Callable[[bytes], None]] = None,
  3104. ) -> SendPackResult:
  3105. """Upload a pack to a remote repository.
  3106. Args:
  3107. path: Repository path (as bytestring or string)
  3108. update_refs: Function to determine changes to remote refs.
  3109. Receives dict with existing remote refs, returns dict with
  3110. changed refs (name -> sha, where sha=ZERO_SHA for deletions)
  3111. generate_pack_data: Function that can return a tuple
  3112. with number of elements and pack data to upload.
  3113. progress: Optional progress function
  3114. Returns:
  3115. SendPackResult
  3116. Raises:
  3117. SendPackError: if server rejects the pack data
  3118. """
  3119. url = self._get_url(path)
  3120. old_refs, server_capabilities, url, _symrefs, _peeled = (
  3121. self._discover_references(b"git-receive-pack", url)
  3122. )
  3123. (
  3124. negotiated_capabilities,
  3125. agent,
  3126. ) = self._negotiate_receive_pack_capabilities(server_capabilities)
  3127. negotiated_capabilities.add(capability_agent())
  3128. if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
  3129. self._report_status_parser = ReportStatusParser()
  3130. # Assert that old_refs has no None values
  3131. assert all(v is not None for v in old_refs.values()), (
  3132. "old_refs should not contain None values"
  3133. )
  3134. old_refs_typed: dict[bytes, bytes] = old_refs # type: ignore[assignment]
  3135. new_refs = update_refs(dict(old_refs_typed))
  3136. if new_refs is None:
  3137. # Determine wants function is aborting the push.
  3138. # Convert to Optional type for SendPackResult
  3139. old_refs_optional: dict[bytes, Optional[bytes]] = old_refs
  3140. return SendPackResult(old_refs_optional, agent=agent, ref_status={})
  3141. if set(new_refs.items()).issubset(set(old_refs_typed.items())):
  3142. # Convert to Optional type for SendPackResult
  3143. return SendPackResult(
  3144. _to_optional_dict(new_refs), agent=agent, ref_status={}
  3145. )
  3146. if self.dumb:
  3147. raise NotImplementedError(self.fetch_pack)
  3148. def body_generator() -> Iterator[bytes]:
  3149. header_handler = _v1ReceivePackHeader(
  3150. list(negotiated_capabilities), old_refs_typed, new_refs
  3151. )
  3152. for pkt in header_handler:
  3153. yield pkt_line(pkt)
  3154. pack_data_count, pack_data = generate_pack_data(
  3155. header_handler.have,
  3156. header_handler.want,
  3157. ofs_delta=(CAPABILITY_OFS_DELTA in negotiated_capabilities),
  3158. )
  3159. if self._should_send_pack(new_refs):
  3160. yield from PackChunkGenerator(pack_data_count, pack_data)
  3161. resp, read = self._smart_request("git-receive-pack", url, data=body_generator())
  3162. try:
  3163. resp_proto = Protocol(read, lambda data: None)
  3164. ref_status = self._handle_receive_pack_tail(
  3165. resp_proto, negotiated_capabilities, progress
  3166. )
  3167. # Convert to Optional type for SendPackResult
  3168. return SendPackResult(
  3169. _to_optional_dict(new_refs), agent=agent, ref_status=ref_status
  3170. )
  3171. finally:
  3172. resp.close()
  3173. def fetch_pack(
  3174. self,
  3175. path: Union[str, bytes],
  3176. determine_wants: "DetermineWantsFunc",
  3177. graph_walker: GraphWalker,
  3178. pack_data: Callable[[bytes], int],
  3179. progress: Optional[Callable[[bytes], None]] = None,
  3180. depth: Optional[int] = None,
  3181. ref_prefix: Optional[Sequence[Ref]] = None,
  3182. filter_spec: Optional[bytes] = None,
  3183. protocol_version: Optional[int] = None,
  3184. ) -> FetchPackResult:
  3185. """Retrieve a pack from a git smart server.
  3186. Args:
  3187. path: Path to fetch from
  3188. determine_wants: Callback that returns list of commits to fetch
  3189. graph_walker: Object with next() and ack().
  3190. pack_data: Callback called for each bit of data in the pack
  3191. progress: Callback for progress reports (strings)
  3192. depth: Depth for request
  3193. ref_prefix: List of prefixes of desired references, as a list of
  3194. bytestrings. Filtering is done by the server if supported, and
  3195. client side otherwise.
  3196. filter_spec: A git-rev-list-style object filter spec, as bytestring.
  3197. Only used if the server supports the Git protocol-v2 'filter'
  3198. feature, and ignored otherwise.
  3199. protocol_version: Desired Git protocol version. By default the highest
  3200. mutually supported protocol version will be used.
  3201. Returns:
  3202. FetchPackResult object
  3203. """
  3204. url = self._get_url(path)
  3205. refs, server_capabilities, url, symrefs, _peeled = self._discover_references(
  3206. b"git-upload-pack",
  3207. url,
  3208. protocol_version=protocol_version,
  3209. ref_prefix=ref_prefix,
  3210. )
  3211. (
  3212. negotiated_capabilities,
  3213. capa_symrefs,
  3214. agent,
  3215. ) = self._negotiate_upload_pack_capabilities(server_capabilities)
  3216. if not symrefs and capa_symrefs:
  3217. symrefs = capa_symrefs
  3218. # Filter out None values from refs for determine_wants
  3219. refs_filtered = {k: v for k, v in refs.items() if v is not None}
  3220. if depth is not None:
  3221. wants = determine_wants(refs_filtered, depth=depth)
  3222. else:
  3223. wants = determine_wants(refs_filtered)
  3224. if wants is not None:
  3225. wants = [cid for cid in wants if cid != ZERO_SHA]
  3226. if not wants and not self.dumb:
  3227. return FetchPackResult(refs, symrefs, agent)
  3228. elif self.dumb:
  3229. # Use dumb HTTP protocol
  3230. from .dumb import DumbRemoteHTTPRepo
  3231. # Pass http_request function
  3232. dumb_repo = DumbRemoteHTTPRepo(
  3233. url, functools.partial(self._http_request, raise_for_status=False)
  3234. )
  3235. # Fetch pack data from dumb remote
  3236. pack_data_list = list(
  3237. dumb_repo.fetch_pack_data(
  3238. lambda refs, depth: wants,
  3239. graph_walker,
  3240. progress=progress,
  3241. depth=depth,
  3242. )
  3243. )
  3244. symrefs[b"HEAD"] = dumb_repo.get_head()
  3245. # Write pack data
  3246. if pack_data_list:
  3247. from .pack import write_pack_data
  3248. # Wrap pack_data to match expected signature
  3249. def write_fn(data: bytes) -> None:
  3250. pack_data(data)
  3251. # Write pack data directly using the unpacked objects
  3252. write_pack_data(
  3253. write_fn,
  3254. iter(pack_data_list),
  3255. num_records=len(pack_data_list),
  3256. progress=progress,
  3257. )
  3258. return FetchPackResult(refs, symrefs, agent)
  3259. req_data = BytesIO()
  3260. req_proto = Protocol(None, req_data.write) # type: ignore
  3261. (new_shallow, new_unshallow) = _handle_upload_pack_head(
  3262. req_proto,
  3263. negotiated_capabilities,
  3264. graph_walker,
  3265. wants,
  3266. can_read=None,
  3267. depth=depth,
  3268. protocol_version=self.protocol_version,
  3269. )
  3270. if self.protocol_version == 2:
  3271. data = pkt_line(b"command=fetch\n") + b"0001"
  3272. if CAPABILITY_THIN_PACK in self._fetch_capabilities:
  3273. data += pkt_line(b"thin-pack\n")
  3274. if (
  3275. find_capability(
  3276. negotiated_capabilities, CAPABILITY_FETCH, CAPABILITY_FILTER
  3277. )
  3278. and filter_spec
  3279. ):
  3280. data += pkt_line(b"filter %s\n" % filter_spec)
  3281. elif filter_spec:
  3282. self._warn_filter_objects()
  3283. data += req_data.getvalue()
  3284. else:
  3285. if filter_spec:
  3286. self._warn_filter_objects()
  3287. data = req_data.getvalue()
  3288. resp, read = self._smart_request("git-upload-pack", url, data)
  3289. try:
  3290. resp_proto = Protocol(read, None) # type: ignore
  3291. if new_shallow is None and new_unshallow is None:
  3292. (new_shallow, new_unshallow) = _read_shallow_updates(
  3293. resp_proto.read_pkt_seq()
  3294. )
  3295. _handle_upload_pack_tail(
  3296. resp_proto,
  3297. negotiated_capabilities,
  3298. graph_walker,
  3299. pack_data,
  3300. progress,
  3301. protocol_version=self.protocol_version,
  3302. )
  3303. return FetchPackResult(refs, symrefs, agent, new_shallow, new_unshallow)
  3304. finally:
  3305. resp.close()
  3306. def get_refs(
  3307. self,
  3308. path: Union[str, bytes],
  3309. protocol_version: Optional[int] = None,
  3310. ref_prefix: Optional[Sequence[Ref]] = None,
  3311. ) -> LsRemoteResult:
  3312. """Retrieve the current refs from a git smart server."""
  3313. url = self._get_url(path)
  3314. refs, _, _, symrefs, peeled = self._discover_references(
  3315. b"git-upload-pack",
  3316. url,
  3317. protocol_version=protocol_version,
  3318. ref_prefix=ref_prefix,
  3319. )
  3320. for refname, refvalue in peeled.items():
  3321. refs[refname + PEELED_TAG_SUFFIX] = refvalue
  3322. return LsRemoteResult(refs, symrefs)
  3323. def get_url(self, path: str) -> str:
  3324. """Get the HTTP URL for a path."""
  3325. return self._get_url(path).rstrip("/")
  3326. def _get_url(self, path: Union[str, bytes]) -> str:
  3327. path_str = path if isinstance(path, str) else path.decode("utf-8")
  3328. return urljoin(self._base_url, path_str).rstrip("/") + "/"
  3329. @classmethod
  3330. def from_parsedurl(
  3331. cls,
  3332. parsedurl: ParseResult,
  3333. thin_packs: bool = True,
  3334. report_activity: Optional[Callable[[int, str], None]] = None,
  3335. quiet: bool = False,
  3336. include_tags: bool = False,
  3337. dumb: bool = False,
  3338. username: Optional[str] = None,
  3339. password: Optional[str] = None,
  3340. config: Optional[Config] = None,
  3341. ) -> "AbstractHttpGitClient":
  3342. """Create an AbstractHttpGitClient from a parsed URL.
  3343. Args:
  3344. parsedurl: Result of urlparse()
  3345. thin_packs: Whether or not thin packs should be retrieved
  3346. report_activity: Optional callback for reporting transport activity
  3347. quiet: Whether to suppress progress output
  3348. include_tags: Whether to include tags
  3349. dumb: Whether to use dumb HTTP transport
  3350. username: Optional username for authentication
  3351. password: Optional password for authentication
  3352. config: Configuration object
  3353. Returns:
  3354. An AbstractHttpGitClient instance
  3355. """
  3356. # Extract credentials from URL if present
  3357. # ParseResult.username and .password are URL-encoded, need to unquote them
  3358. from urllib.parse import unquote
  3359. url_username = unquote(parsedurl.username) if parsedurl.username else None
  3360. url_password = unquote(parsedurl.password) if parsedurl.password else None
  3361. # Explicit parameters take precedence over URL credentials
  3362. final_username = username if username is not None else url_username
  3363. final_password = password if password is not None else url_password
  3364. # Remove credentials from URL for base_url
  3365. hostname = parsedurl.hostname or ""
  3366. base_parsed = parsedurl._replace(netloc=hostname)
  3367. if parsedurl.port:
  3368. base_parsed = base_parsed._replace(netloc=f"{hostname}:{parsedurl.port}")
  3369. # Pass credentials to constructor if it's a subclass that supports them
  3370. if cls is Urllib3HttpGitClient:
  3371. return cls( # type: ignore[call-arg]
  3372. urlunparse(base_parsed),
  3373. dumb=dumb,
  3374. thin_packs=thin_packs,
  3375. report_activity=report_activity,
  3376. quiet=quiet,
  3377. include_tags=include_tags,
  3378. username=final_username,
  3379. password=final_password,
  3380. config=config,
  3381. )
  3382. else:
  3383. # Base class doesn't support credentials in constructor
  3384. return cls(
  3385. urlunparse(base_parsed),
  3386. dumb=dumb,
  3387. thin_packs=thin_packs,
  3388. report_activity=report_activity,
  3389. quiet=quiet,
  3390. include_tags=include_tags,
  3391. )
  3392. def __repr__(self) -> str:
  3393. """Return string representation of this client."""
  3394. return f"{type(self).__name__}({self._base_url!r}, dumb={self.dumb!r})"
  3395. def _wrap_urllib3_exceptions(
  3396. func: Callable[..., bytes],
  3397. ) -> Callable[..., bytes]:
  3398. from urllib3.exceptions import ProtocolError
  3399. def wrapper(*args: object, **kwargs: object) -> bytes:
  3400. try:
  3401. return func(*args, **kwargs)
  3402. except ProtocolError as error:
  3403. raise GitProtocolError(str(error)) from error
  3404. return wrapper
  3405. class Urllib3HttpGitClient(AbstractHttpGitClient):
  3406. """Git client that uses urllib3 for HTTP(S) connections."""
  3407. def __init__(
  3408. self,
  3409. base_url: str,
  3410. dumb: Optional[bool] = None,
  3411. pool_manager: Optional["urllib3.PoolManager"] = None,
  3412. config: Optional[Config] = None,
  3413. username: Optional[str] = None,
  3414. password: Optional[str] = None,
  3415. timeout: Optional[float] = None,
  3416. extra_headers: Optional[dict[str, str]] = None,
  3417. thin_packs: bool = True,
  3418. report_activity: Optional[Callable[[int, str], None]] = None,
  3419. quiet: bool = False,
  3420. include_tags: bool = False,
  3421. ) -> None:
  3422. """Initialize Urllib3HttpGitClient."""
  3423. self._username = username
  3424. self._password = password
  3425. self._timeout = timeout
  3426. self._extra_headers = extra_headers or {}
  3427. if pool_manager is None:
  3428. self.pool_manager = default_urllib3_manager(
  3429. config, base_url=base_url, timeout=timeout
  3430. )
  3431. else:
  3432. self.pool_manager = pool_manager
  3433. if username is not None:
  3434. # No escaping needed: ":" is not allowed in username:
  3435. # https://tools.ietf.org/html/rfc2617#section-2
  3436. credentials = f"{username}:{password or ''}"
  3437. import urllib3.util
  3438. basic_auth = urllib3.util.make_headers(basic_auth=credentials)
  3439. self.pool_manager.headers.update(basic_auth) # type: ignore
  3440. self.config = config
  3441. super().__init__(
  3442. base_url=base_url,
  3443. dumb=dumb if dumb is not None else False,
  3444. thin_packs=thin_packs,
  3445. report_activity=report_activity,
  3446. quiet=quiet,
  3447. include_tags=include_tags,
  3448. )
  3449. def _get_url(self, path: Union[str, bytes]) -> str:
  3450. if not isinstance(path, str):
  3451. # urllib3.util.url._encode_invalid_chars() converts the path back
  3452. # to bytes using the utf-8 codec.
  3453. path = path.decode("utf-8")
  3454. return urljoin(self._base_url, path).rstrip("/") + "/"
  3455. def _http_request(
  3456. self,
  3457. url: str,
  3458. headers: Optional[dict[str, str]] = None,
  3459. data: Optional[Union[bytes, Iterator[bytes]]] = None,
  3460. raise_for_status: bool = True,
  3461. ) -> tuple["HTTPResponse", Callable[[int], bytes]]:
  3462. import urllib3.exceptions
  3463. req_headers = dict(self.pool_manager.headers)
  3464. if headers is not None:
  3465. req_headers.update(headers)
  3466. req_headers["Pragma"] = "no-cache"
  3467. try:
  3468. request_kwargs = {
  3469. "headers": req_headers,
  3470. "preload_content": False,
  3471. }
  3472. if self._timeout is not None:
  3473. request_kwargs["timeout"] = self._timeout
  3474. if data is None:
  3475. resp = self.pool_manager.request("GET", url, **request_kwargs) # type: ignore[arg-type]
  3476. else:
  3477. request_kwargs["body"] = data
  3478. resp = self.pool_manager.request("POST", url, **request_kwargs) # type: ignore[arg-type]
  3479. except urllib3.exceptions.HTTPError as e:
  3480. raise GitProtocolError(str(e)) from e
  3481. if raise_for_status:
  3482. if resp.status == 404:
  3483. raise NotGitRepository
  3484. if resp.status == 401:
  3485. raise HTTPUnauthorized(resp.headers.get("WWW-Authenticate"), url)
  3486. if resp.status == 407:
  3487. raise HTTPProxyUnauthorized(resp.headers.get("Proxy-Authenticate"), url)
  3488. if resp.status != 200:
  3489. raise GitProtocolError(f"unexpected http resp {resp.status} for {url}")
  3490. resp.content_type = resp.headers.get("Content-Type") # type: ignore[attr-defined]
  3491. # Check if geturl() is available (urllib3 version >= 1.23)
  3492. try:
  3493. resp_url = resp.geturl()
  3494. except AttributeError:
  3495. # get_redirect_location() is available for urllib3 >= 1.1
  3496. resp.redirect_location = resp.get_redirect_location() # type: ignore[attr-defined]
  3497. else:
  3498. resp.redirect_location = resp_url if resp_url != url else "" # type: ignore[attr-defined]
  3499. return resp, _wrap_urllib3_exceptions(resp.read) # type: ignore[return-value]
  3500. HttpGitClient = Urllib3HttpGitClient
  3501. def _win32_url_to_path(parsed: ParseResult) -> str:
  3502. """Convert a file: URL to a path.
  3503. https://datatracker.ietf.org/doc/html/rfc8089
  3504. """
  3505. assert parsed.scheme == "file"
  3506. _, netloc, path, _, _, _ = parsed
  3507. if netloc == "localhost" or not netloc:
  3508. netloc = ""
  3509. elif (
  3510. netloc
  3511. and len(netloc) >= 2
  3512. and netloc[0].isalpha()
  3513. and netloc[1:2] in (":", ":/")
  3514. ):
  3515. # file://C:/foo.bar/baz or file://C://foo.bar//baz
  3516. netloc = netloc[:2]
  3517. else:
  3518. raise NotImplementedError("Non-local file URLs are not supported")
  3519. from nturl2path import url2pathname
  3520. return url2pathname(netloc + path)
  3521. def get_transport_and_path_from_url(
  3522. url: str,
  3523. config: Optional[Config] = None,
  3524. operation: Optional[str] = None,
  3525. thin_packs: bool = True,
  3526. report_activity: Optional[Callable[[int, str], None]] = None,
  3527. quiet: bool = False,
  3528. include_tags: bool = False,
  3529. username: Optional[str] = None,
  3530. password: Optional[str] = None,
  3531. key_filename: Optional[str] = None,
  3532. ssh_command: Optional[str] = None,
  3533. ) -> tuple[GitClient, str]:
  3534. """Obtain a git client from a URL.
  3535. Args:
  3536. url: URL to open (a unicode string)
  3537. config: Optional config object
  3538. operation: Kind of operation that'll be performed; "pull" or "push"
  3539. thin_packs: Whether or not thin packs should be retrieved
  3540. report_activity: Optional callback for reporting transport activity
  3541. quiet: Whether to suppress output
  3542. include_tags: Send annotated tags when sending the objects they point to
  3543. username: Optional username for authentication
  3544. password: Optional password for authentication
  3545. key_filename: Optional SSH key file
  3546. ssh_command: Optional custom SSH command
  3547. Returns:
  3548. Tuple with client instance and relative path.
  3549. """
  3550. if config is not None:
  3551. url = apply_instead_of(config, url, push=(operation == "push"))
  3552. return _get_transport_and_path_from_url(
  3553. url,
  3554. config=config,
  3555. operation=operation,
  3556. thin_packs=thin_packs,
  3557. report_activity=report_activity,
  3558. quiet=quiet,
  3559. include_tags=include_tags,
  3560. username=username,
  3561. password=password,
  3562. key_filename=key_filename,
  3563. ssh_command=ssh_command,
  3564. )
  3565. def _get_transport_and_path_from_url(
  3566. url: str,
  3567. config: Optional[Config],
  3568. operation: Optional[str],
  3569. thin_packs: bool = True,
  3570. report_activity: Optional[Callable[[int, str], None]] = None,
  3571. quiet: bool = False,
  3572. include_tags: bool = False,
  3573. username: Optional[str] = None,
  3574. password: Optional[str] = None,
  3575. key_filename: Optional[str] = None,
  3576. ssh_command: Optional[str] = None,
  3577. ) -> tuple[GitClient, str]:
  3578. parsed = urlparse(url)
  3579. if parsed.scheme == "git":
  3580. return (
  3581. TCPGitClient.from_parsedurl(
  3582. parsed,
  3583. thin_packs=thin_packs,
  3584. report_activity=report_activity,
  3585. quiet=quiet,
  3586. include_tags=include_tags,
  3587. ),
  3588. parsed.path,
  3589. )
  3590. elif parsed.scheme in ("git+ssh", "ssh"):
  3591. return SSHGitClient.from_parsedurl(
  3592. parsed,
  3593. config=config,
  3594. username=username,
  3595. password=password,
  3596. thin_packs=thin_packs,
  3597. report_activity=report_activity,
  3598. quiet=quiet,
  3599. include_tags=include_tags,
  3600. ), parsed.path
  3601. elif parsed.scheme in ("http", "https"):
  3602. return (
  3603. HttpGitClient.from_parsedurl(
  3604. parsed,
  3605. config=config,
  3606. username=username,
  3607. password=password,
  3608. thin_packs=thin_packs,
  3609. report_activity=report_activity,
  3610. quiet=quiet,
  3611. include_tags=include_tags,
  3612. ),
  3613. parsed.path,
  3614. )
  3615. elif parsed.scheme == "file":
  3616. if sys.platform == "win32" or os.name == "nt":
  3617. return default_local_git_client_cls(
  3618. thin_packs=thin_packs,
  3619. report_activity=report_activity,
  3620. quiet=quiet,
  3621. include_tags=include_tags,
  3622. ), _win32_url_to_path(parsed)
  3623. return (
  3624. default_local_git_client_cls.from_parsedurl(
  3625. parsed,
  3626. thin_packs=thin_packs,
  3627. report_activity=report_activity,
  3628. quiet=quiet,
  3629. include_tags=include_tags,
  3630. ),
  3631. parsed.path,
  3632. )
  3633. raise ValueError(f"unknown scheme '{parsed.scheme}'")
  3634. def parse_rsync_url(location: str) -> tuple[Optional[str], str, str]:
  3635. """Parse a rsync-style URL."""
  3636. if ":" in location and "@" not in location:
  3637. # SSH with no user@, zero or one leading slash.
  3638. (host, path) = location.split(":", 1)
  3639. user = None
  3640. elif ":" in location:
  3641. # SSH with user@host:foo.
  3642. user_host, path = location.split(":", 1)
  3643. if "@" in user_host:
  3644. user, host = user_host.rsplit("@", 1)
  3645. else:
  3646. user = None
  3647. host = user_host
  3648. else:
  3649. raise ValueError("not a valid rsync-style URL")
  3650. return (user, host, path)
  3651. def get_transport_and_path(
  3652. location: str,
  3653. config: Optional[Config] = None,
  3654. operation: Optional[str] = None,
  3655. thin_packs: bool = True,
  3656. report_activity: Optional[Callable[[int, str], None]] = None,
  3657. quiet: bool = False,
  3658. include_tags: bool = False,
  3659. username: Optional[str] = None,
  3660. password: Optional[str] = None,
  3661. key_filename: Optional[str] = None,
  3662. ssh_command: Optional[str] = None,
  3663. ) -> tuple[GitClient, str]:
  3664. """Obtain a git client from a URL.
  3665. Args:
  3666. location: URL or path (a string)
  3667. config: Optional config object
  3668. operation: Kind of operation that'll be performed; "pull" or "push"
  3669. thin_packs: Whether or not thin packs should be retrieved
  3670. report_activity: Optional callback for reporting transport activity
  3671. quiet: Whether to suppress output
  3672. include_tags: Send annotated tags when sending the objects they point to
  3673. username: Optional username for authentication
  3674. password: Optional password for authentication
  3675. key_filename: Optional SSH key file
  3676. ssh_command: Optional custom SSH command
  3677. Returns:
  3678. Tuple with client instance and relative path.
  3679. """
  3680. if config is not None:
  3681. location = apply_instead_of(config, location, push=(operation == "push"))
  3682. # First, try to parse it as a URL
  3683. try:
  3684. return _get_transport_and_path_from_url(
  3685. location,
  3686. config=config,
  3687. operation=operation,
  3688. thin_packs=thin_packs,
  3689. report_activity=report_activity,
  3690. quiet=quiet,
  3691. include_tags=include_tags,
  3692. username=username,
  3693. password=password,
  3694. key_filename=key_filename,
  3695. ssh_command=ssh_command,
  3696. )
  3697. except ValueError:
  3698. pass
  3699. if sys.platform == "win32" and location[0].isalpha() and location[1:3] == ":\\":
  3700. # Windows local path - but check if it's a bundle file first
  3701. if BundleClient._is_bundle_file(location):
  3702. return BundleClient(
  3703. thin_packs=thin_packs,
  3704. report_activity=report_activity,
  3705. quiet=quiet,
  3706. include_tags=include_tags,
  3707. ), location
  3708. return default_local_git_client_cls(
  3709. thin_packs=thin_packs,
  3710. report_activity=report_activity,
  3711. quiet=quiet,
  3712. include_tags=include_tags,
  3713. ), location
  3714. try:
  3715. (rsync_username, hostname, path) = parse_rsync_url(location)
  3716. except ValueError:
  3717. # Check if it's a bundle file before assuming it's a local path
  3718. if BundleClient._is_bundle_file(location):
  3719. return BundleClient(
  3720. thin_packs=thin_packs,
  3721. report_activity=report_activity,
  3722. quiet=quiet,
  3723. include_tags=include_tags,
  3724. ), location
  3725. # Otherwise, assume it's a local path.
  3726. return default_local_git_client_cls(
  3727. thin_packs=thin_packs,
  3728. report_activity=report_activity,
  3729. quiet=quiet,
  3730. include_tags=include_tags,
  3731. ), location
  3732. else:
  3733. return SSHGitClient(
  3734. hostname,
  3735. username=rsync_username or username,
  3736. config=config,
  3737. password=password,
  3738. key_filename=key_filename,
  3739. ssh_command=ssh_command,
  3740. thin_packs=thin_packs,
  3741. report_activity=report_activity,
  3742. quiet=quiet,
  3743. include_tags=include_tags,
  3744. ), path
  3745. DEFAULT_GIT_CREDENTIALS_PATHS = [
  3746. os.path.expanduser("~/.git-credentials"),
  3747. get_xdg_config_home_path("git", "credentials"),
  3748. ]
  3749. def get_credentials_from_store(
  3750. scheme: str,
  3751. hostname: str,
  3752. username: Optional[str] = None,
  3753. fnames: list[str] = DEFAULT_GIT_CREDENTIALS_PATHS,
  3754. ) -> Iterator[tuple[str, str]]:
  3755. """Read credentials from a Git credential store."""
  3756. for fname in fnames:
  3757. try:
  3758. with open(fname, "rb") as f:
  3759. for line in f:
  3760. line_str = line.strip().decode("utf-8")
  3761. parsed_line = urlparse(line_str)
  3762. if (
  3763. parsed_line.scheme == scheme
  3764. and parsed_line.hostname == hostname
  3765. and (username is None or parsed_line.username == username)
  3766. ):
  3767. if parsed_line.username and parsed_line.password:
  3768. yield parsed_line.username, parsed_line.password
  3769. except FileNotFoundError:
  3770. # If the file doesn't exist, try the next one.
  3771. continue