client.py 100 KB


  1. # client.py -- Implementation of the client side git protocols
  2. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Client side support for the Git protocol.
  21. The Dulwich client supports the following capabilities:
  22. * thin-pack
  23. * multi_ack_detailed
  24. * multi_ack
  25. * side-band-64k
  26. * ofs-delta
  27. * quiet
  28. * report-status
  29. * delete-refs
  30. * shallow
  31. Known capabilities that are not supported:
  32. * no-progress
  33. * include-tag
  34. """
  35. import copy
  36. import logging
  37. import os
  38. import select
  39. import socket
  40. import subprocess
  41. import sys
  42. from contextlib import closing
  43. from io import BufferedReader, BytesIO
  44. from typing import (
  45. IO,
  46. TYPE_CHECKING,
  47. Callable,
  48. ClassVar,
  49. Dict,
  50. Iterable,
  51. Iterator,
  52. List,
  53. Optional,
  54. Set,
  55. Tuple,
  56. Union,
  57. )
  58. from urllib.parse import quote as urlquote
  59. from urllib.parse import unquote as urlunquote
  60. from urllib.parse import urljoin, urlparse, urlunparse, urlunsplit
  61. if TYPE_CHECKING:
  62. import urllib3
  63. import dulwich
  64. from .config import Config, apply_instead_of, get_xdg_config_home_path
  65. from .errors import GitProtocolError, NotGitRepository, SendPackError
  66. from .pack import (
  67. PACK_SPOOL_FILE_MAX_SIZE,
  68. PackChunkGenerator,
  69. UnpackedObject,
  70. write_pack_from_container,
  71. )
  72. from .protocol import (
  73. _RBUFSIZE,
  74. CAPABILITIES_REF,
  75. CAPABILITY_AGENT,
  76. CAPABILITY_DELETE_REFS,
  77. CAPABILITY_FETCH,
  78. CAPABILITY_FILTER,
  79. CAPABILITY_INCLUDE_TAG,
  80. CAPABILITY_MULTI_ACK,
  81. CAPABILITY_MULTI_ACK_DETAILED,
  82. CAPABILITY_OFS_DELTA,
  83. CAPABILITY_QUIET,
  84. CAPABILITY_REPORT_STATUS,
  85. CAPABILITY_SHALLOW,
  86. CAPABILITY_SIDE_BAND_64K,
  87. CAPABILITY_SYMREF,
  88. CAPABILITY_THIN_PACK,
  89. COMMAND_DEEPEN,
  90. COMMAND_DONE,
  91. COMMAND_HAVE,
  92. COMMAND_SHALLOW,
  93. COMMAND_UNSHALLOW,
  94. COMMAND_WANT,
  95. DEFAULT_GIT_PROTOCOL_VERSION_FETCH,
  96. DEFAULT_GIT_PROTOCOL_VERSION_SEND,
  97. GIT_PROTOCOL_VERSIONS,
  98. KNOWN_RECEIVE_CAPABILITIES,
  99. KNOWN_UPLOAD_CAPABILITIES,
  100. SIDE_BAND_CHANNEL_DATA,
  101. SIDE_BAND_CHANNEL_FATAL,
  102. SIDE_BAND_CHANNEL_PROGRESS,
  103. TCP_GIT_PORT,
  104. ZERO_SHA,
  105. HangupException,
  106. PktLineParser,
  107. Protocol,
  108. agent_string,
  109. capability_agent,
  110. extract_capabilities,
  111. extract_capability_names,
  112. parse_capability,
  113. pkt_line,
  114. )
  115. from .refs import (
  116. PEELED_TAG_SUFFIX,
  117. Ref,
  118. _import_remote_refs,
  119. _set_default_branch,
  120. _set_head,
  121. _set_origin_head,
  122. read_info_refs,
  123. split_peeled_refs,
  124. )
  125. from .repo import Repo
  126. ObjectID = bytes
  127. # url2pathname is lazily imported
  128. url2pathname = None
  129. logger = logging.getLogger(__name__)
  130. class InvalidWants(Exception):
  131. """Invalid wants."""
  132. def __init__(self, wants) -> None:
  133. Exception.__init__(
  134. self, f"requested wants not in server provided refs: {wants!r}"
  135. )
  136. class HTTPUnauthorized(Exception):
  137. """Raised when authentication fails."""
  138. def __init__(self, www_authenticate, url) -> None:
  139. Exception.__init__(self, "No valid credentials provided")
  140. self.www_authenticate = www_authenticate
  141. self.url = url
  142. class HTTPProxyUnauthorized(Exception):
  143. """Raised when proxy authentication fails."""
  144. def __init__(self, proxy_authenticate, url) -> None:
  145. Exception.__init__(self, "No valid proxy credentials provided")
  146. self.proxy_authenticate = proxy_authenticate
  147. self.url = url
  148. def _fileno_can_read(fileno):
  149. """Check if a file descriptor is readable."""
  150. return len(select.select([fileno], [], [], 0)[0]) > 0
  151. def _win32_peek_avail(handle):
  152. """Wrapper around PeekNamedPipe to check how many bytes are available."""
  153. from ctypes import byref, windll, wintypes
  154. c_avail = wintypes.DWORD()
  155. c_message = wintypes.DWORD()
  156. success = windll.kernel32.PeekNamedPipe(
  157. handle, None, 0, None, byref(c_avail), byref(c_message)
  158. )
  159. if not success:
  160. raise OSError(wintypes.GetLastError())
  161. return c_avail.value
  162. COMMON_CAPABILITIES = [CAPABILITY_OFS_DELTA, CAPABILITY_SIDE_BAND_64K]
  163. UPLOAD_CAPABILITIES = [
  164. CAPABILITY_THIN_PACK,
  165. CAPABILITY_MULTI_ACK,
  166. CAPABILITY_MULTI_ACK_DETAILED,
  167. CAPABILITY_SHALLOW,
  168. *COMMON_CAPABILITIES,
  169. ]
  170. RECEIVE_CAPABILITIES = [
  171. CAPABILITY_REPORT_STATUS,
  172. CAPABILITY_DELETE_REFS,
  173. *COMMON_CAPABILITIES,
  174. ]
  175. class ReportStatusParser:
  176. """Handle status as reported by servers with 'report-status' capability."""
  177. def __init__(self) -> None:
  178. self._done = False
  179. self._pack_status = None
  180. self._ref_statuses: List[bytes] = []
  181. def check(self):
  182. """Check if there were any errors and, if so, raise exceptions.
  183. Raises:
  184. SendPackError: Raised when the server could not unpack
  185. Returns:
  186. iterator over refs
  187. """
  188. if self._pack_status not in (b"unpack ok", None):
  189. raise SendPackError(self._pack_status)
  190. for status in self._ref_statuses:
  191. try:
  192. status, rest = status.split(b" ", 1)
  193. except ValueError:
  194. # malformed response, move on to the next one
  195. continue
  196. if status == b"ng":
  197. ref, error = rest.split(b" ", 1)
  198. yield ref, error.decode("utf-8")
  199. elif status == b"ok":
  200. yield rest, None
  201. else:
  202. raise GitProtocolError(f"invalid ref status {status!r}")
  203. def handle_packet(self, pkt):
  204. """Handle a packet.
  205. Raises:
  206. GitProtocolError: Raised when packets are received after a flush
  207. packet.
  208. """
  209. if self._done:
  210. raise GitProtocolError("received more data after status report")
  211. if pkt is None:
  212. self._done = True
  213. return
  214. if self._pack_status is None:
  215. self._pack_status = pkt.strip()
  216. else:
  217. ref_status = pkt.strip()
  218. self._ref_statuses.append(ref_status)
  219. def negotiate_protocol_version(proto):
  220. pkt = proto.read_pkt_line()
  221. if pkt == b"version 2\n":
  222. return 2
  223. proto.unread_pkt_line(pkt)
  224. return 0
  225. def read_server_capabilities(pkt_seq):
  226. server_capabilities = []
  227. for pkt in pkt_seq:
  228. server_capabilities.append(pkt)
  229. return set(server_capabilities)
  230. def read_pkt_refs_v2(
  231. pkt_seq,
  232. ) -> Tuple[Dict[bytes, bytes], Dict[bytes, bytes], Dict[bytes, bytes]]:
  233. refs = {}
  234. symrefs = {}
  235. peeled = {}
  236. # Receive refs from server
  237. for pkt in pkt_seq:
  238. parts = pkt.rstrip(b"\n").split(b" ")
  239. sha = parts[0]
  240. if sha == b"unborn":
  241. sha = None
  242. ref = parts[1]
  243. for part in parts[2:]:
  244. if part.startswith(b"peeled:"):
  245. peeled[ref] = part[7:]
  246. elif part.startswith(b"symref-target:"):
  247. symrefs[ref] = part[14:]
  248. else:
  249. logging.warning("unknown part in pkt-ref: %s", part)
  250. refs[ref] = sha
  251. return refs, symrefs, peeled
  252. def read_pkt_refs_v1(pkt_seq) -> Tuple[Dict[bytes, bytes], Set[bytes]]:
  253. server_capabilities = None
  254. refs = {}
  255. # Receive refs from server
  256. for pkt in pkt_seq:
  257. (sha, ref) = pkt.rstrip(b"\n").split(None, 1)
  258. if sha == b"ERR":
  259. raise GitProtocolError(ref.decode("utf-8", "replace"))
  260. if server_capabilities is None:
  261. (ref, server_capabilities) = extract_capabilities(ref)
  262. refs[ref] = sha
  263. if len(refs) == 0:
  264. return {}, set()
  265. if refs == {CAPABILITIES_REF: ZERO_SHA}:
  266. refs = {}
  267. assert server_capabilities is not None
  268. return refs, set(server_capabilities)
  269. class FetchPackResult:
  270. """Result of a fetch-pack operation.
  271. Attributes:
  272. refs: Dictionary with all remote refs
  273. symrefs: Dictionary with remote symrefs
  274. agent: User agent string
  275. """
  276. _FORWARDED_ATTRS: ClassVar[Set[str]] = {
  277. "clear",
  278. "copy",
  279. "fromkeys",
  280. "get",
  281. "items",
  282. "keys",
  283. "pop",
  284. "popitem",
  285. "setdefault",
  286. "update",
  287. "values",
  288. "viewitems",
  289. "viewkeys",
  290. "viewvalues",
  291. }
  292. def __init__(
  293. self, refs, symrefs, agent, new_shallow=None, new_unshallow=None
  294. ) -> None:
  295. self.refs = refs
  296. self.symrefs = symrefs
  297. self.agent = agent
  298. self.new_shallow = new_shallow
  299. self.new_unshallow = new_unshallow
  300. def _warn_deprecated(self):
  301. import warnings
  302. warnings.warn(
  303. "Use FetchPackResult.refs instead.",
  304. DeprecationWarning,
  305. stacklevel=3,
  306. )
  307. def __eq__(self, other):
  308. if isinstance(other, dict):
  309. self._warn_deprecated()
  310. return self.refs == other
  311. return (
  312. self.refs == other.refs
  313. and self.symrefs == other.symrefs
  314. and self.agent == other.agent
  315. )
  316. def __contains__(self, name) -> bool:
  317. self._warn_deprecated()
  318. return name in self.refs
  319. def __getitem__(self, name):
  320. self._warn_deprecated()
  321. return self.refs[name]
  322. def __len__(self) -> int:
  323. self._warn_deprecated()
  324. return len(self.refs)
  325. def __iter__(self):
  326. self._warn_deprecated()
  327. return iter(self.refs)
  328. def __getattribute__(self, name):
  329. if name in type(self)._FORWARDED_ATTRS:
  330. self._warn_deprecated()
  331. return getattr(self.refs, name)
  332. return super().__getattribute__(name)
  333. def __repr__(self) -> str:
  334. return f"{self.__class__.__name__}({self.refs!r}, {self.symrefs!r}, {self.agent!r})"
  335. class SendPackResult:
  336. """Result of a upload-pack operation.
  337. Attributes:
  338. refs: Dictionary with all remote refs
  339. agent: User agent string
  340. ref_status: Optional dictionary mapping ref name to error message (if it
  341. failed to update), or None if it was updated successfully
  342. """
  343. _FORWARDED_ATTRS: ClassVar[Set[str]] = {
  344. "clear",
  345. "copy",
  346. "fromkeys",
  347. "get",
  348. "items",
  349. "keys",
  350. "pop",
  351. "popitem",
  352. "setdefault",
  353. "update",
  354. "values",
  355. "viewitems",
  356. "viewkeys",
  357. "viewvalues",
  358. }
  359. def __init__(self, refs, agent=None, ref_status=None) -> None:
  360. self.refs = refs
  361. self.agent = agent
  362. self.ref_status = ref_status
  363. def _warn_deprecated(self):
  364. import warnings
  365. warnings.warn(
  366. "Use SendPackResult.refs instead.",
  367. DeprecationWarning,
  368. stacklevel=3,
  369. )
  370. def __eq__(self, other):
  371. if isinstance(other, dict):
  372. self._warn_deprecated()
  373. return self.refs == other
  374. return self.refs == other.refs and self.agent == other.agent
  375. def __contains__(self, name) -> bool:
  376. self._warn_deprecated()
  377. return name in self.refs
  378. def __getitem__(self, name):
  379. self._warn_deprecated()
  380. return self.refs[name]
  381. def __len__(self) -> int:
  382. self._warn_deprecated()
  383. return len(self.refs)
  384. def __iter__(self):
  385. self._warn_deprecated()
  386. return iter(self.refs)
  387. def __getattribute__(self, name):
  388. if name in type(self)._FORWARDED_ATTRS:
  389. self._warn_deprecated()
  390. return getattr(self.refs, name)
  391. return super().__getattribute__(name)
  392. def __repr__(self) -> str:
  393. return f"{self.__class__.__name__}({self.refs!r}, {self.agent!r})"
  394. def _read_shallow_updates(pkt_seq):
  395. new_shallow = set()
  396. new_unshallow = set()
  397. for pkt in pkt_seq:
  398. if pkt == b"shallow-info\n": # Git-protocol v2
  399. continue
  400. try:
  401. cmd, sha = pkt.split(b" ", 1)
  402. except ValueError:
  403. raise GitProtocolError(f"unknown command {pkt}")
  404. if cmd == COMMAND_SHALLOW:
  405. new_shallow.add(sha.strip())
  406. elif cmd == COMMAND_UNSHALLOW:
  407. new_unshallow.add(sha.strip())
  408. else:
  409. raise GitProtocolError(f"unknown command {pkt}")
  410. return (new_shallow, new_unshallow)
  411. class _v1ReceivePackHeader:
  412. def __init__(self, capabilities, old_refs, new_refs) -> None:
  413. self.want: List[bytes] = []
  414. self.have: List[bytes] = []
  415. self._it = self._handle_receive_pack_head(capabilities, old_refs, new_refs)
  416. self.sent_capabilities = False
  417. def __iter__(self):
  418. return self._it
  419. def _handle_receive_pack_head(self, capabilities, old_refs, new_refs):
  420. """Handle the head of a 'git-receive-pack' request.
  421. Args:
  422. capabilities: List of negotiated capabilities
  423. old_refs: Old refs, as received from the server
  424. new_refs: Refs to change
  425. Returns:
  426. (have, want) tuple
  427. """
  428. self.have = [x for x in old_refs.values() if not x == ZERO_SHA]
  429. for refname in new_refs:
  430. if not isinstance(refname, bytes):
  431. raise TypeError(f"refname is not a bytestring: {refname!r}")
  432. old_sha1 = old_refs.get(refname, ZERO_SHA)
  433. if not isinstance(old_sha1, bytes):
  434. raise TypeError(
  435. f"old sha1 for {refname!r} is not a bytestring: {old_sha1!r}"
  436. )
  437. new_sha1 = new_refs.get(refname, ZERO_SHA)
  438. if not isinstance(new_sha1, bytes):
  439. raise TypeError(
  440. f"old sha1 for {refname!r} is not a bytestring {new_sha1!r}"
  441. )
  442. if old_sha1 != new_sha1:
  443. logger.debug(
  444. "Sending updated ref %r: %r -> %r", refname, old_sha1, new_sha1
  445. )
  446. if self.sent_capabilities:
  447. yield old_sha1 + b" " + new_sha1 + b" " + refname
  448. else:
  449. yield (
  450. old_sha1
  451. + b" "
  452. + new_sha1
  453. + b" "
  454. + refname
  455. + b"\0"
  456. + b" ".join(sorted(capabilities))
  457. )
  458. self.sent_capabilities = True
  459. if new_sha1 not in self.have and new_sha1 != ZERO_SHA:
  460. self.want.append(new_sha1)
  461. yield None
  462. def _read_side_band64k_data(pkt_seq: Iterable[bytes]) -> Iterator[Tuple[int, bytes]]:
  463. """Read per-channel data.
  464. This requires the side-band-64k capability.
  465. Args:
  466. pkt_seq: Sequence of packets to read
  467. """
  468. for pkt in pkt_seq:
  469. channel = ord(pkt[:1])
  470. yield channel, pkt[1:]
  471. def find_capability(capabilities, key, value):
  472. for capability in capabilities:
  473. k, v = parse_capability(capability)
  474. if k != key:
  475. continue
  476. if value and value not in v.split(b" "):
  477. continue
  478. return capability
  479. def _handle_upload_pack_head(
  480. proto,
  481. capabilities,
  482. graph_walker,
  483. wants,
  484. can_read,
  485. depth,
  486. protocol_version,
  487. ):
  488. """Handle the head of a 'git-upload-pack' request.
  489. Args:
  490. proto: Protocol object to read from
  491. capabilities: List of negotiated capabilities
  492. graph_walker: GraphWalker instance to call .ack() on
  493. wants: List of commits to fetch
  494. can_read: function that returns a boolean that indicates
  495. whether there is extra graph data to read on proto
  496. depth: Depth for request
  497. protocol_version: Neogiated Git protocol version.
  498. """
  499. assert isinstance(wants, list) and isinstance(wants[0], bytes)
  500. wantcmd = COMMAND_WANT + b" " + wants[0]
  501. if protocol_version is None:
  502. protocol_version = DEFAULT_GIT_PROTOCOL_VERSION_SEND
  503. if protocol_version != 2:
  504. wantcmd += b" " + b" ".join(sorted(capabilities))
  505. wantcmd += b"\n"
  506. proto.write_pkt_line(wantcmd)
  507. for want in wants[1:]:
  508. proto.write_pkt_line(COMMAND_WANT + b" " + want + b"\n")
  509. if depth not in (0, None) or getattr(graph_walker, "shallow", None):
  510. if protocol_version == 2:
  511. if not find_capability(capabilities, CAPABILITY_FETCH, CAPABILITY_SHALLOW):
  512. raise GitProtocolError(
  513. "server does not support shallow capability required for " "depth"
  514. )
  515. elif CAPABILITY_SHALLOW not in capabilities:
  516. raise GitProtocolError(
  517. "server does not support shallow capability required for " "depth"
  518. )
  519. for sha in graph_walker.shallow:
  520. proto.write_pkt_line(COMMAND_SHALLOW + b" " + sha + b"\n")
  521. if depth is not None:
  522. proto.write_pkt_line(
  523. COMMAND_DEEPEN + b" " + str(depth).encode("ascii") + b"\n"
  524. )
  525. if protocol_version == 2:
  526. proto.write_pkt_line(None)
  527. if protocol_version != 2:
  528. proto.write_pkt_line(None)
  529. if depth not in (0, None):
  530. if can_read is not None:
  531. (new_shallow, new_unshallow) = _read_shallow_updates(proto.read_pkt_seq())
  532. else:
  533. new_shallow = new_unshallow = None
  534. else:
  535. new_shallow = new_unshallow = set()
  536. have = next(graph_walker)
  537. while have:
  538. proto.write_pkt_line(COMMAND_HAVE + b" " + have + b"\n")
  539. if can_read is not None and can_read():
  540. pkt = proto.read_pkt_line()
  541. parts = pkt.rstrip(b"\n").split(b" ")
  542. if parts[0] == b"ACK":
  543. graph_walker.ack(parts[1])
  544. if parts[2] in (b"continue", b"common"):
  545. pass
  546. elif parts[2] == b"ready":
  547. break
  548. else:
  549. raise AssertionError(
  550. f"{parts[2]} not in ('continue', 'ready', 'common)"
  551. )
  552. have = next(graph_walker)
  553. proto.write_pkt_line(COMMAND_DONE + b"\n")
  554. if protocol_version == 2:
  555. proto.write_pkt_line(None)
  556. return (new_shallow, new_unshallow)
  557. def _handle_upload_pack_tail(
  558. proto,
  559. capabilities: Set[bytes],
  560. graph_walker,
  561. pack_data: Callable[[bytes], None],
  562. progress: Optional[Callable[[bytes], None]] = None,
  563. rbufsize=_RBUFSIZE,
  564. protocol_version=0,
  565. ):
  566. """Handle the tail of a 'git-upload-pack' request.
  567. Args:
  568. proto: Protocol object to read from
  569. capabilities: List of negotiated capabilities
  570. graph_walker: GraphWalker instance to call .ack() on
  571. pack_data: Function to call with pack data
  572. progress: Optional progress reporting function
  573. rbufsize: Read buffer size
  574. protocol_version: Neogiated Git protocol version.
  575. """
  576. pkt = proto.read_pkt_line()
  577. while pkt:
  578. parts = pkt.rstrip(b"\n").split(b" ")
  579. if protocol_version == 2 and parts[0] != "packfile":
  580. break
  581. else:
  582. if parts[0] == b"ACK":
  583. graph_walker.ack(parts[1])
  584. if parts[0] == b"NAK":
  585. graph_walker.nak()
  586. if len(parts) < 3 or parts[2] not in (
  587. b"ready",
  588. b"continue",
  589. b"common",
  590. ):
  591. break
  592. pkt = proto.read_pkt_line()
  593. if CAPABILITY_SIDE_BAND_64K in capabilities or protocol_version == 2:
  594. if progress is None:
  595. # Just ignore progress data
  596. def progress(x):
  597. pass
  598. for chan, data in _read_side_band64k_data(proto.read_pkt_seq()):
  599. if chan == SIDE_BAND_CHANNEL_DATA:
  600. pack_data(data)
  601. elif chan == SIDE_BAND_CHANNEL_PROGRESS:
  602. progress(data)
  603. else:
  604. raise AssertionError("Invalid sideband channel %d" % chan)
  605. else:
  606. while True:
  607. data = proto.read(rbufsize)
  608. if data == b"":
  609. break
  610. pack_data(data)
  611. def _extract_symrefs_and_agent(capabilities):
  612. """Extract symrefs and agent from capabilities.
  613. Args:
  614. capabilities: List of capabilities
  615. Returns:
  616. (symrefs, agent) tuple
  617. """
  618. symrefs = {}
  619. agent = None
  620. for capability in capabilities:
  621. k, v = parse_capability(capability)
  622. if k == CAPABILITY_SYMREF:
  623. (src, dst) = v.split(b":", 1)
  624. symrefs[src] = dst
  625. if k == CAPABILITY_AGENT:
  626. agent = v
  627. return (symrefs, agent)
  628. # TODO(durin42): this doesn't correctly degrade if the server doesn't
  629. # support some capabilities. This should work properly with servers
  630. # that don't support multi_ack.
  631. class GitClient:
  632. """Git smart server client."""
  633. def __init__(
  634. self,
  635. thin_packs=True,
  636. report_activity=None,
  637. quiet=False,
  638. include_tags=False,
  639. **kwargs,
  640. ) -> None:
  641. """Create a new GitClient instance.
  642. Args:
  643. thin_packs: Whether or not thin packs should be retrieved
  644. report_activity: Optional callback for reporting transport
  645. activity.
  646. include_tags: send annotated tags when sending the objects they point
  647. to
  648. """
  649. self._report_activity = report_activity
  650. self._report_status_parser: Optional[ReportStatusParser] = None
  651. self._fetch_capabilities = set(UPLOAD_CAPABILITIES)
  652. self._fetch_capabilities.add(capability_agent())
  653. self._send_capabilities = set(RECEIVE_CAPABILITIES)
  654. self._send_capabilities.add(capability_agent())
  655. if quiet:
  656. self._send_capabilities.add(CAPABILITY_QUIET)
  657. if not thin_packs:
  658. self._fetch_capabilities.remove(CAPABILITY_THIN_PACK)
  659. if include_tags:
  660. self._fetch_capabilities.add(CAPABILITY_INCLUDE_TAG)
  661. self.protocol_version = 0 # will be overridden later
  662. def get_url(self, path):
  663. """Retrieves full url to given path.
  664. Args:
  665. path: Repository path (as string)
  666. Returns:
  667. Url to path (as string)
  668. """
  669. raise NotImplementedError(self.get_url)
  670. @classmethod
  671. def from_parsedurl(cls, parsedurl, **kwargs):
  672. """Create an instance of this client from a urlparse.parsed object.
  673. Args:
  674. parsedurl: Result of urlparse()
  675. Returns:
  676. A `GitClient` object
  677. """
  678. raise NotImplementedError(cls.from_parsedurl)
  679. def send_pack(
  680. self,
  681. path,
  682. update_refs,
  683. generate_pack_data: Callable[
  684. [Set[bytes], Set[bytes], bool], Tuple[int, Iterator[UnpackedObject]]
  685. ],
  686. progress=None,
  687. ):
  688. """Upload a pack to a remote repository.
  689. Args:
  690. path: Repository path (as bytestring)
  691. update_refs: Function to determine changes to remote refs. Receive
  692. dict with existing remote refs, returns dict with
  693. changed refs (name -> sha, where sha=ZERO_SHA for deletions)
  694. generate_pack_data: Function that can return a tuple
  695. with number of objects and list of pack data to include
  696. progress: Optional progress function
  697. Returns:
  698. SendPackResult object
  699. Raises:
  700. SendPackError: if server rejects the pack data
  701. """
  702. raise NotImplementedError(self.send_pack)
  703. def clone(
  704. self,
  705. path,
  706. target_path,
  707. mkdir: bool = True,
  708. bare: bool = False,
  709. origin: Optional[str] = "origin",
  710. checkout=None,
  711. branch=None,
  712. progress=None,
  713. depth=None,
  714. ref_prefix=[],
  715. filter_spec=None,
  716. protocol_version: Optional[int] = None,
  717. ) -> Repo:
  718. """Clone a repository."""
  719. if mkdir:
  720. os.mkdir(target_path)
  721. try:
  722. target = None
  723. if not bare:
  724. target = Repo.init(target_path)
  725. if checkout is None:
  726. checkout = True
  727. else:
  728. if checkout:
  729. raise ValueError("checkout and bare are incompatible")
  730. target = Repo.init_bare(target_path)
  731. # TODO(jelmer): abstract method for get_location?
  732. if isinstance(self, (LocalGitClient, SubprocessGitClient)):
  733. encoded_path = path.encode("utf-8")
  734. else:
  735. encoded_path = self.get_url(path).encode("utf-8")
  736. assert target is not None
  737. if origin is not None:
  738. target_config = target.get_config()
  739. target_config.set(
  740. (b"remote", origin.encode("utf-8")), b"url", encoded_path
  741. )
  742. target_config.set(
  743. (b"remote", origin.encode("utf-8")),
  744. b"fetch",
  745. b"+refs/heads/*:refs/remotes/" + origin.encode("utf-8") + b"/*",
  746. )
  747. target_config.write_to_path()
  748. ref_message = b"clone: from " + encoded_path
  749. result = self.fetch(
  750. path,
  751. target,
  752. progress=progress,
  753. depth=depth,
  754. ref_prefix=ref_prefix,
  755. filter_spec=filter_spec,
  756. protocol_version=protocol_version,
  757. )
  758. if origin is not None:
  759. _import_remote_refs(
  760. target.refs, origin, result.refs, message=ref_message
  761. )
  762. origin_head = result.symrefs.get(b"HEAD")
  763. origin_sha = result.refs.get(b"HEAD")
  764. if origin is None or (origin_sha and not origin_head):
  765. # set detached HEAD
  766. target.refs[b"HEAD"] = origin_sha
  767. head = origin_sha
  768. else:
  769. _set_origin_head(target.refs, origin.encode("utf-8"), origin_head)
  770. head_ref = _set_default_branch(
  771. target.refs,
  772. origin.encode("utf-8"),
  773. origin_head,
  774. branch,
  775. ref_message,
  776. )
  777. # Update target head
  778. if head_ref:
  779. head = _set_head(target.refs, head_ref, ref_message)
  780. else:
  781. head = None
  782. if checkout and head is not None:
  783. target.reset_index()
  784. except BaseException:
  785. if target is not None:
  786. target.close()
  787. if mkdir:
  788. import shutil
  789. shutil.rmtree(target_path)
  790. raise
  791. return target
  792. def fetch(
  793. self,
  794. path: str,
  795. target: Repo,
  796. determine_wants: Optional[
  797. Callable[[Dict[bytes, bytes], Optional[int]], List[bytes]]
  798. ] = None,
  799. progress: Optional[Callable[[bytes], None]] = None,
  800. depth: Optional[int] = None,
  801. ref_prefix: Optional[List[bytes]] = [],
  802. filter_spec: Optional[bytes] = None,
  803. protocol_version: Optional[int] = None,
  804. ) -> FetchPackResult:
  805. """Fetch into a target repository.
  806. Args:
  807. path: Path to fetch from (as bytestring)
  808. target: Target repository to fetch into
  809. determine_wants: Optional function to determine what refs to fetch.
  810. Receives dictionary of name->sha, should return
  811. list of shas to fetch. Defaults to all shas.
  812. progress: Optional progress function
  813. depth: Depth to fetch at
  814. ref_prefix: Prefix of desired references, as a list of bytestrings.
  815. The server will limit the list of references sent to this prefix,
  816. provided this feature is supported and sufficient server-side
  817. resources are available to match all references against the prefix.
  818. Clients must be prepared to filter out any non-requested references
  819. themselves. This feature is an entirely optional optimization.
  820. filter_spec: A git-rev-list-style object filter spec, as bytestring.
  821. Only used if the server supports the Git protocol-v2 'filter'
  822. feature, and ignored otherwise.
  823. protocol_version: Desired Git protocol version. By default the highest
  824. mutually supported protocol version will be used.
  825. Returns:
  826. Dictionary with all remote refs (not just those fetched)
  827. """
  828. if determine_wants is None:
  829. determine_wants = target.object_store.determine_wants_all
  830. if CAPABILITY_THIN_PACK in self._fetch_capabilities:
  831. from tempfile import SpooledTemporaryFile
  832. f: IO[bytes] = SpooledTemporaryFile(
  833. max_size=PACK_SPOOL_FILE_MAX_SIZE,
  834. prefix="incoming-",
  835. dir=getattr(target.object_store, "path", None),
  836. )
  837. def commit():
  838. if f.tell():
  839. f.seek(0)
  840. target.object_store.add_thin_pack(f.read, None, progress=progress)
  841. f.close()
  842. def abort():
  843. f.close()
  844. else:
  845. f, commit, abort = target.object_store.add_pack()
  846. try:
  847. result = self.fetch_pack(
  848. path,
  849. determine_wants,
  850. target.get_graph_walker(),
  851. f.write,
  852. progress=progress,
  853. depth=depth,
  854. ref_prefix=ref_prefix,
  855. filter_spec=filter_spec,
  856. protocol_version=protocol_version,
  857. )
  858. except BaseException:
  859. abort()
  860. raise
  861. else:
  862. commit()
  863. target.update_shallow(result.new_shallow, result.new_unshallow)
  864. return result
  865. def fetch_pack(
  866. self,
  867. path: str,
  868. determine_wants,
  869. graph_walker,
  870. pack_data,
  871. *,
  872. progress: Optional[Callable[[bytes], None]] = None,
  873. depth: Optional[int] = None,
  874. ref_prefix=[],
  875. filter_spec=None,
  876. protocol_version: Optional[int] = None,
  877. ):
  878. """Retrieve a pack from a git smart server.
  879. Args:
  880. path: Remote path to fetch from
  881. determine_wants: Function determine what refs
  882. to fetch. Receives dictionary of name->sha, should return
  883. list of shas to fetch.
  884. graph_walker: Object with next() and ack().
  885. pack_data: Callback called for each bit of data in the pack
  886. progress: Callback for progress reports (strings)
  887. depth: Shallow fetch depth
  888. ref_prefix: Prefix of desired references, as a list of bytestrings.
  889. The server will limit the list of references sent to this prefix,
  890. provided this feature is supported and sufficient server-side
  891. resources are available to match all references against the prefix.
  892. Clients must be prepared to filter out any non-requested references
  893. themselves. This feature is an entirely optional optimization.
  894. filter_spec: A git-rev-list-style object filter spec, as bytestring.
  895. Only used if the server supports the Git protocol-v2 'filter'
  896. feature, and ignored otherwise.
  897. protocol_version: Desired Git protocol version. By default the highest
  898. mutually supported protocol version will be used.
  899. Returns:
  900. FetchPackResult object
  901. """
  902. raise NotImplementedError(self.fetch_pack)
  903. def get_refs(self, path):
  904. """Retrieve the current refs from a git smart server.
  905. Args:
  906. path: Path to the repo to fetch from. (as bytestring)
  907. """
  908. raise NotImplementedError(self.get_refs)
  909. @staticmethod
  910. def _should_send_pack(new_refs):
  911. # The packfile MUST NOT be sent if the only command used is delete.
  912. return any(sha != ZERO_SHA for sha in new_refs.values())
  913. def _negotiate_receive_pack_capabilities(self, server_capabilities):
  914. negotiated_capabilities = self._send_capabilities & server_capabilities
  915. (agent, _symrefs) = _extract_symrefs_and_agent(server_capabilities)
  916. (extract_capability_names(server_capabilities) - KNOWN_RECEIVE_CAPABILITIES)
  917. # TODO(jelmer): warn about unknown capabilities
  918. return negotiated_capabilities, agent
  919. def _handle_receive_pack_tail(
  920. self,
  921. proto: Protocol,
  922. capabilities: Set[bytes],
  923. progress: Optional[Callable[[bytes], None]] = None,
  924. ) -> Optional[Dict[bytes, Optional[str]]]:
  925. """Handle the tail of a 'git-receive-pack' request.
  926. Args:
  927. proto: Protocol object to read from
  928. capabilities: List of negotiated capabilities
  929. progress: Optional progress reporting function
  930. Returns:
  931. dict mapping ref name to:
  932. error message if the ref failed to update
  933. None if it was updated successfully
  934. """
  935. if CAPABILITY_SIDE_BAND_64K in capabilities or self.protocol_version == 2:
  936. if progress is None:
  937. def progress(x):
  938. pass
  939. if CAPABILITY_REPORT_STATUS in capabilities:
  940. assert self._report_status_parser is not None
  941. pktline_parser = PktLineParser(self._report_status_parser.handle_packet)
  942. for chan, data in _read_side_band64k_data(proto.read_pkt_seq()):
  943. if chan == SIDE_BAND_CHANNEL_DATA:
  944. if CAPABILITY_REPORT_STATUS in capabilities:
  945. pktline_parser.parse(data)
  946. elif chan == SIDE_BAND_CHANNEL_PROGRESS:
  947. progress(data)
  948. else:
  949. raise AssertionError("Invalid sideband channel %d" % chan)
  950. else:
  951. if CAPABILITY_REPORT_STATUS in capabilities:
  952. assert self._report_status_parser
  953. for pkt in proto.read_pkt_seq():
  954. self._report_status_parser.handle_packet(pkt)
  955. if self._report_status_parser is not None:
  956. return dict(self._report_status_parser.check())
  957. return None
  958. def _negotiate_upload_pack_capabilities(self, server_capabilities):
  959. (extract_capability_names(server_capabilities) - KNOWN_UPLOAD_CAPABILITIES)
  960. # TODO(jelmer): warn about unknown capabilities
  961. fetch_capa = None
  962. for capability in server_capabilities:
  963. k, v = parse_capability(capability)
  964. if self.protocol_version == 2 and k == CAPABILITY_FETCH:
  965. fetch_capa = CAPABILITY_FETCH
  966. fetch_features = []
  967. v = v.strip().split(b" ")
  968. if b"shallow" in v:
  969. fetch_features.append(CAPABILITY_SHALLOW)
  970. if b"filter" in v:
  971. fetch_features.append(CAPABILITY_FILTER)
  972. for i in range(len(fetch_features)):
  973. if i == 0:
  974. fetch_capa += b"="
  975. else:
  976. fetch_capa += b" "
  977. fetch_capa += fetch_features[i]
  978. (symrefs, agent) = _extract_symrefs_and_agent(server_capabilities)
  979. negotiated_capabilities = self._fetch_capabilities & server_capabilities
  980. if fetch_capa:
  981. negotiated_capabilities.add(fetch_capa)
  982. return (negotiated_capabilities, symrefs, agent)
  983. def archive(
  984. self,
  985. path,
  986. committish,
  987. write_data,
  988. progress=None,
  989. write_error=None,
  990. format=None,
  991. subdirs=None,
  992. prefix=None,
  993. ):
  994. """Retrieve an archive of the specified tree."""
  995. raise NotImplementedError(self.archive)
  996. @staticmethod
  997. def _warn_filter_objects():
  998. import warnings
  999. warnings.warn(
  1000. "object filtering not recognized by server, ignoring",
  1001. UserWarning,
  1002. )
  1003. def check_wants(wants, refs):
  1004. """Check that a set of wants is valid.
  1005. Args:
  1006. wants: Set of object SHAs to fetch
  1007. refs: Refs dictionary to check against
  1008. """
  1009. missing = set(wants) - {
  1010. v for (k, v) in refs.items() if not k.endswith(PEELED_TAG_SUFFIX)
  1011. }
  1012. if missing:
  1013. raise InvalidWants(missing)
  1014. def _remote_error_from_stderr(stderr):
  1015. if stderr is None:
  1016. return HangupException()
  1017. lines = [line.rstrip(b"\n") for line in stderr.readlines()]
  1018. for line in lines:
  1019. if line.startswith(b"ERROR: "):
  1020. return GitProtocolError(line[len(b"ERROR: ") :].decode("utf-8", "replace"))
  1021. return HangupException(lines)
  1022. class TraditionalGitClient(GitClient):
  1023. """Traditional Git client."""
  1024. DEFAULT_ENCODING = "utf-8"
  1025. def __init__(self, path_encoding=DEFAULT_ENCODING, **kwargs) -> None:
  1026. self._remote_path_encoding = path_encoding
  1027. super().__init__(**kwargs)
  1028. async def _connect(self, cmd, path, protocol_version=None):
  1029. """Create a connection to the server.
  1030. This method is abstract - concrete implementations should
  1031. implement their own variant which connects to the server and
  1032. returns an initialized Protocol object with the service ready
  1033. for use and a can_read function which may be used to see if
  1034. reads would block.
  1035. Args:
  1036. cmd: The git service name to which we should connect.
  1037. path: The path we should pass to the service. (as bytestirng)
  1038. protocol_version: Desired Git protocol version. By default the highest
  1039. mutually supported protocol version will be used.
  1040. """
  1041. raise NotImplementedError
  1042. def send_pack(self, path, update_refs, generate_pack_data, progress=None):
  1043. """Upload a pack to a remote repository.
  1044. Args:
  1045. path: Repository path (as bytestring)
  1046. update_refs: Function to determine changes to remote refs.
  1047. Receive dict with existing remote refs, returns dict with
  1048. changed refs (name -> sha, where sha=ZERO_SHA for deletions)
  1049. generate_pack_data: Function that can return a tuple with
  1050. number of objects and pack data to upload.
  1051. progress: Optional callback called with progress updates
  1052. Returns:
  1053. SendPackResult
  1054. Raises:
  1055. SendPackError: if server rejects the pack data
  1056. """
  1057. self.protocol_version = DEFAULT_GIT_PROTOCOL_VERSION_SEND
  1058. proto, unused_can_read, stderr = self._connect(b"receive-pack", path)
  1059. with proto:
  1060. try:
  1061. old_refs, server_capabilities = read_pkt_refs_v1(proto.read_pkt_seq())
  1062. except HangupException as exc:
  1063. raise _remote_error_from_stderr(stderr) from exc
  1064. (
  1065. negotiated_capabilities,
  1066. agent,
  1067. ) = self._negotiate_receive_pack_capabilities(server_capabilities)
  1068. if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
  1069. self._report_status_parser = ReportStatusParser()
  1070. report_status_parser = self._report_status_parser
  1071. try:
  1072. new_refs = orig_new_refs = update_refs(dict(old_refs))
  1073. except BaseException:
  1074. proto.write_pkt_line(None)
  1075. raise
  1076. if set(new_refs.items()).issubset(set(old_refs.items())):
  1077. proto.write_pkt_line(None)
  1078. return SendPackResult(new_refs, agent=agent, ref_status={})
  1079. if CAPABILITY_DELETE_REFS not in server_capabilities:
  1080. # Server does not support deletions. Fail later.
  1081. new_refs = dict(orig_new_refs)
  1082. for ref, sha in orig_new_refs.items():
  1083. if sha == ZERO_SHA:
  1084. if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
  1085. report_status_parser._ref_statuses.append(
  1086. b"ng " + ref + b" remote does not support deleting refs"
  1087. )
  1088. report_status_parser._ref_status_ok = False
  1089. del new_refs[ref]
  1090. if new_refs is None:
  1091. proto.write_pkt_line(None)
  1092. return SendPackResult(old_refs, agent=agent, ref_status={})
  1093. if len(new_refs) == 0 and orig_new_refs:
  1094. # NOOP - Original new refs filtered out by policy
  1095. proto.write_pkt_line(None)
  1096. if report_status_parser is not None:
  1097. ref_status = dict(report_status_parser.check())
  1098. else:
  1099. ref_status = None
  1100. return SendPackResult(old_refs, agent=agent, ref_status=ref_status)
  1101. header_handler = _v1ReceivePackHeader(
  1102. negotiated_capabilities, old_refs, new_refs
  1103. )
  1104. for pkt in header_handler:
  1105. proto.write_pkt_line(pkt)
  1106. pack_data_count, pack_data = generate_pack_data(
  1107. header_handler.have,
  1108. header_handler.want,
  1109. ofs_delta=(CAPABILITY_OFS_DELTA in negotiated_capabilities),
  1110. progress=progress,
  1111. )
  1112. if self._should_send_pack(new_refs):
  1113. for chunk in PackChunkGenerator(
  1114. pack_data_count, pack_data, progress=progress
  1115. ):
  1116. proto.write(chunk)
  1117. ref_status = self._handle_receive_pack_tail(
  1118. proto, negotiated_capabilities, progress
  1119. )
  1120. return SendPackResult(new_refs, agent=agent, ref_status=ref_status)
  1121. def fetch_pack(
  1122. self,
  1123. path,
  1124. determine_wants,
  1125. graph_walker,
  1126. pack_data,
  1127. progress=None,
  1128. depth=None,
  1129. ref_prefix=[],
  1130. filter_spec=None,
  1131. protocol_version: Optional[int] = None,
  1132. ):
  1133. """Retrieve a pack from a git smart server.
  1134. Args:
  1135. path: Remote path to fetch from
  1136. determine_wants: Function determine what refs
  1137. to fetch. Receives dictionary of name->sha, should return
  1138. list of shas to fetch.
  1139. graph_walker: Object with next() and ack().
  1140. pack_data: Callback called for each bit of data in the pack
  1141. progress: Callback for progress reports (strings)
  1142. depth: Shallow fetch depth
  1143. ref_prefix: Prefix of desired references, as a list of bytestrings.
  1144. The server will limit the list of references sent to this prefix,
  1145. provided this feature is supported and sufficient server-side
  1146. resources are available to match all references against the prefix.
  1147. Clients must be prepared to filter out any non-requested references
  1148. themselves. This feature is an entirely optional optimization.
  1149. filter_spec: A git-rev-list-style object filter spec, as bytestring.
  1150. Only used if the server supports the Git protocol-v2 'filter'
  1151. feature, and ignored otherwise.
  1152. protocol_version: Desired Git protocol version. By default the highest
  1153. mutually supported protocol version will be used.
  1154. Returns:
  1155. FetchPackResult object
  1156. """
  1157. if (
  1158. protocol_version is not None
  1159. and protocol_version not in GIT_PROTOCOL_VERSIONS
  1160. ):
  1161. raise ValueError("unknown Git protocol version %d" % protocol_version)
  1162. proto, can_read, stderr = self._connect(b"upload-pack", path, protocol_version)
  1163. server_protocol_version = negotiate_protocol_version(proto)
  1164. if server_protocol_version not in GIT_PROTOCOL_VERSIONS:
  1165. raise ValueError(
  1166. "unknown Git protocol version %d used by server"
  1167. % server_protocol_version
  1168. )
  1169. if protocol_version and server_protocol_version > protocol_version:
  1170. raise ValueError(
  1171. "bad Git protocol version %d used by server" % server_protocol_version
  1172. )
  1173. self.protocol_version = server_protocol_version
  1174. with proto:
  1175. try:
  1176. if self.protocol_version == 2:
  1177. server_capabilities = read_server_capabilities(proto.read_pkt_seq())
  1178. refs = None
  1179. else:
  1180. refs, server_capabilities = read_pkt_refs_v1(proto.read_pkt_seq())
  1181. except HangupException as exc:
  1182. raise _remote_error_from_stderr(stderr) from exc
  1183. (
  1184. negotiated_capabilities,
  1185. symrefs,
  1186. agent,
  1187. ) = self._negotiate_upload_pack_capabilities(server_capabilities)
  1188. if self.protocol_version == 2:
  1189. proto.write_pkt_line(b"command=ls-refs\n")
  1190. proto.write(b"0001") # delim-pkt
  1191. proto.write_pkt_line(b"symrefs")
  1192. for prefix in ref_prefix:
  1193. proto.write_pkt_line(b"ref-prefix " + prefix)
  1194. proto.write_pkt_line(None)
  1195. refs, symrefs, _peeled = read_pkt_refs_v2(proto.read_pkt_seq())
  1196. if refs is None:
  1197. proto.write_pkt_line(None)
  1198. return FetchPackResult(refs, symrefs, agent)
  1199. try:
  1200. if depth is not None:
  1201. wants = determine_wants(refs, depth=depth)
  1202. else:
  1203. wants = determine_wants(refs)
  1204. except BaseException:
  1205. proto.write_pkt_line(None)
  1206. raise
  1207. if wants is not None:
  1208. wants = [cid for cid in wants if cid != ZERO_SHA]
  1209. if not wants:
  1210. proto.write_pkt_line(None)
  1211. return FetchPackResult(refs, symrefs, agent)
  1212. if self.protocol_version == 2:
  1213. proto.write_pkt_line(b"command=fetch\n")
  1214. proto.write(b"0001") # delim-pkt
  1215. if (
  1216. find_capability(
  1217. negotiated_capabilities, CAPABILITY_FETCH, CAPABILITY_FILTER
  1218. )
  1219. and filter_spec
  1220. ):
  1221. proto.write(pkt_line(b"filter %s\n" % filter_spec))
  1222. elif filter_spec:
  1223. self._warn_filter_objects()
  1224. elif filter_spec:
  1225. self._warn_filter_objects()
  1226. (new_shallow, new_unshallow) = _handle_upload_pack_head(
  1227. proto,
  1228. negotiated_capabilities,
  1229. graph_walker,
  1230. wants,
  1231. can_read,
  1232. depth=depth,
  1233. protocol_version=self.protocol_version,
  1234. )
  1235. _handle_upload_pack_tail(
  1236. proto,
  1237. negotiated_capabilities,
  1238. graph_walker,
  1239. pack_data,
  1240. progress,
  1241. protocol_version=self.protocol_version,
  1242. )
  1243. return FetchPackResult(refs, symrefs, agent, new_shallow, new_unshallow)
  1244. def get_refs(self, path, protocol_version=None):
  1245. """Retrieve the current refs from a git smart server."""
  1246. # stock `git ls-remote` uses upload-pack
  1247. if (
  1248. protocol_version is not None
  1249. and protocol_version not in GIT_PROTOCOL_VERSIONS
  1250. ):
  1251. raise ValueError("unknown Git protocol version %d" % protocol_version)
  1252. proto, _, stderr = self._connect(b"upload-pack", path, protocol_version)
  1253. server_protocol_version = negotiate_protocol_version(proto)
  1254. if server_protocol_version not in GIT_PROTOCOL_VERSIONS:
  1255. raise ValueError(
  1256. "unknown Git protocol version %d used by server"
  1257. % server_protocol_version
  1258. )
  1259. if protocol_version and server_protocol_version > protocol_version:
  1260. raise ValueError(
  1261. "bad Git protocol version %d used by server" % server_protocol_version
  1262. )
  1263. self.protocol_version = server_protocol_version
  1264. if self.protocol_version == 2:
  1265. server_capabilities = read_server_capabilities(proto.read_pkt_seq())
  1266. proto.write_pkt_line(b"command=ls-refs\n")
  1267. proto.write(b"0001") # delim-pkt
  1268. proto.write_pkt_line(b"symrefs")
  1269. proto.write_pkt_line(None)
  1270. with proto:
  1271. try:
  1272. refs, _symrefs, _peeled = read_pkt_refs_v2(proto.read_pkt_seq())
  1273. except HangupException as exc:
  1274. raise _remote_error_from_stderr(stderr) from exc
  1275. proto.write_pkt_line(None)
  1276. return refs
  1277. else:
  1278. with proto:
  1279. try:
  1280. refs, server_capabilities = read_pkt_refs_v1(proto.read_pkt_seq())
  1281. except HangupException as exc:
  1282. raise _remote_error_from_stderr(stderr) from exc
  1283. proto.write_pkt_line(None)
  1284. (_symrefs, _agent) = _extract_symrefs_and_agent(server_capabilities)
  1285. return refs
  1286. def archive(
  1287. self,
  1288. path,
  1289. committish,
  1290. write_data,
  1291. progress=None,
  1292. write_error=None,
  1293. format=None,
  1294. subdirs=None,
  1295. prefix=None,
  1296. ):
  1297. proto, can_read, stderr = self._connect(b"upload-archive", path)
  1298. with proto:
  1299. if format is not None:
  1300. proto.write_pkt_line(b"argument --format=" + format)
  1301. proto.write_pkt_line(b"argument " + committish)
  1302. if subdirs is not None:
  1303. for subdir in subdirs:
  1304. proto.write_pkt_line(b"argument " + subdir)
  1305. if prefix is not None:
  1306. proto.write_pkt_line(b"argument --prefix=" + prefix)
  1307. proto.write_pkt_line(None)
  1308. try:
  1309. pkt = proto.read_pkt_line()
  1310. except HangupException as exc:
  1311. raise _remote_error_from_stderr(stderr) from exc
  1312. if pkt == b"NACK\n" or pkt == b"NACK":
  1313. return
  1314. elif pkt == b"ACK\n" or pkt == b"ACK":
  1315. pass
  1316. elif pkt.startswith(b"ERR "):
  1317. raise GitProtocolError(pkt[4:].rstrip(b"\n").decode("utf-8", "replace"))
  1318. else:
  1319. raise AssertionError(f"invalid response {pkt!r}")
  1320. ret = proto.read_pkt_line()
  1321. if ret is not None:
  1322. raise AssertionError("expected pkt tail")
  1323. for chan, data in _read_side_band64k_data(proto.read_pkt_seq()):
  1324. if chan == SIDE_BAND_CHANNEL_DATA:
  1325. write_data(data)
  1326. elif chan == SIDE_BAND_CHANNEL_PROGRESS:
  1327. progress(data)
  1328. elif chan == SIDE_BAND_CHANNEL_FATAL:
  1329. write_error(data)
  1330. else:
  1331. raise AssertionError("Invalid sideband channel %d" % chan)
  1332. class TCPGitClient(TraditionalGitClient):
  1333. """A Git Client that works over TCP directly (i.e. git://)."""
  1334. def __init__(self, host, port=None, **kwargs) -> None:
  1335. if port is None:
  1336. port = TCP_GIT_PORT
  1337. self._host = host
  1338. self._port = port
  1339. super().__init__(**kwargs)
  1340. @classmethod
  1341. def from_parsedurl(cls, parsedurl, **kwargs):
  1342. return cls(parsedurl.hostname, port=parsedurl.port, **kwargs)
  1343. def get_url(self, path):
  1344. netloc = self._host
  1345. if self._port is not None and self._port != TCP_GIT_PORT:
  1346. netloc += ":%d" % self._port
  1347. return urlunsplit(("git", netloc, path, "", ""))
  1348. def _connect(self, cmd, path, protocol_version=None):
  1349. if not isinstance(cmd, bytes):
  1350. raise TypeError(cmd)
  1351. if not isinstance(path, bytes):
  1352. path = path.encode(self._remote_path_encoding)
  1353. sockaddrs = socket.getaddrinfo(
  1354. self._host, self._port, socket.AF_UNSPEC, socket.SOCK_STREAM
  1355. )
  1356. s = None
  1357. err = OSError(f"no address found for {self._host}")
  1358. for family, socktype, proto, canonname, sockaddr in sockaddrs:
  1359. s = socket.socket(family, socktype, proto)
  1360. s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
  1361. try:
  1362. s.connect(sockaddr)
  1363. break
  1364. except OSError as e:
  1365. err = e
  1366. if s is not None:
  1367. s.close()
  1368. s = None
  1369. if s is None:
  1370. raise err
  1371. # -1 means system default buffering
  1372. rfile = s.makefile("rb", -1)
  1373. # 0 means unbuffered
  1374. wfile = s.makefile("wb", 0)
  1375. def close():
  1376. rfile.close()
  1377. wfile.close()
  1378. s.close()
  1379. proto = Protocol(
  1380. rfile.read,
  1381. wfile.write,
  1382. close,
  1383. report_activity=self._report_activity,
  1384. )
  1385. if path.startswith(b"/~"):
  1386. path = path[1:]
  1387. if cmd == b"upload-pack":
  1388. if protocol_version is None:
  1389. self.protocol_version = DEFAULT_GIT_PROTOCOL_VERSION_FETCH
  1390. else:
  1391. self.protocol_version = protocol_version
  1392. else:
  1393. self.protocol_version = DEFAULT_GIT_PROTOCOL_VERSION_SEND
  1394. if cmd == b"upload-pack" and self.protocol_version == 2:
  1395. # Git protocol version advertisement is hidden behind two NUL bytes
  1396. # for compatibility with older Git server implementations, which
  1397. # would crash if something other than a "host=" header was found
  1398. # after the first NUL byte.
  1399. version_str = b"\0\0version=%d\0" % self.protocol_version
  1400. else:
  1401. version_str = b""
  1402. # TODO(jelmer): Alternative to ascii?
  1403. proto.send_cmd(
  1404. b"git-" + cmd, path, b"host=" + self._host.encode("ascii") + version_str
  1405. )
  1406. return proto, lambda: _fileno_can_read(s), None
  1407. class SubprocessWrapper:
  1408. """A socket-like object that talks to a subprocess via pipes."""
  1409. def __init__(self, proc) -> None:
  1410. self.proc = proc
  1411. self.read = BufferedReader(proc.stdout).read
  1412. self.write = proc.stdin.write
  1413. @property
  1414. def stderr(self):
  1415. return self.proc.stderr
  1416. def can_read(self):
  1417. if sys.platform == "win32":
  1418. from msvcrt import get_osfhandle
  1419. handle = get_osfhandle(self.proc.stdout.fileno())
  1420. return _win32_peek_avail(handle) != 0
  1421. else:
  1422. return _fileno_can_read(self.proc.stdout.fileno())
  1423. def close(self):
  1424. self.proc.stdin.close()
  1425. self.proc.stdout.close()
  1426. if self.proc.stderr:
  1427. self.proc.stderr.close()
  1428. self.proc.wait()
  1429. def find_git_command() -> List[str]:
  1430. """Find command to run for system Git (usually C Git)."""
  1431. if sys.platform == "win32": # support .exe, .bat and .cmd
  1432. try: # to avoid overhead
  1433. import pywintypes
  1434. import win32api
  1435. except ImportError: # run through cmd.exe with some overhead
  1436. return ["cmd", "/c", "git"]
  1437. else:
  1438. try:
  1439. status, git = win32api.FindExecutable("git")
  1440. return [git]
  1441. except pywintypes.error:
  1442. return ["cmd", "/c", "git"]
  1443. else:
  1444. return ["git"]
  1445. class SubprocessGitClient(TraditionalGitClient):
  1446. """Git client that talks to a server using a subprocess."""
  1447. @classmethod
  1448. def from_parsedurl(cls, parsedurl, **kwargs):
  1449. return cls(**kwargs)
  1450. git_command = None
  1451. def _connect(self, service, path, protocol_version=None):
  1452. if not isinstance(service, bytes):
  1453. raise TypeError(service)
  1454. if isinstance(path, bytes):
  1455. path = path.decode(self._remote_path_encoding)
  1456. if self.git_command is None:
  1457. git_command = find_git_command()
  1458. argv = [*git_command, service.decode("ascii"), path]
  1459. p = subprocess.Popen(
  1460. argv,
  1461. bufsize=0,
  1462. stdin=subprocess.PIPE,
  1463. stdout=subprocess.PIPE,
  1464. stderr=subprocess.PIPE,
  1465. )
  1466. pw = SubprocessWrapper(p)
  1467. return (
  1468. Protocol(
  1469. pw.read,
  1470. pw.write,
  1471. pw.close,
  1472. report_activity=self._report_activity,
  1473. ),
  1474. pw.can_read,
  1475. p.stderr,
  1476. )
  1477. class LocalGitClient(GitClient):
  1478. """Git Client that just uses a local on-disk repository."""
  1479. def __init__(
  1480. self,
  1481. thin_packs: bool = True,
  1482. report_activity=None,
  1483. config: Optional[Config] = None,
  1484. ) -> None:
  1485. """Create a new LocalGitClient instance.
  1486. Args:
  1487. thin_packs: Whether or not thin packs should be retrieved
  1488. report_activity: Optional callback for reporting transport
  1489. activity.
  1490. """
  1491. self._report_activity = report_activity
  1492. # Ignore the thin_packs argument
  1493. def get_url(self, path):
  1494. return urlunsplit(("file", "", path, "", ""))
  1495. @classmethod
  1496. def from_parsedurl(cls, parsedurl, **kwargs):
  1497. return cls(**kwargs)
  1498. @classmethod
  1499. def _open_repo(cls, path):
  1500. if not isinstance(path, str):
  1501. path = os.fsdecode(path)
  1502. return closing(Repo(path))
  1503. def send_pack(self, path, update_refs, generate_pack_data, progress=None):
  1504. """Upload a pack to a local on-disk repository.
  1505. Args:
  1506. path: Repository path (as bytestring)
  1507. update_refs: Function to determine changes to remote refs.
  1508. Receive dict with existing remote refs, returns dict with
  1509. changed refs (name -> sha, where sha=ZERO_SHA for deletions)
  1510. with number of items and pack data to upload.
  1511. progress: Optional progress function
  1512. Returns:
  1513. SendPackResult
  1514. Raises:
  1515. SendPackError: if server rejects the pack data
  1516. """
  1517. if not progress:
  1518. def progress(x):
  1519. pass
  1520. with self._open_repo(path) as target:
  1521. old_refs = target.get_refs()
  1522. new_refs = update_refs(dict(old_refs))
  1523. have = [sha1 for sha1 in old_refs.values() if sha1 != ZERO_SHA]
  1524. want = []
  1525. for refname, new_sha1 in new_refs.items():
  1526. if (
  1527. new_sha1 not in have
  1528. and new_sha1 not in want
  1529. and new_sha1 != ZERO_SHA
  1530. ):
  1531. want.append(new_sha1)
  1532. if not want and set(new_refs.items()).issubset(set(old_refs.items())):
  1533. return SendPackResult(new_refs, ref_status={})
  1534. target.object_store.add_pack_data(
  1535. *generate_pack_data(have, want, ofs_delta=True)
  1536. )
  1537. ref_status = {}
  1538. for refname, new_sha1 in new_refs.items():
  1539. old_sha1 = old_refs.get(refname, ZERO_SHA)
  1540. if new_sha1 != ZERO_SHA:
  1541. if not target.refs.set_if_equals(refname, old_sha1, new_sha1):
  1542. msg = f"unable to set {refname} to {new_sha1}"
  1543. progress(msg)
  1544. ref_status[refname] = msg
  1545. else:
  1546. if not target.refs.remove_if_equals(refname, old_sha1):
  1547. progress(f"unable to remove {refname}")
  1548. ref_status[refname] = "unable to remove"
  1549. return SendPackResult(new_refs, ref_status=ref_status)
  1550. def fetch(
  1551. self,
  1552. path,
  1553. target,
  1554. determine_wants=None,
  1555. progress=None,
  1556. depth=None,
  1557. ref_prefix=[],
  1558. filter_spec=None,
  1559. **kwargs,
  1560. ):
  1561. """Fetch into a target repository.
  1562. Args:
  1563. path: Path to fetch from (as bytestring)
  1564. target: Target repository to fetch into
  1565. determine_wants: Optional function determine what refs
  1566. to fetch. Receives dictionary of name->sha, should return
  1567. list of shas to fetch. Defaults to all shas.
  1568. progress: Optional progress function
  1569. depth: Shallow fetch depth
  1570. ref_prefix: Prefix of desired references, as a list of bytestrings.
  1571. The server will limit the list of references sent to this prefix,
  1572. provided this feature is supported and sufficient server-side
  1573. resources are available to match all references against the prefix.
  1574. Clients must be prepared to filter out any non-requested references
  1575. themselves. This feature is an entirely optional optimization.
  1576. filter_spec: A git-rev-list-style object filter spec, as bytestring.
  1577. Only used if the server supports the Git protocol-v2 'filter'
  1578. feature, and ignored otherwise.
  1579. Returns:
  1580. FetchPackResult object
  1581. """
  1582. with self._open_repo(path) as r:
  1583. refs = r.fetch(
  1584. target,
  1585. determine_wants=determine_wants,
  1586. progress=progress,
  1587. depth=depth,
  1588. )
  1589. return FetchPackResult(refs, r.refs.get_symrefs(), agent_string())
  1590. def fetch_pack(
  1591. self,
  1592. path,
  1593. determine_wants,
  1594. graph_walker,
  1595. pack_data,
  1596. progress=None,
  1597. depth=None,
  1598. ref_prefix: Optional[List[bytes]] = [],
  1599. filter_spec: Optional[bytes] = None,
  1600. protocol_version: Optional[int] = None,
  1601. ) -> FetchPackResult:
  1602. """Retrieve a pack from a local on-disk repository.
  1603. Args:
  1604. path: Remote path to fetch from
  1605. determine_wants: Function determine what refs
  1606. to fetch. Receives dictionary of name->sha, should return
  1607. list of shas to fetch.
  1608. graph_walker: Object with next() and ack().
  1609. pack_data: Callback called for each bit of data in the pack
  1610. progress: Callback for progress reports (strings)
  1611. depth: Shallow fetch depth
  1612. ref_prefix: Prefix of desired references, as a list of bytestrings.
  1613. The server will limit the list of references sent to this prefix,
  1614. provided this feature is supported and sufficient server-side
  1615. resources are available to match all references against the prefix.
  1616. Clients must be prepared to filter out any non-requested references
  1617. themselves. This feature is an entirely optional optimization.
  1618. filter_spec: A git-rev-list-style object filter spec, as bytestring.
  1619. Only used if the server supports the Git protocol-v2 'filter'
  1620. feature, and ignored otherwise.
  1621. Returns:
  1622. FetchPackResult object
  1623. """
  1624. with self._open_repo(path) as r:
  1625. missing_objects = r.find_missing_objects(
  1626. determine_wants, graph_walker, progress=progress, depth=depth
  1627. )
  1628. other_haves = missing_objects.get_remote_has()
  1629. object_ids = list(missing_objects)
  1630. symrefs = r.refs.get_symrefs()
  1631. agent = agent_string()
  1632. # Did the process short-circuit (e.g. in a stateless RPC call)?
  1633. # Note that the client still expects a 0-object pack in most cases.
  1634. if object_ids is None:
  1635. return FetchPackResult(None, symrefs, agent)
  1636. write_pack_from_container(
  1637. pack_data, r.object_store, object_ids, other_haves=other_haves
  1638. )
  1639. return FetchPackResult(r.get_refs(), symrefs, agent)
  1640. def get_refs(self, path):
  1641. """Retrieve the current refs from a local on-disk repository."""
  1642. with self._open_repo(path) as target:
  1643. return target.get_refs()
  1644. # What Git client to use for local access
  1645. default_local_git_client_cls = LocalGitClient
  1646. class SSHVendor:
  1647. """A client side SSH implementation."""
  1648. def run_command(
  1649. self,
  1650. host,
  1651. command,
  1652. username=None,
  1653. port=None,
  1654. password=None,
  1655. key_filename=None,
  1656. ssh_command=None,
  1657. protocol_version: Optional[int] = None,
  1658. ):
  1659. """Connect to an SSH server.
  1660. Run a command remotely and return a file-like object for interaction
  1661. with the remote command.
  1662. Args:
  1663. host: Host name
  1664. command: Command to run (as argv array)
  1665. username: Optional ame of user to log in as
  1666. port: Optional SSH port to use
  1667. password: Optional ssh password for login or private key
  1668. key_filename: Optional path to private keyfile
  1669. ssh_command: Optional SSH command
  1670. protocol_version: Desired Git protocol version. By default the highest
  1671. mutually supported protocol version will be used.
  1672. """
  1673. raise NotImplementedError(self.run_command)
  1674. class StrangeHostname(Exception):
  1675. """Refusing to connect to strange SSH hostname."""
  1676. def __init__(self, hostname) -> None:
  1677. super().__init__(hostname)
  1678. class SubprocessSSHVendor(SSHVendor):
  1679. """SSH vendor that shells out to the local 'ssh' command."""
  1680. def run_command(
  1681. self,
  1682. host,
  1683. command,
  1684. username=None,
  1685. port=None,
  1686. password=None,
  1687. key_filename=None,
  1688. ssh_command=None,
  1689. protocol_version=None,
  1690. ):
  1691. if password is not None:
  1692. raise NotImplementedError(
  1693. "Setting password not supported by SubprocessSSHVendor."
  1694. )
  1695. if ssh_command:
  1696. import shlex
  1697. args = [*shlex.split(ssh_command, posix=sys.platform != "win32"), "-x"]
  1698. else:
  1699. args = ["ssh", "-x"]
  1700. if port:
  1701. args.extend(["-p", str(port)])
  1702. if key_filename:
  1703. args.extend(["-i", str(key_filename)])
  1704. if protocol_version is None:
  1705. protocol_version = DEFAULT_GIT_PROTOCOL_VERSION_FETCH
  1706. if protocol_version > 0:
  1707. args.extend(["-o", f"SetEnv GIT_PROTOCOL=version={protocol_version}"])
  1708. if username:
  1709. host = f"{username}@{host}"
  1710. if host.startswith("-"):
  1711. raise StrangeHostname(hostname=host)
  1712. args.append(host)
  1713. proc = subprocess.Popen(
  1714. [*args, command],
  1715. bufsize=0,
  1716. stdin=subprocess.PIPE,
  1717. stdout=subprocess.PIPE,
  1718. stderr=subprocess.PIPE,
  1719. )
  1720. return SubprocessWrapper(proc)
  1721. class PLinkSSHVendor(SSHVendor):
  1722. """SSH vendor that shells out to the local 'plink' command."""
  1723. def run_command(
  1724. self,
  1725. host,
  1726. command,
  1727. username=None,
  1728. port=None,
  1729. password=None,
  1730. key_filename=None,
  1731. ssh_command=None,
  1732. protocol_version: Optional[int] = None,
  1733. ):
  1734. if ssh_command:
  1735. import shlex
  1736. args = [*shlex.split(ssh_command, posix=sys.platform != "win32"), "-ssh"]
  1737. elif sys.platform == "win32":
  1738. args = ["plink.exe", "-ssh"]
  1739. else:
  1740. args = ["plink", "-ssh"]
  1741. if password is not None:
  1742. import warnings
  1743. warnings.warn(
  1744. "Invoking PLink with a password exposes the password in the "
  1745. "process list."
  1746. )
  1747. args.extend(["-pw", str(password)])
  1748. if port:
  1749. args.extend(["-P", str(port)])
  1750. if key_filename:
  1751. args.extend(["-i", str(key_filename)])
  1752. if username:
  1753. host = f"{username}@{host}"
  1754. if host.startswith("-"):
  1755. raise StrangeHostname(hostname=host)
  1756. args.append(host)
  1757. # plink.exe does not provide a way to pass environment variables
  1758. # via the command line. The best we can do is set an environment
  1759. # variable and hope that plink will pass it to the server. If this
  1760. # does not work then the server should behave as if we had requested
  1761. # protocol version 0.
  1762. env = copy.deepcopy(os.environ)
  1763. if protocol_version is None:
  1764. protocol_version = DEFAULT_GIT_PROTOCOL_VERSION_FETCH
  1765. if protocol_version > 0:
  1766. env["GIT_PROTOCOL"] = f"version={protocol_version}"
  1767. proc = subprocess.Popen(
  1768. [*args, command],
  1769. bufsize=0,
  1770. stdin=subprocess.PIPE,
  1771. stdout=subprocess.PIPE,
  1772. stderr=subprocess.PIPE,
  1773. env=env,
  1774. )
  1775. return SubprocessWrapper(proc)
  1776. def ParamikoSSHVendor(**kwargs):
  1777. import warnings
  1778. warnings.warn(
  1779. "ParamikoSSHVendor has been moved to dulwich.contrib.paramiko_vendor.",
  1780. DeprecationWarning,
  1781. )
  1782. from .contrib.paramiko_vendor import ParamikoSSHVendor
  1783. return ParamikoSSHVendor(**kwargs)
  1784. # Can be overridden by users
  1785. get_ssh_vendor = SubprocessSSHVendor
  1786. class SSHGitClient(TraditionalGitClient):
  1787. def __init__(
  1788. self,
  1789. host,
  1790. port=None,
  1791. username=None,
  1792. vendor=None,
  1793. config=None,
  1794. password=None,
  1795. key_filename=None,
  1796. ssh_command=None,
  1797. **kwargs,
  1798. ) -> None:
  1799. self.host = host
  1800. self.port = port
  1801. self.username = username
  1802. self.password = password
  1803. self.key_filename = key_filename
  1804. self.ssh_command = ssh_command or os.environ.get(
  1805. "GIT_SSH_COMMAND", os.environ.get("GIT_SSH")
  1806. )
  1807. super().__init__(**kwargs)
  1808. self.alternative_paths: Dict[bytes, bytes] = {}
  1809. if vendor is not None:
  1810. self.ssh_vendor = vendor
  1811. else:
  1812. self.ssh_vendor = get_ssh_vendor()
  1813. def get_url(self, path):
  1814. netloc = self.host
  1815. if self.port is not None:
  1816. netloc += ":%d" % self.port
  1817. if self.username is not None:
  1818. netloc = urlquote(self.username, "@/:") + "@" + netloc
  1819. return urlunsplit(("ssh", netloc, path, "", ""))
  1820. @classmethod
  1821. def from_parsedurl(cls, parsedurl, **kwargs):
  1822. return cls(
  1823. host=parsedurl.hostname,
  1824. port=parsedurl.port,
  1825. username=parsedurl.username,
  1826. **kwargs,
  1827. )
  1828. def _get_cmd_path(self, cmd):
  1829. cmd = self.alternative_paths.get(cmd, b"git-" + cmd)
  1830. assert isinstance(cmd, bytes)
  1831. return cmd
  1832. def _connect(self, cmd, path, protocol_version=None):
  1833. if not isinstance(cmd, bytes):
  1834. raise TypeError(cmd)
  1835. if isinstance(path, bytes):
  1836. path = path.decode(self._remote_path_encoding)
  1837. if path.startswith("/~"):
  1838. path = path[1:]
  1839. argv = (
  1840. self._get_cmd_path(cmd).decode(self._remote_path_encoding)
  1841. + " '"
  1842. + path
  1843. + "'"
  1844. )
  1845. kwargs = {}
  1846. if self.password is not None:
  1847. kwargs["password"] = self.password
  1848. if self.key_filename is not None:
  1849. kwargs["key_filename"] = self.key_filename
  1850. # GIT_SSH_COMMAND takes precedence over GIT_SSH
  1851. if self.ssh_command is not None:
  1852. kwargs["ssh_command"] = self.ssh_command
  1853. con = self.ssh_vendor.run_command(
  1854. self.host,
  1855. argv,
  1856. port=self.port,
  1857. username=self.username,
  1858. protocol_version=protocol_version,
  1859. **kwargs,
  1860. )
  1861. return (
  1862. Protocol(
  1863. con.read,
  1864. con.write,
  1865. con.close,
  1866. report_activity=self._report_activity,
  1867. ),
  1868. con.can_read,
  1869. getattr(con, "stderr", None),
  1870. )
  1871. def default_user_agent_string():
  1872. # Start user agent with "git/", because GitHub requires this. :-( See
  1873. # https://github.com/jelmer/dulwich/issues/562 for details.
  1874. return "git/dulwich/{}".format(".".join([str(x) for x in dulwich.__version__]))
  1875. def default_urllib3_manager(
  1876. config,
  1877. pool_manager_cls=None,
  1878. proxy_manager_cls=None,
  1879. base_url=None,
  1880. **override_kwargs,
  1881. ) -> Union["urllib3.ProxyManager", "urllib3.PoolManager"]:
  1882. """Return urllib3 connection pool manager.
  1883. Honour detected proxy configurations.
  1884. Args:
  1885. config: `dulwich.config.ConfigDict` instance with Git configuration.
  1886. override_kwargs: Additional arguments for `urllib3.ProxyManager`
  1887. Returns:
  1888. Either pool_manager_cls (defaults to `urllib3.ProxyManager`) instance for
  1889. proxy configurations, proxy_manager_cls
  1890. (defaults to `urllib3.PoolManager`) instance otherwise
  1891. """
  1892. proxy_server = user_agent = None
  1893. ca_certs = ssl_verify = None
  1894. if proxy_server is None:
  1895. for proxyname in ("https_proxy", "http_proxy", "all_proxy"):
  1896. proxy_server = os.environ.get(proxyname)
  1897. if proxy_server:
  1898. break
  1899. if proxy_server:
  1900. if check_for_proxy_bypass(base_url):
  1901. proxy_server = None
  1902. if config is not None:
  1903. if proxy_server is None:
  1904. try:
  1905. proxy_server = config.get(b"http", b"proxy")
  1906. except KeyError:
  1907. pass
  1908. try:
  1909. user_agent = config.get(b"http", b"useragent")
  1910. except KeyError:
  1911. pass
  1912. # TODO(jelmer): Support per-host settings
  1913. try:
  1914. ssl_verify = config.get_boolean(b"http", b"sslVerify")
  1915. except KeyError:
  1916. ssl_verify = True
  1917. try:
  1918. ca_certs = config.get(b"http", b"sslCAInfo")
  1919. except KeyError:
  1920. ca_certs = None
  1921. if user_agent is None:
  1922. user_agent = default_user_agent_string()
  1923. headers = {"User-agent": user_agent}
  1924. kwargs = {
  1925. "ca_certs": ca_certs,
  1926. }
  1927. if ssl_verify is True:
  1928. kwargs["cert_reqs"] = "CERT_REQUIRED"
  1929. elif ssl_verify is False:
  1930. kwargs["cert_reqs"] = "CERT_NONE"
  1931. else:
  1932. # Default to SSL verification
  1933. kwargs["cert_reqs"] = "CERT_REQUIRED"
  1934. kwargs.update(override_kwargs)
  1935. import urllib3
  1936. if proxy_server is not None:
  1937. if proxy_manager_cls is None:
  1938. proxy_manager_cls = urllib3.ProxyManager
  1939. if not isinstance(proxy_server, str):
  1940. proxy_server = proxy_server.decode()
  1941. proxy_server_url = urlparse(proxy_server)
  1942. if proxy_server_url.username is not None:
  1943. proxy_headers = urllib3.make_headers(
  1944. proxy_basic_auth=f"{proxy_server_url.username}:{proxy_server_url.password or ''}" # type: ignore
  1945. )
  1946. else:
  1947. proxy_headers = {}
  1948. manager = proxy_manager_cls(
  1949. proxy_server, proxy_headers=proxy_headers, headers=headers, **kwargs
  1950. )
  1951. else:
  1952. if pool_manager_cls is None:
  1953. pool_manager_cls = urllib3.PoolManager
  1954. manager = pool_manager_cls(headers=headers, **kwargs)
  1955. return manager
  1956. def check_for_proxy_bypass(base_url):
  1957. # Check if a proxy bypass is defined with the no_proxy environment variable
  1958. if base_url: # only check if base_url is provided
  1959. no_proxy_str = os.environ.get("no_proxy")
  1960. if no_proxy_str:
  1961. # implementation based on curl behavior: https://curl.se/libcurl/c/CURLOPT_NOPROXY.html
  1962. # get hostname of provided parsed url
  1963. parsed_url = urlparse(base_url)
  1964. hostname = parsed_url.hostname
  1965. if hostname:
  1966. import ipaddress
  1967. # check if hostname is an ip address
  1968. try:
  1969. hostname_ip = ipaddress.ip_address(hostname)
  1970. except ValueError:
  1971. hostname_ip = None
  1972. no_proxy_values = no_proxy_str.split(",")
  1973. for no_proxy_value in no_proxy_values:
  1974. no_proxy_value = no_proxy_value.strip()
  1975. if no_proxy_value:
  1976. no_proxy_value = no_proxy_value.lower()
  1977. no_proxy_value = no_proxy_value.lstrip(
  1978. "."
  1979. ) # ignore leading dots
  1980. if hostname_ip:
  1981. # check if no_proxy_value is a ip network
  1982. try:
  1983. no_proxy_value_network = ipaddress.ip_network(
  1984. no_proxy_value, strict=False
  1985. )
  1986. except ValueError:
  1987. no_proxy_value_network = None
  1988. if no_proxy_value_network:
  1989. # if hostname is a ip address and no_proxy_value is a ip network -> check if ip address is part of network
  1990. if hostname_ip in no_proxy_value_network:
  1991. return True
  1992. if no_proxy_value == "*":
  1993. # '*' is special case for always bypass proxy
  1994. return True
  1995. if hostname == no_proxy_value:
  1996. return True
  1997. no_proxy_value = (
  1998. "." + no_proxy_value
  1999. ) # add a dot to only match complete domains
  2000. if hostname.endswith(no_proxy_value):
  2001. return True
  2002. return False
  2003. class AbstractHttpGitClient(GitClient):
  2004. """Abstract base class for HTTP Git Clients.
  2005. This is agonistic of the actual HTTP implementation.
  2006. Subclasses should provide an implementation of the
  2007. _http_request method.
  2008. """
  2009. def __init__(self, base_url, dumb=False, **kwargs) -> None:
  2010. self._base_url = base_url.rstrip("/") + "/"
  2011. self.dumb = dumb
  2012. GitClient.__init__(self, **kwargs)
  2013. def _http_request(self, url, headers=None, data=None):
  2014. """Perform HTTP request.
  2015. Args:
  2016. url: Request URL.
  2017. headers: Optional custom headers to override defaults.
  2018. data: Request data.
  2019. Returns:
  2020. Tuple (response, read), where response is an urllib3
  2021. response object with additional content_type and
  2022. redirect_location properties, and read is a consumable read
  2023. method for the response data.
  2024. Raises:
  2025. GitProtocolError
  2026. """
  2027. raise NotImplementedError(self._http_request)
  2028. def _discover_references(
  2029. self, service, base_url, protocol_version=None
  2030. ) -> Tuple[
  2031. Dict[Ref, ObjectID], Set[bytes], str, Dict[Ref, Ref], Dict[Ref, ObjectID]
  2032. ]:
  2033. if (
  2034. protocol_version is not None
  2035. and protocol_version not in GIT_PROTOCOL_VERSIONS
  2036. ):
  2037. raise ValueError("unknown Git protocol version %d" % protocol_version)
  2038. assert base_url[-1] == "/"
  2039. tail = "info/refs"
  2040. headers = {"Accept": "*/*"}
  2041. if self.dumb is not True:
  2042. tail += "?service={}".format(service.decode("ascii"))
  2043. # Enable protocol v2 only when fetching, not when pushing.
  2044. # Git does not yet implement push over protocol v2, and as of
  2045. # git version 2.37.3 git-http-backend's behaviour is erratic if
  2046. # we try: It responds with a Git-protocol-v1-style ref listing
  2047. # which lacks the "001f# service=git-receive-pack" marker.
  2048. if service == b"git-upload-pack":
  2049. if protocol_version is None:
  2050. self.protocol_version = DEFAULT_GIT_PROTOCOL_VERSION_FETCH
  2051. else:
  2052. self.protocol_version = protocol_version
  2053. if self.protocol_version == 2:
  2054. headers["Git-Protocol"] = "version=2"
  2055. else:
  2056. self.protocol_version = DEFAULT_GIT_PROTOCOL_VERSION_SEND
  2057. url = urljoin(base_url, tail)
  2058. resp, read = self._http_request(url, headers)
  2059. if resp.redirect_location:
  2060. # Something changed (redirect!), so let's update the base URL
  2061. if not resp.redirect_location.endswith(tail):
  2062. raise GitProtocolError(
  2063. f"Redirected from URL {url} to URL {resp.redirect_location} without {tail}"
  2064. )
  2065. base_url = urljoin(url, resp.redirect_location[: -len(tail)])
  2066. try:
  2067. self.dumb = resp.content_type is None or not resp.content_type.startswith(
  2068. "application/x-git-"
  2069. )
  2070. if not self.dumb:
  2071. def begin_protocol_v2(proto):
  2072. server_capabilities = read_server_capabilities(proto.read_pkt_seq())
  2073. resp, read = self._smart_request(
  2074. service.decode("ascii"),
  2075. base_url,
  2076. pkt_line(b"command=ls-refs\n")
  2077. + b"0001"
  2078. + pkt_line(b"symrefs")
  2079. + b"0000",
  2080. )
  2081. proto = Protocol(read, None)
  2082. return server_capabilities, resp, read, proto
  2083. proto = Protocol(read, None)
  2084. server_protocol_version = negotiate_protocol_version(proto)
  2085. if server_protocol_version not in GIT_PROTOCOL_VERSIONS:
  2086. raise ValueError(
  2087. "unknown Git protocol version %d used by server"
  2088. % server_protocol_version
  2089. )
  2090. if protocol_version and server_protocol_version > protocol_version:
  2091. raise ValueError(
  2092. "bad Git protocol version %d used by server"
  2093. % server_protocol_version
  2094. )
  2095. self.protocol_version = server_protocol_version
  2096. if self.protocol_version == 2:
  2097. server_capabilities, resp, read, proto = begin_protocol_v2(proto)
  2098. (refs, symrefs, peeled) = read_pkt_refs_v2(proto.read_pkt_seq())
  2099. return refs, server_capabilities, base_url, symrefs, peeled
  2100. else:
  2101. try:
  2102. [pkt] = list(proto.read_pkt_seq())
  2103. except ValueError as exc:
  2104. raise GitProtocolError(
  2105. "unexpected number of packets received"
  2106. ) from exc
  2107. if pkt.rstrip(b"\n") != (b"# service=" + service):
  2108. raise GitProtocolError(
  2109. f"unexpected first line {pkt!r} from smart server"
  2110. )
  2111. # Github sends "version 2" after sending the service name.
  2112. # Try to negotiate protocol version 2 again.
  2113. server_protocol_version = negotiate_protocol_version(proto)
  2114. if server_protocol_version not in GIT_PROTOCOL_VERSIONS:
  2115. raise ValueError(
  2116. "unknown Git protocol version %d used by server"
  2117. % server_protocol_version
  2118. )
  2119. if protocol_version and server_protocol_version > protocol_version:
  2120. raise ValueError(
  2121. "bad Git protocol version %d used by server"
  2122. % server_protocol_version
  2123. )
  2124. self.protocol_version = server_protocol_version
  2125. if self.protocol_version == 2:
  2126. server_capabilities, resp, read, proto = begin_protocol_v2(
  2127. proto
  2128. )
  2129. (refs, symrefs, peeled) = read_pkt_refs_v2(proto.read_pkt_seq())
  2130. else:
  2131. (
  2132. refs,
  2133. server_capabilities,
  2134. ) = read_pkt_refs_v1(proto.read_pkt_seq())
  2135. (refs, peeled) = split_peeled_refs(refs)
  2136. (symrefs, agent) = _extract_symrefs_and_agent(
  2137. server_capabilities
  2138. )
  2139. return refs, server_capabilities, base_url, symrefs, peeled
  2140. else:
  2141. self.protocol_version = 0 # dumb servers only support protocol v0
  2142. (refs, peeled) = split_peeled_refs(read_info_refs(resp))
  2143. return refs, set(), base_url, {}, peeled
  2144. finally:
  2145. resp.close()
  2146. def _smart_request(self, service, url, data):
  2147. """Send a 'smart' HTTP request.
  2148. This is a simple wrapper around _http_request that sets
  2149. a couple of extra headers.
  2150. """
  2151. assert url[-1] == "/"
  2152. url = urljoin(url, service)
  2153. result_content_type = f"application/x-{service}-result"
  2154. headers = {
  2155. "Content-Type": f"application/x-{service}-request",
  2156. "Accept": result_content_type,
  2157. }
  2158. if self.protocol_version == 2:
  2159. headers["Git-Protocol"] = "version=2"
  2160. if isinstance(data, bytes):
  2161. headers["Content-Length"] = str(len(data))
  2162. resp, read = self._http_request(url, headers, data)
  2163. if resp.content_type.split(";")[0] != result_content_type:
  2164. raise GitProtocolError(
  2165. f"Invalid content-type from server: {resp.content_type}"
  2166. )
  2167. return resp, read
  2168. def send_pack(self, path, update_refs, generate_pack_data, progress=None):
  2169. """Upload a pack to a remote repository.
  2170. Args:
  2171. path: Repository path (as bytestring)
  2172. update_refs: Function to determine changes to remote refs.
  2173. Receives dict with existing remote refs, returns dict with
  2174. changed refs (name -> sha, where sha=ZERO_SHA for deletions)
  2175. generate_pack_data: Function that can return a tuple
  2176. with number of elements and pack data to upload.
  2177. progress: Optional progress function
  2178. Returns:
  2179. SendPackResult
  2180. Raises:
  2181. SendPackError: if server rejects the pack data
  2182. """
  2183. url = self._get_url(path)
  2184. old_refs, server_capabilities, url, symrefs, peeled = self._discover_references(
  2185. b"git-receive-pack", url
  2186. )
  2187. (
  2188. negotiated_capabilities,
  2189. agent,
  2190. ) = self._negotiate_receive_pack_capabilities(server_capabilities)
  2191. negotiated_capabilities.add(capability_agent())
  2192. if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
  2193. self._report_status_parser = ReportStatusParser()
  2194. new_refs = update_refs(dict(old_refs))
  2195. if new_refs is None:
  2196. # Determine wants function is aborting the push.
  2197. return SendPackResult(old_refs, agent=agent, ref_status={})
  2198. if set(new_refs.items()).issubset(set(old_refs.items())):
  2199. return SendPackResult(new_refs, agent=agent, ref_status={})
  2200. if self.dumb:
  2201. raise NotImplementedError(self.fetch_pack)
  2202. def body_generator():
  2203. header_handler = _v1ReceivePackHeader(
  2204. negotiated_capabilities, old_refs, new_refs
  2205. )
  2206. for pkt in header_handler:
  2207. yield pkt_line(pkt)
  2208. pack_data_count, pack_data = generate_pack_data(
  2209. header_handler.have,
  2210. header_handler.want,
  2211. ofs_delta=(CAPABILITY_OFS_DELTA in negotiated_capabilities),
  2212. )
  2213. if self._should_send_pack(new_refs):
  2214. yield from PackChunkGenerator(pack_data_count, pack_data)
  2215. resp, read = self._smart_request("git-receive-pack", url, data=body_generator())
  2216. try:
  2217. resp_proto = Protocol(read, None)
  2218. ref_status = self._handle_receive_pack_tail(
  2219. resp_proto, negotiated_capabilities, progress
  2220. )
  2221. return SendPackResult(new_refs, agent=agent, ref_status=ref_status)
  2222. finally:
  2223. resp.close()
  2224. def fetch_pack(
  2225. self,
  2226. path,
  2227. determine_wants,
  2228. graph_walker,
  2229. pack_data,
  2230. progress=None,
  2231. depth=None,
  2232. ref_prefix=[],
  2233. filter_spec=None,
  2234. protocol_version: Optional[int] = None,
  2235. ):
  2236. """Retrieve a pack from a git smart server.
  2237. Args:
  2238. path: Path to fetch from
  2239. determine_wants: Callback that returns list of commits to fetch
  2240. graph_walker: Object with next() and ack().
  2241. pack_data: Callback called for each bit of data in the pack
  2242. progress: Callback for progress reports (strings)
  2243. depth: Depth for request
  2244. ref_prefix: Prefix of desired references, as a list of bytestrings.
  2245. The server will limit the list of references sent to this prefix,
  2246. provided this feature is supported and sufficient server-side
  2247. resources are available to match all references against the prefix.
  2248. Clients must be prepared to filter out any non-requested references
  2249. themselves. This feature is an entirely optional optimization.
  2250. filter_spec: A git-rev-list-style object filter spec, as bytestring.
  2251. Only used if the server supports the Git protocol-v2 'filter'
  2252. feature, and ignored otherwise.
  2253. protocol_version: Desired Git protocol version. By default the highest
  2254. mutually supported protocol version will be used.
  2255. Returns:
  2256. FetchPackResult object
  2257. """
  2258. url = self._get_url(path)
  2259. refs, server_capabilities, url, symrefs, peeled = self._discover_references(
  2260. b"git-upload-pack", url, protocol_version
  2261. )
  2262. (
  2263. negotiated_capabilities,
  2264. capa_symrefs,
  2265. agent,
  2266. ) = self._negotiate_upload_pack_capabilities(server_capabilities)
  2267. if not symrefs and capa_symrefs:
  2268. symrefs = capa_symrefs
  2269. if depth is not None:
  2270. wants = determine_wants(refs, depth=depth)
  2271. else:
  2272. wants = determine_wants(refs)
  2273. if wants is not None:
  2274. wants = [cid for cid in wants if cid != ZERO_SHA]
  2275. if not wants:
  2276. return FetchPackResult(refs, symrefs, agent)
  2277. if self.dumb:
  2278. raise NotImplementedError(self.fetch_pack)
  2279. req_data = BytesIO()
  2280. req_proto = Protocol(None, req_data.write)
  2281. (new_shallow, new_unshallow) = _handle_upload_pack_head(
  2282. req_proto,
  2283. negotiated_capabilities,
  2284. graph_walker,
  2285. wants,
  2286. can_read=None,
  2287. depth=depth,
  2288. protocol_version=self.protocol_version,
  2289. )
  2290. if self.protocol_version == 2:
  2291. data = pkt_line(b"command=fetch\n") + b"0001"
  2292. if (
  2293. find_capability(
  2294. negotiated_capabilities, CAPABILITY_FETCH, CAPABILITY_FILTER
  2295. )
  2296. and filter_spec
  2297. ):
  2298. data += pkt_line(b"filter %s\n" % filter_spec)
  2299. elif filter_spec:
  2300. self._warn_filter_objects()
  2301. data += req_data.getvalue()
  2302. else:
  2303. if filter_spec:
  2304. self._warn_filter_objects()
  2305. data = req_data.getvalue()
  2306. resp, read = self._smart_request("git-upload-pack", url, data)
  2307. try:
  2308. resp_proto = Protocol(read, None)
  2309. if new_shallow is None and new_unshallow is None:
  2310. (new_shallow, new_unshallow) = _read_shallow_updates(
  2311. resp_proto.read_pkt_seq()
  2312. )
  2313. _handle_upload_pack_tail(
  2314. resp_proto,
  2315. negotiated_capabilities,
  2316. graph_walker,
  2317. pack_data,
  2318. progress,
  2319. protocol_version=self.protocol_version,
  2320. )
  2321. return FetchPackResult(refs, symrefs, agent, new_shallow, new_unshallow)
  2322. finally:
  2323. resp.close()
  2324. def get_refs(self, path):
  2325. """Retrieve the current refs from a git smart server."""
  2326. url = self._get_url(path)
  2327. refs, _, _, _, _ = self._discover_references(b"git-upload-pack", url)
  2328. return refs
  2329. def get_url(self, path):
  2330. return self._get_url(path).rstrip("/")
  2331. def _get_url(self, path):
  2332. return urljoin(self._base_url, path).rstrip("/") + "/"
  2333. @classmethod
  2334. def from_parsedurl(cls, parsedurl, **kwargs):
  2335. password = parsedurl.password
  2336. if password is not None:
  2337. kwargs["password"] = urlunquote(password)
  2338. username = parsedurl.username
  2339. if username is not None:
  2340. kwargs["username"] = urlunquote(username)
  2341. return cls(urlunparse(parsedurl), **kwargs)
  2342. def __repr__(self) -> str:
  2343. return f"{type(self).__name__}({self._base_url!r}, dumb={self.dumb!r})"
  2344. def _wrap_urllib3_exceptions(func):
  2345. from urllib3.exceptions import ProtocolError
  2346. def wrapper(*args, **kwargs):
  2347. try:
  2348. return func(*args, **kwargs)
  2349. except ProtocolError as error:
  2350. raise GitProtocolError(str(error)) from error
  2351. return wrapper
  2352. class Urllib3HttpGitClient(AbstractHttpGitClient):
  2353. def __init__(
  2354. self,
  2355. base_url,
  2356. dumb=None,
  2357. pool_manager=None,
  2358. config=None,
  2359. username=None,
  2360. password=None,
  2361. **kwargs,
  2362. ) -> None:
  2363. self._username = username
  2364. self._password = password
  2365. if pool_manager is None:
  2366. self.pool_manager = default_urllib3_manager(config, base_url=base_url)
  2367. else:
  2368. self.pool_manager = pool_manager
  2369. if username is not None:
  2370. # No escaping needed: ":" is not allowed in username:
  2371. # https://tools.ietf.org/html/rfc2617#section-2
  2372. credentials = f"{username}:{password or ''}"
  2373. import urllib3.util
  2374. basic_auth = urllib3.util.make_headers(basic_auth=credentials)
  2375. self.pool_manager.headers.update(basic_auth) # type: ignore
  2376. self.config = config
  2377. super().__init__(base_url=base_url, dumb=dumb, **kwargs)
  2378. def _get_url(self, path):
  2379. if not isinstance(path, str):
  2380. # urllib3.util.url._encode_invalid_chars() converts the path back
  2381. # to bytes using the utf-8 codec.
  2382. path = path.decode("utf-8")
  2383. return urljoin(self._base_url, path).rstrip("/") + "/"
  2384. def _http_request(self, url, headers=None, data=None):
  2385. import urllib3.exceptions
  2386. req_headers = self.pool_manager.headers.copy()
  2387. if headers is not None:
  2388. req_headers.update(headers)
  2389. req_headers["Pragma"] = "no-cache"
  2390. try:
  2391. if data is None:
  2392. resp = self.pool_manager.request(
  2393. "GET", url, headers=req_headers, preload_content=False
  2394. )
  2395. else:
  2396. resp = self.pool_manager.request(
  2397. "POST", url, headers=req_headers, body=data, preload_content=False
  2398. )
  2399. except urllib3.exceptions.HTTPError as e:
  2400. raise GitProtocolError(str(e)) from e
  2401. if resp.status == 404:
  2402. raise NotGitRepository
  2403. if resp.status == 401:
  2404. raise HTTPUnauthorized(resp.headers.get("WWW-Authenticate"), url)
  2405. if resp.status == 407:
  2406. raise HTTPProxyUnauthorized(resp.headers.get("Proxy-Authenticate"), url)
  2407. if resp.status != 200:
  2408. raise GitProtocolError(
  2409. "unexpected http resp %d for %s" % (resp.status, url)
  2410. )
  2411. resp.content_type = resp.headers.get("Content-Type")
  2412. # Check if geturl() is available (urllib3 version >= 1.23)
  2413. try:
  2414. resp_url = resp.geturl()
  2415. except AttributeError:
  2416. # get_redirect_location() is available for urllib3 >= 1.1
  2417. resp.redirect_location = resp.get_redirect_location()
  2418. else:
  2419. resp.redirect_location = resp_url if resp_url != url else ""
  2420. return resp, _wrap_urllib3_exceptions(resp.read)
  2421. HttpGitClient = Urllib3HttpGitClient
  2422. def _win32_url_to_path(parsed) -> str:
  2423. """Convert a file: URL to a path.
  2424. https://datatracker.ietf.org/doc/html/rfc8089
  2425. """
  2426. assert sys.platform == "win32" or os.name == "nt"
  2427. assert parsed.scheme == "file"
  2428. _, netloc, path, _, _, _ = parsed
  2429. if netloc == "localhost" or not netloc:
  2430. netloc = ""
  2431. elif (
  2432. netloc
  2433. and len(netloc) >= 2
  2434. and netloc[0].isalpha()
  2435. and netloc[1:2] in (":", ":/")
  2436. ):
  2437. # file://C:/foo.bar/baz or file://C://foo.bar//baz
  2438. netloc = netloc[:2]
  2439. else:
  2440. raise NotImplementedError("Non-local file URLs are not supported")
  2441. global url2pathname
  2442. if url2pathname is None:
  2443. from urllib.request import url2pathname # type: ignore
  2444. return url2pathname(netloc + path) # type: ignore
  2445. def get_transport_and_path_from_url(
  2446. url: str, config: Optional[Config] = None, operation: Optional[str] = None, **kwargs
  2447. ) -> Tuple[GitClient, str]:
  2448. """Obtain a git client from a URL.
  2449. Args:
  2450. url: URL to open (a unicode string)
  2451. config: Optional config object
  2452. operation: Kind of operation that'll be performed; "pull" or "push"
  2453. thin_packs: Whether or not thin packs should be retrieved
  2454. report_activity: Optional callback for reporting transport
  2455. activity.
  2456. Returns:
  2457. Tuple with client instance and relative path.
  2458. """
  2459. if config is not None:
  2460. url = apply_instead_of(config, url, push=(operation == "push"))
  2461. return _get_transport_and_path_from_url(
  2462. url, config=config, operation=operation, **kwargs
  2463. )
  2464. def _get_transport_and_path_from_url(url, config, operation, **kwargs):
  2465. parsed = urlparse(url)
  2466. if parsed.scheme == "git":
  2467. return (TCPGitClient.from_parsedurl(parsed, **kwargs), parsed.path)
  2468. elif parsed.scheme in ("git+ssh", "ssh"):
  2469. return SSHGitClient.from_parsedurl(parsed, **kwargs), parsed.path
  2470. elif parsed.scheme in ("http", "https"):
  2471. return (
  2472. HttpGitClient.from_parsedurl(parsed, config=config, **kwargs),
  2473. parsed.path,
  2474. )
  2475. elif parsed.scheme == "file":
  2476. if sys.platform == "win32" or os.name == "nt":
  2477. return default_local_git_client_cls(**kwargs), _win32_url_to_path(parsed)
  2478. return (
  2479. default_local_git_client_cls.from_parsedurl(parsed, **kwargs),
  2480. parsed.path,
  2481. )
  2482. raise ValueError(f"unknown scheme '{parsed.scheme}'")
  2483. def parse_rsync_url(location: str) -> Tuple[Optional[str], str, str]:
  2484. """Parse a rsync-style URL."""
  2485. if ":" in location and "@" not in location:
  2486. # SSH with no user@, zero or one leading slash.
  2487. (host, path) = location.split(":", 1)
  2488. user = None
  2489. elif ":" in location:
  2490. # SSH with user@host:foo.
  2491. user_host, path = location.split(":", 1)
  2492. if "@" in user_host:
  2493. user, host = user_host.rsplit("@", 1)
  2494. else:
  2495. user = None
  2496. host = user_host
  2497. else:
  2498. raise ValueError("not a valid rsync-style URL")
  2499. return (user, host, path)
  2500. def get_transport_and_path(
  2501. location: str,
  2502. config: Optional[Config] = None,
  2503. operation: Optional[str] = None,
  2504. **kwargs,
  2505. ) -> Tuple[GitClient, str]:
  2506. """Obtain a git client from a URL.
  2507. Args:
  2508. location: URL or path (a string)
  2509. config: Optional config object
  2510. operation: Kind of operation that'll be performed; "pull" or "push"
  2511. thin_packs: Whether or not thin packs should be retrieved
  2512. report_activity: Optional callback for reporting transport
  2513. activity.
  2514. Returns:
  2515. Tuple with client instance and relative path.
  2516. """
  2517. if config is not None:
  2518. location = apply_instead_of(config, location, push=(operation == "push"))
  2519. # First, try to parse it as a URL
  2520. try:
  2521. return _get_transport_and_path_from_url(
  2522. location, config=config, operation=operation, **kwargs
  2523. )
  2524. except ValueError:
  2525. pass
  2526. if sys.platform == "win32" and location[0].isalpha() and location[1:3] == ":\\":
  2527. # Windows local path
  2528. return default_local_git_client_cls(**kwargs), location
  2529. try:
  2530. (username, hostname, path) = parse_rsync_url(location)
  2531. except ValueError:
  2532. # Otherwise, assume it's a local path.
  2533. return default_local_git_client_cls(**kwargs), location
  2534. else:
  2535. return SSHGitClient(hostname, username=username, **kwargs), path
  2536. DEFAULT_GIT_CREDENTIALS_PATHS = [
  2537. os.path.expanduser("~/.git-credentials"),
  2538. get_xdg_config_home_path("git", "credentials"),
  2539. ]
  2540. def get_credentials_from_store(
  2541. scheme, hostname, username=None, fnames=DEFAULT_GIT_CREDENTIALS_PATHS
  2542. ):
  2543. for fname in fnames:
  2544. try:
  2545. with open(fname, "rb") as f:
  2546. for line in f:
  2547. parsed_line = urlparse(line.strip())
  2548. if (
  2549. parsed_line.scheme == scheme
  2550. and parsed_line.hostname == hostname
  2551. and (username is None or parsed_line.username == username)
  2552. ):
  2553. return parsed_line.username, parsed_line.password
  2554. except FileNotFoundError:
  2555. # If the file doesn't exist, try the next one.
  2556. continue