client.py 46 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230
  1. # client.py -- Implementation of the client side git protocols
  2. # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; either version 2
  7. # or (at your option) a later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """Client side support for the Git protocol.
  19. The Dulwich client supports the following capabilities:
  20. * thin-pack
  21. * multi_ack_detailed
  22. * multi_ack
  23. * side-band-64k
  24. * ofs-delta
  25. * quiet
  26. * report-status
  27. * delete-refs
  28. Known capabilities that are not supported:
  29. * shallow
  30. * no-progress
  31. * include-tag
  32. """
  33. __docformat__ = 'restructuredText'
  34. from contextlib import closing
  35. from io import BytesIO, BufferedReader
  36. import dulwich
  37. import select
  38. import socket
  39. import subprocess
  40. import sys
  41. try:
  42. import urllib2
  43. import urlparse
  44. except ImportError:
  45. import urllib.request as urllib2
  46. import urllib.parse as urlparse
  47. from dulwich.errors import (
  48. GitProtocolError,
  49. NotGitRepository,
  50. SendPackError,
  51. UpdateRefsError,
  52. )
  53. from dulwich.protocol import (
  54. _RBUFSIZE,
  55. capability_agent,
  56. CAPABILITY_DELETE_REFS,
  57. CAPABILITY_MULTI_ACK,
  58. CAPABILITY_MULTI_ACK_DETAILED,
  59. CAPABILITY_OFS_DELTA,
  60. CAPABILITY_QUIET,
  61. CAPABILITY_REPORT_STATUS,
  62. CAPABILITY_SIDE_BAND_64K,
  63. CAPABILITY_THIN_PACK,
  64. CAPABILITIES_REF,
  65. COMMAND_DONE,
  66. COMMAND_HAVE,
  67. COMMAND_WANT,
  68. SIDE_BAND_CHANNEL_DATA,
  69. SIDE_BAND_CHANNEL_PROGRESS,
  70. SIDE_BAND_CHANNEL_FATAL,
  71. PktLineParser,
  72. Protocol,
  73. ProtocolFile,
  74. TCP_GIT_PORT,
  75. ZERO_SHA,
  76. extract_capabilities,
  77. )
  78. from dulwich.pack import (
  79. write_pack_objects,
  80. )
  81. from dulwich.refs import (
  82. read_info_refs,
  83. )
  84. def _fileno_can_read(fileno):
  85. """Check if a file descriptor is readable."""
  86. return len(select.select([fileno], [], [], 0)[0]) > 0
  87. COMMON_CAPABILITIES = [CAPABILITY_OFS_DELTA, CAPABILITY_SIDE_BAND_64K]
  88. FETCH_CAPABILITIES = ([CAPABILITY_THIN_PACK, CAPABILITY_MULTI_ACK,
  89. CAPABILITY_MULTI_ACK_DETAILED] +
  90. COMMON_CAPABILITIES)
  91. SEND_CAPABILITIES = [CAPABILITY_REPORT_STATUS] + COMMON_CAPABILITIES
  92. class ReportStatusParser(object):
  93. """Handle status as reported by servers with 'report-status' capability.
  94. """
  95. def __init__(self):
  96. self._done = False
  97. self._pack_status = None
  98. self._ref_status_ok = True
  99. self._ref_statuses = []
  100. def check(self):
  101. """Check if there were any errors and, if so, raise exceptions.
  102. :raise SendPackError: Raised when the server could not unpack
  103. :raise UpdateRefsError: Raised when refs could not be updated
  104. """
  105. if self._pack_status not in (b'unpack ok', None):
  106. raise SendPackError(self._pack_status)
  107. if not self._ref_status_ok:
  108. ref_status = {}
  109. ok = set()
  110. for status in self._ref_statuses:
  111. if b' ' not in status:
  112. # malformed response, move on to the next one
  113. continue
  114. status, ref = status.split(b' ', 1)
  115. if status == b'ng':
  116. if b' ' in ref:
  117. ref, status = ref.split(b' ', 1)
  118. else:
  119. ok.add(ref)
  120. ref_status[ref] = status
  121. # TODO(jelmer): don't assume encoding of refs is ascii.
  122. raise UpdateRefsError(', '.join([
  123. ref.decode('ascii') for ref in ref_status if ref not in ok]) +
  124. ' failed to update', ref_status=ref_status)
  125. def handle_packet(self, pkt):
  126. """Handle a packet.
  127. :raise GitProtocolError: Raised when packets are received after a
  128. flush packet.
  129. """
  130. if self._done:
  131. raise GitProtocolError("received more data after status report")
  132. if pkt is None:
  133. self._done = True
  134. return
  135. if self._pack_status is None:
  136. self._pack_status = pkt.strip()
  137. else:
  138. ref_status = pkt.strip()
  139. self._ref_statuses.append(ref_status)
  140. if not ref_status.startswith(b'ok '):
  141. self._ref_status_ok = False
  142. def read_pkt_refs(proto):
  143. server_capabilities = None
  144. refs = {}
  145. # Receive refs from server
  146. for pkt in proto.read_pkt_seq():
  147. (sha, ref) = pkt.rstrip(b'\n').split(None, 1)
  148. if sha == b'ERR':
  149. raise GitProtocolError(ref)
  150. if server_capabilities is None:
  151. (ref, server_capabilities) = extract_capabilities(ref)
  152. refs[ref] = sha
  153. if len(refs) == 0:
  154. return None, set([])
  155. if refs == {CAPABILITIES_REF: ZERO_SHA}:
  156. refs = {}
  157. return refs, set(server_capabilities)
  158. # TODO(durin42): this doesn't correctly degrade if the server doesn't
  159. # support some capabilities. This should work properly with servers
  160. # that don't support multi_ack.
  161. class GitClient(object):
  162. """Git smart server client.
  163. """
  164. def __init__(self, thin_packs=True, report_activity=None, quiet=False):
  165. """Create a new GitClient instance.
  166. :param thin_packs: Whether or not thin packs should be retrieved
  167. :param report_activity: Optional callback for reporting transport
  168. activity.
  169. """
  170. self._report_activity = report_activity
  171. self._report_status_parser = None
  172. self._fetch_capabilities = set(FETCH_CAPABILITIES)
  173. self._fetch_capabilities.add(capability_agent())
  174. self._send_capabilities = set(SEND_CAPABILITIES)
  175. self._send_capabilities.add(capability_agent())
  176. if quiet:
  177. self._send_capabilities.add(CAPABILITY_QUIET)
  178. if not thin_packs:
  179. self._fetch_capabilities.remove(CAPABILITY_THIN_PACK)
  180. def get_url(self, path):
  181. """Retrieves full url to given path.
  182. :param path: Repository path (as string)
  183. :return: Url to path (as string)
  184. """
  185. raise NotImplementedError(self.get_url)
  186. def send_pack(self, path, determine_wants, generate_pack_contents,
  187. progress=None, write_pack=write_pack_objects):
  188. """Upload a pack to a remote repository.
  189. :param path: Repository path (as bytestring)
  190. :param generate_pack_contents: Function that can return a sequence of
  191. the shas of the objects to upload.
  192. :param progress: Optional progress function
  193. :param write_pack: Function called with (file, iterable of objects) to
  194. write the objects returned by generate_pack_contents to the server.
  195. :raises SendPackError: if server rejects the pack data
  196. :raises UpdateRefsError: if the server supports report-status
  197. and rejects ref updates
  198. :return: new_refs dictionary containing the changes that were made
  199. {refname: new_ref}, including deleted refs.
  200. """
  201. raise NotImplementedError(self.send_pack)
  202. def fetch(self, path, target, determine_wants=None, progress=None):
  203. """Fetch into a target repository.
  204. :param path: Path to fetch from (as bytestring)
  205. :param target: Target repository to fetch into
  206. :param determine_wants: Optional function to determine what refs
  207. to fetch
  208. :param progress: Optional progress function
  209. :return: Dictionary with all remote refs (not just those fetched)
  210. """
  211. if determine_wants is None:
  212. determine_wants = target.object_store.determine_wants_all
  213. if CAPABILITY_THIN_PACK in self._fetch_capabilities:
  214. # TODO(jelmer): Avoid reading entire file into memory and
  215. # only processing it after the whole file has been fetched.
  216. f = BytesIO()
  217. def commit():
  218. if f.tell():
  219. f.seek(0)
  220. target.object_store.add_thin_pack(f.read, None)
  221. def abort():
  222. pass
  223. else:
  224. f, commit, abort = target.object_store.add_pack()
  225. try:
  226. result = self.fetch_pack(
  227. path, determine_wants, target.get_graph_walker(), f.write,
  228. progress)
  229. except:
  230. abort()
  231. raise
  232. else:
  233. commit()
  234. return result
  235. def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
  236. progress=None):
  237. """Retrieve a pack from a git smart server.
  238. :param determine_wants: Callback that returns list of commits to fetch
  239. :param graph_walker: Object with next() and ack().
  240. :param pack_data: Callback called for each bit of data in the pack
  241. :param progress: Callback for progress reports (strings)
  242. :return: Dictionary with all remote refs (not just those fetched)
  243. """
  244. raise NotImplementedError(self.fetch_pack)
  245. def get_refs(self, path):
  246. """Retrieve the current refs from a git smart server.
  247. :param path: Path to the repo to fetch from. (as bytestring)
  248. """
  249. raise NotImplementedError(self.get_refs)
  250. def _parse_status_report(self, proto):
  251. unpack = proto.read_pkt_line().strip()
  252. if unpack != b'unpack ok':
  253. st = True
  254. # flush remaining error data
  255. while st is not None:
  256. st = proto.read_pkt_line()
  257. raise SendPackError(unpack)
  258. statuses = []
  259. errs = False
  260. ref_status = proto.read_pkt_line()
  261. while ref_status:
  262. ref_status = ref_status.strip()
  263. statuses.append(ref_status)
  264. if not ref_status.startswith(b'ok '):
  265. errs = True
  266. ref_status = proto.read_pkt_line()
  267. if errs:
  268. ref_status = {}
  269. ok = set()
  270. for status in statuses:
  271. if b' ' not in status:
  272. # malformed response, move on to the next one
  273. continue
  274. status, ref = status.split(b' ', 1)
  275. if status == b'ng':
  276. if b' ' in ref:
  277. ref, status = ref.split(b' ', 1)
  278. else:
  279. ok.add(ref)
  280. ref_status[ref] = status
  281. raise UpdateRefsError(', '.join([ref for ref in ref_status
  282. if ref not in ok]) +
  283. b' failed to update',
  284. ref_status=ref_status)
  285. def _read_side_band64k_data(self, proto, channel_callbacks):
  286. """Read per-channel data.
  287. This requires the side-band-64k capability.
  288. :param proto: Protocol object to read from
  289. :param channel_callbacks: Dictionary mapping channels to packet
  290. handlers to use. None for a callback discards channel data.
  291. """
  292. for pkt in proto.read_pkt_seq():
  293. channel = ord(pkt[:1])
  294. pkt = pkt[1:]
  295. try:
  296. cb = channel_callbacks[channel]
  297. except KeyError:
  298. raise AssertionError('Invalid sideband channel %d' % channel)
  299. else:
  300. if cb is not None:
  301. cb(pkt)
  302. def _handle_receive_pack_head(self, proto, capabilities, old_refs,
  303. new_refs):
  304. """Handle the head of a 'git-receive-pack' request.
  305. :param proto: Protocol object to read from
  306. :param capabilities: List of negotiated capabilities
  307. :param old_refs: Old refs, as received from the server
  308. :param new_refs: Refs to change
  309. :return: (have, want) tuple
  310. """
  311. want = []
  312. have = [x for x in old_refs.values() if not x == ZERO_SHA]
  313. sent_capabilities = False
  314. for refname in new_refs:
  315. if not isinstance(refname, bytes):
  316. raise TypeError('refname is not a bytestring: %r' % refname)
  317. old_sha1 = old_refs.get(refname, ZERO_SHA)
  318. if not isinstance(old_sha1, bytes):
  319. raise TypeError('old sha1 for %s is not a bytestring: %r' %
  320. (refname, old_sha1))
  321. new_sha1 = new_refs.get(refname, ZERO_SHA)
  322. if not isinstance(new_sha1, bytes):
  323. raise TypeError('old sha1 for %s is not a bytestring %r' %
  324. (refname, new_sha1))
  325. if old_sha1 != new_sha1:
  326. if sent_capabilities:
  327. proto.write_pkt_line(old_sha1 + b' ' + new_sha1 + b' ' + refname)
  328. else:
  329. proto.write_pkt_line(
  330. old_sha1 + b' ' + new_sha1 + b' ' + refname + b'\0' +
  331. b' '.join(capabilities))
  332. sent_capabilities = True
  333. if new_sha1 not in have and new_sha1 != ZERO_SHA:
  334. want.append(new_sha1)
  335. proto.write_pkt_line(None)
  336. return (have, want)
  337. def _handle_receive_pack_tail(self, proto, capabilities, progress=None):
  338. """Handle the tail of a 'git-receive-pack' request.
  339. :param proto: Protocol object to read from
  340. :param capabilities: List of negotiated capabilities
  341. :param progress: Optional progress reporting function
  342. """
  343. if b"side-band-64k" in capabilities:
  344. if progress is None:
  345. progress = lambda x: None
  346. channel_callbacks = {2: progress}
  347. if CAPABILITY_REPORT_STATUS in capabilities:
  348. channel_callbacks[1] = PktLineParser(
  349. self._report_status_parser.handle_packet).parse
  350. self._read_side_band64k_data(proto, channel_callbacks)
  351. else:
  352. if CAPABILITY_REPORT_STATUS in capabilities:
  353. for pkt in proto.read_pkt_seq():
  354. self._report_status_parser.handle_packet(pkt)
  355. if self._report_status_parser is not None:
  356. self._report_status_parser.check()
  357. def _handle_upload_pack_head(self, proto, capabilities, graph_walker,
  358. wants, can_read):
  359. """Handle the head of a 'git-upload-pack' request.
  360. :param proto: Protocol object to read from
  361. :param capabilities: List of negotiated capabilities
  362. :param graph_walker: GraphWalker instance to call .ack() on
  363. :param wants: List of commits to fetch
  364. :param can_read: function that returns a boolean that indicates
  365. whether there is extra graph data to read on proto
  366. """
  367. assert isinstance(wants, list) and isinstance(wants[0], bytes)
  368. proto.write_pkt_line(COMMAND_WANT + b' ' + wants[0] + b' ' + b' '.join(capabilities) + b'\n')
  369. for want in wants[1:]:
  370. proto.write_pkt_line(COMMAND_WANT + b' ' + want + b'\n')
  371. proto.write_pkt_line(None)
  372. have = next(graph_walker)
  373. while have:
  374. proto.write_pkt_line(COMMAND_HAVE + b' ' + have + b'\n')
  375. if can_read():
  376. pkt = proto.read_pkt_line()
  377. parts = pkt.rstrip(b'\n').split(b' ')
  378. if parts[0] == b'ACK':
  379. graph_walker.ack(parts[1])
  380. if parts[2] in (b'continue', b'common'):
  381. pass
  382. elif parts[2] == b'ready':
  383. break
  384. else:
  385. raise AssertionError(
  386. "%s not in ('continue', 'ready', 'common)" %
  387. parts[2])
  388. have = next(graph_walker)
  389. proto.write_pkt_line(COMMAND_DONE + b'\n')
  390. def _handle_upload_pack_tail(self, proto, capabilities, graph_walker,
  391. pack_data, progress=None, rbufsize=_RBUFSIZE):
  392. """Handle the tail of a 'git-upload-pack' request.
  393. :param proto: Protocol object to read from
  394. :param capabilities: List of negotiated capabilities
  395. :param graph_walker: GraphWalker instance to call .ack() on
  396. :param pack_data: Function to call with pack data
  397. :param progress: Optional progress reporting function
  398. :param rbufsize: Read buffer size
  399. """
  400. pkt = proto.read_pkt_line()
  401. while pkt:
  402. parts = pkt.rstrip(b'\n').split(b' ')
  403. if parts[0] == b'ACK':
  404. graph_walker.ack(parts[1])
  405. if len(parts) < 3 or parts[2] not in (
  406. b'ready', b'continue', b'common'):
  407. break
  408. pkt = proto.read_pkt_line()
  409. if CAPABILITY_SIDE_BAND_64K in capabilities:
  410. if progress is None:
  411. # Just ignore progress data
  412. progress = lambda x: None
  413. self._read_side_band64k_data(proto, {
  414. SIDE_BAND_CHANNEL_DATA: pack_data,
  415. SIDE_BAND_CHANNEL_PROGRESS: progress}
  416. )
  417. else:
  418. while True:
  419. data = proto.read(rbufsize)
  420. if data == b"":
  421. break
  422. pack_data(data)
  423. class TraditionalGitClient(GitClient):
  424. """Traditional Git client."""
  425. def _connect(self, cmd, path):
  426. """Create a connection to the server.
  427. This method is abstract - concrete implementations should
  428. implement their own variant which connects to the server and
  429. returns an initialized Protocol object with the service ready
  430. for use and a can_read function which may be used to see if
  431. reads would block.
  432. :param cmd: The git service name to which we should connect.
  433. :param path: The path we should pass to the service. (as bytestirng)
  434. """
  435. raise NotImplementedError()
  436. def send_pack(self, path, determine_wants, generate_pack_contents,
  437. progress=None, write_pack=write_pack_objects):
  438. """Upload a pack to a remote repository.
  439. :param path: Repository path (as bytestring)
  440. :param generate_pack_contents: Function that can return a sequence of
  441. the shas of the objects to upload.
  442. :param progress: Optional callback called with progress updates
  443. :param write_pack: Function called with (file, iterable of objects) to
  444. write the objects returned by generate_pack_contents to the server.
  445. :raises SendPackError: if server rejects the pack data
  446. :raises UpdateRefsError: if the server supports report-status
  447. and rejects ref updates
  448. :return: new_refs dictionary containing the changes that were made
  449. {refname: new_ref}, including deleted refs.
  450. """
  451. proto, unused_can_read = self._connect(b'receive-pack', path)
  452. with proto:
  453. old_refs, server_capabilities = read_pkt_refs(proto)
  454. negotiated_capabilities = self._send_capabilities & server_capabilities
  455. if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
  456. self._report_status_parser = ReportStatusParser()
  457. report_status_parser = self._report_status_parser
  458. try:
  459. new_refs = orig_new_refs = determine_wants(dict(old_refs))
  460. except:
  461. proto.write_pkt_line(None)
  462. raise
  463. if not CAPABILITY_DELETE_REFS in server_capabilities:
  464. # Server does not support deletions. Fail later.
  465. new_refs = dict(orig_new_refs)
  466. for ref, sha in orig_new_refs.items():
  467. if sha == ZERO_SHA:
  468. if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
  469. report_status_parser._ref_statuses.append(
  470. b'ng ' + sha + b' remote does not support deleting refs')
  471. report_status_parser._ref_status_ok = False
  472. del new_refs[ref]
  473. if new_refs is None:
  474. proto.write_pkt_line(None)
  475. return old_refs
  476. if len(new_refs) == 0 and len(orig_new_refs):
  477. # NOOP - Original new refs filtered out by policy
  478. proto.write_pkt_line(None)
  479. if report_status_parser is not None:
  480. report_status_parser.check()
  481. return old_refs
  482. (have, want) = self._handle_receive_pack_head(
  483. proto, negotiated_capabilities, old_refs, new_refs)
  484. if not want and set(new_refs.items()).issubset(set(old_refs.items())):
  485. return new_refs
  486. objects = generate_pack_contents(have, want)
  487. dowrite = len(objects) > 0
  488. dowrite = dowrite or any(old_refs.get(ref) != sha
  489. for (ref, sha) in new_refs.items()
  490. if sha != ZERO_SHA)
  491. if dowrite:
  492. write_pack(proto.write_file(), objects)
  493. self._handle_receive_pack_tail(
  494. proto, negotiated_capabilities, progress)
  495. return new_refs
  496. def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
  497. progress=None):
  498. """Retrieve a pack from a git smart server.
  499. :param determine_wants: Callback that returns list of commits to fetch
  500. :param graph_walker: Object with next() and ack().
  501. :param pack_data: Callback called for each bit of data in the pack
  502. :param progress: Callback for progress reports (strings)
  503. :return: Dictionary with all remote refs (not just those fetched)
  504. """
  505. proto, can_read = self._connect(b'upload-pack', path)
  506. with proto:
  507. refs, server_capabilities = read_pkt_refs(proto)
  508. negotiated_capabilities = (
  509. self._fetch_capabilities & server_capabilities)
  510. if refs is None:
  511. proto.write_pkt_line(None)
  512. return refs
  513. try:
  514. wants = determine_wants(refs)
  515. except:
  516. proto.write_pkt_line(None)
  517. raise
  518. if wants is not None:
  519. wants = [cid for cid in wants if cid != ZERO_SHA]
  520. if not wants:
  521. proto.write_pkt_line(None)
  522. return refs
  523. self._handle_upload_pack_head(
  524. proto, negotiated_capabilities, graph_walker, wants, can_read)
  525. self._handle_upload_pack_tail(
  526. proto, negotiated_capabilities, graph_walker, pack_data, progress)
  527. return refs
  528. def get_refs(self, path):
  529. """Retrieve the current refs from a git smart server."""
  530. # stock `git ls-remote` uses upload-pack
  531. proto, _ = self._connect(b'upload-pack', path)
  532. with proto:
  533. refs, _ = read_pkt_refs(proto)
  534. return refs
  535. def archive(self, path, committish, write_data, progress=None,
  536. write_error=None):
  537. proto, can_read = self._connect(b'upload-archive', path)
  538. with proto:
  539. proto.write_pkt_line(b"argument " + committish)
  540. proto.write_pkt_line(None)
  541. pkt = proto.read_pkt_line()
  542. if pkt == b"NACK\n":
  543. return
  544. elif pkt == b"ACK\n":
  545. pass
  546. elif pkt.startswith(b"ERR "):
  547. raise GitProtocolError(pkt[4:].rstrip(b"\n"))
  548. else:
  549. raise AssertionError("invalid response %r" % pkt)
  550. ret = proto.read_pkt_line()
  551. if ret is not None:
  552. raise AssertionError("expected pkt tail")
  553. self._read_side_band64k_data(proto, {
  554. SIDE_BAND_CHANNEL_DATA: write_data,
  555. SIDE_BAND_CHANNEL_PROGRESS: progress,
  556. SIDE_BAND_CHANNEL_FATAL: write_error})
  557. class TCPGitClient(TraditionalGitClient):
  558. """A Git Client that works over TCP directly (i.e. git://)."""
  559. def __init__(self, host, port=None, **kwargs):
  560. if port is None:
  561. port = TCP_GIT_PORT
  562. self._host = host
  563. self._port = port
  564. TraditionalGitClient.__init__(self, **kwargs)
  565. def get_url(self, path):
  566. netloc = self._host
  567. if self._port is not None and self._port != TCP_GIT_PORT:
  568. netloc += ":%d" % self._port
  569. return urlparse.urlunsplit(("git", netloc, path, '', ''))
  570. def _connect(self, cmd, path):
  571. if type(cmd) is not bytes:
  572. raise TypeError(path)
  573. if type(path) is not bytes:
  574. raise TypeError(path)
  575. sockaddrs = socket.getaddrinfo(
  576. self._host, self._port, socket.AF_UNSPEC, socket.SOCK_STREAM)
  577. s = None
  578. err = socket.error("no address found for %s" % self._host)
  579. for (family, socktype, proto, canonname, sockaddr) in sockaddrs:
  580. s = socket.socket(family, socktype, proto)
  581. s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
  582. try:
  583. s.connect(sockaddr)
  584. break
  585. except socket.error as err:
  586. if s is not None:
  587. s.close()
  588. s = None
  589. if s is None:
  590. raise err
  591. # -1 means system default buffering
  592. rfile = s.makefile('rb', -1)
  593. # 0 means unbuffered
  594. wfile = s.makefile('wb', 0)
  595. def close():
  596. rfile.close()
  597. wfile.close()
  598. s.close()
  599. proto = Protocol(rfile.read, wfile.write, close,
  600. report_activity=self._report_activity)
  601. if path.startswith(b"/~"):
  602. path = path[1:]
  603. # TODO(jelmer): Alternative to ascii?
  604. proto.send_cmd(b'git-' + cmd, path, b'host=' + self._host.encode('ascii'))
  605. return proto, lambda: _fileno_can_read(s)
  606. class SubprocessWrapper(object):
  607. """A socket-like object that talks to a subprocess via pipes."""
  608. def __init__(self, proc):
  609. self.proc = proc
  610. if sys.version_info[0] == 2:
  611. self.read = proc.stdout.read
  612. else:
  613. self.read = BufferedReader(proc.stdout).read
  614. self.write = proc.stdin.write
  615. def can_read(self):
  616. if sys.platform == 'win32':
  617. from msvcrt import get_osfhandle
  618. from win32pipe import PeekNamedPipe
  619. handle = get_osfhandle(self.proc.stdout.fileno())
  620. data, total_bytes_avail, msg_bytes_left = PeekNamedPipe(handle, 0)
  621. return total_bytes_avail != 0
  622. else:
  623. return _fileno_can_read(self.proc.stdout.fileno())
  624. def close(self):
  625. self.proc.stdin.close()
  626. self.proc.stdout.close()
  627. if self.proc.stderr:
  628. self.proc.stderr.close()
  629. self.proc.wait()
  630. def find_git_command():
  631. """Find command to run for system Git (usually C Git).
  632. """
  633. if sys.platform == 'win32': # support .exe, .bat and .cmd
  634. try: # to avoid overhead
  635. import win32api
  636. except ImportError: # run through cmd.exe with some overhead
  637. return ['cmd', '/c', 'git']
  638. else:
  639. status, git = win32api.FindExecutable('git')
  640. return [git]
  641. else:
  642. return ['git']
  643. class SubprocessGitClient(TraditionalGitClient):
  644. """Git client that talks to a server using a subprocess."""
  645. def __init__(self, **kwargs):
  646. self._connection = None
  647. self._stderr = None
  648. self._stderr = kwargs.get('stderr')
  649. if 'stderr' in kwargs:
  650. del kwargs['stderr']
  651. TraditionalGitClient.__init__(self, **kwargs)
  652. git_command = None
  653. def _connect(self, service, path):
  654. if type(service) is not bytes:
  655. raise TypeError(path)
  656. if type(path) is not bytes:
  657. raise TypeError(path)
  658. if self.git_command is None:
  659. git_command = find_git_command()
  660. argv = git_command + [service.decode('ascii'), path]
  661. p = SubprocessWrapper(
  662. subprocess.Popen(argv, bufsize=0, stdin=subprocess.PIPE,
  663. stdout=subprocess.PIPE,
  664. stderr=self._stderr))
  665. return Protocol(p.read, p.write, p.close,
  666. report_activity=self._report_activity), p.can_read
  667. class LocalGitClient(GitClient):
  668. """Git Client that just uses a local Repo."""
  669. def __init__(self, thin_packs=True, report_activity=None):
  670. """Create a new LocalGitClient instance.
  671. :param thin_packs: Whether or not thin packs should be retrieved
  672. :param report_activity: Optional callback for reporting transport
  673. activity.
  674. """
  675. self._report_activity = report_activity
  676. # Ignore the thin_packs argument
  677. def get_url(self, path):
  678. return urlparse.urlunsplit(('file', '', path, '', ''))
  679. def send_pack(self, path, determine_wants, generate_pack_contents,
  680. progress=None, write_pack=write_pack_objects):
  681. """Upload a pack to a remote repository.
  682. :param path: Repository path (as bytestring)
  683. :param generate_pack_contents: Function that can return a sequence of
  684. the shas of the objects to upload.
  685. :param progress: Optional progress function
  686. :param write_pack: Function called with (file, iterable of objects) to
  687. write the objects returned by generate_pack_contents to the server.
  688. :raises SendPackError: if server rejects the pack data
  689. :raises UpdateRefsError: if the server supports report-status
  690. and rejects ref updates
  691. :return: new_refs dictionary containing the changes that were made
  692. {refname: new_ref}, including deleted refs.
  693. """
  694. if not progress:
  695. progress = lambda x: None
  696. from dulwich.repo import Repo
  697. with closing(Repo(path)) as target:
  698. old_refs = target.get_refs()
  699. new_refs = determine_wants(dict(old_refs))
  700. have = [sha1 for sha1 in old_refs.values() if sha1 != ZERO_SHA]
  701. want = []
  702. for refname, new_sha1 in new_refs.items():
  703. if new_sha1 not in have and not new_sha1 in want and new_sha1 != ZERO_SHA:
  704. want.append(new_sha1)
  705. if not want and set(new_refs.items()).issubset(set(old_refs.items())):
  706. return new_refs
  707. target.object_store.add_objects(generate_pack_contents(have, want))
  708. for refname, new_sha1 in new_refs.items():
  709. old_sha1 = old_refs.get(refname, ZERO_SHA)
  710. if new_sha1 != ZERO_SHA:
  711. if not target.refs.set_if_equals(refname, old_sha1, new_sha1):
  712. progress('unable to set %s to %s' % (refname, new_sha1))
  713. else:
  714. if not target.refs.remove_if_equals(refname, old_sha1):
  715. progress('unable to remove %s' % refname)
  716. return new_refs
  717. def fetch(self, path, target, determine_wants=None, progress=None):
  718. """Fetch into a target repository.
  719. :param path: Path to fetch from (as bytestring)
  720. :param target: Target repository to fetch into
  721. :param determine_wants: Optional function to determine what refs
  722. to fetch
  723. :param progress: Optional progress function
  724. :return: Dictionary with all remote refs (not just those fetched)
  725. """
  726. from dulwich.repo import Repo
  727. with closing(Repo(path)) as r:
  728. return r.fetch(target, determine_wants=determine_wants,
  729. progress=progress)
  730. def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
  731. progress=None):
  732. """Retrieve a pack from a git smart server.
  733. :param determine_wants: Callback that returns list of commits to fetch
  734. :param graph_walker: Object with next() and ack().
  735. :param pack_data: Callback called for each bit of data in the pack
  736. :param progress: Callback for progress reports (strings)
  737. :return: Dictionary with all remote refs (not just those fetched)
  738. """
  739. from dulwich.repo import Repo
  740. with closing(Repo(path)) as r:
  741. objects_iter = r.fetch_objects(determine_wants, graph_walker, progress)
  742. # Did the process short-circuit (e.g. in a stateless RPC call)? Note
  743. # that the client still expects a 0-object pack in most cases.
  744. if objects_iter is None:
  745. return
  746. write_pack_objects(ProtocolFile(None, pack_data), objects_iter)
  747. def get_refs(self, path):
  748. """Retrieve the current refs from a git smart server."""
  749. from dulwich.repo import Repo
  750. with closing(Repo(path)) as target:
  751. return target.get_refs()
  752. # What Git client to use for local access
  753. default_local_git_client_cls = LocalGitClient
  754. class SSHVendor(object):
  755. """A client side SSH implementation."""
  756. def connect_ssh(self, host, command, username=None, port=None):
  757. import warnings
  758. warnings.warn(
  759. "SSHVendor.connect_ssh has been renamed to SSHVendor.run_command",
  760. DeprecationWarning)
  761. return self.run_command(host, command, username=username, port=port)
  762. def run_command(self, host, command, username=None, port=None):
  763. """Connect to an SSH server.
  764. Run a command remotely and return a file-like object for interaction
  765. with the remote command.
  766. :param host: Host name
  767. :param command: Command to run (as argv array)
  768. :param username: Optional ame of user to log in as
  769. :param port: Optional SSH port to use
  770. """
  771. raise NotImplementedError(self.run_command)
  772. class SubprocessSSHVendor(SSHVendor):
  773. """SSH vendor that shells out to the local 'ssh' command."""
  774. def run_command(self, host, command, username=None, port=None):
  775. if not isinstance(command, bytes):
  776. raise TypeError(command)
  777. #FIXME: This has no way to deal with passwords..
  778. args = ['ssh', '-x']
  779. if port is not None:
  780. args.extend(['-p', str(port)])
  781. if username is not None:
  782. host = '%s@%s' % (username, host)
  783. args.append(host)
  784. proc = subprocess.Popen(args + [command],
  785. stdin=subprocess.PIPE,
  786. stdout=subprocess.PIPE)
  787. return SubprocessWrapper(proc)
  788. def ParamikoSSHVendor(**kwargs):
  789. import warnings
  790. warnings.warn(
  791. "ParamikoSSHVendor has been moved to dulwich.contrib.paramiko_vendor.",
  792. DeprecationWarning)
  793. from dulwich.contrib.paramiko_vendor import ParamikoSSHVendor
  794. return ParamikoSSHVendor(**kwargs)
  795. # Can be overridden by users
  796. get_ssh_vendor = SubprocessSSHVendor
  797. class SSHGitClient(TraditionalGitClient):
  798. def __init__(self, host, port=None, username=None, vendor=None, **kwargs):
  799. self.host = host
  800. self.port = port
  801. self.username = username
  802. TraditionalGitClient.__init__(self, **kwargs)
  803. self.alternative_paths = {}
  804. if vendor is not None:
  805. self.ssh_vendor = vendor
  806. else:
  807. self.ssh_vendor = get_ssh_vendor()
  808. def get_url(self, path):
  809. netloc = self.host
  810. if self.port is not None:
  811. netloc += ":%d" % self.port
  812. if self.username is not None:
  813. netloc = self.username + "@" + netloc
  814. return urlparse.urlunsplit(('ssh', netloc, path, '', ''))
  815. def _get_cmd_path(self, cmd):
  816. cmd = self.alternative_paths.get(cmd, b'git-' + cmd)
  817. assert isinstance(cmd, bytes)
  818. return cmd
  819. def _connect(self, cmd, path):
  820. if type(cmd) is not bytes:
  821. raise TypeError(path)
  822. if type(path) is not bytes:
  823. raise TypeError(path)
  824. if path.startswith(b"/~"):
  825. path = path[1:]
  826. argv = self._get_cmd_path(cmd) + b" '" + path + b"'"
  827. con = self.ssh_vendor.run_command(
  828. self.host, argv, port=self.port, username=self.username)
  829. return (Protocol(con.read, con.write, con.close,
  830. report_activity=self._report_activity),
  831. con.can_read)
  832. def default_user_agent_string():
  833. return "dulwich/%s" % ".".join([str(x) for x in dulwich.__version__])
  834. def default_urllib2_opener(config):
  835. if config is not None:
  836. proxy_server = config.get("http", "proxy")
  837. else:
  838. proxy_server = None
  839. handlers = []
  840. if proxy_server is not None:
  841. handlers.append(urllib2.ProxyHandler({"http": proxy_server}))
  842. opener = urllib2.build_opener(*handlers)
  843. if config is not None:
  844. user_agent = config.get("http", "useragent")
  845. else:
  846. user_agent = None
  847. if user_agent is None:
  848. user_agent = default_user_agent_string()
  849. opener.addheaders = [('User-agent', user_agent)]
  850. return opener
  851. class HttpGitClient(GitClient):
  852. def __init__(self, base_url, dumb=None, opener=None, config=None, **kwargs):
  853. self._base_url = base_url.rstrip("/") + "/"
  854. self.dumb = dumb
  855. if opener is None:
  856. self.opener = default_urllib2_opener(config)
  857. else:
  858. self.opener = opener
  859. GitClient.__init__(self, **kwargs)
  860. def get_url(self, path):
  861. return self._get_url(path).rstrip("/")
  862. def __repr__(self):
  863. return "%s(%r, dumb=%r)" % (type(self).__name__, self._base_url, self.dumb)
  864. def _get_url(self, path):
  865. return urlparse.urljoin(self._base_url, path).rstrip("/") + "/"
  866. def _http_request(self, url, headers={}, data=None):
  867. req = urllib2.Request(url, headers=headers, data=data)
  868. try:
  869. resp = self.opener.open(req)
  870. except urllib2.HTTPError as e:
  871. if e.code == 404:
  872. raise NotGitRepository()
  873. if e.code != 200:
  874. raise GitProtocolError("unexpected http response %d" % e.code)
  875. return resp
  876. def _discover_references(self, service, url):
  877. assert url[-1] == "/"
  878. url = urlparse.urljoin(url, "info/refs")
  879. headers = {}
  880. if self.dumb is not False:
  881. url += "?service=%s" % service.decode('ascii')
  882. headers["Content-Type"] = "application/x-%s-request" % (
  883. service.decode('ascii'))
  884. resp = self._http_request(url, headers)
  885. try:
  886. content_type = resp.info().gettype()
  887. except AttributeError:
  888. content_type = resp.info().get_content_type()
  889. try:
  890. self.dumb = (not content_type.startswith("application/x-git-"))
  891. if not self.dumb:
  892. proto = Protocol(resp.read, None)
  893. # The first line should mention the service
  894. pkts = list(proto.read_pkt_seq())
  895. if pkts != [b'# service=' + service + b'\n']:
  896. raise GitProtocolError(
  897. "unexpected first line %r from smart server" % pkts)
  898. return read_pkt_refs(proto)
  899. else:
  900. return read_info_refs(resp), set()
  901. finally:
  902. resp.close()
  903. def _smart_request(self, service, url, data):
  904. assert url[-1] == "/"
  905. url = urlparse.urljoin(url, service)
  906. headers = {
  907. "Content-Type": "application/x-%s-request" % service
  908. }
  909. resp = self._http_request(url, headers, data)
  910. try:
  911. content_type = resp.info().gettype()
  912. except AttributeError:
  913. content_type = resp.info().get_content_type()
  914. if content_type != (
  915. "application/x-%s-result" % service):
  916. raise GitProtocolError("Invalid content-type from server: %s"
  917. % content_type)
  918. return resp
  919. def send_pack(self, path, determine_wants, generate_pack_contents,
  920. progress=None, write_pack=write_pack_objects):
  921. """Upload a pack to a remote repository.
  922. :param path: Repository path (as bytestring)
  923. :param generate_pack_contents: Function that can return a sequence of
  924. the shas of the objects to upload.
  925. :param progress: Optional progress function
  926. :param write_pack: Function called with (file, iterable of objects) to
  927. write the objects returned by generate_pack_contents to the server.
  928. :raises SendPackError: if server rejects the pack data
  929. :raises UpdateRefsError: if the server supports report-status
  930. and rejects ref updates
  931. :return: new_refs dictionary containing the changes that were made
  932. {refname: new_ref}, including deleted refs.
  933. """
  934. url = self._get_url(path)
  935. old_refs, server_capabilities = self._discover_references(
  936. b"git-receive-pack", url)
  937. negotiated_capabilities = self._send_capabilities & server_capabilities
  938. if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
  939. self._report_status_parser = ReportStatusParser()
  940. new_refs = determine_wants(dict(old_refs))
  941. if new_refs is None:
  942. # Determine wants function is aborting the push.
  943. return old_refs
  944. if self.dumb:
  945. raise NotImplementedError(self.fetch_pack)
  946. req_data = BytesIO()
  947. req_proto = Protocol(None, req_data.write)
  948. (have, want) = self._handle_receive_pack_head(
  949. req_proto, negotiated_capabilities, old_refs, new_refs)
  950. if not want and set(new_refs.items()).issubset(set(old_refs.items())):
  951. return new_refs
  952. objects = generate_pack_contents(have, want)
  953. if len(objects) > 0:
  954. write_pack(req_proto.write_file(), objects)
  955. resp = self._smart_request("git-receive-pack", url,
  956. data=req_data.getvalue())
  957. try:
  958. resp_proto = Protocol(resp.read, None)
  959. self._handle_receive_pack_tail(resp_proto, negotiated_capabilities,
  960. progress)
  961. return new_refs
  962. finally:
  963. resp.close()
  964. def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
  965. progress=None):
  966. """Retrieve a pack from a git smart server.
  967. :param determine_wants: Callback that returns list of commits to fetch
  968. :param graph_walker: Object with next() and ack().
  969. :param pack_data: Callback called for each bit of data in the pack
  970. :param progress: Callback for progress reports (strings)
  971. :return: Dictionary with all remote refs (not just those fetched)
  972. """
  973. url = self._get_url(path)
  974. refs, server_capabilities = self._discover_references(
  975. b"git-upload-pack", url)
  976. negotiated_capabilities = self._fetch_capabilities & server_capabilities
  977. wants = determine_wants(refs)
  978. if wants is not None:
  979. wants = [cid for cid in wants if cid != ZERO_SHA]
  980. if not wants:
  981. return refs
  982. if self.dumb:
  983. raise NotImplementedError(self.send_pack)
  984. req_data = BytesIO()
  985. req_proto = Protocol(None, req_data.write)
  986. self._handle_upload_pack_head(
  987. req_proto, negotiated_capabilities, graph_walker, wants,
  988. lambda: False)
  989. resp = self._smart_request(
  990. "git-upload-pack", url, data=req_data.getvalue())
  991. try:
  992. resp_proto = Protocol(resp.read, None)
  993. self._handle_upload_pack_tail(resp_proto, negotiated_capabilities,
  994. graph_walker, pack_data, progress)
  995. return refs
  996. finally:
  997. resp.close()
  998. def get_refs(self, path):
  999. """Retrieve the current refs from a git smart server."""
  1000. url = self._get_url(path)
  1001. refs, _ = self._discover_references(
  1002. b"git-upload-pack", url)
  1003. return refs
  1004. def get_transport_and_path_from_url(url, config=None, **kwargs):
  1005. """Obtain a git client from a URL.
  1006. :param url: URL to open (a unicode string)
  1007. :param config: Optional config object
  1008. :param thin_packs: Whether or not thin packs should be retrieved
  1009. :param report_activity: Optional callback for reporting transport
  1010. activity.
  1011. :return: Tuple with client instance and relative path.
  1012. """
  1013. parsed = urlparse.urlparse(url)
  1014. if parsed.scheme == 'git':
  1015. return (TCPGitClient(parsed.hostname, port=parsed.port, **kwargs),
  1016. parsed.path)
  1017. elif parsed.scheme in ('git+ssh', 'ssh'):
  1018. path = parsed.path
  1019. if path.startswith('/'):
  1020. path = parsed.path[1:]
  1021. return SSHGitClient(parsed.hostname, port=parsed.port,
  1022. username=parsed.username, **kwargs), path
  1023. elif parsed.scheme in ('http', 'https'):
  1024. return HttpGitClient(urlparse.urlunparse(parsed), config=config,
  1025. **kwargs), parsed.path
  1026. elif parsed.scheme == 'file':
  1027. return default_local_git_client_cls(**kwargs), parsed.path
  1028. raise ValueError("unknown scheme '%s'" % parsed.scheme)
  1029. def get_transport_and_path(location, **kwargs):
  1030. """Obtain a git client from a URL.
  1031. :param location: URL or path (a string)
  1032. :param config: Optional config object
  1033. :param thin_packs: Whether or not thin packs should be retrieved
  1034. :param report_activity: Optional callback for reporting transport
  1035. activity.
  1036. :return: Tuple with client instance and relative path.
  1037. """
  1038. # First, try to parse it as a URL
  1039. try:
  1040. return get_transport_and_path_from_url(location, **kwargs)
  1041. except ValueError:
  1042. pass
  1043. if (sys.platform == 'win32' and
  1044. location[0].isalpha() and location[1:3] == ':\\'):
  1045. # Windows local path
  1046. return default_local_git_client_cls(**kwargs), location
  1047. if ':' in location and not '@' in location:
  1048. # SSH with no user@, zero or one leading slash.
  1049. (hostname, path) = location.split(':')
  1050. return SSHGitClient(hostname, **kwargs), path
  1051. elif '@' in location and ':' in location:
  1052. # SSH with user@host:foo.
  1053. user_host, path = location.split(':')
  1054. user, host = user_host.rsplit('@')
  1055. return SSHGitClient(host, username=user, **kwargs), path
  1056. # Otherwise, assume it's a local path.
  1057. return default_local_git_client_cls(**kwargs), location