server.py 58 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745
  1. # server.py -- Implementation of the server side git protocols
  2. # Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk>
  3. # Copyright(C) 2011-2012 Jelmer Vernooij <jelmer@jelmer.uk>
  4. #
  5. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  6. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  7. # General Public License as published by the Free Software Foundation; version 2.0
  8. # or (at your option) any later version. You can redistribute it and/or
  9. # modify it under the terms of either of these two licenses.
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. # You should have received a copy of the licenses; if not, see
  18. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  19. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  20. # License, Version 2.0.
  21. #
  22. """Git smart network protocol server implementation.
  23. For more detailed implementation on the network protocol, see the
  24. Documentation/technical directory in the cgit distribution, and in particular:
  25. * Documentation/technical/protocol-capabilities.txt
  26. * Documentation/technical/pack-protocol.txt
  27. Currently supported capabilities:
  28. * include-tag
  29. * thin-pack
  30. * multi_ack_detailed
  31. * multi_ack
  32. * side-band-64k
  33. * ofs-delta
  34. * no-progress
  35. * report-status
  36. * delete-refs
  37. * shallow
  38. * symref
  39. """
  40. import collections
  41. import os
  42. import socket
  43. import socketserver
  44. import sys
  45. import time
  46. import zlib
  47. from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence
  48. from collections.abc import Set as AbstractSet
  49. from functools import partial
  50. from typing import IO, TYPE_CHECKING, Optional
  51. from typing import Protocol as TypingProtocol
  52. if sys.version_info >= (3, 12):
  53. from collections.abc import Buffer
  54. else:
  55. Buffer = bytes | bytearray | memoryview
  56. if TYPE_CHECKING:
  57. from .object_store import BaseObjectStore
  58. from .repo import BaseRepo
  59. from dulwich import log_utils
  60. from .archive import tar_stream
  61. from .errors import (
  62. ApplyDeltaError,
  63. ChecksumMismatch,
  64. GitProtocolError,
  65. HookError,
  66. NotGitRepository,
  67. ObjectFormatException,
  68. UnexpectedCommandError,
  69. )
  70. from .object_store import MissingObjectFinder, PackBasedObjectStore, find_shallow
  71. from .objects import Commit, ObjectID, Tree, valid_hexsha
  72. from .pack import ObjectContainer, write_pack_from_container
  73. from .protocol import (
  74. CAPABILITIES_REF,
  75. CAPABILITY_AGENT,
  76. CAPABILITY_DELETE_REFS,
  77. CAPABILITY_INCLUDE_TAG,
  78. CAPABILITY_MULTI_ACK,
  79. CAPABILITY_MULTI_ACK_DETAILED,
  80. CAPABILITY_NO_DONE,
  81. CAPABILITY_NO_PROGRESS,
  82. CAPABILITY_OFS_DELTA,
  83. CAPABILITY_QUIET,
  84. CAPABILITY_REPORT_STATUS,
  85. CAPABILITY_SHALLOW,
  86. CAPABILITY_SIDE_BAND_64K,
  87. CAPABILITY_THIN_PACK,
  88. COMMAND_DEEPEN,
  89. COMMAND_DONE,
  90. COMMAND_HAVE,
  91. COMMAND_SHALLOW,
  92. COMMAND_UNSHALLOW,
  93. COMMAND_WANT,
  94. MULTI_ACK,
  95. MULTI_ACK_DETAILED,
  96. NAK_LINE,
  97. SIDE_BAND_CHANNEL_DATA,
  98. SIDE_BAND_CHANNEL_FATAL,
  99. SIDE_BAND_CHANNEL_PROGRESS,
  100. SINGLE_ACK,
  101. TCP_GIT_PORT,
  102. ZERO_SHA,
  103. BufferedPktLineWriter,
  104. Protocol,
  105. ReceivableProtocol,
  106. ack_type,
  107. capability_agent,
  108. extract_capabilities,
  109. extract_want_line_capabilities,
  110. format_ack_line,
  111. format_ref_line,
  112. format_shallow_line,
  113. format_unshallow_line,
  114. symref_capabilities,
  115. )
  116. from .refs import PEELED_TAG_SUFFIX, Ref, RefsContainer, write_info_refs
  117. from .repo import Repo
  118. logger = log_utils.getLogger(__name__)
  119. class Backend:
  120. """A backend for the Git smart server implementation."""
  121. def open_repository(self, path: str) -> "BackendRepo":
  122. """Open the repository at a path.
  123. Args:
  124. path: Path to the repository
  125. Raises:
  126. NotGitRepository: no git repository was found at path
  127. Returns: Instance of BackendRepo
  128. """
  129. raise NotImplementedError(self.open_repository)
  130. class BackendRepo(TypingProtocol):
  131. """Repository abstraction used by the Git server.
  132. The methods required here are a subset of those provided by
  133. dulwich.repo.Repo.
  134. """
  135. object_store: PackBasedObjectStore
  136. refs: RefsContainer
  137. def get_refs(self) -> dict[bytes, bytes]:
  138. """Get all the refs in the repository.
  139. Returns: dict of name -> sha
  140. """
  141. raise NotImplementedError
  142. def get_peeled(self, name: bytes) -> bytes | None:
  143. """Return the cached peeled value of a ref, if available.
  144. Args:
  145. name: Name of the ref to peel
  146. Returns: The peeled value of the ref. If the ref is known not point to
  147. a tag, this will be the SHA the ref refers to. If no cached
  148. information about a tag is available, this method may return None,
  149. but it should attempt to peel the tag if possible.
  150. """
  151. return None
  152. def find_missing_objects(
  153. self,
  154. determine_wants: Callable[[Mapping[bytes, bytes], int | None], list[bytes]],
  155. graph_walker: "_ProtocolGraphWalker",
  156. progress: Callable[[bytes], None] | None,
  157. *,
  158. get_tagged: Callable[[], dict[bytes, bytes]] | None = None,
  159. depth: int | None = None,
  160. ) -> Optional["MissingObjectFinder"]:
  161. """Yield the objects required for a list of commits.
  162. Args:
  163. determine_wants: Function to determine which objects the client wants
  164. graph_walker: Object used to walk the commit graph
  165. progress: is a callback to send progress messages to the client
  166. get_tagged: Function that returns a dict of pointed-to sha ->
  167. tag sha for including tags.
  168. depth: Maximum depth of commits to fetch for shallow clones
  169. """
  170. raise NotImplementedError
  171. class DictBackend(Backend):
  172. """Trivial backend that looks up Git repositories in a dictionary."""
  173. def __init__(
  174. self, repos: dict[bytes, "BackendRepo"] | dict[str, "BackendRepo"]
  175. ) -> None:
  176. """Initialize a DictBackend.
  177. Args:
  178. repos: Dictionary mapping repository paths to BackendRepo instances
  179. """
  180. self.repos = repos
  181. def open_repository(self, path: str) -> BackendRepo:
  182. """Open repository at given path.
  183. Args:
  184. path: Path to the repository
  185. Returns:
  186. Repository object
  187. Raises:
  188. NotGitRepository: If no repository found at path
  189. """
  190. logger.debug("Opening repository at %s", path)
  191. # Handle both str and bytes keys for backward compatibility
  192. if path in self.repos:
  193. return self.repos[path] # type: ignore
  194. # Try converting between str and bytes
  195. if isinstance(path, bytes):
  196. try:
  197. alt_path = path.decode("utf-8")
  198. if alt_path in self.repos:
  199. return self.repos[alt_path]
  200. except UnicodeDecodeError:
  201. pass
  202. else:
  203. alt_path_bytes = path.encode("utf-8")
  204. if alt_path_bytes in self.repos:
  205. return self.repos[alt_path_bytes] # type: ignore
  206. raise NotGitRepository(
  207. "No git repository was found at {path}".format(**dict(path=path))
  208. )
  209. class FileSystemBackend(Backend):
  210. """Simple backend looking up Git repositories in the local file system."""
  211. def __init__(self, root: str = os.sep) -> None:
  212. """Initialize a FileSystemBackend.
  213. Args:
  214. root: Root directory to serve repositories from
  215. """
  216. super().__init__()
  217. self.root = (os.path.abspath(root) + os.sep).replace(os.sep * 2, os.sep)
  218. def open_repository(self, path: str) -> BackendRepo:
  219. """Open a repository from the filesystem.
  220. Args:
  221. path: Path to the repository relative to the root
  222. Returns: Repo instance
  223. Raises:
  224. NotGitRepository: If path is outside the root or not a git repository
  225. """
  226. logger.debug("opening repository at %s", path)
  227. # Ensure path is a string to avoid TypeError when joining with self.root
  228. path = os.fspath(path)
  229. if isinstance(path, bytes):
  230. path = os.fsdecode(path)
  231. abspath = os.path.abspath(os.path.join(self.root, path)) + os.sep
  232. normcase_abspath = os.path.normcase(abspath)
  233. normcase_root = os.path.normcase(self.root)
  234. if not normcase_abspath.startswith(normcase_root):
  235. raise NotGitRepository(f"Path {path!r} not inside root {self.root!r}")
  236. return Repo(abspath) # type: ignore[return-value]
  237. class Handler:
  238. """Smart protocol command handler base class."""
  239. def __init__(
  240. self, backend: Backend, proto: Protocol, stateless_rpc: bool = False
  241. ) -> None:
  242. """Initialize a Handler.
  243. Args:
  244. backend: Backend instance for repository access
  245. proto: Protocol instance for communication
  246. stateless_rpc: Whether this is a stateless RPC session
  247. """
  248. self.backend = backend
  249. self.proto = proto
  250. self.stateless_rpc = stateless_rpc
  251. def handle(self) -> None:
  252. """Handle a request."""
  253. raise NotImplementedError(self.handle)
  254. class PackHandler(Handler):
  255. """Protocol handler for packs."""
  256. def __init__(
  257. self, backend: Backend, proto: Protocol, stateless_rpc: bool = False
  258. ) -> None:
  259. """Initialize a PackHandler.
  260. Args:
  261. backend: Backend instance for repository access
  262. proto: Protocol instance for communication
  263. stateless_rpc: Whether this is a stateless RPC session
  264. """
  265. super().__init__(backend, proto, stateless_rpc)
  266. self._client_capabilities: set[bytes] | None = None
  267. # Flags needed for the no-done capability
  268. self._done_received = False
  269. self.advertise_refs = False
  270. @classmethod
  271. def capabilities(cls) -> Iterable[bytes]:
  272. """Return a list of capabilities supported by this handler."""
  273. raise NotImplementedError(cls.capabilities)
  274. @classmethod
  275. def innocuous_capabilities(cls) -> Iterable[bytes]:
  276. """Return capabilities that don't affect protocol behavior.
  277. Returns:
  278. List of innocuous capability names
  279. """
  280. return [
  281. CAPABILITY_INCLUDE_TAG,
  282. CAPABILITY_THIN_PACK,
  283. CAPABILITY_NO_PROGRESS,
  284. CAPABILITY_OFS_DELTA,
  285. capability_agent(),
  286. ]
  287. @classmethod
  288. def required_capabilities(cls) -> Iterable[bytes]:
  289. """Return a list of capabilities that we require the client to have."""
  290. return []
  291. def set_client_capabilities(self, caps: Iterable[bytes]) -> None:
  292. """Set the client capabilities and validate them.
  293. Args:
  294. caps: List of capabilities requested by the client
  295. Raises:
  296. GitProtocolError: If client requests unsupported capability or lacks required ones
  297. """
  298. allowable_caps = set(self.innocuous_capabilities())
  299. allowable_caps.update(self.capabilities())
  300. for cap in caps:
  301. if cap.startswith(CAPABILITY_AGENT + b"="):
  302. continue
  303. if cap not in allowable_caps:
  304. raise GitProtocolError(
  305. f"Client asked for capability {cap!r} that was not advertised."
  306. )
  307. for cap in self.required_capabilities():
  308. if cap not in caps:
  309. raise GitProtocolError(
  310. f"Client does not support required capability {cap!r}."
  311. )
  312. self._client_capabilities = set(caps)
  313. logger.info("Client capabilities: %s", caps)
  314. def has_capability(self, cap: bytes) -> bool:
  315. """Check if the client supports a specific capability.
  316. Args:
  317. cap: Capability name to check
  318. Returns: True if the client supports the capability
  319. Raises:
  320. GitProtocolError: If called before client capabilities are set
  321. """
  322. if self._client_capabilities is None:
  323. raise GitProtocolError(
  324. f"Server attempted to access capability {cap!r} before asking client"
  325. )
  326. return cap in self._client_capabilities
  327. def notify_done(self) -> None:
  328. """Notify that the 'done' command has been received from the client."""
  329. self._done_received = True
  330. class UploadPackHandler(PackHandler):
  331. """Protocol handler for uploading a pack to the client."""
  332. def __init__(
  333. self,
  334. backend: Backend,
  335. args: Sequence[str],
  336. proto: Protocol,
  337. stateless_rpc: bool = False,
  338. advertise_refs: bool = False,
  339. ) -> None:
  340. """Initialize an UploadPackHandler.
  341. Args:
  342. backend: Backend instance for repository access
  343. args: Command arguments (first arg is repository path)
  344. proto: Protocol instance for communication
  345. stateless_rpc: Whether this is a stateless RPC session
  346. advertise_refs: Whether to advertise refs
  347. """
  348. super().__init__(backend, proto, stateless_rpc=stateless_rpc)
  349. self.repo = backend.open_repository(args[0])
  350. self._graph_walker = None
  351. self.advertise_refs = advertise_refs
  352. # A state variable for denoting that the have list is still
  353. # being processed, and the client is not accepting any other
  354. # data (such as side-band, see the progress method here).
  355. self._processing_have_lines = False
  356. @classmethod
  357. def capabilities(cls) -> list[bytes]:
  358. """Return the list of capabilities supported by upload-pack."""
  359. return [
  360. CAPABILITY_MULTI_ACK_DETAILED,
  361. CAPABILITY_MULTI_ACK,
  362. CAPABILITY_SIDE_BAND_64K,
  363. CAPABILITY_THIN_PACK,
  364. CAPABILITY_OFS_DELTA,
  365. CAPABILITY_NO_PROGRESS,
  366. CAPABILITY_INCLUDE_TAG,
  367. CAPABILITY_SHALLOW,
  368. CAPABILITY_NO_DONE,
  369. ]
  370. @classmethod
  371. def required_capabilities(cls) -> tuple[bytes, ...]:
  372. """Return the list of capabilities required for upload-pack."""
  373. return (
  374. CAPABILITY_SIDE_BAND_64K,
  375. CAPABILITY_THIN_PACK,
  376. CAPABILITY_OFS_DELTA,
  377. )
  378. def progress(self, message: bytes) -> None:
  379. """Send a progress message to the client.
  380. Args:
  381. message: Progress message to send
  382. """
  383. def _start_pack_send_phase(self) -> None:
  384. """Start the pack sending phase, setting up sideband if supported."""
  385. if self.has_capability(CAPABILITY_SIDE_BAND_64K):
  386. # The provided haves are processed, and it is safe to send side-
  387. # band data now.
  388. if not self.has_capability(CAPABILITY_NO_PROGRESS):
  389. self.progress = partial( # type: ignore
  390. self.proto.write_sideband, SIDE_BAND_CHANNEL_PROGRESS
  391. )
  392. self.write_pack_data: Callable[[bytes], None] = partial(
  393. self.proto.write_sideband, SIDE_BAND_CHANNEL_DATA
  394. )
  395. else:
  396. # proto.write returns Optional[int], but we need to treat it as returning None
  397. # for compatibility with write_pack_from_container
  398. def write_data(data: bytes) -> None:
  399. self.proto.write(data)
  400. self.write_pack_data = write_data
  401. def get_tagged(
  402. self,
  403. refs: Mapping[bytes, bytes] | None = None,
  404. repo: BackendRepo | None = None,
  405. ) -> dict[ObjectID, ObjectID]:
  406. """Get a dict of peeled values of tags to their original tag shas.
  407. Args:
  408. refs: dict of refname -> sha of possible tags; defaults to all
  409. of the backend's refs.
  410. repo: optional Repo instance for getting peeled refs; defaults
  411. to the backend's repo, if available
  412. Returns: dict of peeled_sha -> tag_sha, where tag_sha is the sha of a
  413. tag whose peeled value is peeled_sha.
  414. """
  415. if not self.has_capability(CAPABILITY_INCLUDE_TAG):
  416. return {}
  417. if refs is None:
  418. refs = self.repo.get_refs()
  419. if repo is None:
  420. repo = getattr(self.repo, "repo", None)
  421. if repo is None:
  422. # Bail if we don't have a Repo available; this is ok since
  423. # clients must be able to handle if the server doesn't include
  424. # all relevant tags.
  425. # TODO: fix behavior when missing
  426. return {}
  427. # TODO(jelmer): Integrate this with the refs logic in
  428. # Repo.find_missing_objects
  429. tagged = {}
  430. for name, sha in refs.items():
  431. peeled_sha = repo.get_peeled(name)
  432. if peeled_sha is not None and peeled_sha != sha:
  433. tagged[peeled_sha] = sha
  434. return tagged
  435. def handle(self) -> None:
  436. """Handle an upload-pack request.
  437. This method processes the client's wants and haves, determines which
  438. objects to send, and writes the pack data to the client.
  439. """
  440. # Note the fact that client is only processing responses related
  441. # to the have lines it sent, and any other data (including side-
  442. # band) will be be considered a fatal error.
  443. self._processing_have_lines = True
  444. graph_walker = _ProtocolGraphWalker(
  445. self,
  446. self.repo.object_store,
  447. self.repo.get_peeled,
  448. self.repo.refs.get_symrefs,
  449. )
  450. wants = []
  451. def wants_wrapper(
  452. refs: Mapping[bytes, bytes], depth: int | None = None
  453. ) -> list[bytes]:
  454. wants.extend(graph_walker.determine_wants(refs, depth))
  455. return wants
  456. missing_objects = self.repo.find_missing_objects(
  457. wants_wrapper,
  458. graph_walker,
  459. self.progress,
  460. get_tagged=self.get_tagged,
  461. )
  462. # Did the process short-circuit (e.g. in a stateless RPC call)? Note
  463. # that the client still expects a 0-object pack in most cases.
  464. # Also, if it also happens that the object_iter is instantiated
  465. # with a graph walker with an implementation that talks over the
  466. # wire (which is this instance of this class) this will actually
  467. # iterate through everything and write things out to the wire.
  468. if len(wants) == 0:
  469. return
  470. # Handle shallow clone case where missing_objects can be None
  471. if missing_objects is None:
  472. return
  473. object_ids = list(missing_objects)
  474. if not graph_walker.handle_done(
  475. not self.has_capability(CAPABILITY_NO_DONE), self._done_received
  476. ):
  477. return
  478. self._start_pack_send_phase()
  479. self.progress((f"counting objects: {len(object_ids)}, done.\n").encode("ascii"))
  480. write_pack_from_container(
  481. self.write_pack_data,
  482. self.repo.object_store,
  483. object_ids,
  484. )
  485. # we are done
  486. self.proto.write_pkt_line(None)
  487. def _split_proto_line(
  488. line: bytes | None, allowed: Iterable[bytes | None] | None
  489. ) -> tuple[bytes | None, bytes | int | None]:
  490. """Split a line read from the wire.
  491. Args:
  492. line: The line read from the wire.
  493. allowed: An iterable of command names that should be allowed.
  494. Command names not listed below as possible return values will be
  495. ignored. If None, any commands from the possible return values are
  496. allowed.
  497. Returns: a tuple having one of the following forms:
  498. ('want', obj_id)
  499. ('have', obj_id)
  500. ('done', None)
  501. (None, None) (for a flush-pkt)
  502. Raises:
  503. UnexpectedCommandError: if the line cannot be parsed into one of the
  504. allowed return values.
  505. """
  506. if not line:
  507. fields: list[bytes | None] = [None]
  508. else:
  509. fields = list(line.rstrip(b"\n").split(b" ", 1))
  510. command = fields[0]
  511. if allowed is not None and command not in allowed:
  512. raise UnexpectedCommandError(command.decode("utf-8") if command else None)
  513. if len(fields) == 1 and command in (COMMAND_DONE, None):
  514. return (command, None)
  515. elif len(fields) == 2:
  516. if command in (
  517. COMMAND_WANT,
  518. COMMAND_HAVE,
  519. COMMAND_SHALLOW,
  520. COMMAND_UNSHALLOW,
  521. ):
  522. assert fields[1] is not None
  523. if not valid_hexsha(fields[1]):
  524. raise GitProtocolError("Invalid sha")
  525. return (command, fields[1])
  526. elif command == COMMAND_DEEPEN:
  527. assert fields[1] is not None
  528. return command, int(fields[1])
  529. raise GitProtocolError(f"Received invalid line from client: {line!r}")
  530. def _want_satisfied(
  531. store: ObjectContainer, haves: set[bytes], want: bytes, earliest: int
  532. ) -> bool:
  533. """Check if a specific want is satisfied by a set of haves.
  534. Args:
  535. store: Object store to retrieve objects from
  536. haves: Set of commit IDs the client has
  537. want: Commit ID the client wants
  538. earliest: Earliest commit time to consider
  539. Returns: True if the want is satisfied by the haves
  540. """
  541. o = store[want]
  542. pending = collections.deque([o])
  543. known = {want}
  544. while pending:
  545. commit = pending.popleft()
  546. if commit.id in haves:
  547. return True
  548. if not isinstance(commit, Commit):
  549. # non-commit wants are assumed to be satisfied
  550. continue
  551. for parent in commit.parents:
  552. if parent in known:
  553. continue
  554. known.add(parent)
  555. parent_obj = store[parent]
  556. assert isinstance(parent_obj, Commit)
  557. # TODO: handle parents with later commit times than children
  558. if parent_obj.commit_time >= earliest:
  559. pending.append(parent_obj)
  560. return False
  561. def _all_wants_satisfied(
  562. store: ObjectContainer, haves: AbstractSet[bytes], wants: set[bytes]
  563. ) -> bool:
  564. """Check whether all the current wants are satisfied by a set of haves.
  565. Args:
  566. store: Object store to retrieve objects from
  567. haves: A set of commits we know the client has.
  568. wants: A set of commits the client wants
  569. Note: Wants are specified with set_wants rather than passed in since
  570. in the current interface they are determined outside this class.
  571. """
  572. haves = set(haves)
  573. if haves:
  574. have_objs = [store[h] for h in haves]
  575. earliest = min([h.commit_time for h in have_objs if isinstance(h, Commit)])
  576. else:
  577. earliest = 0
  578. for want in wants:
  579. if not _want_satisfied(store, haves, want, earliest):
  580. return False
  581. return True
  582. class AckGraphWalkerImpl:
  583. """Base class for acknowledgment graph walker implementations."""
  584. def __init__(self, graph_walker: "_ProtocolGraphWalker") -> None:
  585. """Initialize acknowledgment graph walker.
  586. Args:
  587. graph_walker: Graph walker to wrap
  588. """
  589. raise NotImplementedError
  590. def ack(self, have_ref: ObjectID) -> None:
  591. """Acknowledge a have reference.
  592. Args:
  593. have_ref: Object ID to acknowledge
  594. """
  595. raise NotImplementedError
  596. def handle_done(self, done_required: bool, done_received: bool) -> bool:
  597. """Handle 'done' packet from client."""
  598. raise NotImplementedError
  599. class _ProtocolGraphWalker:
  600. """A graph walker that knows the git protocol.
  601. As a graph walker, this class implements ack(), next(), and reset(). It
  602. also contains some base methods for interacting with the wire and walking
  603. the commit tree.
  604. The work of determining which acks to send is passed on to the
  605. implementation instance stored in _impl. The reason for this is that we do
  606. not know at object creation time what ack level the protocol requires. A
  607. call to set_ack_type() is required to set up the implementation, before
  608. any calls to next() or ack() are made.
  609. """
  610. def __init__(
  611. self,
  612. handler: PackHandler,
  613. object_store: ObjectContainer,
  614. get_peeled: Callable[[bytes], bytes | None],
  615. get_symrefs: Callable[[], dict[bytes, bytes]],
  616. ) -> None:
  617. """Initialize a ProtocolGraphWalker.
  618. Args:
  619. handler: Protocol handler instance
  620. object_store: Object store for retrieving objects
  621. get_peeled: Function to get peeled refs
  622. get_symrefs: Function to get symbolic refs
  623. """
  624. self.handler = handler
  625. self.store: ObjectContainer = object_store
  626. self.get_peeled = get_peeled
  627. self.get_symrefs = get_symrefs
  628. self.proto = handler.proto
  629. self.stateless_rpc = handler.stateless_rpc
  630. self.advertise_refs = handler.advertise_refs
  631. self._wants: list[bytes] = []
  632. self.shallow: set[bytes] = set()
  633. self.client_shallow: set[bytes] = set()
  634. self.unshallow: set[bytes] = set()
  635. self._cached = False
  636. self._cache: list[bytes] = []
  637. self._cache_index = 0
  638. self._impl: AckGraphWalkerImpl | None = None
  639. def determine_wants(
  640. self, heads: Mapping[bytes, bytes], depth: int | None = None
  641. ) -> list[bytes]:
  642. """Determine the wants for a set of heads.
  643. The given heads are advertised to the client, who then specifies which
  644. refs they want using 'want' lines. This portion of the protocol is the
  645. same regardless of ack type, and in fact is used to set the ack type of
  646. the ProtocolGraphWalker.
  647. If the client has the 'shallow' capability, this method also reads and
  648. responds to the 'shallow' and 'deepen' lines from the client. These are
  649. not part of the wants per se, but they set up necessary state for
  650. walking the graph. Additionally, later code depends on this method
  651. consuming everything up to the first 'have' line.
  652. Args:
  653. heads: a dict of refname->SHA1 to advertise
  654. depth: Maximum depth for shallow clones
  655. Returns: a list of SHA1s requested by the client
  656. """
  657. symrefs = self.get_symrefs()
  658. values = set(heads.values())
  659. if self.advertise_refs or not self.stateless_rpc:
  660. for i, (ref, sha) in enumerate(sorted(heads.items())):
  661. try:
  662. peeled_sha = self.get_peeled(ref)
  663. except KeyError:
  664. # Skip refs that are inaccessible
  665. # TODO(jelmer): Integrate with Repo.find_missing_objects refs
  666. # logic.
  667. continue
  668. if i == 0:
  669. logger.info("Sending capabilities: %s", self.handler.capabilities())
  670. line = format_ref_line(
  671. ref,
  672. sha,
  673. list(self.handler.capabilities())
  674. + symref_capabilities(symrefs.items()),
  675. )
  676. else:
  677. line = format_ref_line(ref, sha)
  678. self.proto.write_pkt_line(line)
  679. if peeled_sha is not None and peeled_sha != sha:
  680. self.proto.write_pkt_line(
  681. format_ref_line(ref + PEELED_TAG_SUFFIX, peeled_sha)
  682. )
  683. # i'm done..
  684. self.proto.write_pkt_line(None)
  685. if self.advertise_refs:
  686. return []
  687. # Now client will sending want want want commands
  688. want = self.proto.read_pkt_line()
  689. if not want:
  690. return []
  691. line, caps = extract_want_line_capabilities(want)
  692. self.handler.set_client_capabilities(caps)
  693. self.set_ack_type(ack_type(caps))
  694. allowed = (COMMAND_WANT, COMMAND_SHALLOW, COMMAND_DEEPEN, None)
  695. command, sha_result = _split_proto_line(line, allowed)
  696. want_revs = []
  697. while command == COMMAND_WANT:
  698. assert isinstance(sha_result, bytes)
  699. if sha_result not in values:
  700. raise GitProtocolError(f"Client wants invalid object {sha_result!r}")
  701. want_revs.append(sha_result)
  702. command, sha_result = self.read_proto_line(allowed)
  703. self.set_wants(want_revs)
  704. if command in (COMMAND_SHALLOW, COMMAND_DEEPEN):
  705. assert sha_result is not None
  706. self.unread_proto_line(command, sha_result)
  707. self._handle_shallow_request(want_revs)
  708. if self.stateless_rpc and self.proto.eof():
  709. # The client may close the socket at this point, expecting a
  710. # flush-pkt from the server. We might be ready to send a packfile
  711. # at this point, so we need to explicitly short-circuit in this
  712. # case.
  713. return []
  714. return want_revs
  715. def unread_proto_line(self, command: bytes, value: bytes | int) -> None:
  716. """Push a command back to be read again.
  717. Args:
  718. command: Command name
  719. value: Command value
  720. """
  721. if isinstance(value, int):
  722. value = str(value).encode("ascii")
  723. self.proto.unread_pkt_line(command + b" " + value)
  724. def nak(self) -> None:
  725. """Send a NAK response."""
  726. def ack(self, have_ref: bytes) -> None:
  727. """Acknowledge a have reference.
  728. Args:
  729. have_ref: SHA to acknowledge (40 bytes hex)
  730. Raises:
  731. ValueError: If have_ref is not 40 bytes
  732. """
  733. if len(have_ref) != 40:
  734. raise ValueError(f"invalid sha {have_ref!r}")
  735. assert self._impl is not None
  736. return self._impl.ack(have_ref)
  737. def reset(self) -> None:
  738. """Reset the graph walker cache."""
  739. self._cached = True
  740. self._cache_index = 0
  741. def next(self) -> bytes | None:
  742. """Get the next SHA from the graph walker.
  743. Returns: Next SHA or None if done
  744. """
  745. if not self._cached:
  746. if not self._impl and self.stateless_rpc:
  747. return None
  748. assert self._impl is not None
  749. return next(self._impl) # type: ignore[call-overload, no-any-return]
  750. self._cache_index += 1
  751. if self._cache_index > len(self._cache):
  752. return None
  753. return self._cache[self._cache_index]
  754. __next__ = next
  755. def read_proto_line(
  756. self, allowed: Iterable[bytes | None] | None
  757. ) -> tuple[bytes | None, bytes | int | None]:
  758. """Read a line from the wire.
  759. Args:
  760. allowed: An iterable of command names that should be allowed.
  761. Returns: A tuple of (command, value); see _split_proto_line.
  762. Raises:
  763. UnexpectedCommandError: If an error occurred reading the line.
  764. """
  765. return _split_proto_line(self.proto.read_pkt_line(), allowed)
  766. def _handle_shallow_request(self, wants: Sequence[bytes]) -> None:
  767. """Handle shallow clone requests from the client.
  768. Args:
  769. wants: List of wanted object SHAs
  770. """
  771. while True:
  772. command, val = self.read_proto_line((COMMAND_DEEPEN, COMMAND_SHALLOW))
  773. if command == COMMAND_DEEPEN:
  774. assert isinstance(val, int)
  775. depth = val
  776. break
  777. assert isinstance(val, bytes)
  778. self.client_shallow.add(val)
  779. self.read_proto_line((None,)) # consume client's flush-pkt
  780. shallow, not_shallow = find_shallow(self.store, wants, depth)
  781. # Update self.shallow instead of reassigning it since we passed a
  782. # reference to it before this method was called.
  783. self.shallow.update(shallow - not_shallow)
  784. new_shallow = self.shallow - self.client_shallow
  785. unshallow = self.unshallow = not_shallow & self.client_shallow
  786. self.update_shallow(new_shallow, unshallow)
  787. def update_shallow(
  788. self, new_shallow: AbstractSet[bytes], unshallow: AbstractSet[bytes]
  789. ) -> None:
  790. """Update shallow/unshallow information to the client.
  791. Args:
  792. new_shallow: Set of newly shallow commits
  793. unshallow: Set of commits to unshallow
  794. """
  795. for sha in sorted(new_shallow):
  796. self.proto.write_pkt_line(format_shallow_line(sha))
  797. for sha in sorted(unshallow):
  798. self.proto.write_pkt_line(format_unshallow_line(sha))
  799. self.proto.write_pkt_line(None)
  800. def notify_done(self) -> None:
  801. """Notify that the client sent 'done'."""
  802. # relay the message down to the handler.
  803. self.handler.notify_done()
  804. def send_ack(self, sha: bytes, ack_type: bytes = b"") -> None:
  805. """Send an ACK to the client.
  806. Args:
  807. sha: SHA to acknowledge
  808. ack_type: Type of ACK (e.g., b'continue', b'ready')
  809. """
  810. self.proto.write_pkt_line(format_ack_line(sha, ack_type))
  811. def send_nak(self) -> None:
  812. """Send a NAK to the client."""
  813. self.proto.write_pkt_line(NAK_LINE)
  814. def handle_done(self, done_required: bool, done_received: bool) -> bool:
  815. """Handle the 'done' command.
  816. Args:
  817. done_required: Whether done is required
  818. done_received: Whether done was received
  819. Returns: True if done handling succeeded
  820. """
  821. # Delegate this to the implementation.
  822. assert self._impl is not None
  823. return self._impl.handle_done(done_required, done_received)
  824. def set_wants(self, wants: list[bytes]) -> None:
  825. """Set the list of wanted objects.
  826. Args:
  827. wants: List of wanted object SHAs
  828. """
  829. self._wants = wants
  830. def all_wants_satisfied(self, haves: AbstractSet[bytes]) -> bool:
  831. """Check whether all the current wants are satisfied by a set of haves.
  832. Args:
  833. haves: A set of commits we know the client has.
  834. Note: Wants are specified with set_wants rather than passed in since
  835. in the current interface they are determined outside this class.
  836. """
  837. return _all_wants_satisfied(self.store, haves, set(self._wants))
  838. def set_ack_type(self, ack_type: int) -> None:
  839. """Set the acknowledgment type for the graph walker.
  840. Args:
  841. ack_type: One of SINGLE_ACK, MULTI_ACK, or MULTI_ACK_DETAILED
  842. """
  843. impl_classes: dict[int, type[AckGraphWalkerImpl]] = {
  844. MULTI_ACK: MultiAckGraphWalkerImpl,
  845. MULTI_ACK_DETAILED: MultiAckDetailedGraphWalkerImpl,
  846. SINGLE_ACK: SingleAckGraphWalkerImpl,
  847. }
  848. self._impl = impl_classes[ack_type](self)
  849. _GRAPH_WALKER_COMMANDS = (COMMAND_HAVE, COMMAND_DONE, None)
  850. class SingleAckGraphWalkerImpl(AckGraphWalkerImpl):
  851. """Graph walker implementation that speaks the single-ack protocol."""
  852. def __init__(self, walker: "_ProtocolGraphWalker") -> None:
  853. """Initialize a SingleAckGraphWalkerImpl.
  854. Args:
  855. walker: Parent ProtocolGraphWalker instance
  856. """
  857. self.walker = walker
  858. self._common: list[bytes] = []
  859. def ack(self, have_ref: bytes) -> None:
  860. """Acknowledge a have reference.
  861. Args:
  862. have_ref: Object ID to acknowledge
  863. """
  864. if not self._common:
  865. self.walker.send_ack(have_ref)
  866. self._common.append(have_ref)
  867. def next(self) -> bytes | None:
  868. """Get next SHA from graph walker.
  869. Returns:
  870. SHA bytes or None if done
  871. """
  872. command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS)
  873. if command in (None, COMMAND_DONE):
  874. # defer the handling of done
  875. self.walker.notify_done()
  876. return None
  877. elif command == COMMAND_HAVE:
  878. assert isinstance(sha, bytes)
  879. return sha
  880. return None
  881. __next__ = next
  882. def handle_done(self, done_required: bool, done_received: bool) -> bool:
  883. """Handle done command.
  884. Args:
  885. done_required: Whether done is required
  886. done_received: Whether done was received
  887. Returns:
  888. True if handling completed successfully
  889. """
  890. if not self._common:
  891. self.walker.send_nak()
  892. if done_required and not done_received:
  893. # we are not done, especially when done is required; skip
  894. # the pack for this request and especially do not handle
  895. # the done.
  896. return False
  897. if not done_received and not self._common:
  898. # Okay we are not actually done then since the walker picked
  899. # up no haves. This is usually triggered when client attempts
  900. # to pull from a source that has no common base_commit.
  901. # See: test_server.MultiAckDetailedGraphWalkerImplTestCase.\
  902. # test_multi_ack_stateless_nodone
  903. return False
  904. return True
  905. class MultiAckGraphWalkerImpl(AckGraphWalkerImpl):
  906. """Graph walker implementation that speaks the multi-ack protocol."""
  907. def __init__(self, walker: "_ProtocolGraphWalker") -> None:
  908. """Initialize multi-ack graph walker.
  909. Args:
  910. walker: Parent ProtocolGraphWalker instance
  911. """
  912. self.walker = walker
  913. self._found_base = False
  914. self._common: list[bytes] = []
  915. def ack(self, have_ref: bytes) -> None:
  916. """Acknowledge a have reference.
  917. Args:
  918. have_ref: Object ID to acknowledge
  919. """
  920. self._common.append(have_ref)
  921. if not self._found_base:
  922. self.walker.send_ack(have_ref, b"continue")
  923. if self.walker.all_wants_satisfied(set(self._common)):
  924. self._found_base = True
  925. # else we blind ack within next
  926. def next(self) -> bytes | None:
  927. """Get next SHA from graph walker.
  928. Returns:
  929. SHA bytes or None if done
  930. """
  931. while True:
  932. command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS)
  933. if command is None:
  934. self.walker.send_nak()
  935. # in multi-ack mode, a flush-pkt indicates the client wants to
  936. # flush but more have lines are still coming
  937. continue
  938. elif command == COMMAND_DONE:
  939. self.walker.notify_done()
  940. return None
  941. elif command == COMMAND_HAVE:
  942. assert isinstance(sha, bytes)
  943. if self._found_base:
  944. # blind ack
  945. self.walker.send_ack(sha, b"continue")
  946. return sha
  947. __next__ = next
  948. def handle_done(self, done_required: bool, done_received: bool) -> bool:
  949. """Handle done command.
  950. Args:
  951. done_required: Whether done is required
  952. done_received: Whether done was received
  953. Returns:
  954. True if handling completed successfully
  955. """
  956. if done_required and not done_received:
  957. # we are not done, especially when done is required; skip
  958. # the pack for this request and especially do not handle
  959. # the done.
  960. return False
  961. if not done_received and not self._common:
  962. # Okay we are not actually done then since the walker picked
  963. # up no haves. This is usually triggered when client attempts
  964. # to pull from a source that has no common base_commit.
  965. # See: test_server.MultiAckDetailedGraphWalkerImplTestCase.\
  966. # test_multi_ack_stateless_nodone
  967. return False
  968. # don't nak unless no common commits were found, even if not
  969. # everything is satisfied
  970. if self._common:
  971. self.walker.send_ack(self._common[-1])
  972. else:
  973. self.walker.send_nak()
  974. return True
  975. class MultiAckDetailedGraphWalkerImpl(AckGraphWalkerImpl):
  976. """Graph walker implementation speaking the multi-ack-detailed protocol."""
  977. def __init__(self, walker: "_ProtocolGraphWalker") -> None:
  978. """Initialize multi-ack-detailed graph walker.
  979. Args:
  980. walker: Parent ProtocolGraphWalker instance
  981. """
  982. self.walker = walker
  983. self._common: list[bytes] = []
  984. def ack(self, have_ref: bytes) -> None:
  985. """Acknowledge a have reference.
  986. Args:
  987. have_ref: Object ID to acknowledge
  988. """
  989. # Should only be called iff have_ref is common
  990. self._common.append(have_ref)
  991. self.walker.send_ack(have_ref, b"common")
  992. def next(self) -> bytes | None:
  993. """Get next SHA from graph walker.
  994. Returns:
  995. SHA bytes or None if done
  996. """
  997. while True:
  998. command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS)
  999. if command is None:
  1000. if self.walker.all_wants_satisfied(set(self._common)):
  1001. self.walker.send_ack(self._common[-1], b"ready")
  1002. self.walker.send_nak()
  1003. if self.walker.stateless_rpc:
  1004. # The HTTP version of this request a flush-pkt always
  1005. # signifies an end of request, so we also return
  1006. # nothing here as if we are done (but not really, as
  1007. # it depends on whether no-done capability was
  1008. # specified and that's handled in handle_done which
  1009. # may or may not call post_nodone_check depending on
  1010. # that).
  1011. return None
  1012. elif command == COMMAND_DONE:
  1013. # Let the walker know that we got a done.
  1014. self.walker.notify_done()
  1015. break
  1016. elif command == COMMAND_HAVE:
  1017. # return the sha and let the caller ACK it with the
  1018. # above ack method.
  1019. assert isinstance(sha, bytes)
  1020. return sha
  1021. # don't nak unless no common commits were found, even if not
  1022. # everything is satisfied
  1023. return None
  1024. __next__ = next
  1025. def handle_done(self, done_required: bool, done_received: bool) -> bool:
  1026. """Handle done command.
  1027. Args:
  1028. done_required: Whether done is required
  1029. done_received: Whether done was received
  1030. Returns:
  1031. True if handling completed successfully
  1032. """
  1033. if done_required and not done_received:
  1034. # we are not done, especially when done is required; skip
  1035. # the pack for this request and especially do not handle
  1036. # the done.
  1037. return False
  1038. if not done_received and not self._common:
  1039. # Okay we are not actually done then since the walker picked
  1040. # up no haves. This is usually triggered when client attempts
  1041. # to pull from a source that has no common base_commit.
  1042. # See: test_server.MultiAckDetailedGraphWalkerImplTestCase.\
  1043. # test_multi_ack_stateless_nodone
  1044. return False
  1045. # don't nak unless no common commits were found, even if not
  1046. # everything is satisfied
  1047. if self._common:
  1048. self.walker.send_ack(self._common[-1])
  1049. else:
  1050. self.walker.send_nak()
  1051. return True
  1052. class ReceivePackHandler(PackHandler):
  1053. """Protocol handler for downloading a pack from the client."""
  1054. def __init__(
  1055. self,
  1056. backend: Backend,
  1057. args: Sequence[str],
  1058. proto: Protocol,
  1059. stateless_rpc: bool = False,
  1060. advertise_refs: bool = False,
  1061. ) -> None:
  1062. """Initialize receive-pack handler.
  1063. Args:
  1064. backend: Backend instance
  1065. args: Command arguments
  1066. proto: Protocol instance
  1067. stateless_rpc: Whether to use stateless RPC
  1068. advertise_refs: Whether to advertise refs
  1069. """
  1070. super().__init__(backend, proto, stateless_rpc=stateless_rpc)
  1071. self.repo = backend.open_repository(args[0])
  1072. self.advertise_refs = advertise_refs
  1073. @classmethod
  1074. def capabilities(cls) -> Iterable[bytes]:
  1075. """Return supported capabilities.
  1076. Returns:
  1077. List of capability names
  1078. """
  1079. return [
  1080. CAPABILITY_REPORT_STATUS,
  1081. CAPABILITY_DELETE_REFS,
  1082. CAPABILITY_QUIET,
  1083. CAPABILITY_OFS_DELTA,
  1084. CAPABILITY_SIDE_BAND_64K,
  1085. CAPABILITY_NO_DONE,
  1086. ]
  1087. def _apply_pack(
  1088. self, refs: list[tuple[ObjectID, ObjectID, Ref]]
  1089. ) -> Iterator[tuple[bytes, bytes]]:
  1090. """Apply received pack to repository.
  1091. Args:
  1092. refs: List of (old_sha, new_sha, ref_name) tuples
  1093. Yields:
  1094. Tuples of (ref_name, error_message) for any errors
  1095. """
  1096. all_exceptions = (
  1097. IOError,
  1098. OSError,
  1099. ChecksumMismatch,
  1100. ApplyDeltaError,
  1101. AssertionError,
  1102. socket.error,
  1103. zlib.error,
  1104. ObjectFormatException,
  1105. )
  1106. will_send_pack = False
  1107. for command in refs:
  1108. if command[1] != ZERO_SHA:
  1109. will_send_pack = True
  1110. if will_send_pack:
  1111. # TODO: more informative error messages than just the exception
  1112. # string
  1113. try:
  1114. recv = getattr(self.proto, "recv", None)
  1115. self.repo.object_store.add_thin_pack(self.proto.read, recv) # type: ignore[attr-defined]
  1116. yield (b"unpack", b"ok")
  1117. except all_exceptions as e:
  1118. yield (b"unpack", str(e).replace("\n", "").encode("utf-8"))
  1119. # The pack may still have been moved in, but it may contain
  1120. # broken objects. We trust a later GC to clean it up.
  1121. else:
  1122. # The git protocol want to find a status entry related to unpack
  1123. # process even if no pack data has been sent.
  1124. yield (b"unpack", b"ok")
  1125. for oldsha, sha, ref in refs:
  1126. ref_status = b"ok"
  1127. try:
  1128. if sha == ZERO_SHA:
  1129. if CAPABILITY_DELETE_REFS not in self.capabilities():
  1130. raise GitProtocolError(
  1131. "Attempted to delete refs without delete-refs capability."
  1132. )
  1133. try:
  1134. self.repo.refs.remove_if_equals(ref, oldsha)
  1135. except all_exceptions:
  1136. ref_status = b"failed to delete"
  1137. else:
  1138. try:
  1139. self.repo.refs.set_if_equals(ref, oldsha, sha)
  1140. except all_exceptions:
  1141. ref_status = b"failed to write"
  1142. except KeyError:
  1143. ref_status = b"bad ref"
  1144. yield (ref, ref_status)
  1145. def _report_status(self, status: Sequence[tuple[bytes, bytes]]) -> None:
  1146. """Report status to client.
  1147. Args:
  1148. status: List of (ref_name, status_message) tuples
  1149. """
  1150. if self.has_capability(CAPABILITY_SIDE_BAND_64K):
  1151. writer = BufferedPktLineWriter(
  1152. lambda d: self.proto.write_sideband(SIDE_BAND_CHANNEL_DATA, d)
  1153. )
  1154. write = writer.write
  1155. def flush() -> None:
  1156. writer.flush()
  1157. self.proto.write_pkt_line(None)
  1158. else:
  1159. write = self.proto.write_pkt_line # type: ignore[assignment]
  1160. def flush() -> None:
  1161. pass
  1162. for name, msg in status:
  1163. if name == b"unpack":
  1164. write(b"unpack " + msg + b"\n")
  1165. elif msg == b"ok":
  1166. write(b"ok " + name + b"\n")
  1167. else:
  1168. write(b"ng " + name + b" " + msg + b"\n")
  1169. write(None) # type: ignore
  1170. flush()
  1171. def _on_post_receive(self, client_refs: dict[bytes, tuple[bytes, bytes]]) -> None:
  1172. """Run post-receive hook.
  1173. Args:
  1174. client_refs: Dictionary of ref changes from client
  1175. """
  1176. hook = self.repo.hooks.get("post-receive", None) # type: ignore[attr-defined]
  1177. if not hook:
  1178. return
  1179. try:
  1180. output = hook.execute(client_refs)
  1181. if output:
  1182. self.proto.write_sideband(SIDE_BAND_CHANNEL_PROGRESS, output)
  1183. except HookError as err:
  1184. self.proto.write_sideband(SIDE_BAND_CHANNEL_FATAL, str(err).encode("utf-8"))
  1185. def handle(self) -> None:
  1186. """Handle receive-pack request."""
  1187. if self.advertise_refs or not self.stateless_rpc:
  1188. refs = sorted(self.repo.get_refs().items())
  1189. symrefs = sorted(self.repo.refs.get_symrefs().items())
  1190. if not refs:
  1191. refs = [(CAPABILITIES_REF, ZERO_SHA)]
  1192. logger.info("Sending capabilities: %s", self.capabilities())
  1193. self.proto.write_pkt_line(
  1194. format_ref_line(
  1195. refs[0][0],
  1196. refs[0][1],
  1197. list(self.capabilities()) + symref_capabilities(symrefs),
  1198. )
  1199. )
  1200. for i in range(1, len(refs)):
  1201. ref = refs[i]
  1202. self.proto.write_pkt_line(format_ref_line(ref[0], ref[1]))
  1203. self.proto.write_pkt_line(None)
  1204. if self.advertise_refs:
  1205. return
  1206. client_refs = []
  1207. ref_line = self.proto.read_pkt_line()
  1208. # if ref is none then client doesn't want to send us anything..
  1209. if ref_line is None:
  1210. return
  1211. ref_line, caps = extract_capabilities(ref_line)
  1212. self.set_client_capabilities(caps)
  1213. # client will now send us a list of (oldsha, newsha, ref)
  1214. while ref_line:
  1215. (oldsha, newsha, ref_name) = ref_line.split()
  1216. client_refs.append((oldsha, newsha, ref_name))
  1217. ref_line = self.proto.read_pkt_line()
  1218. # backend can now deal with this refs and read a pack using self.read
  1219. status = list(self._apply_pack(client_refs))
  1220. self._on_post_receive(client_refs) # type: ignore[arg-type]
  1221. # when we have read all the pack from the client, send a status report
  1222. # if the client asked for it
  1223. if self.has_capability(CAPABILITY_REPORT_STATUS):
  1224. self._report_status(status)
  1225. class UploadArchiveHandler(Handler):
  1226. """Handler for git-upload-archive requests."""
  1227. def __init__(
  1228. self,
  1229. backend: Backend,
  1230. args: Sequence[str],
  1231. proto: Protocol,
  1232. stateless_rpc: bool = False,
  1233. ) -> None:
  1234. """Initialize upload-archive handler.
  1235. Args:
  1236. backend: Backend instance
  1237. args: Command arguments
  1238. proto: Protocol instance
  1239. stateless_rpc: Whether to use stateless RPC
  1240. """
  1241. super().__init__(backend, proto, stateless_rpc)
  1242. self.repo = backend.open_repository(args[0])
  1243. def handle(self) -> None:
  1244. """Handle upload-archive request."""
  1245. def write(x: bytes) -> None:
  1246. self.proto.write_sideband(SIDE_BAND_CHANNEL_DATA, x)
  1247. arguments = []
  1248. for pkt in self.proto.read_pkt_seq():
  1249. (key, value) = pkt.split(b" ", 1)
  1250. if key != b"argument":
  1251. raise GitProtocolError(f"unknown command {key!r}")
  1252. arguments.append(value.rstrip(b"\n"))
  1253. prefix = b""
  1254. format = "tar"
  1255. i = 0
  1256. store: BaseObjectStore = self.repo.object_store
  1257. while i < len(arguments):
  1258. argument = arguments[i]
  1259. if argument == b"--prefix":
  1260. i += 1
  1261. prefix = arguments[i]
  1262. elif argument == b"--format":
  1263. i += 1
  1264. format = arguments[i].decode("ascii")
  1265. else:
  1266. commit_sha = self.repo.refs[argument]
  1267. commit_obj = store[commit_sha]
  1268. assert isinstance(commit_obj, Commit)
  1269. tree_obj = store[commit_obj.tree]
  1270. assert isinstance(tree_obj, Tree)
  1271. tree = tree_obj
  1272. i += 1
  1273. self.proto.write_pkt_line(b"ACK")
  1274. self.proto.write_pkt_line(None)
  1275. for chunk in tar_stream(
  1276. store,
  1277. tree,
  1278. mtime=int(time.time()),
  1279. prefix=prefix,
  1280. format=format,
  1281. ):
  1282. write(chunk)
  1283. self.proto.write_pkt_line(None)
  1284. # Default handler classes for git services.
  1285. DEFAULT_HANDLERS = {
  1286. b"git-upload-pack": UploadPackHandler,
  1287. b"git-receive-pack": ReceivePackHandler,
  1288. b"git-upload-archive": UploadArchiveHandler,
  1289. }
  1290. class TCPGitRequestHandler(socketserver.StreamRequestHandler):
  1291. """TCP request handler for git protocol."""
  1292. def __init__(
  1293. self,
  1294. handlers: dict[bytes, type[Handler]],
  1295. request: socket.socket,
  1296. client_address: tuple[str, int],
  1297. server: socketserver.TCPServer,
  1298. ) -> None:
  1299. """Initialize TCP request handler.
  1300. Args:
  1301. handlers: Dictionary mapping commands to handler classes
  1302. request: Request socket
  1303. client_address: Client address tuple
  1304. server: Server instance
  1305. """
  1306. self.handlers = handlers
  1307. socketserver.StreamRequestHandler.__init__(
  1308. self, request, client_address, server
  1309. )
  1310. def handle(self) -> None:
  1311. """Handle TCP git request."""
  1312. proto = ReceivableProtocol(self.connection.recv, self.wfile.write)
  1313. command, args = proto.read_cmd()
  1314. logger.info("Handling %s request, args=%s", command, args)
  1315. cls = self.handlers.get(command, None)
  1316. if not callable(cls):
  1317. raise GitProtocolError(f"Invalid service {command!r}")
  1318. h = cls(self.server.backend, args, proto) # type: ignore
  1319. h.handle()
  1320. class TCPGitServer(socketserver.TCPServer):
  1321. """TCP server for git protocol."""
  1322. allow_reuse_address = True
  1323. serve = socketserver.TCPServer.serve_forever
  1324. def _make_handler(
  1325. self,
  1326. request: socket.socket,
  1327. client_address: tuple[str, int],
  1328. server: socketserver.TCPServer,
  1329. ) -> TCPGitRequestHandler:
  1330. """Create request handler instance.
  1331. Args:
  1332. request: Request socket
  1333. client_address: Client address tuple
  1334. server: Server instance
  1335. Returns:
  1336. TCPGitRequestHandler instance
  1337. """
  1338. return TCPGitRequestHandler(self.handlers, request, client_address, server)
  1339. def __init__(
  1340. self,
  1341. backend: Backend,
  1342. listen_addr: str,
  1343. port: int = TCP_GIT_PORT,
  1344. handlers: dict[bytes, type[Handler]] | None = None,
  1345. ) -> None:
  1346. """Initialize TCP git server.
  1347. Args:
  1348. backend: Backend instance
  1349. listen_addr: Address to listen on
  1350. port: Port to listen on (default: TCP_GIT_PORT)
  1351. handlers: Optional dictionary of custom handlers
  1352. """
  1353. self.handlers = dict(DEFAULT_HANDLERS)
  1354. if handlers is not None:
  1355. self.handlers.update(handlers)
  1356. self.backend = backend
  1357. logger.info("Listening for TCP connections on %s:%d", listen_addr, port)
  1358. socketserver.TCPServer.__init__(self, (listen_addr, port), self._make_handler)
  1359. def verify_request(
  1360. self,
  1361. request: socket.socket | tuple[bytes, socket.socket],
  1362. client_address: tuple[str, int] | socket.socket,
  1363. ) -> bool:
  1364. """Verify incoming request.
  1365. Args:
  1366. request: Request socket
  1367. client_address: Client address tuple
  1368. Returns:
  1369. True to accept request
  1370. """
  1371. logger.info("Handling request from %s", client_address)
  1372. return True
  1373. def handle_error(
  1374. self,
  1375. request: socket.socket | tuple[bytes, socket.socket],
  1376. client_address: tuple[str, int] | socket.socket,
  1377. ) -> None:
  1378. """Handle request processing errors.
  1379. Args:
  1380. request: Request socket
  1381. client_address: Client address tuple
  1382. """
  1383. logger.exception(
  1384. "Exception happened during processing of request from %s",
  1385. client_address,
  1386. )
  1387. def main(argv: list[str] = sys.argv) -> None:
  1388. """Entry point for starting a TCP git server."""
  1389. import optparse
  1390. parser = optparse.OptionParser()
  1391. parser.add_option(
  1392. "-l",
  1393. "--listen_address",
  1394. dest="listen_address",
  1395. default="localhost",
  1396. help="Binding IP address.",
  1397. )
  1398. parser.add_option(
  1399. "-p",
  1400. "--port",
  1401. dest="port",
  1402. type=int,
  1403. default=TCP_GIT_PORT,
  1404. help="Binding TCP port.",
  1405. )
  1406. options, args = parser.parse_args(argv)
  1407. log_utils.default_logging_config()
  1408. if len(args) > 1:
  1409. gitdir = args[1]
  1410. else:
  1411. gitdir = "."
  1412. # TODO(jelmer): Support git-daemon-export-ok and --export-all.
  1413. backend = FileSystemBackend(gitdir)
  1414. server = TCPGitServer(backend, options.listen_address, options.port)
  1415. server.serve_forever()
  1416. def serve_command(
  1417. handler_cls: type[Handler],
  1418. argv: list[str] = sys.argv,
  1419. backend: Backend | None = None,
  1420. inf: IO[bytes] | None = None,
  1421. outf: IO[bytes] | None = None,
  1422. ) -> int:
  1423. """Serve a single command.
  1424. This is mostly useful for the implementation of commands used by e.g.
  1425. git+ssh.
  1426. Args:
  1427. handler_cls: `Handler` class to use for the request
  1428. argv: execv-style command-line arguments. Defaults to sys.argv.
  1429. backend: `Backend` to use
  1430. inf: File-like object to read from, defaults to standard input.
  1431. outf: File-like object to write to, defaults to standard output.
  1432. Returns: Exit code for use with sys.exit. 0 on success, 1 on failure.
  1433. """
  1434. if backend is None:
  1435. backend = FileSystemBackend()
  1436. if inf is None:
  1437. inf = sys.stdin.buffer
  1438. if outf is None:
  1439. outf = sys.stdout.buffer
  1440. def send_fn(data: bytes) -> None:
  1441. outf.write(data)
  1442. outf.flush()
  1443. proto = Protocol(inf.read, send_fn)
  1444. handler = handler_cls(backend, argv[1:], proto) # type: ignore[arg-type]
  1445. # FIXME: Catch exceptions and write a single-line summary to outf.
  1446. handler.handle()
  1447. return 0
  1448. def generate_info_refs(repo: "BaseRepo") -> Iterator[bytes]:
  1449. """Generate an info refs file."""
  1450. refs = repo.get_refs()
  1451. return write_info_refs(refs, repo.object_store)
  1452. def generate_objects_info_packs(repo: "BaseRepo") -> Iterator[bytes]:
  1453. """Generate an index for for packs."""
  1454. for pack in repo.object_store.packs:
  1455. yield (b"P " + os.fsencode(pack.data.filename) + b"\n")
  1456. def update_server_info(repo: "BaseRepo") -> None:
  1457. """Generate server info for dumb file access.
  1458. This generates info/refs and objects/info/packs,
  1459. similar to "git update-server-info".
  1460. """
  1461. repo._put_named_file(
  1462. os.path.join("info", "refs"), b"".join(generate_info_refs(repo))
  1463. )
  1464. repo._put_named_file(
  1465. os.path.join("objects", "info", "packs"),
  1466. b"".join(generate_objects_info_packs(repo)),
  1467. )
  1468. if __name__ == "__main__":
  1469. main()