server.py 58 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740
  1. # server.py -- Implementation of the server side git protocols
  2. # Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk>
  3. # Copyright(C) 2011-2012 Jelmer Vernooij <jelmer@jelmer.uk>
  4. #
  5. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  6. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  7. # General Public License as published by the Free Software Foundation; version 2.0
  8. # or (at your option) any later version. You can redistribute it and/or
  9. # modify it under the terms of either of these two licenses.
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. # You should have received a copy of the licenses; if not, see
  18. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  19. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  20. # License, Version 2.0.
  21. #
  22. """Git smart network protocol server implementation.
  23. For more detailed implementation on the network protocol, see the
  24. Documentation/technical directory in the cgit distribution, and in particular:
  25. * Documentation/technical/protocol-capabilities.txt
  26. * Documentation/technical/pack-protocol.txt
  27. Currently supported capabilities:
  28. * include-tag
  29. * thin-pack
  30. * multi_ack_detailed
  31. * multi_ack
  32. * side-band-64k
  33. * ofs-delta
  34. * no-progress
  35. * report-status
  36. * delete-refs
  37. * shallow
  38. * symref
  39. """
  40. import os
  41. import socket
  42. import socketserver
  43. import sys
  44. import time
  45. import zlib
  46. from collections import deque
  47. from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence
  48. from collections.abc import Set as AbstractSet
  49. from functools import partial
  50. from typing import IO, TYPE_CHECKING
  51. from typing import Protocol as TypingProtocol
  52. if TYPE_CHECKING:
  53. from .object_store import BaseObjectStore
  54. from .repo import BaseRepo
  55. from dulwich import log_utils
  56. from .archive import tar_stream
  57. from .errors import (
  58. ApplyDeltaError,
  59. ChecksumMismatch,
  60. GitProtocolError,
  61. HookError,
  62. NotGitRepository,
  63. ObjectFormatException,
  64. UnexpectedCommandError,
  65. )
  66. from .object_store import MissingObjectFinder, PackBasedObjectStore, find_shallow
  67. from .objects import Commit, ObjectID, Tree, valid_hexsha
  68. from .pack import ObjectContainer, write_pack_from_container
  69. from .protocol import (
  70. CAPABILITIES_REF,
  71. CAPABILITY_AGENT,
  72. CAPABILITY_DELETE_REFS,
  73. CAPABILITY_INCLUDE_TAG,
  74. CAPABILITY_MULTI_ACK,
  75. CAPABILITY_MULTI_ACK_DETAILED,
  76. CAPABILITY_NO_DONE,
  77. CAPABILITY_NO_PROGRESS,
  78. CAPABILITY_OFS_DELTA,
  79. CAPABILITY_QUIET,
  80. CAPABILITY_REPORT_STATUS,
  81. CAPABILITY_SHALLOW,
  82. CAPABILITY_SIDE_BAND_64K,
  83. CAPABILITY_THIN_PACK,
  84. COMMAND_DEEPEN,
  85. COMMAND_DONE,
  86. COMMAND_HAVE,
  87. COMMAND_SHALLOW,
  88. COMMAND_UNSHALLOW,
  89. COMMAND_WANT,
  90. MULTI_ACK,
  91. MULTI_ACK_DETAILED,
  92. NAK_LINE,
  93. SIDE_BAND_CHANNEL_DATA,
  94. SIDE_BAND_CHANNEL_FATAL,
  95. SIDE_BAND_CHANNEL_PROGRESS,
  96. SINGLE_ACK,
  97. TCP_GIT_PORT,
  98. ZERO_SHA,
  99. BufferedPktLineWriter,
  100. Protocol,
  101. ReceivableProtocol,
  102. ack_type,
  103. capability_agent,
  104. extract_capabilities,
  105. extract_want_line_capabilities,
  106. format_ack_line,
  107. format_ref_line,
  108. format_shallow_line,
  109. format_unshallow_line,
  110. symref_capabilities,
  111. )
  112. from .refs import PEELED_TAG_SUFFIX, Ref, RefsContainer, write_info_refs
  113. from .repo import Repo
  114. logger = log_utils.getLogger(__name__)
  115. class Backend:
  116. """A backend for the Git smart server implementation."""
  117. def open_repository(self, path: str) -> "BackendRepo":
  118. """Open the repository at a path.
  119. Args:
  120. path: Path to the repository
  121. Raises:
  122. NotGitRepository: no git repository was found at path
  123. Returns: Instance of BackendRepo
  124. """
  125. raise NotImplementedError(self.open_repository)
  126. class BackendRepo(TypingProtocol):
  127. """Repository abstraction used by the Git server.
  128. The methods required here are a subset of those provided by
  129. dulwich.repo.Repo.
  130. """
  131. object_store: PackBasedObjectStore
  132. refs: RefsContainer
  133. def get_refs(self) -> dict[bytes, bytes]:
  134. """Get all the refs in the repository.
  135. Returns: dict of name -> sha
  136. """
  137. raise NotImplementedError
  138. def get_peeled(self, name: bytes) -> bytes | None:
  139. """Return the cached peeled value of a ref, if available.
  140. Args:
  141. name: Name of the ref to peel
  142. Returns: The peeled value of the ref. If the ref is known not point to
  143. a tag, this will be the SHA the ref refers to. If no cached
  144. information about a tag is available, this method may return None,
  145. but it should attempt to peel the tag if possible.
  146. """
  147. return None
  148. def find_missing_objects(
  149. self,
  150. determine_wants: Callable[[Mapping[bytes, bytes], int | None], list[bytes]],
  151. graph_walker: "_ProtocolGraphWalker",
  152. progress: Callable[[bytes], None] | None,
  153. *,
  154. get_tagged: Callable[[], dict[bytes, bytes]] | None = None,
  155. depth: int | None = None,
  156. ) -> "MissingObjectFinder | None":
  157. """Yield the objects required for a list of commits.
  158. Args:
  159. determine_wants: Function to determine which objects the client wants
  160. graph_walker: Object used to walk the commit graph
  161. progress: is a callback to send progress messages to the client
  162. get_tagged: Function that returns a dict of pointed-to sha ->
  163. tag sha for including tags.
  164. depth: Maximum depth of commits to fetch for shallow clones
  165. """
  166. raise NotImplementedError
  167. class DictBackend(Backend):
  168. """Trivial backend that looks up Git repositories in a dictionary."""
  169. def __init__(
  170. self, repos: dict[bytes, "BackendRepo"] | dict[str, "BackendRepo"]
  171. ) -> None:
  172. """Initialize a DictBackend.
  173. Args:
  174. repos: Dictionary mapping repository paths to BackendRepo instances
  175. """
  176. self.repos = repos
  177. def open_repository(self, path: str) -> BackendRepo:
  178. """Open repository at given path.
  179. Args:
  180. path: Path to the repository
  181. Returns:
  182. Repository object
  183. Raises:
  184. NotGitRepository: If no repository found at path
  185. """
  186. logger.debug("Opening repository at %s", path)
  187. # Handle both str and bytes keys for backward compatibility
  188. if path in self.repos:
  189. return self.repos[path] # type: ignore
  190. # Try converting between str and bytes
  191. if isinstance(path, bytes):
  192. try:
  193. alt_path = path.decode("utf-8")
  194. if alt_path in self.repos:
  195. return self.repos[alt_path]
  196. except UnicodeDecodeError:
  197. pass
  198. else:
  199. alt_path_bytes = path.encode("utf-8")
  200. if alt_path_bytes in self.repos:
  201. return self.repos[alt_path_bytes] # type: ignore
  202. raise NotGitRepository(
  203. "No git repository was found at {path}".format(**dict(path=path))
  204. )
  205. class FileSystemBackend(Backend):
  206. """Simple backend looking up Git repositories in the local file system."""
  207. def __init__(self, root: str = os.sep) -> None:
  208. """Initialize a FileSystemBackend.
  209. Args:
  210. root: Root directory to serve repositories from
  211. """
  212. super().__init__()
  213. self.root = (os.path.abspath(root) + os.sep).replace(os.sep * 2, os.sep)
  214. def open_repository(self, path: str) -> BackendRepo:
  215. """Open a repository from the filesystem.
  216. Args:
  217. path: Path to the repository relative to the root
  218. Returns: Repo instance
  219. Raises:
  220. NotGitRepository: If path is outside the root or not a git repository
  221. """
  222. logger.debug("opening repository at %s", path)
  223. # Ensure path is a string to avoid TypeError when joining with self.root
  224. path = os.fspath(path)
  225. if isinstance(path, bytes):
  226. path = os.fsdecode(path)
  227. abspath = os.path.abspath(os.path.join(self.root, path)) + os.sep
  228. normcase_abspath = os.path.normcase(abspath)
  229. normcase_root = os.path.normcase(self.root)
  230. if not normcase_abspath.startswith(normcase_root):
  231. raise NotGitRepository(f"Path {path!r} not inside root {self.root!r}")
  232. return Repo(abspath) # type: ignore[return-value]
  233. class Handler:
  234. """Smart protocol command handler base class."""
  235. def __init__(
  236. self, backend: Backend, proto: Protocol, stateless_rpc: bool = False
  237. ) -> None:
  238. """Initialize a Handler.
  239. Args:
  240. backend: Backend instance for repository access
  241. proto: Protocol instance for communication
  242. stateless_rpc: Whether this is a stateless RPC session
  243. """
  244. self.backend = backend
  245. self.proto = proto
  246. self.stateless_rpc = stateless_rpc
  247. def handle(self) -> None:
  248. """Handle a request."""
  249. raise NotImplementedError(self.handle)
  250. class PackHandler(Handler):
  251. """Protocol handler for packs."""
  252. def __init__(
  253. self, backend: Backend, proto: Protocol, stateless_rpc: bool = False
  254. ) -> None:
  255. """Initialize a PackHandler.
  256. Args:
  257. backend: Backend instance for repository access
  258. proto: Protocol instance for communication
  259. stateless_rpc: Whether this is a stateless RPC session
  260. """
  261. super().__init__(backend, proto, stateless_rpc)
  262. self._client_capabilities: set[bytes] | None = None
  263. # Flags needed for the no-done capability
  264. self._done_received = False
  265. self.advertise_refs = False
  266. @classmethod
  267. def capabilities(cls) -> Iterable[bytes]:
  268. """Return a list of capabilities supported by this handler."""
  269. raise NotImplementedError(cls.capabilities)
  270. @classmethod
  271. def innocuous_capabilities(cls) -> Iterable[bytes]:
  272. """Return capabilities that don't affect protocol behavior.
  273. Returns:
  274. List of innocuous capability names
  275. """
  276. return [
  277. CAPABILITY_INCLUDE_TAG,
  278. CAPABILITY_THIN_PACK,
  279. CAPABILITY_NO_PROGRESS,
  280. CAPABILITY_OFS_DELTA,
  281. capability_agent(),
  282. ]
  283. @classmethod
  284. def required_capabilities(cls) -> Iterable[bytes]:
  285. """Return a list of capabilities that we require the client to have."""
  286. return []
  287. def set_client_capabilities(self, caps: Iterable[bytes]) -> None:
  288. """Set the client capabilities and validate them.
  289. Args:
  290. caps: List of capabilities requested by the client
  291. Raises:
  292. GitProtocolError: If client requests unsupported capability or lacks required ones
  293. """
  294. allowable_caps = set(self.innocuous_capabilities())
  295. allowable_caps.update(self.capabilities())
  296. for cap in caps:
  297. if cap.startswith(CAPABILITY_AGENT + b"="):
  298. continue
  299. if cap not in allowable_caps:
  300. raise GitProtocolError(
  301. f"Client asked for capability {cap!r} that was not advertised."
  302. )
  303. for cap in self.required_capabilities():
  304. if cap not in caps:
  305. raise GitProtocolError(
  306. f"Client does not support required capability {cap!r}."
  307. )
  308. self._client_capabilities = set(caps)
  309. logger.info("Client capabilities: %s", caps)
  310. def has_capability(self, cap: bytes) -> bool:
  311. """Check if the client supports a specific capability.
  312. Args:
  313. cap: Capability name to check
  314. Returns: True if the client supports the capability
  315. Raises:
  316. GitProtocolError: If called before client capabilities are set
  317. """
  318. if self._client_capabilities is None:
  319. raise GitProtocolError(
  320. f"Server attempted to access capability {cap!r} before asking client"
  321. )
  322. return cap in self._client_capabilities
  323. def notify_done(self) -> None:
  324. """Notify that the 'done' command has been received from the client."""
  325. self._done_received = True
  326. class UploadPackHandler(PackHandler):
  327. """Protocol handler for uploading a pack to the client."""
  328. def __init__(
  329. self,
  330. backend: Backend,
  331. args: Sequence[str],
  332. proto: Protocol,
  333. stateless_rpc: bool = False,
  334. advertise_refs: bool = False,
  335. ) -> None:
  336. """Initialize an UploadPackHandler.
  337. Args:
  338. backend: Backend instance for repository access
  339. args: Command arguments (first arg is repository path)
  340. proto: Protocol instance for communication
  341. stateless_rpc: Whether this is a stateless RPC session
  342. advertise_refs: Whether to advertise refs
  343. """
  344. super().__init__(backend, proto, stateless_rpc=stateless_rpc)
  345. self.repo = backend.open_repository(args[0])
  346. self._graph_walker = None
  347. self.advertise_refs = advertise_refs
  348. # A state variable for denoting that the have list is still
  349. # being processed, and the client is not accepting any other
  350. # data (such as side-band, see the progress method here).
  351. self._processing_have_lines = False
  352. @classmethod
  353. def capabilities(cls) -> list[bytes]:
  354. """Return the list of capabilities supported by upload-pack."""
  355. return [
  356. CAPABILITY_MULTI_ACK_DETAILED,
  357. CAPABILITY_MULTI_ACK,
  358. CAPABILITY_SIDE_BAND_64K,
  359. CAPABILITY_THIN_PACK,
  360. CAPABILITY_OFS_DELTA,
  361. CAPABILITY_NO_PROGRESS,
  362. CAPABILITY_INCLUDE_TAG,
  363. CAPABILITY_SHALLOW,
  364. CAPABILITY_NO_DONE,
  365. ]
  366. @classmethod
  367. def required_capabilities(cls) -> tuple[bytes, ...]:
  368. """Return the list of capabilities required for upload-pack."""
  369. return (
  370. CAPABILITY_SIDE_BAND_64K,
  371. CAPABILITY_THIN_PACK,
  372. CAPABILITY_OFS_DELTA,
  373. )
  374. def progress(self, message: bytes) -> None:
  375. """Send a progress message to the client.
  376. Args:
  377. message: Progress message to send
  378. """
  379. def _start_pack_send_phase(self) -> None:
  380. """Start the pack sending phase, setting up sideband if supported."""
  381. if self.has_capability(CAPABILITY_SIDE_BAND_64K):
  382. # The provided haves are processed, and it is safe to send side-
  383. # band data now.
  384. if not self.has_capability(CAPABILITY_NO_PROGRESS):
  385. self.progress = partial( # type: ignore
  386. self.proto.write_sideband, SIDE_BAND_CHANNEL_PROGRESS
  387. )
  388. self.write_pack_data: Callable[[bytes], None] = partial(
  389. self.proto.write_sideband, SIDE_BAND_CHANNEL_DATA
  390. )
  391. else:
  392. # proto.write returns Optional[int], but we need to treat it as returning None
  393. # for compatibility with write_pack_from_container
  394. def write_data(data: bytes) -> None:
  395. self.proto.write(data)
  396. self.write_pack_data = write_data
  397. def get_tagged(
  398. self,
  399. refs: Mapping[bytes, bytes] | None = None,
  400. repo: BackendRepo | None = None,
  401. ) -> dict[ObjectID, ObjectID]:
  402. """Get a dict of peeled values of tags to their original tag shas.
  403. Args:
  404. refs: dict of refname -> sha of possible tags; defaults to all
  405. of the backend's refs.
  406. repo: optional Repo instance for getting peeled refs; defaults
  407. to the backend's repo, if available
  408. Returns: dict of peeled_sha -> tag_sha, where tag_sha is the sha of a
  409. tag whose peeled value is peeled_sha.
  410. """
  411. if not self.has_capability(CAPABILITY_INCLUDE_TAG):
  412. return {}
  413. if refs is None:
  414. refs = self.repo.get_refs()
  415. if repo is None:
  416. repo = getattr(self.repo, "repo", None)
  417. if repo is None:
  418. # Bail if we don't have a Repo available; this is ok since
  419. # clients must be able to handle if the server doesn't include
  420. # all relevant tags.
  421. # TODO: fix behavior when missing
  422. return {}
  423. # TODO(jelmer): Integrate this with the refs logic in
  424. # Repo.find_missing_objects
  425. tagged = {}
  426. for name, sha in refs.items():
  427. peeled_sha = repo.get_peeled(name)
  428. if peeled_sha is not None and peeled_sha != sha:
  429. tagged[peeled_sha] = sha
  430. return tagged
  431. def handle(self) -> None:
  432. """Handle an upload-pack request.
  433. This method processes the client's wants and haves, determines which
  434. objects to send, and writes the pack data to the client.
  435. """
  436. # Note the fact that client is only processing responses related
  437. # to the have lines it sent, and any other data (including side-
  438. # band) will be be considered a fatal error.
  439. self._processing_have_lines = True
  440. graph_walker = _ProtocolGraphWalker(
  441. self,
  442. self.repo.object_store,
  443. self.repo.get_peeled,
  444. self.repo.refs.get_symrefs,
  445. )
  446. wants = []
  447. def wants_wrapper(
  448. refs: Mapping[bytes, bytes], depth: int | None = None
  449. ) -> list[bytes]:
  450. wants.extend(graph_walker.determine_wants(refs, depth))
  451. return wants
  452. missing_objects = self.repo.find_missing_objects(
  453. wants_wrapper,
  454. graph_walker,
  455. self.progress,
  456. get_tagged=self.get_tagged,
  457. )
  458. # Did the process short-circuit (e.g. in a stateless RPC call)? Note
  459. # that the client still expects a 0-object pack in most cases.
  460. # Also, if it also happens that the object_iter is instantiated
  461. # with a graph walker with an implementation that talks over the
  462. # wire (which is this instance of this class) this will actually
  463. # iterate through everything and write things out to the wire.
  464. if len(wants) == 0:
  465. return
  466. # Handle shallow clone case where missing_objects can be None
  467. if missing_objects is None:
  468. return
  469. object_ids = list(missing_objects)
  470. if not graph_walker.handle_done(
  471. not self.has_capability(CAPABILITY_NO_DONE), self._done_received
  472. ):
  473. return
  474. self._start_pack_send_phase()
  475. self.progress((f"counting objects: {len(object_ids)}, done.\n").encode("ascii"))
  476. write_pack_from_container(
  477. self.write_pack_data,
  478. self.repo.object_store,
  479. object_ids,
  480. )
  481. # we are done
  482. self.proto.write_pkt_line(None)
  483. def _split_proto_line(
  484. line: bytes | None, allowed: Iterable[bytes | None] | None
  485. ) -> tuple[bytes | None, bytes | int | None]:
  486. """Split a line read from the wire.
  487. Args:
  488. line: The line read from the wire.
  489. allowed: An iterable of command names that should be allowed.
  490. Command names not listed below as possible return values will be
  491. ignored. If None, any commands from the possible return values are
  492. allowed.
  493. Returns: a tuple having one of the following forms:
  494. ('want', obj_id)
  495. ('have', obj_id)
  496. ('done', None)
  497. (None, None) (for a flush-pkt)
  498. Raises:
  499. UnexpectedCommandError: if the line cannot be parsed into one of the
  500. allowed return values.
  501. """
  502. if not line:
  503. fields: list[bytes | None] = [None]
  504. else:
  505. fields = list(line.rstrip(b"\n").split(b" ", 1))
  506. command = fields[0]
  507. if allowed is not None and command not in allowed:
  508. raise UnexpectedCommandError(command.decode("utf-8") if command else None)
  509. if len(fields) == 1 and command in (COMMAND_DONE, None):
  510. return (command, None)
  511. elif len(fields) == 2:
  512. if command in (
  513. COMMAND_WANT,
  514. COMMAND_HAVE,
  515. COMMAND_SHALLOW,
  516. COMMAND_UNSHALLOW,
  517. ):
  518. assert fields[1] is not None
  519. if not valid_hexsha(fields[1]):
  520. raise GitProtocolError("Invalid sha")
  521. return (command, fields[1])
  522. elif command == COMMAND_DEEPEN:
  523. assert fields[1] is not None
  524. return command, int(fields[1])
  525. raise GitProtocolError(f"Received invalid line from client: {line!r}")
  526. def _want_satisfied(
  527. store: ObjectContainer, haves: set[bytes], want: bytes, earliest: int
  528. ) -> bool:
  529. """Check if a specific want is satisfied by a set of haves.
  530. Args:
  531. store: Object store to retrieve objects from
  532. haves: Set of commit IDs the client has
  533. want: Commit ID the client wants
  534. earliest: Earliest commit time to consider
  535. Returns: True if the want is satisfied by the haves
  536. """
  537. o = store[want]
  538. pending = deque([o])
  539. known = {want}
  540. while pending:
  541. commit = pending.popleft()
  542. if commit.id in haves:
  543. return True
  544. if not isinstance(commit, Commit):
  545. # non-commit wants are assumed to be satisfied
  546. continue
  547. for parent in commit.parents:
  548. if parent in known:
  549. continue
  550. known.add(parent)
  551. parent_obj = store[parent]
  552. assert isinstance(parent_obj, Commit)
  553. # TODO: handle parents with later commit times than children
  554. if parent_obj.commit_time >= earliest:
  555. pending.append(parent_obj)
  556. return False
  557. def _all_wants_satisfied(
  558. store: ObjectContainer, haves: AbstractSet[bytes], wants: set[bytes]
  559. ) -> bool:
  560. """Check whether all the current wants are satisfied by a set of haves.
  561. Args:
  562. store: Object store to retrieve objects from
  563. haves: A set of commits we know the client has.
  564. wants: A set of commits the client wants
  565. Note: Wants are specified with set_wants rather than passed in since
  566. in the current interface they are determined outside this class.
  567. """
  568. haves = set(haves)
  569. if haves:
  570. have_objs = [store[h] for h in haves]
  571. earliest = min([h.commit_time for h in have_objs if isinstance(h, Commit)])
  572. else:
  573. earliest = 0
  574. for want in wants:
  575. if not _want_satisfied(store, haves, want, earliest):
  576. return False
  577. return True
  578. class AckGraphWalkerImpl:
  579. """Base class for acknowledgment graph walker implementations."""
  580. def __init__(self, graph_walker: "_ProtocolGraphWalker") -> None:
  581. """Initialize acknowledgment graph walker.
  582. Args:
  583. graph_walker: Graph walker to wrap
  584. """
  585. raise NotImplementedError
  586. def ack(self, have_ref: ObjectID) -> None:
  587. """Acknowledge a have reference.
  588. Args:
  589. have_ref: Object ID to acknowledge
  590. """
  591. raise NotImplementedError
  592. def handle_done(self, done_required: bool, done_received: bool) -> bool:
  593. """Handle 'done' packet from client."""
  594. raise NotImplementedError
  595. class _ProtocolGraphWalker:
  596. """A graph walker that knows the git protocol.
  597. As a graph walker, this class implements ack(), next(), and reset(). It
  598. also contains some base methods for interacting with the wire and walking
  599. the commit tree.
  600. The work of determining which acks to send is passed on to the
  601. implementation instance stored in _impl. The reason for this is that we do
  602. not know at object creation time what ack level the protocol requires. A
  603. call to set_ack_type() is required to set up the implementation, before
  604. any calls to next() or ack() are made.
  605. """
  606. def __init__(
  607. self,
  608. handler: PackHandler,
  609. object_store: ObjectContainer,
  610. get_peeled: Callable[[bytes], bytes | None],
  611. get_symrefs: Callable[[], dict[bytes, bytes]],
  612. ) -> None:
  613. """Initialize a ProtocolGraphWalker.
  614. Args:
  615. handler: Protocol handler instance
  616. object_store: Object store for retrieving objects
  617. get_peeled: Function to get peeled refs
  618. get_symrefs: Function to get symbolic refs
  619. """
  620. self.handler = handler
  621. self.store: ObjectContainer = object_store
  622. self.get_peeled = get_peeled
  623. self.get_symrefs = get_symrefs
  624. self.proto = handler.proto
  625. self.stateless_rpc = handler.stateless_rpc
  626. self.advertise_refs = handler.advertise_refs
  627. self._wants: list[bytes] = []
  628. self.shallow: set[bytes] = set()
  629. self.client_shallow: set[bytes] = set()
  630. self.unshallow: set[bytes] = set()
  631. self._cached = False
  632. self._cache: list[bytes] = []
  633. self._cache_index = 0
  634. self._impl: AckGraphWalkerImpl | None = None
  635. def determine_wants(
  636. self, heads: Mapping[bytes, bytes], depth: int | None = None
  637. ) -> list[bytes]:
  638. """Determine the wants for a set of heads.
  639. The given heads are advertised to the client, who then specifies which
  640. refs they want using 'want' lines. This portion of the protocol is the
  641. same regardless of ack type, and in fact is used to set the ack type of
  642. the ProtocolGraphWalker.
  643. If the client has the 'shallow' capability, this method also reads and
  644. responds to the 'shallow' and 'deepen' lines from the client. These are
  645. not part of the wants per se, but they set up necessary state for
  646. walking the graph. Additionally, later code depends on this method
  647. consuming everything up to the first 'have' line.
  648. Args:
  649. heads: a dict of refname->SHA1 to advertise
  650. depth: Maximum depth for shallow clones
  651. Returns: a list of SHA1s requested by the client
  652. """
  653. symrefs = self.get_symrefs()
  654. values = set(heads.values())
  655. if self.advertise_refs or not self.stateless_rpc:
  656. for i, (ref, sha) in enumerate(sorted(heads.items())):
  657. try:
  658. peeled_sha = self.get_peeled(ref)
  659. except KeyError:
  660. # Skip refs that are inaccessible
  661. # TODO(jelmer): Integrate with Repo.find_missing_objects refs
  662. # logic.
  663. continue
  664. if i == 0:
  665. logger.info("Sending capabilities: %s", self.handler.capabilities())
  666. line = format_ref_line(
  667. ref,
  668. sha,
  669. list(self.handler.capabilities())
  670. + symref_capabilities(symrefs.items()),
  671. )
  672. else:
  673. line = format_ref_line(ref, sha)
  674. self.proto.write_pkt_line(line)
  675. if peeled_sha is not None and peeled_sha != sha:
  676. self.proto.write_pkt_line(
  677. format_ref_line(ref + PEELED_TAG_SUFFIX, peeled_sha)
  678. )
  679. # i'm done..
  680. self.proto.write_pkt_line(None)
  681. if self.advertise_refs:
  682. return []
  683. # Now client will sending want want want commands
  684. want = self.proto.read_pkt_line()
  685. if not want:
  686. return []
  687. line, caps = extract_want_line_capabilities(want)
  688. self.handler.set_client_capabilities(caps)
  689. self.set_ack_type(ack_type(caps))
  690. allowed = (COMMAND_WANT, COMMAND_SHALLOW, COMMAND_DEEPEN, None)
  691. command, sha_result = _split_proto_line(line, allowed)
  692. want_revs = []
  693. while command == COMMAND_WANT:
  694. assert isinstance(sha_result, bytes)
  695. if sha_result not in values:
  696. raise GitProtocolError(f"Client wants invalid object {sha_result!r}")
  697. want_revs.append(sha_result)
  698. command, sha_result = self.read_proto_line(allowed)
  699. self.set_wants(want_revs)
  700. if command in (COMMAND_SHALLOW, COMMAND_DEEPEN):
  701. assert sha_result is not None
  702. self.unread_proto_line(command, sha_result)
  703. self._handle_shallow_request(want_revs)
  704. if self.stateless_rpc and self.proto.eof():
  705. # The client may close the socket at this point, expecting a
  706. # flush-pkt from the server. We might be ready to send a packfile
  707. # at this point, so we need to explicitly short-circuit in this
  708. # case.
  709. return []
  710. return want_revs
  711. def unread_proto_line(self, command: bytes, value: bytes | int) -> None:
  712. """Push a command back to be read again.
  713. Args:
  714. command: Command name
  715. value: Command value
  716. """
  717. if isinstance(value, int):
  718. value = str(value).encode("ascii")
  719. self.proto.unread_pkt_line(command + b" " + value)
  720. def nak(self) -> None:
  721. """Send a NAK response."""
  722. def ack(self, have_ref: bytes) -> None:
  723. """Acknowledge a have reference.
  724. Args:
  725. have_ref: SHA to acknowledge (40 bytes hex)
  726. Raises:
  727. ValueError: If have_ref is not 40 bytes
  728. """
  729. if len(have_ref) != 40:
  730. raise ValueError(f"invalid sha {have_ref!r}")
  731. assert self._impl is not None
  732. return self._impl.ack(have_ref)
  733. def reset(self) -> None:
  734. """Reset the graph walker cache."""
  735. self._cached = True
  736. self._cache_index = 0
  737. def next(self) -> bytes | None:
  738. """Get the next SHA from the graph walker.
  739. Returns: Next SHA or None if done
  740. """
  741. if not self._cached:
  742. if not self._impl and self.stateless_rpc:
  743. return None
  744. assert self._impl is not None
  745. return next(self._impl) # type: ignore[call-overload, no-any-return]
  746. self._cache_index += 1
  747. if self._cache_index > len(self._cache):
  748. return None
  749. return self._cache[self._cache_index]
  750. __next__ = next
  751. def read_proto_line(
  752. self, allowed: Iterable[bytes | None] | None
  753. ) -> tuple[bytes | None, bytes | int | None]:
  754. """Read a line from the wire.
  755. Args:
  756. allowed: An iterable of command names that should be allowed.
  757. Returns: A tuple of (command, value); see _split_proto_line.
  758. Raises:
  759. UnexpectedCommandError: If an error occurred reading the line.
  760. """
  761. return _split_proto_line(self.proto.read_pkt_line(), allowed)
  762. def _handle_shallow_request(self, wants: Sequence[bytes]) -> None:
  763. """Handle shallow clone requests from the client.
  764. Args:
  765. wants: List of wanted object SHAs
  766. """
  767. while True:
  768. command, val = self.read_proto_line((COMMAND_DEEPEN, COMMAND_SHALLOW))
  769. if command == COMMAND_DEEPEN:
  770. assert isinstance(val, int)
  771. depth = val
  772. break
  773. assert isinstance(val, bytes)
  774. self.client_shallow.add(val)
  775. self.read_proto_line((None,)) # consume client's flush-pkt
  776. shallow, not_shallow = find_shallow(self.store, wants, depth)
  777. # Update self.shallow instead of reassigning it since we passed a
  778. # reference to it before this method was called.
  779. self.shallow.update(shallow - not_shallow)
  780. new_shallow = self.shallow - self.client_shallow
  781. unshallow = self.unshallow = not_shallow & self.client_shallow
  782. self.update_shallow(new_shallow, unshallow)
  783. def update_shallow(
  784. self, new_shallow: AbstractSet[bytes], unshallow: AbstractSet[bytes]
  785. ) -> None:
  786. """Update shallow/unshallow information to the client.
  787. Args:
  788. new_shallow: Set of newly shallow commits
  789. unshallow: Set of commits to unshallow
  790. """
  791. for sha in sorted(new_shallow):
  792. self.proto.write_pkt_line(format_shallow_line(sha))
  793. for sha in sorted(unshallow):
  794. self.proto.write_pkt_line(format_unshallow_line(sha))
  795. self.proto.write_pkt_line(None)
  796. def notify_done(self) -> None:
  797. """Notify that the client sent 'done'."""
  798. # relay the message down to the handler.
  799. self.handler.notify_done()
  800. def send_ack(self, sha: bytes, ack_type: bytes = b"") -> None:
  801. """Send an ACK to the client.
  802. Args:
  803. sha: SHA to acknowledge
  804. ack_type: Type of ACK (e.g., b'continue', b'ready')
  805. """
  806. self.proto.write_pkt_line(format_ack_line(sha, ack_type))
  807. def send_nak(self) -> None:
  808. """Send a NAK to the client."""
  809. self.proto.write_pkt_line(NAK_LINE)
  810. def handle_done(self, done_required: bool, done_received: bool) -> bool:
  811. """Handle the 'done' command.
  812. Args:
  813. done_required: Whether done is required
  814. done_received: Whether done was received
  815. Returns: True if done handling succeeded
  816. """
  817. # Delegate this to the implementation.
  818. assert self._impl is not None
  819. return self._impl.handle_done(done_required, done_received)
  820. def set_wants(self, wants: list[bytes]) -> None:
  821. """Set the list of wanted objects.
  822. Args:
  823. wants: List of wanted object SHAs
  824. """
  825. self._wants = wants
  826. def all_wants_satisfied(self, haves: AbstractSet[bytes]) -> bool:
  827. """Check whether all the current wants are satisfied by a set of haves.
  828. Args:
  829. haves: A set of commits we know the client has.
  830. Note: Wants are specified with set_wants rather than passed in since
  831. in the current interface they are determined outside this class.
  832. """
  833. return _all_wants_satisfied(self.store, haves, set(self._wants))
  834. def set_ack_type(self, ack_type: int) -> None:
  835. """Set the acknowledgment type for the graph walker.
  836. Args:
  837. ack_type: One of SINGLE_ACK, MULTI_ACK, or MULTI_ACK_DETAILED
  838. """
  839. impl_classes: dict[int, type[AckGraphWalkerImpl]] = {
  840. MULTI_ACK: MultiAckGraphWalkerImpl,
  841. MULTI_ACK_DETAILED: MultiAckDetailedGraphWalkerImpl,
  842. SINGLE_ACK: SingleAckGraphWalkerImpl,
  843. }
  844. self._impl = impl_classes[ack_type](self)
  845. _GRAPH_WALKER_COMMANDS = (COMMAND_HAVE, COMMAND_DONE, None)
  846. class SingleAckGraphWalkerImpl(AckGraphWalkerImpl):
  847. """Graph walker implementation that speaks the single-ack protocol."""
  848. def __init__(self, walker: "_ProtocolGraphWalker") -> None:
  849. """Initialize a SingleAckGraphWalkerImpl.
  850. Args:
  851. walker: Parent ProtocolGraphWalker instance
  852. """
  853. self.walker = walker
  854. self._common: list[bytes] = []
  855. def ack(self, have_ref: bytes) -> None:
  856. """Acknowledge a have reference.
  857. Args:
  858. have_ref: Object ID to acknowledge
  859. """
  860. if not self._common:
  861. self.walker.send_ack(have_ref)
  862. self._common.append(have_ref)
  863. def next(self) -> bytes | None:
  864. """Get next SHA from graph walker.
  865. Returns:
  866. SHA bytes or None if done
  867. """
  868. command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS)
  869. if command in (None, COMMAND_DONE):
  870. # defer the handling of done
  871. self.walker.notify_done()
  872. return None
  873. elif command == COMMAND_HAVE:
  874. assert isinstance(sha, bytes)
  875. return sha
  876. return None
  877. __next__ = next
  878. def handle_done(self, done_required: bool, done_received: bool) -> bool:
  879. """Handle done command.
  880. Args:
  881. done_required: Whether done is required
  882. done_received: Whether done was received
  883. Returns:
  884. True if handling completed successfully
  885. """
  886. if not self._common:
  887. self.walker.send_nak()
  888. if done_required and not done_received:
  889. # we are not done, especially when done is required; skip
  890. # the pack for this request and especially do not handle
  891. # the done.
  892. return False
  893. if not done_received and not self._common:
  894. # Okay we are not actually done then since the walker picked
  895. # up no haves. This is usually triggered when client attempts
  896. # to pull from a source that has no common base_commit.
  897. # See: test_server.MultiAckDetailedGraphWalkerImplTestCase.\
  898. # test_multi_ack_stateless_nodone
  899. return False
  900. return True
  901. class MultiAckGraphWalkerImpl(AckGraphWalkerImpl):
  902. """Graph walker implementation that speaks the multi-ack protocol."""
  903. def __init__(self, walker: "_ProtocolGraphWalker") -> None:
  904. """Initialize multi-ack graph walker.
  905. Args:
  906. walker: Parent ProtocolGraphWalker instance
  907. """
  908. self.walker = walker
  909. self._found_base = False
  910. self._common: list[bytes] = []
  911. def ack(self, have_ref: bytes) -> None:
  912. """Acknowledge a have reference.
  913. Args:
  914. have_ref: Object ID to acknowledge
  915. """
  916. self._common.append(have_ref)
  917. if not self._found_base:
  918. self.walker.send_ack(have_ref, b"continue")
  919. if self.walker.all_wants_satisfied(set(self._common)):
  920. self._found_base = True
  921. # else we blind ack within next
  922. def next(self) -> bytes | None:
  923. """Get next SHA from graph walker.
  924. Returns:
  925. SHA bytes or None if done
  926. """
  927. while True:
  928. command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS)
  929. if command is None:
  930. self.walker.send_nak()
  931. # in multi-ack mode, a flush-pkt indicates the client wants to
  932. # flush but more have lines are still coming
  933. continue
  934. elif command == COMMAND_DONE:
  935. self.walker.notify_done()
  936. return None
  937. elif command == COMMAND_HAVE:
  938. assert isinstance(sha, bytes)
  939. if self._found_base:
  940. # blind ack
  941. self.walker.send_ack(sha, b"continue")
  942. return sha
  943. __next__ = next
  944. def handle_done(self, done_required: bool, done_received: bool) -> bool:
  945. """Handle done command.
  946. Args:
  947. done_required: Whether done is required
  948. done_received: Whether done was received
  949. Returns:
  950. True if handling completed successfully
  951. """
  952. if done_required and not done_received:
  953. # we are not done, especially when done is required; skip
  954. # the pack for this request and especially do not handle
  955. # the done.
  956. return False
  957. if not done_received and not self._common:
  958. # Okay we are not actually done then since the walker picked
  959. # up no haves. This is usually triggered when client attempts
  960. # to pull from a source that has no common base_commit.
  961. # See: test_server.MultiAckDetailedGraphWalkerImplTestCase.\
  962. # test_multi_ack_stateless_nodone
  963. return False
  964. # don't nak unless no common commits were found, even if not
  965. # everything is satisfied
  966. if self._common:
  967. self.walker.send_ack(self._common[-1])
  968. else:
  969. self.walker.send_nak()
  970. return True
  971. class MultiAckDetailedGraphWalkerImpl(AckGraphWalkerImpl):
  972. """Graph walker implementation speaking the multi-ack-detailed protocol."""
  973. def __init__(self, walker: "_ProtocolGraphWalker") -> None:
  974. """Initialize multi-ack-detailed graph walker.
  975. Args:
  976. walker: Parent ProtocolGraphWalker instance
  977. """
  978. self.walker = walker
  979. self._common: list[bytes] = []
  980. def ack(self, have_ref: bytes) -> None:
  981. """Acknowledge a have reference.
  982. Args:
  983. have_ref: Object ID to acknowledge
  984. """
  985. # Should only be called iff have_ref is common
  986. self._common.append(have_ref)
  987. self.walker.send_ack(have_ref, b"common")
  988. def next(self) -> bytes | None:
  989. """Get next SHA from graph walker.
  990. Returns:
  991. SHA bytes or None if done
  992. """
  993. while True:
  994. command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS)
  995. if command is None:
  996. if self.walker.all_wants_satisfied(set(self._common)):
  997. self.walker.send_ack(self._common[-1], b"ready")
  998. self.walker.send_nak()
  999. if self.walker.stateless_rpc:
  1000. # The HTTP version of this request a flush-pkt always
  1001. # signifies an end of request, so we also return
  1002. # nothing here as if we are done (but not really, as
  1003. # it depends on whether no-done capability was
  1004. # specified and that's handled in handle_done which
  1005. # may or may not call post_nodone_check depending on
  1006. # that).
  1007. return None
  1008. elif command == COMMAND_DONE:
  1009. # Let the walker know that we got a done.
  1010. self.walker.notify_done()
  1011. break
  1012. elif command == COMMAND_HAVE:
  1013. # return the sha and let the caller ACK it with the
  1014. # above ack method.
  1015. assert isinstance(sha, bytes)
  1016. return sha
  1017. # don't nak unless no common commits were found, even if not
  1018. # everything is satisfied
  1019. return None
  1020. __next__ = next
  1021. def handle_done(self, done_required: bool, done_received: bool) -> bool:
  1022. """Handle done command.
  1023. Args:
  1024. done_required: Whether done is required
  1025. done_received: Whether done was received
  1026. Returns:
  1027. True if handling completed successfully
  1028. """
  1029. if done_required and not done_received:
  1030. # we are not done, especially when done is required; skip
  1031. # the pack for this request and especially do not handle
  1032. # the done.
  1033. return False
  1034. if not done_received and not self._common:
  1035. # Okay we are not actually done then since the walker picked
  1036. # up no haves. This is usually triggered when client attempts
  1037. # to pull from a source that has no common base_commit.
  1038. # See: test_server.MultiAckDetailedGraphWalkerImplTestCase.\
  1039. # test_multi_ack_stateless_nodone
  1040. return False
  1041. # don't nak unless no common commits were found, even if not
  1042. # everything is satisfied
  1043. if self._common:
  1044. self.walker.send_ack(self._common[-1])
  1045. else:
  1046. self.walker.send_nak()
  1047. return True
  1048. class ReceivePackHandler(PackHandler):
  1049. """Protocol handler for downloading a pack from the client."""
  1050. def __init__(
  1051. self,
  1052. backend: Backend,
  1053. args: Sequence[str],
  1054. proto: Protocol,
  1055. stateless_rpc: bool = False,
  1056. advertise_refs: bool = False,
  1057. ) -> None:
  1058. """Initialize receive-pack handler.
  1059. Args:
  1060. backend: Backend instance
  1061. args: Command arguments
  1062. proto: Protocol instance
  1063. stateless_rpc: Whether to use stateless RPC
  1064. advertise_refs: Whether to advertise refs
  1065. """
  1066. super().__init__(backend, proto, stateless_rpc=stateless_rpc)
  1067. self.repo = backend.open_repository(args[0])
  1068. self.advertise_refs = advertise_refs
  1069. @classmethod
  1070. def capabilities(cls) -> Iterable[bytes]:
  1071. """Return supported capabilities.
  1072. Returns:
  1073. List of capability names
  1074. """
  1075. return [
  1076. CAPABILITY_REPORT_STATUS,
  1077. CAPABILITY_DELETE_REFS,
  1078. CAPABILITY_QUIET,
  1079. CAPABILITY_OFS_DELTA,
  1080. CAPABILITY_SIDE_BAND_64K,
  1081. CAPABILITY_NO_DONE,
  1082. ]
  1083. def _apply_pack(
  1084. self, refs: list[tuple[ObjectID, ObjectID, Ref]]
  1085. ) -> Iterator[tuple[bytes, bytes]]:
  1086. """Apply received pack to repository.
  1087. Args:
  1088. refs: List of (old_sha, new_sha, ref_name) tuples
  1089. Yields:
  1090. Tuples of (ref_name, error_message) for any errors
  1091. """
  1092. all_exceptions = (
  1093. IOError,
  1094. OSError,
  1095. ChecksumMismatch,
  1096. ApplyDeltaError,
  1097. AssertionError,
  1098. socket.error,
  1099. zlib.error,
  1100. ObjectFormatException,
  1101. )
  1102. will_send_pack = False
  1103. for command in refs:
  1104. if command[1] != ZERO_SHA:
  1105. will_send_pack = True
  1106. if will_send_pack:
  1107. # TODO: more informative error messages than just the exception
  1108. # string
  1109. try:
  1110. recv = getattr(self.proto, "recv", None)
  1111. self.repo.object_store.add_thin_pack(self.proto.read, recv) # type: ignore[attr-defined]
  1112. yield (b"unpack", b"ok")
  1113. except all_exceptions as e:
  1114. yield (b"unpack", str(e).replace("\n", "").encode("utf-8"))
  1115. # The pack may still have been moved in, but it may contain
  1116. # broken objects. We trust a later GC to clean it up.
  1117. else:
  1118. # The git protocol want to find a status entry related to unpack
  1119. # process even if no pack data has been sent.
  1120. yield (b"unpack", b"ok")
  1121. for oldsha, sha, ref in refs:
  1122. ref_status = b"ok"
  1123. try:
  1124. if sha == ZERO_SHA:
  1125. if CAPABILITY_DELETE_REFS not in self.capabilities():
  1126. raise GitProtocolError(
  1127. "Attempted to delete refs without delete-refs capability."
  1128. )
  1129. try:
  1130. self.repo.refs.remove_if_equals(ref, oldsha)
  1131. except all_exceptions:
  1132. ref_status = b"failed to delete"
  1133. else:
  1134. try:
  1135. self.repo.refs.set_if_equals(ref, oldsha, sha)
  1136. except all_exceptions:
  1137. ref_status = b"failed to write"
  1138. except KeyError:
  1139. ref_status = b"bad ref"
  1140. yield (ref, ref_status)
  1141. def _report_status(self, status: Sequence[tuple[bytes, bytes]]) -> None:
  1142. """Report status to client.
  1143. Args:
  1144. status: List of (ref_name, status_message) tuples
  1145. """
  1146. if self.has_capability(CAPABILITY_SIDE_BAND_64K):
  1147. writer = BufferedPktLineWriter(
  1148. lambda d: self.proto.write_sideband(SIDE_BAND_CHANNEL_DATA, d)
  1149. )
  1150. write = writer.write
  1151. def flush() -> None:
  1152. writer.flush()
  1153. self.proto.write_pkt_line(None)
  1154. else:
  1155. write = self.proto.write_pkt_line # type: ignore[assignment]
  1156. def flush() -> None:
  1157. pass
  1158. for name, msg in status:
  1159. if name == b"unpack":
  1160. write(b"unpack " + msg + b"\n")
  1161. elif msg == b"ok":
  1162. write(b"ok " + name + b"\n")
  1163. else:
  1164. write(b"ng " + name + b" " + msg + b"\n")
  1165. write(None) # type: ignore
  1166. flush()
  1167. def _on_post_receive(self, client_refs: dict[bytes, tuple[bytes, bytes]]) -> None:
  1168. """Run post-receive hook.
  1169. Args:
  1170. client_refs: Dictionary of ref changes from client
  1171. """
  1172. hook = self.repo.hooks.get("post-receive", None) # type: ignore[attr-defined]
  1173. if not hook:
  1174. return
  1175. try:
  1176. output = hook.execute(client_refs)
  1177. if output:
  1178. self.proto.write_sideband(SIDE_BAND_CHANNEL_PROGRESS, output)
  1179. except HookError as err:
  1180. self.proto.write_sideband(SIDE_BAND_CHANNEL_FATAL, str(err).encode("utf-8"))
  1181. def handle(self) -> None:
  1182. """Handle receive-pack request."""
  1183. if self.advertise_refs or not self.stateless_rpc:
  1184. refs = sorted(self.repo.get_refs().items())
  1185. symrefs = sorted(self.repo.refs.get_symrefs().items())
  1186. if not refs:
  1187. refs = [(CAPABILITIES_REF, ZERO_SHA)]
  1188. logger.info("Sending capabilities: %s", self.capabilities())
  1189. self.proto.write_pkt_line(
  1190. format_ref_line(
  1191. refs[0][0],
  1192. refs[0][1],
  1193. list(self.capabilities()) + symref_capabilities(symrefs),
  1194. )
  1195. )
  1196. for i in range(1, len(refs)):
  1197. ref = refs[i]
  1198. self.proto.write_pkt_line(format_ref_line(ref[0], ref[1]))
  1199. self.proto.write_pkt_line(None)
  1200. if self.advertise_refs:
  1201. return
  1202. client_refs = []
  1203. ref_line = self.proto.read_pkt_line()
  1204. # if ref is none then client doesn't want to send us anything..
  1205. if ref_line is None:
  1206. return
  1207. ref_line, caps = extract_capabilities(ref_line)
  1208. self.set_client_capabilities(caps)
  1209. # client will now send us a list of (oldsha, newsha, ref)
  1210. while ref_line:
  1211. (oldsha, newsha, ref_name) = ref_line.split()
  1212. client_refs.append((oldsha, newsha, ref_name))
  1213. ref_line = self.proto.read_pkt_line()
  1214. # backend can now deal with this refs and read a pack using self.read
  1215. status = list(self._apply_pack(client_refs))
  1216. self._on_post_receive(client_refs) # type: ignore[arg-type]
  1217. # when we have read all the pack from the client, send a status report
  1218. # if the client asked for it
  1219. if self.has_capability(CAPABILITY_REPORT_STATUS):
  1220. self._report_status(status)
  1221. class UploadArchiveHandler(Handler):
  1222. """Handler for git-upload-archive requests."""
  1223. def __init__(
  1224. self,
  1225. backend: Backend,
  1226. args: Sequence[str],
  1227. proto: Protocol,
  1228. stateless_rpc: bool = False,
  1229. ) -> None:
  1230. """Initialize upload-archive handler.
  1231. Args:
  1232. backend: Backend instance
  1233. args: Command arguments
  1234. proto: Protocol instance
  1235. stateless_rpc: Whether to use stateless RPC
  1236. """
  1237. super().__init__(backend, proto, stateless_rpc)
  1238. self.repo = backend.open_repository(args[0])
  1239. def handle(self) -> None:
  1240. """Handle upload-archive request."""
  1241. def write(x: bytes) -> None:
  1242. self.proto.write_sideband(SIDE_BAND_CHANNEL_DATA, x)
  1243. arguments = []
  1244. for pkt in self.proto.read_pkt_seq():
  1245. (key, value) = pkt.split(b" ", 1)
  1246. if key != b"argument":
  1247. raise GitProtocolError(f"unknown command {key!r}")
  1248. arguments.append(value.rstrip(b"\n"))
  1249. prefix = b""
  1250. format = "tar"
  1251. i = 0
  1252. store: BaseObjectStore = self.repo.object_store
  1253. while i < len(arguments):
  1254. argument = arguments[i]
  1255. if argument == b"--prefix":
  1256. i += 1
  1257. prefix = arguments[i]
  1258. elif argument == b"--format":
  1259. i += 1
  1260. format = arguments[i].decode("ascii")
  1261. else:
  1262. commit_sha = self.repo.refs[argument]
  1263. commit_obj = store[commit_sha]
  1264. assert isinstance(commit_obj, Commit)
  1265. tree_obj = store[commit_obj.tree]
  1266. assert isinstance(tree_obj, Tree)
  1267. tree = tree_obj
  1268. i += 1
  1269. self.proto.write_pkt_line(b"ACK")
  1270. self.proto.write_pkt_line(None)
  1271. for chunk in tar_stream(
  1272. store,
  1273. tree,
  1274. mtime=int(time.time()),
  1275. prefix=prefix,
  1276. format=format,
  1277. ):
  1278. write(chunk)
  1279. self.proto.write_pkt_line(None)
  1280. # Default handler classes for git services.
  1281. DEFAULT_HANDLERS = {
  1282. b"git-upload-pack": UploadPackHandler,
  1283. b"git-receive-pack": ReceivePackHandler,
  1284. b"git-upload-archive": UploadArchiveHandler,
  1285. }
  1286. class TCPGitRequestHandler(socketserver.StreamRequestHandler):
  1287. """TCP request handler for git protocol."""
  1288. def __init__(
  1289. self,
  1290. handlers: dict[bytes, type[Handler]],
  1291. request: socket.socket,
  1292. client_address: tuple[str, int],
  1293. server: socketserver.TCPServer,
  1294. ) -> None:
  1295. """Initialize TCP request handler.
  1296. Args:
  1297. handlers: Dictionary mapping commands to handler classes
  1298. request: Request socket
  1299. client_address: Client address tuple
  1300. server: Server instance
  1301. """
  1302. self.handlers = handlers
  1303. socketserver.StreamRequestHandler.__init__(
  1304. self, request, client_address, server
  1305. )
  1306. def handle(self) -> None:
  1307. """Handle TCP git request."""
  1308. proto = ReceivableProtocol(self.connection.recv, self.wfile.write)
  1309. command, args = proto.read_cmd()
  1310. logger.info("Handling %s request, args=%s", command, args)
  1311. cls = self.handlers.get(command, None)
  1312. if not callable(cls):
  1313. raise GitProtocolError(f"Invalid service {command!r}")
  1314. h = cls(self.server.backend, args, proto) # type: ignore
  1315. h.handle()
  1316. class TCPGitServer(socketserver.TCPServer):
  1317. """TCP server for git protocol."""
  1318. allow_reuse_address = True
  1319. serve = socketserver.TCPServer.serve_forever
  1320. def _make_handler(
  1321. self,
  1322. request: socket.socket,
  1323. client_address: tuple[str, int],
  1324. server: socketserver.TCPServer,
  1325. ) -> TCPGitRequestHandler:
  1326. """Create request handler instance.
  1327. Args:
  1328. request: Request socket
  1329. client_address: Client address tuple
  1330. server: Server instance
  1331. Returns:
  1332. TCPGitRequestHandler instance
  1333. """
  1334. return TCPGitRequestHandler(self.handlers, request, client_address, server)
  1335. def __init__(
  1336. self,
  1337. backend: Backend,
  1338. listen_addr: str,
  1339. port: int = TCP_GIT_PORT,
  1340. handlers: dict[bytes, type[Handler]] | None = None,
  1341. ) -> None:
  1342. """Initialize TCP git server.
  1343. Args:
  1344. backend: Backend instance
  1345. listen_addr: Address to listen on
  1346. port: Port to listen on (default: TCP_GIT_PORT)
  1347. handlers: Optional dictionary of custom handlers
  1348. """
  1349. self.handlers = dict(DEFAULT_HANDLERS)
  1350. if handlers is not None:
  1351. self.handlers.update(handlers)
  1352. self.backend = backend
  1353. logger.info("Listening for TCP connections on %s:%d", listen_addr, port)
  1354. socketserver.TCPServer.__init__(self, (listen_addr, port), self._make_handler)
  1355. def verify_request(
  1356. self,
  1357. request: socket.socket | tuple[bytes, socket.socket],
  1358. client_address: tuple[str, int] | socket.socket,
  1359. ) -> bool:
  1360. """Verify incoming request.
  1361. Args:
  1362. request: Request socket
  1363. client_address: Client address tuple
  1364. Returns:
  1365. True to accept request
  1366. """
  1367. logger.info("Handling request from %s", client_address)
  1368. return True
  1369. def handle_error(
  1370. self,
  1371. request: socket.socket | tuple[bytes, socket.socket],
  1372. client_address: tuple[str, int] | socket.socket,
  1373. ) -> None:
  1374. """Handle request processing errors.
  1375. Args:
  1376. request: Request socket
  1377. client_address: Client address tuple
  1378. """
  1379. logger.exception(
  1380. "Exception happened during processing of request from %s",
  1381. client_address,
  1382. )
  1383. def main(argv: list[str] = sys.argv) -> None:
  1384. """Entry point for starting a TCP git server."""
  1385. import optparse
  1386. parser = optparse.OptionParser()
  1387. parser.add_option(
  1388. "-l",
  1389. "--listen_address",
  1390. dest="listen_address",
  1391. default="localhost",
  1392. help="Binding IP address.",
  1393. )
  1394. parser.add_option(
  1395. "-p",
  1396. "--port",
  1397. dest="port",
  1398. type=int,
  1399. default=TCP_GIT_PORT,
  1400. help="Binding TCP port.",
  1401. )
  1402. options, args = parser.parse_args(argv)
  1403. log_utils.default_logging_config()
  1404. if len(args) > 1:
  1405. gitdir = args[1]
  1406. else:
  1407. gitdir = "."
  1408. # TODO(jelmer): Support git-daemon-export-ok and --export-all.
  1409. backend = FileSystemBackend(gitdir)
  1410. server = TCPGitServer(backend, options.listen_address, options.port)
  1411. server.serve_forever()
  1412. def serve_command(
  1413. handler_cls: type[Handler],
  1414. argv: list[str] = sys.argv,
  1415. backend: Backend | None = None,
  1416. inf: IO[bytes] | None = None,
  1417. outf: IO[bytes] | None = None,
  1418. ) -> int:
  1419. """Serve a single command.
  1420. This is mostly useful for the implementation of commands used by e.g.
  1421. git+ssh.
  1422. Args:
  1423. handler_cls: `Handler` class to use for the request
  1424. argv: execv-style command-line arguments. Defaults to sys.argv.
  1425. backend: `Backend` to use
  1426. inf: File-like object to read from, defaults to standard input.
  1427. outf: File-like object to write to, defaults to standard output.
  1428. Returns: Exit code for use with sys.exit. 0 on success, 1 on failure.
  1429. """
  1430. if backend is None:
  1431. backend = FileSystemBackend()
  1432. if inf is None:
  1433. inf = sys.stdin.buffer
  1434. if outf is None:
  1435. outf = sys.stdout.buffer
  1436. def send_fn(data: bytes) -> None:
  1437. outf.write(data)
  1438. outf.flush()
  1439. proto = Protocol(inf.read, send_fn)
  1440. handler = handler_cls(backend, argv[1:], proto) # type: ignore[arg-type]
  1441. # FIXME: Catch exceptions and write a single-line summary to outf.
  1442. handler.handle()
  1443. return 0
  1444. def generate_info_refs(repo: "BaseRepo") -> Iterator[bytes]:
  1445. """Generate an info refs file."""
  1446. refs = repo.get_refs()
  1447. return write_info_refs(refs, repo.object_store)
  1448. def generate_objects_info_packs(repo: "BaseRepo") -> Iterator[bytes]:
  1449. """Generate an index for for packs."""
  1450. for pack in repo.object_store.packs:
  1451. yield (b"P " + os.fsencode(pack.data.filename) + b"\n")
  1452. def update_server_info(repo: "BaseRepo") -> None:
  1453. """Generate server info for dumb file access.
  1454. This generates info/refs and objects/info/packs,
  1455. similar to "git update-server-info".
  1456. """
  1457. repo._put_named_file(
  1458. os.path.join("info", "refs"), b"".join(generate_info_refs(repo))
  1459. )
  1460. repo._put_named_file(
  1461. os.path.join("objects", "info", "packs"),
  1462. b"".join(generate_objects_info_packs(repo)),
  1463. )
  1464. if __name__ == "__main__":
  1465. main()