server.py 64 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874
  1. # server.py -- Implementation of the server side git protocols
  2. # Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk>
  3. # Copyright(C) 2011-2012 Jelmer Vernooij <jelmer@jelmer.uk>
  4. #
  5. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  6. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  7. # General Public License as published by the Free Software Foundation; version 2.0
  8. # or (at your option) any later version. You can redistribute it and/or
  9. # modify it under the terms of either of these two licenses.
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. # You should have received a copy of the licenses; if not, see
  18. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  19. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  20. # License, Version 2.0.
  21. #
  22. """Git smart network protocol server implementation.
  23. For more detailed implementation on the network protocol, see the
  24. Documentation/technical directory in the cgit distribution, and in particular:
  25. * Documentation/technical/protocol-capabilities.txt
  26. * Documentation/technical/pack-protocol.txt
  27. Currently supported capabilities:
  28. * include-tag
  29. * thin-pack
  30. * multi_ack_detailed
  31. * multi_ack
  32. * side-band-64k
  33. * ofs-delta
  34. * no-progress
  35. * report-status
  36. * delete-refs
  37. * shallow
  38. * symref
  39. """
  40. __all__ = [
  41. "DEFAULT_HANDLERS",
  42. "AckGraphWalkerImpl",
  43. "Backend",
  44. "BackendRepo",
  45. "DictBackend",
  46. "FileSystemBackend",
  47. "Handler",
  48. "MultiAckDetailedGraphWalkerImpl",
  49. "MultiAckGraphWalkerImpl",
  50. "PackHandler",
  51. "ReceivePackHandler",
  52. "SingleAckGraphWalkerImpl",
  53. "TCPGitRequestHandler",
  54. "TCPGitServer",
  55. "UploadArchiveHandler",
  56. "UploadPackHandler",
  57. "generate_info_refs",
  58. "generate_objects_info_packs",
  59. "main",
  60. "serve_command",
  61. "update_server_info",
  62. ]
  63. import os
  64. import socket
  65. import socketserver
  66. import sys
  67. import time
  68. import zlib
  69. from collections import deque
  70. from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence
  71. from collections.abc import Set as AbstractSet
  72. from functools import partial
  73. from typing import IO, TYPE_CHECKING
  74. from typing import Protocol as TypingProtocol
  75. if TYPE_CHECKING:
  76. from .object_format import ObjectFormat
  77. from .object_store import BaseObjectStore
  78. from .repo import BaseRepo
  79. from dulwich import log_utils
  80. from .archive import tar_stream
  81. from .errors import (
  82. ApplyDeltaError,
  83. ChecksumMismatch,
  84. GitProtocolError,
  85. HookError,
  86. NotGitRepository,
  87. ObjectFormatException,
  88. UnexpectedCommandError,
  89. )
  90. from .object_filters import (
  91. CombineFilter,
  92. FilterSpec,
  93. SparseOidFilter,
  94. TreeDepthFilter,
  95. filter_pack_objects,
  96. filter_pack_objects_with_paths,
  97. parse_filter_spec,
  98. )
  99. from .object_store import MissingObjectFinder, PackBasedObjectStore, find_shallow
  100. from .objects import Commit, ObjectID, Tree, valid_hexsha
  101. from .pack import ObjectContainer, write_pack_from_container
  102. from .protocol import (
  103. CAPABILITIES_REF,
  104. CAPABILITY_AGENT,
  105. CAPABILITY_DELETE_REFS,
  106. CAPABILITY_FILTER,
  107. CAPABILITY_INCLUDE_TAG,
  108. CAPABILITY_MULTI_ACK,
  109. CAPABILITY_MULTI_ACK_DETAILED,
  110. CAPABILITY_NO_DONE,
  111. CAPABILITY_NO_PROGRESS,
  112. CAPABILITY_OFS_DELTA,
  113. CAPABILITY_QUIET,
  114. CAPABILITY_REPORT_STATUS,
  115. CAPABILITY_SHALLOW,
  116. CAPABILITY_SIDE_BAND_64K,
  117. CAPABILITY_THIN_PACK,
  118. COMMAND_DEEPEN,
  119. COMMAND_DONE,
  120. COMMAND_FILTER,
  121. COMMAND_HAVE,
  122. COMMAND_SHALLOW,
  123. COMMAND_UNSHALLOW,
  124. COMMAND_WANT,
  125. MULTI_ACK,
  126. MULTI_ACK_DETAILED,
  127. NAK_LINE,
  128. PEELED_TAG_SUFFIX,
  129. SIDE_BAND_CHANNEL_DATA,
  130. SIDE_BAND_CHANNEL_FATAL,
  131. SIDE_BAND_CHANNEL_PROGRESS,
  132. SINGLE_ACK,
  133. TCP_GIT_PORT,
  134. BufferedPktLineWriter,
  135. Protocol,
  136. ReceivableProtocol,
  137. ack_type,
  138. capability_agent,
  139. capability_object_format,
  140. extract_capabilities,
  141. extract_want_line_capabilities,
  142. find_capability,
  143. format_ack_line,
  144. format_ref_line,
  145. format_shallow_line,
  146. format_unshallow_line,
  147. symref_capabilities,
  148. write_info_refs,
  149. )
  150. from .refs import Ref, RefsContainer
  151. from .repo import Repo
  152. logger = log_utils.getLogger(__name__)
  153. class Backend:
  154. """A backend for the Git smart server implementation."""
  155. def open_repository(self, path: str) -> "BackendRepo":
  156. """Open the repository at a path.
  157. Args:
  158. path: Path to the repository
  159. Raises:
  160. NotGitRepository: no git repository was found at path
  161. Returns: Instance of BackendRepo
  162. """
  163. raise NotImplementedError(self.open_repository)
  164. class BackendRepo(TypingProtocol):
  165. """Repository abstraction used by the Git server.
  166. The methods required here are a subset of those provided by
  167. dulwich.repo.Repo.
  168. """
  169. object_store: PackBasedObjectStore
  170. refs: RefsContainer
  171. object_format: "ObjectFormat"
  172. def get_refs(self) -> dict[bytes, bytes]:
  173. """Get all the refs in the repository.
  174. Returns: dict of name -> sha
  175. """
  176. raise NotImplementedError
  177. def get_peeled(self, name: bytes) -> ObjectID | None:
  178. """Return the cached peeled value of a ref, if available.
  179. Args:
  180. name: Name of the ref to peel
  181. Returns: The peeled value of the ref. If the ref is known not point to
  182. a tag, this will be the SHA the ref refers to. If no cached
  183. information about a tag is available, this method may return None,
  184. but it should attempt to peel the tag if possible.
  185. """
  186. return None
  187. def find_missing_objects(
  188. self,
  189. determine_wants: Callable[[Mapping[Ref, ObjectID], int | None], list[ObjectID]],
  190. graph_walker: "_ProtocolGraphWalker",
  191. progress: Callable[[bytes], None] | None,
  192. *,
  193. get_tagged: Callable[[], dict[ObjectID, ObjectID]] | None = None,
  194. depth: int | None = None,
  195. ) -> "MissingObjectFinder | None":
  196. """Yield the objects required for a list of commits.
  197. Args:
  198. determine_wants: Function to determine which objects the client wants
  199. graph_walker: Object used to walk the commit graph
  200. progress: is a callback to send progress messages to the client
  201. get_tagged: Function that returns a dict of pointed-to sha ->
  202. tag sha for including tags.
  203. depth: Maximum depth of commits to fetch for shallow clones
  204. """
  205. raise NotImplementedError
  206. class DictBackend(Backend):
  207. """Trivial backend that looks up Git repositories in a dictionary."""
  208. def __init__(
  209. self, repos: dict[bytes, "BackendRepo"] | dict[str, "BackendRepo"]
  210. ) -> None:
  211. """Initialize a DictBackend.
  212. Args:
  213. repos: Dictionary mapping repository paths to BackendRepo instances
  214. """
  215. self.repos = repos
  216. def open_repository(self, path: str) -> BackendRepo:
  217. """Open repository at given path.
  218. Args:
  219. path: Path to the repository
  220. Returns:
  221. Repository object
  222. Raises:
  223. NotGitRepository: If no repository found at path
  224. """
  225. logger.debug("Opening repository at %s", path)
  226. # Handle both str and bytes keys for backward compatibility
  227. if path in self.repos:
  228. return self.repos[path] # type: ignore
  229. # Try converting between str and bytes
  230. if isinstance(path, bytes):
  231. try:
  232. alt_path = path.decode("utf-8")
  233. if alt_path in self.repos:
  234. return self.repos[alt_path]
  235. except UnicodeDecodeError:
  236. pass
  237. else:
  238. alt_path_bytes = path.encode("utf-8")
  239. if alt_path_bytes in self.repos:
  240. return self.repos[alt_path_bytes] # type: ignore
  241. raise NotGitRepository(
  242. "No git repository was found at {path}".format(**dict(path=path))
  243. )
  244. class FileSystemBackend(Backend):
  245. """Simple backend looking up Git repositories in the local file system."""
  246. def __init__(self, root: str = os.sep) -> None:
  247. """Initialize a FileSystemBackend.
  248. Args:
  249. root: Root directory to serve repositories from
  250. """
  251. super().__init__()
  252. self.root = (os.path.abspath(root) + os.sep).replace(os.sep * 2, os.sep)
  253. def open_repository(self, path: str) -> BackendRepo:
  254. """Open a repository from the filesystem.
  255. Args:
  256. path: Path to the repository relative to the root
  257. Returns: Repo instance
  258. Raises:
  259. NotGitRepository: If path is outside the root or not a git repository
  260. """
  261. logger.debug("opening repository at %s", path)
  262. # Ensure path is a string to avoid TypeError when joining with self.root
  263. path = os.fspath(path)
  264. if isinstance(path, bytes):
  265. path = os.fsdecode(path)
  266. abspath = os.path.abspath(os.path.join(self.root, path)) + os.sep
  267. normcase_abspath = os.path.normcase(abspath)
  268. normcase_root = os.path.normcase(self.root)
  269. if not normcase_abspath.startswith(normcase_root):
  270. raise NotGitRepository(f"Path {path!r} not inside root {self.root!r}")
  271. return Repo(abspath) # type: ignore[return-value]
  272. class Handler:
  273. """Smart protocol command handler base class."""
  274. def __init__(
  275. self, backend: Backend, proto: Protocol, stateless_rpc: bool = False
  276. ) -> None:
  277. """Initialize a Handler.
  278. Args:
  279. backend: Backend instance for repository access
  280. proto: Protocol instance for communication
  281. stateless_rpc: Whether this is a stateless RPC session
  282. """
  283. self.backend = backend
  284. self.proto = proto
  285. self.stateless_rpc = stateless_rpc
  286. def handle(self) -> None:
  287. """Handle a request."""
  288. raise NotImplementedError(self.handle)
  289. class PackHandler(Handler):
  290. """Protocol handler for packs."""
  291. def __init__(
  292. self, backend: Backend, proto: Protocol, stateless_rpc: bool = False
  293. ) -> None:
  294. """Initialize a PackHandler.
  295. Args:
  296. backend: Backend instance for repository access
  297. proto: Protocol instance for communication
  298. stateless_rpc: Whether this is a stateless RPC session
  299. """
  300. super().__init__(backend, proto, stateless_rpc)
  301. self._client_capabilities: set[bytes] | None = None
  302. # Flags needed for the no-done capability
  303. self._done_received = False
  304. self.advertise_refs = False
  305. def capabilities(self) -> Iterable[bytes]:
  306. """Return a list of capabilities supported by this handler."""
  307. raise NotImplementedError(self.capabilities)
  308. @classmethod
  309. def innocuous_capabilities(cls) -> Iterable[bytes]:
  310. """Return capabilities that don't affect protocol behavior.
  311. Returns:
  312. List of innocuous capability names
  313. """
  314. return [
  315. CAPABILITY_INCLUDE_TAG,
  316. CAPABILITY_THIN_PACK,
  317. CAPABILITY_NO_PROGRESS,
  318. CAPABILITY_OFS_DELTA,
  319. capability_agent(),
  320. ]
  321. @classmethod
  322. def required_capabilities(cls) -> Iterable[bytes]:
  323. """Return a list of capabilities that we require the client to have."""
  324. return []
  325. def set_client_capabilities(self, caps: Iterable[bytes]) -> None:
  326. """Set the client capabilities and validate them.
  327. Args:
  328. caps: List of capabilities requested by the client
  329. Raises:
  330. GitProtocolError: If client requests unsupported capability or lacks required ones
  331. """
  332. allowable_caps = set(self.innocuous_capabilities())
  333. allowable_caps.update(self.capabilities())
  334. for cap in caps:
  335. if cap.startswith(CAPABILITY_AGENT + b"="):
  336. continue
  337. if cap.startswith(CAPABILITY_FILTER + b"="):
  338. # Filter capability can have a value (e.g., filter=blob:none)
  339. if CAPABILITY_FILTER not in allowable_caps:
  340. raise GitProtocolError(
  341. f"Client asked for capability {cap!r} that was not advertised."
  342. )
  343. continue
  344. if cap not in allowable_caps:
  345. raise GitProtocolError(
  346. f"Client asked for capability {cap!r} that was not advertised."
  347. )
  348. for cap in self.required_capabilities():
  349. if cap not in caps:
  350. raise GitProtocolError(
  351. f"Client does not support required capability {cap!r}."
  352. )
  353. self._client_capabilities = set(caps)
  354. logger.info("Client capabilities: %s", caps)
  355. def has_capability(self, cap: bytes) -> bool:
  356. """Check if the client supports a specific capability.
  357. Args:
  358. cap: Capability name to check
  359. Returns: True if the client supports the capability
  360. Raises:
  361. GitProtocolError: If called before client capabilities are set
  362. """
  363. if self._client_capabilities is None:
  364. raise GitProtocolError(
  365. f"Server attempted to access capability {cap!r} before asking client"
  366. )
  367. return cap in self._client_capabilities
  368. def notify_done(self) -> None:
  369. """Notify that the 'done' command has been received from the client."""
  370. self._done_received = True
  371. class UploadPackHandler(PackHandler):
  372. """Protocol handler for uploading a pack to the client."""
  373. def __init__(
  374. self,
  375. backend: Backend,
  376. args: Sequence[str],
  377. proto: Protocol,
  378. stateless_rpc: bool = False,
  379. advertise_refs: bool = False,
  380. ) -> None:
  381. """Initialize an UploadPackHandler.
  382. Args:
  383. backend: Backend instance for repository access
  384. args: Command arguments (first arg is repository path)
  385. proto: Protocol instance for communication
  386. stateless_rpc: Whether this is a stateless RPC session
  387. advertise_refs: Whether to advertise refs
  388. """
  389. super().__init__(backend, proto, stateless_rpc=stateless_rpc)
  390. self.repo = backend.open_repository(args[0])
  391. self._graph_walker = None
  392. self.advertise_refs = advertise_refs
  393. # A state variable for denoting that the have list is still
  394. # being processed, and the client is not accepting any other
  395. # data (such as side-band, see the progress method here).
  396. self._processing_have_lines = False
  397. # Filter specification for partial clone support
  398. self.filter_spec: FilterSpec | None = None
  399. def capabilities(self) -> list[bytes]:
  400. """Return the list of capabilities supported by upload-pack.
  401. Returns:
  402. List of capabilities including object-format for the repository
  403. """
  404. return [
  405. CAPABILITY_MULTI_ACK_DETAILED,
  406. CAPABILITY_MULTI_ACK,
  407. CAPABILITY_SIDE_BAND_64K,
  408. CAPABILITY_THIN_PACK,
  409. CAPABILITY_OFS_DELTA,
  410. CAPABILITY_NO_PROGRESS,
  411. CAPABILITY_INCLUDE_TAG,
  412. CAPABILITY_SHALLOW,
  413. CAPABILITY_NO_DONE,
  414. CAPABILITY_FILTER,
  415. capability_object_format(self.repo.object_format.name),
  416. ]
  417. @classmethod
  418. def required_capabilities(cls) -> tuple[bytes, ...]:
  419. """Return the list of capabilities required for upload-pack."""
  420. return (
  421. CAPABILITY_SIDE_BAND_64K,
  422. CAPABILITY_THIN_PACK,
  423. CAPABILITY_OFS_DELTA,
  424. )
  425. def set_client_capabilities(self, caps: Iterable[bytes]) -> None:
  426. """Set client capabilities and parse filter specification if present.
  427. Args:
  428. caps: List of capability strings from the client
  429. """
  430. super().set_client_capabilities(caps)
  431. # Parse filter specification if present in capabilities
  432. # In protocol v1, filter can be sent as "filter=<spec>" capability
  433. # In protocol v2, filter is sent as a separate "filter <spec>" command
  434. filter_spec_bytes = find_capability(caps, CAPABILITY_FILTER)
  435. if filter_spec_bytes and filter_spec_bytes != CAPABILITY_FILTER:
  436. # Only parse if there's an actual spec (not just the capability name)
  437. try:
  438. self.filter_spec = parse_filter_spec(
  439. filter_spec_bytes, object_store=self.repo.object_store
  440. )
  441. except ValueError as e:
  442. raise GitProtocolError(f"Invalid filter specification: {e}")
  443. def progress(self, message: bytes) -> None:
  444. """Send a progress message to the client.
  445. Args:
  446. message: Progress message to send
  447. """
  448. def _start_pack_send_phase(self) -> None:
  449. """Start the pack sending phase, setting up sideband if supported."""
  450. if self.has_capability(CAPABILITY_SIDE_BAND_64K):
  451. # The provided haves are processed, and it is safe to send side-
  452. # band data now.
  453. if not self.has_capability(CAPABILITY_NO_PROGRESS):
  454. self.progress = partial( # type: ignore
  455. self.proto.write_sideband, SIDE_BAND_CHANNEL_PROGRESS
  456. )
  457. self.write_pack_data: Callable[[bytes], None] = partial(
  458. self.proto.write_sideband, SIDE_BAND_CHANNEL_DATA
  459. )
  460. else:
  461. # proto.write returns Optional[int], but we need to treat it as returning None
  462. # for compatibility with write_pack_from_container
  463. def write_data(data: bytes) -> None:
  464. self.proto.write(data)
  465. self.write_pack_data = write_data
  466. def get_tagged(
  467. self,
  468. refs: Mapping[bytes, bytes] | None = None,
  469. repo: BackendRepo | None = None,
  470. ) -> dict[ObjectID, ObjectID]:
  471. """Get a dict of peeled values of tags to their original tag shas.
  472. Args:
  473. refs: dict of refname -> sha of possible tags; defaults to all
  474. of the backend's refs.
  475. repo: optional Repo instance for getting peeled refs; defaults
  476. to the backend's repo, if available
  477. Returns: dict of peeled_sha -> tag_sha, where tag_sha is the sha of a
  478. tag whose peeled value is peeled_sha.
  479. """
  480. if not self.has_capability(CAPABILITY_INCLUDE_TAG):
  481. return {}
  482. if refs is None:
  483. refs = self.repo.get_refs()
  484. if repo is None:
  485. repo = getattr(self.repo, "repo", None)
  486. if repo is None:
  487. # Bail if we don't have a Repo available; this is ok since
  488. # clients must be able to handle if the server doesn't include
  489. # all relevant tags.
  490. # TODO: fix behavior when missing
  491. return {}
  492. # TODO(jelmer): Integrate this with the refs logic in
  493. # Repo.find_missing_objects
  494. tagged = {}
  495. for name, sha in refs.items():
  496. peeled_sha = repo.get_peeled(name)
  497. if peeled_sha is not None and peeled_sha != ObjectID(sha):
  498. tagged[peeled_sha] = ObjectID(sha)
  499. return tagged
  500. def handle(self) -> None:
  501. """Handle an upload-pack request.
  502. This method processes the client's wants and haves, determines which
  503. objects to send, and writes the pack data to the client.
  504. """
  505. # Note the fact that client is only processing responses related
  506. # to the have lines it sent, and any other data (including side-
  507. # band) will be be considered a fatal error.
  508. self._processing_have_lines = True
  509. graph_walker = _ProtocolGraphWalker(
  510. self,
  511. self.repo.object_store,
  512. self.repo.get_peeled,
  513. self.repo.refs.get_symrefs,
  514. )
  515. wants: list[ObjectID] = []
  516. def wants_wrapper(
  517. refs: Mapping[Ref, ObjectID], depth: int | None = None
  518. ) -> list[ObjectID]:
  519. wants.extend(graph_walker.determine_wants(refs, depth))
  520. return wants
  521. missing_objects = self.repo.find_missing_objects(
  522. wants_wrapper,
  523. graph_walker,
  524. self.progress,
  525. get_tagged=self.get_tagged,
  526. )
  527. # Did the process short-circuit (e.g. in a stateless RPC call)? Note
  528. # that the client still expects a 0-object pack in most cases.
  529. # Also, if it also happens that the object_iter is instantiated
  530. # with a graph walker with an implementation that talks over the
  531. # wire (which is this instance of this class) this will actually
  532. # iterate through everything and write things out to the wire.
  533. if len(wants) == 0:
  534. return
  535. # Handle shallow clone case where missing_objects can be None
  536. if missing_objects is None:
  537. return
  538. object_ids = list(missing_objects)
  539. if not graph_walker.handle_done(
  540. not self.has_capability(CAPABILITY_NO_DONE), self._done_received
  541. ):
  542. return
  543. self._start_pack_send_phase()
  544. # Apply filter if specified (partial clone support)
  545. if self.filter_spec is not None:
  546. original_count = len(object_ids)
  547. # Use path-aware filtering for tree depth and sparse:oid filters
  548. # Check if filter requires path tracking
  549. def needs_path_tracking(filter_spec: FilterSpec) -> bool:
  550. if isinstance(filter_spec, (TreeDepthFilter, SparseOidFilter)):
  551. return True
  552. if isinstance(filter_spec, CombineFilter):
  553. return any(needs_path_tracking(f) for f in filter_spec.filters)
  554. return False
  555. if needs_path_tracking(self.filter_spec):
  556. # Path-aware filtering returns list of OIDs, convert to tuples for pack generation
  557. filtered_oids = filter_pack_objects_with_paths(
  558. self.repo.object_store,
  559. wants,
  560. self.filter_spec,
  561. progress=self.progress,
  562. )
  563. object_ids = [(oid, None) for oid in filtered_oids]
  564. else:
  565. # Extract just the object IDs (filter_pack_objects expects list of OIDs)
  566. # object_ids is a list of tuples (oid, (depth, path))
  567. oid_list = [oid for oid, _hint in object_ids]
  568. filtered_oids = filter_pack_objects(
  569. self.repo.object_store, oid_list, self.filter_spec
  570. )
  571. # Reconstruct tuples with hints for pack generation
  572. filtered_oid_set = set(filtered_oids)
  573. object_ids = [
  574. (oid, hint) for oid, hint in object_ids if oid in filtered_oid_set
  575. ]
  576. filtered_count = original_count - len(object_ids)
  577. if filtered_count > 0:
  578. self.progress((f"filtered {filtered_count} objects.\n").encode("ascii"))
  579. self.progress((f"counting objects: {len(object_ids)}, done.\n").encode("ascii"))
  580. write_pack_from_container(
  581. self.write_pack_data,
  582. self.repo.object_store,
  583. object_ids,
  584. object_format=self.repo.object_format,
  585. )
  586. # we are done
  587. self.proto.write_pkt_line(None)
  588. def _split_proto_line(
  589. line: bytes | None, allowed: Iterable[bytes | None] | None
  590. ) -> tuple[bytes | None, bytes | int | None]:
  591. """Split a line read from the wire.
  592. Args:
  593. line: The line read from the wire.
  594. allowed: An iterable of command names that should be allowed.
  595. Command names not listed below as possible return values will be
  596. ignored. If None, any commands from the possible return values are
  597. allowed.
  598. Returns: a tuple having one of the following forms:
  599. ('want', obj_id)
  600. ('have', obj_id)
  601. ('done', None)
  602. (None, None) (for a flush-pkt)
  603. Raises:
  604. UnexpectedCommandError: if the line cannot be parsed into one of the
  605. allowed return values.
  606. """
  607. if not line:
  608. fields: list[bytes | None] = [None]
  609. else:
  610. fields = list(line.rstrip(b"\n").split(b" ", 1))
  611. command = fields[0]
  612. if allowed is not None and command not in allowed:
  613. raise UnexpectedCommandError(command.decode("utf-8") if command else None)
  614. if len(fields) == 1 and command in (COMMAND_DONE, None):
  615. return (command, None)
  616. elif len(fields) == 2:
  617. if command in (
  618. COMMAND_WANT,
  619. COMMAND_HAVE,
  620. COMMAND_SHALLOW,
  621. COMMAND_UNSHALLOW,
  622. ):
  623. assert fields[1] is not None
  624. if not valid_hexsha(fields[1]):
  625. raise GitProtocolError("Invalid sha")
  626. return (command, fields[1])
  627. elif command == COMMAND_DEEPEN:
  628. assert fields[1] is not None
  629. return command, int(fields[1])
  630. elif command == COMMAND_FILTER:
  631. # Filter specification (e.g., "filter blob:none")
  632. assert fields[1] is not None
  633. return (command, fields[1])
  634. raise GitProtocolError(f"Received invalid line from client: {line!r}")
  635. def _want_satisfied(
  636. store: ObjectContainer, haves: set[ObjectID], want: ObjectID, earliest: int
  637. ) -> bool:
  638. """Check if a specific want is satisfied by a set of haves.
  639. Args:
  640. store: Object store to retrieve objects from
  641. haves: Set of commit IDs the client has
  642. want: Commit ID the client wants
  643. earliest: Earliest commit time to consider
  644. Returns: True if the want is satisfied by the haves
  645. """
  646. o = store[want]
  647. pending = deque([o])
  648. known = {want}
  649. while pending:
  650. commit = pending.popleft()
  651. if commit.id in haves:
  652. return True
  653. if not isinstance(commit, Commit):
  654. # non-commit wants are assumed to be satisfied
  655. continue
  656. for parent in commit.parents:
  657. if parent in known:
  658. continue
  659. known.add(parent)
  660. parent_obj = store[parent]
  661. assert isinstance(parent_obj, Commit)
  662. # TODO: handle parents with later commit times than children
  663. if parent_obj.commit_time >= earliest:
  664. pending.append(parent_obj)
  665. return False
  666. def _all_wants_satisfied(
  667. store: ObjectContainer, haves: AbstractSet[ObjectID], wants: set[ObjectID]
  668. ) -> bool:
  669. """Check whether all the current wants are satisfied by a set of haves.
  670. Args:
  671. store: Object store to retrieve objects from
  672. haves: A set of commits we know the client has.
  673. wants: A set of commits the client wants
  674. Note: Wants are specified with set_wants rather than passed in since
  675. in the current interface they are determined outside this class.
  676. """
  677. haves = set(haves)
  678. if haves:
  679. have_objs = [store[h] for h in haves]
  680. earliest = min([h.commit_time for h in have_objs if isinstance(h, Commit)])
  681. else:
  682. earliest = 0
  683. for want in wants:
  684. if not _want_satisfied(store, haves, want, earliest):
  685. return False
  686. return True
  687. class AckGraphWalkerImpl:
  688. """Base class for acknowledgment graph walker implementations."""
  689. def __init__(self, graph_walker: "_ProtocolGraphWalker") -> None:
  690. """Initialize acknowledgment graph walker.
  691. Args:
  692. graph_walker: Graph walker to wrap
  693. """
  694. raise NotImplementedError
  695. def ack(self, have_ref: ObjectID) -> None:
  696. """Acknowledge a have reference.
  697. Args:
  698. have_ref: Object ID to acknowledge
  699. """
  700. raise NotImplementedError
  701. def handle_done(self, done_required: bool, done_received: bool) -> bool:
  702. """Handle 'done' packet from client."""
  703. raise NotImplementedError
  704. class _ProtocolGraphWalker:
  705. """A graph walker that knows the git protocol.
  706. As a graph walker, this class implements ack(), next(), and reset(). It
  707. also contains some base methods for interacting with the wire and walking
  708. the commit tree.
  709. The work of determining which acks to send is passed on to the
  710. implementation instance stored in _impl. The reason for this is that we do
  711. not know at object creation time what ack level the protocol requires. A
  712. call to set_ack_type() is required to set up the implementation, before
  713. any calls to next() or ack() are made.
  714. """
  715. def __init__(
  716. self,
  717. handler: "UploadPackHandler",
  718. object_store: ObjectContainer,
  719. get_peeled: Callable[[bytes], ObjectID | None],
  720. get_symrefs: Callable[[], dict[Ref, Ref]],
  721. ) -> None:
  722. """Initialize a ProtocolGraphWalker.
  723. Args:
  724. handler: Protocol handler instance
  725. object_store: Object store for retrieving objects
  726. get_peeled: Function to get peeled refs
  727. get_symrefs: Function to get symbolic refs
  728. """
  729. self.handler = handler
  730. self.store: ObjectContainer = object_store
  731. self.get_peeled = get_peeled
  732. self.get_symrefs = get_symrefs
  733. self.proto = handler.proto
  734. self.stateless_rpc = handler.stateless_rpc
  735. self.advertise_refs = handler.advertise_refs
  736. self._wants: list[ObjectID] = []
  737. self.shallow: set[ObjectID] = set()
  738. self.client_shallow: set[ObjectID] = set()
  739. self.unshallow: set[ObjectID] = set()
  740. self._cached = False
  741. self._cache: list[ObjectID] = []
  742. self._cache_index = 0
  743. self._impl: AckGraphWalkerImpl | None = None
  744. def determine_wants(
  745. self, heads: Mapping[Ref, ObjectID], depth: int | None = None
  746. ) -> list[ObjectID]:
  747. """Determine the wants for a set of heads.
  748. The given heads are advertised to the client, who then specifies which
  749. refs they want using 'want' lines. This portion of the protocol is the
  750. same regardless of ack type, and in fact is used to set the ack type of
  751. the ProtocolGraphWalker.
  752. If the client has the 'shallow' capability, this method also reads and
  753. responds to the 'shallow' and 'deepen' lines from the client. These are
  754. not part of the wants per se, but they set up necessary state for
  755. walking the graph. Additionally, later code depends on this method
  756. consuming everything up to the first 'have' line.
  757. Args:
  758. heads: a dict of refname->SHA1 to advertise
  759. depth: Maximum depth for shallow clones
  760. Returns: a list of SHA1s requested by the client
  761. """
  762. symrefs = self.get_symrefs()
  763. values = set(heads.values())
  764. if self.advertise_refs or not self.stateless_rpc:
  765. for i, (ref, sha) in enumerate(sorted(heads.items())):
  766. try:
  767. peeled_sha = self.get_peeled(ref)
  768. except KeyError:
  769. # Skip refs that are inaccessible
  770. # TODO(jelmer): Integrate with Repo.find_missing_objects refs
  771. # logic.
  772. continue
  773. if i == 0:
  774. logger.info("Sending capabilities: %s", self.handler.capabilities())
  775. line = format_ref_line(
  776. ref,
  777. sha,
  778. list(self.handler.capabilities())
  779. + symref_capabilities(symrefs.items()),
  780. )
  781. else:
  782. line = format_ref_line(ref, sha)
  783. self.proto.write_pkt_line(line)
  784. if peeled_sha is not None and peeled_sha != sha:
  785. self.proto.write_pkt_line(
  786. format_ref_line(ref + PEELED_TAG_SUFFIX, peeled_sha)
  787. )
  788. # i'm done..
  789. self.proto.write_pkt_line(None)
  790. if self.advertise_refs:
  791. return []
  792. # Now client will sending want want want commands
  793. want = self.proto.read_pkt_line()
  794. if not want:
  795. return []
  796. line, caps = extract_want_line_capabilities(want)
  797. self.handler.set_client_capabilities(caps)
  798. self.set_ack_type(ack_type(caps))
  799. allowed = (COMMAND_WANT, COMMAND_SHALLOW, COMMAND_DEEPEN, COMMAND_FILTER, None)
  800. command, sha_result = _split_proto_line(line, allowed)
  801. want_revs: list[ObjectID] = []
  802. while command == COMMAND_WANT:
  803. assert isinstance(sha_result, bytes)
  804. if sha_result not in values:
  805. raise GitProtocolError(f"Client wants invalid object {sha_result!r}")
  806. want_revs.append(ObjectID(sha_result))
  807. command, sha_result = self.read_proto_line(allowed)
  808. self.set_wants(want_revs)
  809. # Handle filter command if present (protocol v2)
  810. if command == COMMAND_FILTER:
  811. assert isinstance(sha_result, bytes)
  812. try:
  813. self.handler.filter_spec = parse_filter_spec(
  814. sha_result, object_store=self.handler.repo.object_store
  815. )
  816. except ValueError as e:
  817. raise GitProtocolError(f"Invalid filter specification: {e}")
  818. # Read next command after processing filter
  819. command, sha_result = self.read_proto_line(allowed)
  820. if command in (COMMAND_SHALLOW, COMMAND_DEEPEN):
  821. assert sha_result is not None
  822. self.unread_proto_line(command, sha_result)
  823. self._handle_shallow_request(want_revs)
  824. if self.stateless_rpc and self.proto.eof():
  825. # The client may close the socket at this point, expecting a
  826. # flush-pkt from the server. We might be ready to send a packfile
  827. # at this point, so we need to explicitly short-circuit in this
  828. # case.
  829. return []
  830. return want_revs
  831. def unread_proto_line(self, command: bytes, value: bytes | int) -> None:
  832. """Push a command back to be read again.
  833. Args:
  834. command: Command name
  835. value: Command value
  836. """
  837. if isinstance(value, int):
  838. value = str(value).encode("ascii")
  839. self.proto.unread_pkt_line(command + b" " + value)
  840. def nak(self) -> None:
  841. """Send a NAK response."""
  842. def ack(self, have_ref: ObjectID) -> None:
  843. """Acknowledge a have reference.
  844. Args:
  845. have_ref: SHA to acknowledge (40 bytes hex for SHA-1, 64 bytes for SHA-256)
  846. Raises:
  847. ValueError: If have_ref is not a valid length
  848. """
  849. if len(have_ref) not in (40, 64):
  850. raise ValueError(f"invalid sha {have_ref!r}")
  851. assert self._impl is not None
  852. return self._impl.ack(have_ref)
  853. def reset(self) -> None:
  854. """Reset the graph walker cache."""
  855. self._cached = True
  856. self._cache_index = 0
  857. def next(self) -> ObjectID | None:
  858. """Get the next SHA from the graph walker.
  859. Returns: Next SHA or None if done
  860. """
  861. if not self._cached:
  862. if not self._impl and self.stateless_rpc:
  863. return None
  864. assert self._impl is not None
  865. return next(self._impl) # type: ignore[call-overload, no-any-return]
  866. self._cache_index += 1
  867. if self._cache_index > len(self._cache):
  868. return None
  869. return self._cache[self._cache_index]
  870. __next__ = next
  871. def read_proto_line(
  872. self, allowed: Iterable[bytes | None] | None
  873. ) -> tuple[bytes | None, bytes | int | None]:
  874. """Read a line from the wire.
  875. Args:
  876. allowed: An iterable of command names that should be allowed.
  877. Returns: A tuple of (command, value); see _split_proto_line.
  878. Raises:
  879. UnexpectedCommandError: If an error occurred reading the line.
  880. """
  881. return _split_proto_line(self.proto.read_pkt_line(), allowed)
  882. def _handle_shallow_request(self, wants: Sequence[ObjectID]) -> None:
  883. """Handle shallow clone requests from the client.
  884. Args:
  885. wants: List of wanted object SHAs
  886. """
  887. while True:
  888. command, val = self.read_proto_line((COMMAND_DEEPEN, COMMAND_SHALLOW))
  889. if command == COMMAND_DEEPEN:
  890. assert isinstance(val, int)
  891. depth = val
  892. break
  893. assert isinstance(val, bytes)
  894. self.client_shallow.add(ObjectID(val))
  895. self.read_proto_line((None,)) # consume client's flush-pkt
  896. shallow, not_shallow = find_shallow(self.store, wants, depth)
  897. # Update self.shallow instead of reassigning it since we passed a
  898. # reference to it before this method was called.
  899. self.shallow.update(shallow - not_shallow)
  900. new_shallow = self.shallow - self.client_shallow
  901. unshallow = self.unshallow = not_shallow & self.client_shallow
  902. self.update_shallow(new_shallow, unshallow)
  903. def update_shallow(
  904. self, new_shallow: AbstractSet[ObjectID], unshallow: AbstractSet[ObjectID]
  905. ) -> None:
  906. """Update shallow/unshallow information to the client.
  907. Args:
  908. new_shallow: Set of newly shallow commits
  909. unshallow: Set of commits to unshallow
  910. """
  911. for sha in sorted(new_shallow):
  912. self.proto.write_pkt_line(format_shallow_line(sha))
  913. for sha in sorted(unshallow):
  914. self.proto.write_pkt_line(format_unshallow_line(sha))
  915. self.proto.write_pkt_line(None)
  916. def notify_done(self) -> None:
  917. """Notify that the client sent 'done'."""
  918. # relay the message down to the handler.
  919. self.handler.notify_done()
  920. def send_ack(self, sha: ObjectID, ack_type: bytes = b"") -> None:
  921. """Send an ACK to the client.
  922. Args:
  923. sha: SHA to acknowledge
  924. ack_type: Type of ACK (e.g., b'continue', b'ready')
  925. """
  926. self.proto.write_pkt_line(format_ack_line(sha, ack_type))
  927. def send_nak(self) -> None:
  928. """Send a NAK to the client."""
  929. self.proto.write_pkt_line(NAK_LINE)
  930. def handle_done(self, done_required: bool, done_received: bool) -> bool:
  931. """Handle the 'done' command.
  932. Args:
  933. done_required: Whether done is required
  934. done_received: Whether done was received
  935. Returns: True if done handling succeeded
  936. """
  937. # Delegate this to the implementation.
  938. assert self._impl is not None
  939. return self._impl.handle_done(done_required, done_received)
  940. def set_wants(self, wants: list[ObjectID]) -> None:
  941. """Set the list of wanted objects.
  942. Args:
  943. wants: List of wanted object SHAs
  944. """
  945. self._wants = wants
  946. def all_wants_satisfied(self, haves: AbstractSet[ObjectID]) -> bool:
  947. """Check whether all the current wants are satisfied by a set of haves.
  948. Args:
  949. haves: A set of commits we know the client has.
  950. Note: Wants are specified with set_wants rather than passed in since
  951. in the current interface they are determined outside this class.
  952. """
  953. return _all_wants_satisfied(self.store, haves, set(self._wants))
  954. def set_ack_type(self, ack_type: int) -> None:
  955. """Set the acknowledgment type for the graph walker.
  956. Args:
  957. ack_type: One of SINGLE_ACK, MULTI_ACK, or MULTI_ACK_DETAILED
  958. """
  959. impl_classes: dict[int, type[AckGraphWalkerImpl]] = {
  960. MULTI_ACK: MultiAckGraphWalkerImpl,
  961. MULTI_ACK_DETAILED: MultiAckDetailedGraphWalkerImpl,
  962. SINGLE_ACK: SingleAckGraphWalkerImpl,
  963. }
  964. self._impl = impl_classes[ack_type](self)
  965. _GRAPH_WALKER_COMMANDS = (COMMAND_HAVE, COMMAND_DONE, None)
  966. class SingleAckGraphWalkerImpl(AckGraphWalkerImpl):
  967. """Graph walker implementation that speaks the single-ack protocol."""
  968. def __init__(self, walker: "_ProtocolGraphWalker") -> None:
  969. """Initialize a SingleAckGraphWalkerImpl.
  970. Args:
  971. walker: Parent ProtocolGraphWalker instance
  972. """
  973. self.walker = walker
  974. self._common: list[ObjectID] = []
  975. def ack(self, have_ref: ObjectID) -> None:
  976. """Acknowledge a have reference.
  977. Args:
  978. have_ref: Object ID to acknowledge
  979. """
  980. if not self._common:
  981. self.walker.send_ack(have_ref)
  982. self._common.append(have_ref)
  983. def next(self) -> ObjectID | None:
  984. """Get next SHA from graph walker.
  985. Returns:
  986. SHA bytes or None if done
  987. """
  988. command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS)
  989. if command in (None, COMMAND_DONE):
  990. # defer the handling of done
  991. self.walker.notify_done()
  992. return None
  993. elif command == COMMAND_HAVE:
  994. assert isinstance(sha, bytes)
  995. return ObjectID(sha)
  996. return None
  997. __next__ = next
  998. def handle_done(self, done_required: bool, done_received: bool) -> bool:
  999. """Handle done command.
  1000. Args:
  1001. done_required: Whether done is required
  1002. done_received: Whether done was received
  1003. Returns:
  1004. True if handling completed successfully
  1005. """
  1006. if not self._common:
  1007. self.walker.send_nak()
  1008. if done_required and not done_received:
  1009. # we are not done, especially when done is required; skip
  1010. # the pack for this request and especially do not handle
  1011. # the done.
  1012. return False
  1013. if not done_received and not self._common:
  1014. # Okay we are not actually done then since the walker picked
  1015. # up no haves. This is usually triggered when client attempts
  1016. # to pull from a source that has no common base_commit.
  1017. # See: test_server.MultiAckDetailedGraphWalkerImplTestCase.\
  1018. # test_multi_ack_stateless_nodone
  1019. return False
  1020. return True
  1021. class MultiAckGraphWalkerImpl(AckGraphWalkerImpl):
  1022. """Graph walker implementation that speaks the multi-ack protocol."""
  1023. def __init__(self, walker: "_ProtocolGraphWalker") -> None:
  1024. """Initialize multi-ack graph walker.
  1025. Args:
  1026. walker: Parent ProtocolGraphWalker instance
  1027. """
  1028. self.walker = walker
  1029. self._found_base = False
  1030. self._common: list[ObjectID] = []
  1031. def ack(self, have_ref: ObjectID) -> None:
  1032. """Acknowledge a have reference.
  1033. Args:
  1034. have_ref: Object ID to acknowledge
  1035. """
  1036. self._common.append(have_ref)
  1037. if not self._found_base:
  1038. self.walker.send_ack(have_ref, b"continue")
  1039. if self.walker.all_wants_satisfied(set(self._common)):
  1040. self._found_base = True
  1041. # else we blind ack within next
  1042. def next(self) -> ObjectID | None:
  1043. """Get next SHA from graph walker.
  1044. Returns:
  1045. SHA bytes or None if done
  1046. """
  1047. while True:
  1048. command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS)
  1049. if command is None:
  1050. self.walker.send_nak()
  1051. # in multi-ack mode, a flush-pkt indicates the client wants to
  1052. # flush but more have lines are still coming
  1053. continue
  1054. elif command == COMMAND_DONE:
  1055. self.walker.notify_done()
  1056. return None
  1057. elif command == COMMAND_HAVE:
  1058. assert isinstance(sha, bytes)
  1059. sha_id = ObjectID(sha)
  1060. if self._found_base:
  1061. # blind ack
  1062. self.walker.send_ack(sha_id, b"continue")
  1063. return sha_id
  1064. __next__ = next
  1065. def handle_done(self, done_required: bool, done_received: bool) -> bool:
  1066. """Handle done command.
  1067. Args:
  1068. done_required: Whether done is required
  1069. done_received: Whether done was received
  1070. Returns:
  1071. True if handling completed successfully
  1072. """
  1073. if done_required and not done_received:
  1074. # we are not done, especially when done is required; skip
  1075. # the pack for this request and especially do not handle
  1076. # the done.
  1077. return False
  1078. if not done_received and not self._common:
  1079. # Okay we are not actually done then since the walker picked
  1080. # up no haves. This is usually triggered when client attempts
  1081. # to pull from a source that has no common base_commit.
  1082. # See: test_server.MultiAckDetailedGraphWalkerImplTestCase.\
  1083. # test_multi_ack_stateless_nodone
  1084. return False
  1085. # don't nak unless no common commits were found, even if not
  1086. # everything is satisfied
  1087. if self._common:
  1088. self.walker.send_ack(self._common[-1])
  1089. else:
  1090. self.walker.send_nak()
  1091. return True
  1092. class MultiAckDetailedGraphWalkerImpl(AckGraphWalkerImpl):
  1093. """Graph walker implementation speaking the multi-ack-detailed protocol."""
  1094. def __init__(self, walker: "_ProtocolGraphWalker") -> None:
  1095. """Initialize multi-ack-detailed graph walker.
  1096. Args:
  1097. walker: Parent ProtocolGraphWalker instance
  1098. """
  1099. self.walker = walker
  1100. self._common: list[ObjectID] = []
  1101. def ack(self, have_ref: ObjectID) -> None:
  1102. """Acknowledge a have reference.
  1103. Args:
  1104. have_ref: Object ID to acknowledge
  1105. """
  1106. # Should only be called iff have_ref is common
  1107. self._common.append(have_ref)
  1108. self.walker.send_ack(have_ref, b"common")
  1109. def next(self) -> ObjectID | None:
  1110. """Get next SHA from graph walker.
  1111. Returns:
  1112. SHA bytes or None if done
  1113. """
  1114. while True:
  1115. command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS)
  1116. if command is None:
  1117. if self.walker.all_wants_satisfied(set(self._common)):
  1118. self.walker.send_ack(self._common[-1], b"ready")
  1119. self.walker.send_nak()
  1120. if self.walker.stateless_rpc:
  1121. # The HTTP version of this request a flush-pkt always
  1122. # signifies an end of request, so we also return
  1123. # nothing here as if we are done (but not really, as
  1124. # it depends on whether no-done capability was
  1125. # specified and that's handled in handle_done which
  1126. # may or may not call post_nodone_check depending on
  1127. # that).
  1128. return None
  1129. elif command == COMMAND_DONE:
  1130. # Let the walker know that we got a done.
  1131. self.walker.notify_done()
  1132. break
  1133. elif command == COMMAND_HAVE:
  1134. # return the sha and let the caller ACK it with the
  1135. # above ack method.
  1136. assert isinstance(sha, bytes)
  1137. return ObjectID(sha)
  1138. # don't nak unless no common commits were found, even if not
  1139. # everything is satisfied
  1140. return None
  1141. __next__ = next
  1142. def handle_done(self, done_required: bool, done_received: bool) -> bool:
  1143. """Handle done command.
  1144. Args:
  1145. done_required: Whether done is required
  1146. done_received: Whether done was received
  1147. Returns:
  1148. True if handling completed successfully
  1149. """
  1150. if done_required and not done_received:
  1151. # we are not done, especially when done is required; skip
  1152. # the pack for this request and especially do not handle
  1153. # the done.
  1154. return False
  1155. if not done_received and not self._common:
  1156. # Okay we are not actually done then since the walker picked
  1157. # up no haves. This is usually triggered when client attempts
  1158. # to pull from a source that has no common base_commit.
  1159. # See: test_server.MultiAckDetailedGraphWalkerImplTestCase.\
  1160. # test_multi_ack_stateless_nodone
  1161. return False
  1162. # don't nak unless no common commits were found, even if not
  1163. # everything is satisfied
  1164. if self._common:
  1165. self.walker.send_ack(self._common[-1])
  1166. else:
  1167. self.walker.send_nak()
  1168. return True
  1169. class ReceivePackHandler(PackHandler):
  1170. """Protocol handler for downloading a pack from the client."""
  1171. def __init__(
  1172. self,
  1173. backend: Backend,
  1174. args: Sequence[str],
  1175. proto: Protocol,
  1176. stateless_rpc: bool = False,
  1177. advertise_refs: bool = False,
  1178. ) -> None:
  1179. """Initialize receive-pack handler.
  1180. Args:
  1181. backend: Backend instance
  1182. args: Command arguments
  1183. proto: Protocol instance
  1184. stateless_rpc: Whether to use stateless RPC
  1185. advertise_refs: Whether to advertise refs
  1186. """
  1187. super().__init__(backend, proto, stateless_rpc=stateless_rpc)
  1188. self.repo = backend.open_repository(args[0])
  1189. self.advertise_refs = advertise_refs
  1190. def capabilities(self) -> Iterable[bytes]:
  1191. """Return supported capabilities.
  1192. Returns:
  1193. List of capability names including object-format for the repository
  1194. """
  1195. return [
  1196. CAPABILITY_REPORT_STATUS,
  1197. CAPABILITY_DELETE_REFS,
  1198. CAPABILITY_QUIET,
  1199. CAPABILITY_OFS_DELTA,
  1200. CAPABILITY_SIDE_BAND_64K,
  1201. CAPABILITY_NO_DONE,
  1202. capability_object_format(self.repo.object_format.name),
  1203. ]
  1204. def _apply_pack(
  1205. self, refs: list[tuple[ObjectID, ObjectID, Ref]]
  1206. ) -> Iterator[tuple[bytes, bytes]]:
  1207. """Apply received pack to repository.
  1208. Args:
  1209. refs: List of (old_sha, new_sha, ref_name) tuples
  1210. Yields:
  1211. Tuples of (ref_name, error_message) for any errors
  1212. """
  1213. all_exceptions = (
  1214. IOError,
  1215. OSError,
  1216. ChecksumMismatch,
  1217. ApplyDeltaError,
  1218. AssertionError,
  1219. socket.error,
  1220. zlib.error,
  1221. ObjectFormatException,
  1222. )
  1223. will_send_pack = False
  1224. zero_sha = ObjectID(b"0" * self.repo.object_format.hex_length)
  1225. for command in refs:
  1226. if command[1] != zero_sha:
  1227. will_send_pack = True
  1228. if will_send_pack:
  1229. # TODO: more informative error messages than just the exception
  1230. # string
  1231. try:
  1232. recv = getattr(self.proto, "recv", None)
  1233. self.repo.object_store.add_thin_pack(self.proto.read, recv) # type: ignore[attr-defined]
  1234. yield (b"unpack", b"ok")
  1235. except all_exceptions as e:
  1236. yield (b"unpack", str(e).replace("\n", "").encode("utf-8"))
  1237. # The pack may still have been moved in, but it may contain
  1238. # broken objects. We trust a later GC to clean it up.
  1239. else:
  1240. # The git protocol want to find a status entry related to unpack
  1241. # process even if no pack data has been sent.
  1242. yield (b"unpack", b"ok")
  1243. for oldsha, sha, ref in refs:
  1244. ref_status = b"ok"
  1245. try:
  1246. if sha == zero_sha:
  1247. if CAPABILITY_DELETE_REFS not in self.capabilities():
  1248. raise GitProtocolError(
  1249. "Attempted to delete refs without delete-refs capability."
  1250. )
  1251. try:
  1252. self.repo.refs.remove_if_equals(ref, oldsha)
  1253. except all_exceptions:
  1254. ref_status = b"failed to delete"
  1255. else:
  1256. try:
  1257. self.repo.refs.set_if_equals(ref, oldsha, sha)
  1258. except all_exceptions:
  1259. ref_status = b"failed to write"
  1260. except KeyError:
  1261. ref_status = b"bad ref"
  1262. yield (ref, ref_status)
  1263. def _report_status(self, status: Sequence[tuple[bytes, bytes]]) -> None:
  1264. """Report status to client.
  1265. Args:
  1266. status: List of (ref_name, status_message) tuples
  1267. """
  1268. if self.has_capability(CAPABILITY_SIDE_BAND_64K):
  1269. writer = BufferedPktLineWriter(
  1270. lambda d: self.proto.write_sideband(SIDE_BAND_CHANNEL_DATA, d)
  1271. )
  1272. write = writer.write
  1273. def flush() -> None:
  1274. writer.flush()
  1275. self.proto.write_pkt_line(None)
  1276. else:
  1277. write = self.proto.write_pkt_line # type: ignore[assignment]
  1278. def flush() -> None:
  1279. pass
  1280. for name, msg in status:
  1281. if name == b"unpack":
  1282. write(b"unpack " + msg + b"\n")
  1283. elif msg == b"ok":
  1284. write(b"ok " + name + b"\n")
  1285. else:
  1286. write(b"ng " + name + b" " + msg + b"\n")
  1287. write(None) # type: ignore
  1288. flush()
  1289. def _on_post_receive(self, client_refs: dict[bytes, tuple[bytes, bytes]]) -> None:
  1290. """Run post-receive hook.
  1291. Args:
  1292. client_refs: Dictionary of ref changes from client
  1293. """
  1294. hook = self.repo.hooks.get("post-receive", None) # type: ignore[attr-defined]
  1295. if not hook:
  1296. return
  1297. try:
  1298. output = hook.execute(client_refs)
  1299. if output:
  1300. self.proto.write_sideband(SIDE_BAND_CHANNEL_PROGRESS, output)
  1301. except HookError as err:
  1302. self.proto.write_sideband(SIDE_BAND_CHANNEL_FATAL, str(err).encode("utf-8"))
  1303. def handle(self) -> None:
  1304. """Handle receive-pack request."""
  1305. if self.advertise_refs or not self.stateless_rpc:
  1306. refs = sorted(self.repo.get_refs().items())
  1307. symrefs = sorted(self.repo.refs.get_symrefs().items())
  1308. if not refs:
  1309. zero_sha = ObjectID(b"0" * self.repo.object_format.hex_length)
  1310. refs = [(CAPABILITIES_REF, zero_sha)]
  1311. logger.info("Sending capabilities: %s", self.capabilities())
  1312. self.proto.write_pkt_line(
  1313. format_ref_line(
  1314. refs[0][0],
  1315. refs[0][1],
  1316. list(self.capabilities()) + symref_capabilities(symrefs),
  1317. )
  1318. )
  1319. for i in range(1, len(refs)):
  1320. ref = refs[i]
  1321. self.proto.write_pkt_line(format_ref_line(ref[0], ref[1]))
  1322. self.proto.write_pkt_line(None)
  1323. if self.advertise_refs:
  1324. return
  1325. client_refs = []
  1326. ref_line = self.proto.read_pkt_line()
  1327. # if ref is none then client doesn't want to send us anything..
  1328. if ref_line is None:
  1329. return
  1330. ref_line, caps = extract_capabilities(ref_line)
  1331. self.set_client_capabilities(caps)
  1332. # client will now send us a list of (oldsha, newsha, ref)
  1333. while ref_line:
  1334. (oldsha, newsha, ref_name) = ref_line.split()
  1335. client_refs.append((ObjectID(oldsha), ObjectID(newsha), Ref(ref_name)))
  1336. ref_line = self.proto.read_pkt_line()
  1337. # backend can now deal with this refs and read a pack using self.read
  1338. status = list(self._apply_pack(client_refs))
  1339. self._on_post_receive(client_refs) # type: ignore[arg-type]
  1340. # when we have read all the pack from the client, send a status report
  1341. # if the client asked for it
  1342. if self.has_capability(CAPABILITY_REPORT_STATUS):
  1343. self._report_status(status)
  1344. class UploadArchiveHandler(Handler):
  1345. """Handler for git-upload-archive requests."""
  1346. def __init__(
  1347. self,
  1348. backend: Backend,
  1349. args: Sequence[str],
  1350. proto: Protocol,
  1351. stateless_rpc: bool = False,
  1352. ) -> None:
  1353. """Initialize upload-archive handler.
  1354. Args:
  1355. backend: Backend instance
  1356. args: Command arguments
  1357. proto: Protocol instance
  1358. stateless_rpc: Whether to use stateless RPC
  1359. """
  1360. super().__init__(backend, proto, stateless_rpc)
  1361. self.repo = backend.open_repository(args[0])
  1362. def handle(self) -> None:
  1363. """Handle upload-archive request."""
  1364. def write(x: bytes) -> None:
  1365. self.proto.write_sideband(SIDE_BAND_CHANNEL_DATA, x)
  1366. arguments = []
  1367. for pkt in self.proto.read_pkt_seq():
  1368. (key, value) = pkt.split(b" ", 1)
  1369. if key != b"argument":
  1370. raise GitProtocolError(f"unknown command {key!r}")
  1371. arguments.append(value.rstrip(b"\n"))
  1372. prefix = b""
  1373. format = "tar"
  1374. i = 0
  1375. store: BaseObjectStore = self.repo.object_store
  1376. while i < len(arguments):
  1377. argument = arguments[i]
  1378. if argument == b"--prefix":
  1379. i += 1
  1380. prefix = arguments[i]
  1381. elif argument == b"--format":
  1382. i += 1
  1383. format = arguments[i].decode("ascii")
  1384. else:
  1385. commit_sha = self.repo.refs[Ref(argument)]
  1386. commit_obj = store[commit_sha]
  1387. assert isinstance(commit_obj, Commit)
  1388. tree_obj = store[commit_obj.tree]
  1389. assert isinstance(tree_obj, Tree)
  1390. tree = tree_obj
  1391. i += 1
  1392. self.proto.write_pkt_line(b"ACK")
  1393. self.proto.write_pkt_line(None)
  1394. for chunk in tar_stream(
  1395. store,
  1396. tree,
  1397. mtime=int(time.time()),
  1398. prefix=prefix,
  1399. format=format,
  1400. ):
  1401. write(chunk)
  1402. self.proto.write_pkt_line(None)
  1403. # Default handler classes for git services.
  1404. DEFAULT_HANDLERS = {
  1405. b"git-upload-pack": UploadPackHandler,
  1406. b"git-receive-pack": ReceivePackHandler,
  1407. b"git-upload-archive": UploadArchiveHandler,
  1408. }
  1409. class TCPGitRequestHandler(socketserver.StreamRequestHandler):
  1410. """TCP request handler for git protocol."""
  1411. def __init__(
  1412. self,
  1413. handlers: dict[bytes, type[Handler]],
  1414. request: socket.socket,
  1415. client_address: tuple[str, int],
  1416. server: socketserver.TCPServer,
  1417. ) -> None:
  1418. """Initialize TCP request handler.
  1419. Args:
  1420. handlers: Dictionary mapping commands to handler classes
  1421. request: Request socket
  1422. client_address: Client address tuple
  1423. server: Server instance
  1424. """
  1425. self.handlers = handlers
  1426. socketserver.StreamRequestHandler.__init__(
  1427. self, request, client_address, server
  1428. )
  1429. def handle(self) -> None:
  1430. """Handle TCP git request."""
  1431. proto = ReceivableProtocol(self.connection.recv, self.wfile.write)
  1432. command, args = proto.read_cmd()
  1433. logger.info("Handling %s request, args=%s", command, args)
  1434. cls = self.handlers.get(command, None)
  1435. if not callable(cls):
  1436. raise GitProtocolError(f"Invalid service {command!r}")
  1437. h = cls(self.server.backend, args, proto) # type: ignore
  1438. h.handle()
  1439. class TCPGitServer(socketserver.TCPServer):
  1440. """TCP server for git protocol."""
  1441. allow_reuse_address = True
  1442. serve = socketserver.TCPServer.serve_forever
  1443. def _make_handler(
  1444. self,
  1445. request: socket.socket,
  1446. client_address: tuple[str, int],
  1447. server: socketserver.TCPServer,
  1448. ) -> TCPGitRequestHandler:
  1449. """Create request handler instance.
  1450. Args:
  1451. request: Request socket
  1452. client_address: Client address tuple
  1453. server: Server instance
  1454. Returns:
  1455. TCPGitRequestHandler instance
  1456. """
  1457. return TCPGitRequestHandler(self.handlers, request, client_address, server)
  1458. def __init__(
  1459. self,
  1460. backend: Backend,
  1461. listen_addr: str,
  1462. port: int = TCP_GIT_PORT,
  1463. handlers: dict[bytes, type[Handler]] | None = None,
  1464. ) -> None:
  1465. """Initialize TCP git server.
  1466. Args:
  1467. backend: Backend instance
  1468. listen_addr: Address to listen on
  1469. port: Port to listen on (default: TCP_GIT_PORT)
  1470. handlers: Optional dictionary of custom handlers
  1471. """
  1472. self.handlers = dict(DEFAULT_HANDLERS)
  1473. if handlers is not None:
  1474. self.handlers.update(handlers)
  1475. self.backend = backend
  1476. logger.info("Listening for TCP connections on %s:%d", listen_addr, port)
  1477. socketserver.TCPServer.__init__(self, (listen_addr, port), self._make_handler)
  1478. def verify_request(
  1479. self,
  1480. request: socket.socket | tuple[bytes, socket.socket],
  1481. client_address: tuple[str, int] | socket.socket,
  1482. ) -> bool:
  1483. """Verify incoming request.
  1484. Args:
  1485. request: Request socket
  1486. client_address: Client address tuple
  1487. Returns:
  1488. True to accept request
  1489. """
  1490. logger.info("Handling request from %s", client_address)
  1491. return True
  1492. def handle_error(
  1493. self,
  1494. request: socket.socket | tuple[bytes, socket.socket],
  1495. client_address: tuple[str, int] | socket.socket,
  1496. ) -> None:
  1497. """Handle request processing errors.
  1498. Args:
  1499. request: Request socket
  1500. client_address: Client address tuple
  1501. """
  1502. logger.exception(
  1503. "Exception happened during processing of request from %s",
  1504. client_address,
  1505. )
  1506. def main(argv: list[str] = sys.argv) -> None:
  1507. """Entry point for starting a TCP git server."""
  1508. import optparse
  1509. parser = optparse.OptionParser()
  1510. parser.add_option(
  1511. "-l",
  1512. "--listen_address",
  1513. dest="listen_address",
  1514. default="localhost",
  1515. help="Binding IP address.",
  1516. )
  1517. parser.add_option(
  1518. "-p",
  1519. "--port",
  1520. dest="port",
  1521. type=int,
  1522. default=TCP_GIT_PORT,
  1523. help="Binding TCP port.",
  1524. )
  1525. options, args = parser.parse_args(argv)
  1526. log_utils.default_logging_config()
  1527. if len(args) > 1:
  1528. gitdir = args[1]
  1529. else:
  1530. gitdir = "."
  1531. # TODO(jelmer): Support git-daemon-export-ok and --export-all.
  1532. backend = FileSystemBackend(gitdir)
  1533. server = TCPGitServer(backend, options.listen_address, options.port)
  1534. server.serve_forever()
  1535. def serve_command(
  1536. handler_cls: type[Handler],
  1537. argv: list[str] = sys.argv,
  1538. backend: Backend | None = None,
  1539. inf: IO[bytes] | None = None,
  1540. outf: IO[bytes] | None = None,
  1541. ) -> int:
  1542. """Serve a single command.
  1543. This is mostly useful for the implementation of commands used by e.g.
  1544. git+ssh.
  1545. Args:
  1546. handler_cls: `Handler` class to use for the request
  1547. argv: execv-style command-line arguments. Defaults to sys.argv.
  1548. backend: `Backend` to use
  1549. inf: File-like object to read from, defaults to standard input.
  1550. outf: File-like object to write to, defaults to standard output.
  1551. Returns: Exit code for use with sys.exit. 0 on success, 1 on failure.
  1552. """
  1553. if backend is None:
  1554. backend = FileSystemBackend()
  1555. if inf is None:
  1556. inf = sys.stdin.buffer
  1557. if outf is None:
  1558. outf = sys.stdout.buffer
  1559. def send_fn(data: bytes) -> None:
  1560. outf.write(data)
  1561. outf.flush()
  1562. proto = Protocol(inf.read, send_fn)
  1563. handler = handler_cls(backend, argv[1:], proto) # type: ignore[arg-type]
  1564. # FIXME: Catch exceptions and write a single-line summary to outf.
  1565. handler.handle()
  1566. return 0
  1567. def generate_info_refs(repo: "BaseRepo") -> Iterator[bytes]:
  1568. """Generate an info refs file."""
  1569. refs = repo.get_refs()
  1570. return write_info_refs(refs, repo.object_store)
  1571. def generate_objects_info_packs(repo: "BaseRepo") -> Iterator[bytes]:
  1572. """Generate an index for for packs."""
  1573. for pack in repo.object_store.packs:
  1574. yield (b"P " + os.fsencode(pack.data.filename) + b"\n")
  1575. def update_server_info(repo: "BaseRepo") -> None:
  1576. """Generate server info for dumb file access.
  1577. This generates info/refs and objects/info/packs,
  1578. similar to "git update-server-info".
  1579. """
  1580. repo._put_named_file(
  1581. os.path.join("info", "refs"), b"".join(generate_info_refs(repo))
  1582. )
  1583. repo._put_named_file(
  1584. os.path.join("objects", "info", "packs"),
  1585. b"".join(generate_objects_info_packs(repo)),
  1586. )
  1587. if __name__ == "__main__":
  1588. main()