web.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930
  1. # web.py -- WSGI smart-http server
  2. # Copyright (C) 2010 Google, Inc.
  3. # Copyright (C) 2012 Jelmer Vernooij <jelmer@jelmer.uk>
  4. #
  5. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  6. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  7. # General Public License as published by the Free Software Foundation; version 2.0
  8. # or (at your option) any later version. You can redistribute it and/or
  9. # modify it under the terms of either of these two licenses.
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. # You should have received a copy of the licenses; if not, see
  18. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  19. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  20. # License, Version 2.0.
  21. #
  22. """HTTP server for dulwich that implements the git smart HTTP protocol."""
  23. __all__ = [
  24. "HTTP_FORBIDDEN",
  25. "HTTP_NOT_FOUND",
  26. "HTTP_OK",
  27. "GunzipFilter",
  28. "HTTPGitApplication",
  29. "HTTPGitRequest",
  30. "LimitedInputFilter",
  31. "WSGIRequestHandlerLogger",
  32. "WSGIServerLogger",
  33. "date_time_string",
  34. "generate_info_refs",
  35. "generate_objects_info_packs",
  36. "get_info_packs",
  37. "get_info_refs",
  38. "get_loose_object",
  39. "get_pack_file",
  40. "get_text_file",
  41. "handle_service_request",
  42. "main",
  43. "make_server",
  44. "make_wsgi_chain",
  45. "send_file",
  46. ]
  47. import os
  48. import re
  49. import sys
  50. import time
  51. from collections.abc import Callable, Iterable, Iterator, Sequence
  52. from io import BytesIO
  53. from types import TracebackType
  54. from typing import (
  55. TYPE_CHECKING,
  56. Any,
  57. BinaryIO,
  58. ClassVar,
  59. cast,
  60. )
  61. from urllib.parse import parse_qs
  62. from wsgiref.simple_server import (
  63. ServerHandler,
  64. WSGIRequestHandler,
  65. WSGIServer,
  66. make_server,
  67. )
  68. # wsgiref.types was added in Python 3.11
  69. if sys.version_info >= (3, 11):
  70. from wsgiref.types import StartResponse, WSGIApplication, WSGIEnvironment
  71. else:
  72. # Fallback type definitions for Python < 3.11
  73. if TYPE_CHECKING:
  74. # For type checking, use the _typeshed types if available
  75. try:
  76. from _typeshed.wsgi import StartResponse, WSGIApplication, WSGIEnvironment
  77. except ImportError:
  78. # Define our own protocol types for type checking
  79. from typing import Protocol as TypingProtocol
  80. class StartResponse(TypingProtocol): # type: ignore[no-redef]
  81. """WSGI start_response callable protocol."""
  82. def __call__(
  83. self,
  84. status: str,
  85. response_headers: list[tuple[str, str]],
  86. exc_info: tuple[type, BaseException, TracebackType] | None = None,
  87. ) -> Callable[[bytes], None]:
  88. """Start the response with status and headers."""
  89. ...
  90. WSGIEnvironment = dict[str, Any] # type: ignore[misc]
  91. WSGIApplication = Callable[ # type: ignore[misc]
  92. [WSGIEnvironment, StartResponse], Iterable[bytes]
  93. ]
  94. else:
  95. # At runtime, just use type aliases since these are only for type hints
  96. StartResponse = Any
  97. WSGIEnvironment = dict[str, Any]
  98. WSGIApplication = Callable
  99. from dulwich import log_utils
  100. from .errors import NotGitRepository
  101. from .objects import ObjectID
  102. from .protocol import ReceivableProtocol
  103. from .repo import BaseRepo, Repo
  104. from .server import (
  105. DEFAULT_HANDLERS,
  106. Backend,
  107. DictBackend,
  108. Handler,
  109. generate_info_refs,
  110. generate_objects_info_packs,
  111. )
  112. if TYPE_CHECKING:
  113. from typing import Protocol as TypingProtocol
  114. from .protocol import Protocol
  115. class HandlerConstructor(TypingProtocol):
  116. """Protocol for handler constructors."""
  117. def __call__(
  118. self,
  119. backend: Backend,
  120. args: list[bytes],
  121. proto: Protocol,
  122. stateless_rpc: bool = False,
  123. advertise_refs: bool = False,
  124. ) -> Handler:
  125. """Create a handler instance.
  126. Args:
  127. backend: The backend to use for the handler
  128. args: Arguments for the handler
  129. proto: Protocol object for communication
  130. stateless_rpc: Whether to use stateless RPC mode
  131. advertise_refs: Whether to advertise references
  132. Returns:
  133. A Handler instance
  134. """
  135. ...
  136. logger = log_utils.getLogger(__name__)
  137. # HTTP error strings
  138. HTTP_OK = "200 OK"
  139. HTTP_NOT_FOUND = "404 Not Found"
  140. HTTP_FORBIDDEN = "403 Forbidden"
  141. HTTP_ERROR = "500 Internal Server Error"
  142. NO_CACHE_HEADERS = [
  143. ("Expires", "Fri, 01 Jan 1980 00:00:00 GMT"),
  144. ("Pragma", "no-cache"),
  145. ("Cache-Control", "no-cache, max-age=0, must-revalidate"),
  146. ]
  147. def cache_forever_headers(now: float | None = None) -> list[tuple[str, str]]:
  148. """Generate headers for caching forever.
  149. Args:
  150. now: Timestamp to use as base (defaults to current time)
  151. Returns:
  152. List of (header_name, header_value) tuples for caching forever
  153. """
  154. if now is None:
  155. now = time.time()
  156. return [
  157. ("Date", date_time_string(now)),
  158. ("Expires", date_time_string(now + 31536000)),
  159. ("Cache-Control", "public, max-age=31536000"),
  160. ]
  161. def date_time_string(timestamp: float | None = None) -> str:
  162. """Convert a timestamp to an HTTP date string.
  163. Args:
  164. timestamp: Unix timestamp to convert (defaults to current time)
  165. Returns:
  166. HTTP date string in RFC 1123 format
  167. """
  168. # From BaseHTTPRequestHandler.date_time_string in BaseHTTPServer.py in the
  169. # Python 2.6.5 standard library, following modifications:
  170. # - Made a global rather than an instance method.
  171. # - weekdayname and monthname are renamed and locals rather than class
  172. # variables.
  173. # Copyright (c) 2001-2010 Python Software Foundation; All Rights Reserved
  174. weekdays = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
  175. months = [
  176. None,
  177. "Jan",
  178. "Feb",
  179. "Mar",
  180. "Apr",
  181. "May",
  182. "Jun",
  183. "Jul",
  184. "Aug",
  185. "Sep",
  186. "Oct",
  187. "Nov",
  188. "Dec",
  189. ]
  190. if timestamp is None:
  191. timestamp = time.time()
  192. year, month, day, hh, mm, ss, wd = time.gmtime(timestamp)[:7]
  193. return "%s, %02d %3s %4d %02d:%02d:%02d GMD" % ( # noqa: UP031
  194. weekdays[wd],
  195. day,
  196. months[month],
  197. year,
  198. hh,
  199. mm,
  200. ss,
  201. )
  202. def url_prefix(mat: re.Match[str]) -> str:
  203. """Extract the URL prefix from a regex match.
  204. Args:
  205. mat: A regex match object.
  206. Returns: The URL prefix, defined as the text before the match in the
  207. original string. Normalized to start with one leading slash and end
  208. with zero.
  209. """
  210. return "/" + mat.string[: mat.start()].strip("/")
  211. def get_repo(backend: "Backend", mat: re.Match[str]) -> BaseRepo:
  212. """Get a Repo instance for the given backend and URL regex match."""
  213. return cast(BaseRepo, backend.open_repository(url_prefix(mat)))
  214. def send_file(
  215. req: "HTTPGitRequest", f: BinaryIO | None, content_type: str
  216. ) -> Iterator[bytes]:
  217. """Send a file-like object to the request output.
  218. Args:
  219. req: The HTTPGitRequest object to send output to.
  220. f: An open file-like object to send; will be closed.
  221. content_type: The MIME type for the file.
  222. Returns: Iterator over the contents of the file, as chunks.
  223. """
  224. if f is None:
  225. yield req.not_found("File not found")
  226. return
  227. try:
  228. req.respond(HTTP_OK, content_type)
  229. while True:
  230. data = f.read(10240)
  231. if not data:
  232. break
  233. yield data
  234. except OSError:
  235. yield req.error("Error reading file")
  236. finally:
  237. f.close()
  238. def _url_to_path(url: str) -> str:
  239. return url.replace("/", os.path.sep)
  240. def get_text_file(
  241. req: "HTTPGitRequest", backend: "Backend", mat: re.Match[str]
  242. ) -> Iterator[bytes]:
  243. """Send a plain text file from the repository.
  244. Args:
  245. req: The HTTP request object
  246. backend: The git backend
  247. mat: The regex match for the requested path
  248. Returns:
  249. Iterator yielding file contents as bytes
  250. """
  251. req.nocache()
  252. path = _url_to_path(mat.group())
  253. logger.info("Sending plain text file %s", path)
  254. return send_file(req, get_repo(backend, mat).get_named_file(path), "text/plain")
  255. def get_loose_object(
  256. req: "HTTPGitRequest", backend: "Backend", mat: re.Match[str]
  257. ) -> Iterator[bytes]:
  258. """Send a loose git object.
  259. Args:
  260. req: The HTTP request object
  261. backend: The git backend
  262. mat: The regex match containing object path segments
  263. Returns:
  264. Iterator yielding object contents as bytes
  265. """
  266. sha = cast(ObjectID, (mat.group(1) + mat.group(2)).encode("ascii"))
  267. logger.info("Sending loose object %s", sha)
  268. object_store = get_repo(backend, mat).object_store
  269. if not object_store.contains_loose(sha):
  270. yield req.not_found("Object not found")
  271. return
  272. try:
  273. data = object_store[sha].as_legacy_object()
  274. except OSError:
  275. yield req.error("Error reading object")
  276. return
  277. req.cache_forever()
  278. req.respond(HTTP_OK, "application/x-git-loose-object")
  279. yield data
  280. def get_pack_file(
  281. req: "HTTPGitRequest", backend: "Backend", mat: re.Match[str]
  282. ) -> Iterator[bytes]:
  283. """Send a git pack file.
  284. Args:
  285. req: The HTTP request object
  286. backend: The git backend
  287. mat: The regex match for the requested pack file
  288. Returns:
  289. Iterator yielding pack file contents as bytes
  290. """
  291. req.cache_forever()
  292. path = _url_to_path(mat.group())
  293. logger.info("Sending pack file %s", path)
  294. return send_file(
  295. req,
  296. get_repo(backend, mat).get_named_file(path),
  297. "application/x-git-packed-objects",
  298. )
  299. def get_idx_file(
  300. req: "HTTPGitRequest", backend: "Backend", mat: re.Match[str]
  301. ) -> Iterator[bytes]:
  302. """Send a git pack index file.
  303. Args:
  304. req: The HTTP request object
  305. backend: The git backend
  306. mat: The regex match for the requested index file
  307. Returns:
  308. Iterator yielding index file contents as bytes
  309. """
  310. req.cache_forever()
  311. path = _url_to_path(mat.group())
  312. logger.info("Sending pack file %s", path)
  313. return send_file(
  314. req,
  315. get_repo(backend, mat).get_named_file(path),
  316. "application/x-git-packed-objects-toc",
  317. )
  318. def get_info_refs(
  319. req: "HTTPGitRequest", backend: "Backend", mat: re.Match[str]
  320. ) -> Iterator[bytes]:
  321. """Send git info/refs for discovery.
  322. Args:
  323. req: The HTTP request object
  324. backend: The git backend
  325. mat: The regex match for the info/refs request
  326. Returns:
  327. Iterator yielding refs advertisement or info/refs contents
  328. """
  329. params = parse_qs(req.environ["QUERY_STRING"])
  330. service = params.get("service", [None])[0]
  331. try:
  332. repo = get_repo(backend, mat)
  333. except NotGitRepository as e:
  334. yield req.not_found(str(e))
  335. return
  336. if service and not req.dumb:
  337. if req.handlers is None:
  338. yield req.forbidden("No handlers configured")
  339. return
  340. handler_cls = req.handlers.get(service.encode("ascii"), None)
  341. if handler_cls is None:
  342. yield req.forbidden("Unsupported service")
  343. return
  344. req.nocache()
  345. write = req.respond(HTTP_OK, f"application/x-{service}-advertisement")
  346. def write_fn(data: bytes) -> int | None:
  347. result = write(data)
  348. return len(data) if result is not None else None
  349. proto = ReceivableProtocol(BytesIO().read, write_fn)
  350. from typing import Any, cast
  351. handler = handler_cls(
  352. backend,
  353. cast(Any, [url_prefix(mat)]), # handler_cls could expect bytes or str
  354. proto,
  355. stateless_rpc=True,
  356. advertise_refs=True,
  357. )
  358. assert handler is not None
  359. handler.proto.write_pkt_line(b"# service=" + service.encode("ascii") + b"\n")
  360. handler.proto.write_pkt_line(None)
  361. handler.handle()
  362. else:
  363. # non-smart fallback
  364. # TODO: select_getanyfile() (see http-backend.c)
  365. req.nocache()
  366. req.respond(HTTP_OK, "text/plain")
  367. logger.info("Emulating dumb info/refs")
  368. yield from generate_info_refs(repo)
  369. def get_info_packs(
  370. req: "HTTPGitRequest", backend: "Backend", mat: re.Match[str]
  371. ) -> Iterator[bytes]:
  372. """Send git info/packs file listing available packs.
  373. Args:
  374. req: The HTTP request object
  375. backend: The git backend
  376. mat: The regex match for the info/packs request
  377. Returns:
  378. Iterator yielding pack listing as bytes
  379. """
  380. req.nocache()
  381. req.respond(HTTP_OK, "text/plain")
  382. logger.info("Emulating dumb info/packs")
  383. return generate_objects_info_packs(get_repo(backend, mat))
  384. def _chunk_iter(f: BinaryIO) -> Iterator[bytes]:
  385. while True:
  386. line = f.readline()
  387. length = int(line.rstrip(), 16)
  388. chunk = f.read(length + 2)
  389. if length == 0:
  390. break
  391. yield chunk[:-2]
  392. class ChunkReader:
  393. """Reader for chunked transfer encoding streams."""
  394. def __init__(self, f: BinaryIO) -> None:
  395. """Initialize ChunkReader.
  396. Args:
  397. f: Binary file-like object to read from
  398. """
  399. self._iter = _chunk_iter(f)
  400. self._buffer: list[bytes] = []
  401. def read(self, n: int) -> bytes:
  402. """Read n bytes from the chunked stream.
  403. Args:
  404. n: Number of bytes to read
  405. Returns:
  406. Up to n bytes of data
  407. """
  408. while sum(map(len, self._buffer)) < n:
  409. try:
  410. self._buffer.append(next(self._iter))
  411. except StopIteration:
  412. break
  413. f = b"".join(self._buffer)
  414. ret = f[:n]
  415. self._buffer = [f[n:]]
  416. return ret
  417. class _LengthLimitedFile:
  418. """Wrapper class to limit the length of reads from a file-like object.
  419. This is used to ensure EOF is read from the wsgi.input object once
  420. Content-Length bytes are read. This behavior is required by the WSGI spec
  421. but not implemented in wsgiref as of 2.5.
  422. """
  423. def __init__(self, input: BinaryIO, max_bytes: int) -> None:
  424. self._input = input
  425. self._bytes_avail = max_bytes
  426. def read(self, size: int = -1) -> bytes:
  427. """Read up to size bytes from the limited input.
  428. Args:
  429. size: Maximum number of bytes to read, or -1 for all available
  430. Returns:
  431. Up to size bytes of data
  432. """
  433. if self._bytes_avail <= 0:
  434. return b""
  435. if size == -1 or size > self._bytes_avail:
  436. size = self._bytes_avail
  437. self._bytes_avail -= size
  438. return self._input.read(size)
  439. # TODO: support more methods as necessary
  440. def handle_service_request(
  441. req: "HTTPGitRequest", backend: "Backend", mat: re.Match[str]
  442. ) -> Iterator[bytes]:
  443. """Handle a git service request (upload-pack or receive-pack).
  444. Args:
  445. req: The HTTP request object
  446. backend: The git backend
  447. mat: The regex match for the service request
  448. Returns:
  449. Iterator yielding service response as bytes
  450. """
  451. service = mat.group().lstrip("/")
  452. logger.info("Handling service request for %s", service)
  453. if req.handlers is None:
  454. yield req.forbidden("No handlers configured")
  455. return
  456. handler_cls = req.handlers.get(service.encode("ascii"), None)
  457. if handler_cls is None:
  458. yield req.forbidden("Unsupported service")
  459. return
  460. try:
  461. get_repo(backend, mat)
  462. except NotGitRepository as e:
  463. yield req.not_found(str(e))
  464. return
  465. req.nocache()
  466. write = req.respond(HTTP_OK, f"application/x-{service}-result")
  467. def write_fn(data: bytes) -> int | None:
  468. result = write(data)
  469. return len(data) if result is not None else None
  470. if req.environ.get("HTTP_TRANSFER_ENCODING") == "chunked":
  471. read = ChunkReader(req.environ["wsgi.input"]).read
  472. else:
  473. read = req.environ["wsgi.input"].read
  474. proto = ReceivableProtocol(read, write_fn)
  475. # TODO(jelmer): Find a way to pass in repo, rather than having handler_cls
  476. # reopen.
  477. handler = handler_cls(
  478. backend, [url_prefix(mat).encode("utf-8")], proto, stateless_rpc=True
  479. )
  480. assert handler is not None
  481. handler.handle()
  482. class HTTPGitRequest:
  483. """Class encapsulating the state of a single git HTTP request.
  484. Attributes:
  485. environ: the WSGI environment for the request.
  486. """
  487. def __init__(
  488. self,
  489. environ: WSGIEnvironment,
  490. start_response: StartResponse,
  491. dumb: bool = False,
  492. handlers: dict[bytes, "HandlerConstructor | Callable[..., Any]"] | None = None,
  493. ) -> None:
  494. """Initialize HTTPGitRequest.
  495. Args:
  496. environ: WSGI environment dictionary
  497. start_response: WSGI start_response callable
  498. dumb: Whether to use dumb HTTP protocol
  499. handlers: Optional handler overrides
  500. """
  501. self.environ = environ
  502. self.dumb = dumb
  503. self.handlers = handlers
  504. self._start_response = start_response
  505. self._cache_headers: list[tuple[str, str]] = []
  506. self._headers: list[tuple[str, str]] = []
  507. def add_header(self, name: str, value: str) -> None:
  508. """Add a header to the response."""
  509. self._headers.append((name, value))
  510. def respond(
  511. self,
  512. status: str = HTTP_OK,
  513. content_type: str | None = None,
  514. headers: Sequence[tuple[str, str]] | None = None,
  515. ) -> Callable[[bytes], object]:
  516. """Begin a response with the given status and other headers."""
  517. if headers:
  518. self._headers.extend(headers)
  519. if content_type:
  520. self._headers.append(("Content-Type", content_type))
  521. self._headers.extend(self._cache_headers)
  522. return self._start_response(status, self._headers)
  523. def not_found(self, message: str) -> bytes:
  524. """Begin a HTTP 404 response and return the text of a message."""
  525. self._cache_headers = []
  526. logger.info("Not found: %s", message)
  527. self.respond(HTTP_NOT_FOUND, "text/plain")
  528. return message.encode("ascii")
  529. def forbidden(self, message: str) -> bytes:
  530. """Begin a HTTP 403 response and return the text of a message."""
  531. self._cache_headers = []
  532. logger.info("Forbidden: %s", message)
  533. self.respond(HTTP_FORBIDDEN, "text/plain")
  534. return message.encode("ascii")
  535. def error(self, message: str) -> bytes:
  536. """Begin a HTTP 500 response and return the text of a message."""
  537. self._cache_headers = []
  538. logger.error("Error: %s", message)
  539. self.respond(HTTP_ERROR, "text/plain")
  540. return message.encode("ascii")
  541. def nocache(self) -> None:
  542. """Set the response to never be cached by the client."""
  543. self._cache_headers = NO_CACHE_HEADERS
  544. def cache_forever(self) -> None:
  545. """Set the response to be cached forever by the client."""
  546. self._cache_headers = cache_forever_headers()
  547. class HTTPGitApplication:
  548. """Class encapsulating the state of a git WSGI application.
  549. Attributes:
  550. backend: the Backend object backing this application
  551. """
  552. services: ClassVar[
  553. dict[
  554. tuple[str, re.Pattern[str]],
  555. Callable[[HTTPGitRequest, Backend, re.Match[str]], Iterator[bytes]],
  556. ]
  557. ] = {
  558. ("GET", re.compile("/HEAD$")): get_text_file,
  559. ("GET", re.compile("/info/refs$")): get_info_refs,
  560. ("GET", re.compile("/objects/info/alternates$")): get_text_file,
  561. ("GET", re.compile("/objects/info/http-alternates$")): get_text_file,
  562. ("GET", re.compile("/objects/info/packs$")): get_info_packs,
  563. (
  564. "GET",
  565. re.compile("/objects/([0-9a-f]{2})/([0-9a-f]{38})$"),
  566. ): get_loose_object,
  567. (
  568. "GET",
  569. re.compile("/objects/pack/pack-([0-9a-f]{40})\\.pack$"),
  570. ): get_pack_file,
  571. (
  572. "GET",
  573. re.compile("/objects/pack/pack-([0-9a-f]{40})\\.idx$"),
  574. ): get_idx_file,
  575. ("POST", re.compile("/git-upload-pack$")): handle_service_request,
  576. ("POST", re.compile("/git-receive-pack$")): handle_service_request,
  577. }
  578. def __init__(
  579. self,
  580. backend: Backend,
  581. dumb: bool = False,
  582. handlers: dict[bytes, "HandlerConstructor | Callable[..., Any]"] | None = None,
  583. fallback_app: WSGIApplication | None = None,
  584. ) -> None:
  585. """Initialize HTTPGitApplication.
  586. Args:
  587. backend: Backend object for git operations
  588. dumb: Whether to use dumb HTTP protocol
  589. handlers: Optional handler overrides
  590. fallback_app: Optional fallback WSGI application
  591. """
  592. self.backend = backend
  593. self.dumb = dumb
  594. self.handlers: dict[bytes, HandlerConstructor | Callable[..., Any]] = dict(
  595. DEFAULT_HANDLERS
  596. )
  597. self.fallback_app = fallback_app
  598. if handlers is not None:
  599. self.handlers.update(handlers)
  600. def __call__(
  601. self,
  602. environ: WSGIEnvironment,
  603. start_response: StartResponse,
  604. ) -> Iterable[bytes]:
  605. """Handle WSGI request."""
  606. path = environ["PATH_INFO"]
  607. method = environ["REQUEST_METHOD"]
  608. req = HTTPGitRequest(
  609. environ, start_response, dumb=self.dumb, handlers=self.handlers
  610. )
  611. # environ['QUERY_STRING'] has qs args
  612. handler = None
  613. mat = None
  614. for smethod, spath in self.services.keys():
  615. if smethod != method:
  616. continue
  617. mat = spath.search(path)
  618. if mat:
  619. handler = self.services[smethod, spath]
  620. break
  621. if handler is None or mat is None:
  622. if self.fallback_app is not None:
  623. return self.fallback_app(environ, start_response)
  624. else:
  625. return [req.not_found("Sorry, that method is not supported")]
  626. return handler(req, self.backend, mat)
  627. class GunzipFilter:
  628. """WSGI middleware that unzips gzip-encoded requests before passing on to the underlying application."""
  629. def __init__(self, application: WSGIApplication) -> None:
  630. """Initialize GunzipFilter with WSGI application."""
  631. self.app = application
  632. def __call__(
  633. self,
  634. environ: WSGIEnvironment,
  635. start_response: StartResponse,
  636. ) -> Iterable[bytes]:
  637. """Handle WSGI request with gzip decompression."""
  638. import gzip
  639. if environ.get("HTTP_CONTENT_ENCODING", "") == "gzip":
  640. environ["wsgi.input"] = gzip.GzipFile(
  641. filename=None, fileobj=environ["wsgi.input"], mode="rb"
  642. )
  643. del environ["HTTP_CONTENT_ENCODING"]
  644. environ.pop("CONTENT_LENGTH", None)
  645. return self.app(environ, start_response)
  646. class LimitedInputFilter:
  647. """WSGI middleware that limits the input length of a request to that specified in Content-Length."""
  648. def __init__(self, application: WSGIApplication) -> None:
  649. """Initialize LimitedInputFilter with WSGI application."""
  650. self.app = application
  651. def __call__(
  652. self,
  653. environ: WSGIEnvironment,
  654. start_response: StartResponse,
  655. ) -> Iterable[bytes]:
  656. """Handle WSGI request with input length limiting."""
  657. # This is not necessary if this app is run from a conforming WSGI
  658. # server. Unfortunately, there's no way to tell that at this point.
  659. # TODO: git may used HTTP/1.1 chunked encoding instead of specifying
  660. # content-length
  661. content_length = environ.get("CONTENT_LENGTH", "")
  662. if content_length:
  663. environ["wsgi.input"] = _LengthLimitedFile(
  664. environ["wsgi.input"], int(content_length)
  665. )
  666. return self.app(environ, start_response)
  667. def make_wsgi_chain(
  668. backend: Backend,
  669. dumb: bool = False,
  670. handlers: dict[bytes, Callable[..., Any]] | None = None,
  671. fallback_app: WSGIApplication | None = None,
  672. ) -> WSGIApplication:
  673. """Factory function to create an instance of HTTPGitApplication.
  674. Correctly wrapped with needed middleware.
  675. """
  676. app = HTTPGitApplication(
  677. backend, dumb=dumb, handlers=handlers, fallback_app=fallback_app
  678. )
  679. wrapped_app = LimitedInputFilter(GunzipFilter(app))
  680. return wrapped_app
  681. class ServerHandlerLogger(ServerHandler):
  682. """ServerHandler that uses dulwich's logger for logging exceptions."""
  683. def log_exception(
  684. self,
  685. exc_info: tuple[type[BaseException], BaseException, TracebackType]
  686. | tuple[None, None, None]
  687. | None,
  688. ) -> None:
  689. """Log exception using dulwich logger."""
  690. logger.exception(
  691. "Exception happened during processing of request",
  692. exc_info=exc_info,
  693. )
  694. def log_message(self, format: str, *args: object) -> None:
  695. """Log message using dulwich logger."""
  696. logger.info(format, *args)
  697. def log_error(self, *args: object) -> None:
  698. """Log error using dulwich logger."""
  699. logger.error(*args)
  700. class WSGIRequestHandlerLogger(WSGIRequestHandler):
  701. """WSGIRequestHandler that uses dulwich's logger for logging exceptions."""
  702. def log_exception(
  703. self,
  704. exc_info: tuple[type[BaseException], BaseException, TracebackType]
  705. | tuple[None, None, None]
  706. | None,
  707. ) -> None:
  708. """Log exception using dulwich logger."""
  709. logger.exception(
  710. "Exception happened during processing of request",
  711. exc_info=exc_info,
  712. )
  713. def log_message(self, format: str, *args: object) -> None:
  714. """Log message using dulwich logger."""
  715. logger.info(format, *args)
  716. def log_error(self, *args: object) -> None:
  717. """Log error using dulwich logger."""
  718. logger.error(*args)
  719. def handle(self) -> None:
  720. """Handle a single HTTP request."""
  721. self.raw_requestline = self.rfile.readline()
  722. if not self.parse_request(): # An error code has been sent, just exit
  723. return
  724. handler = ServerHandlerLogger(
  725. self.rfile,
  726. self.wfile, # type: ignore
  727. self.get_stderr(),
  728. self.get_environ(),
  729. )
  730. handler.request_handler = self # type: ignore # backpointer for logging
  731. handler.run(self.server.get_app()) # type: ignore
  732. class WSGIServerLogger(WSGIServer):
  733. """WSGIServer that uses dulwich's logger for error handling."""
  734. def handle_error(self, request: object, client_address: tuple[str, int]) -> None:
  735. """Handle an error."""
  736. logger.exception(
  737. f"Exception happened during processing of request from {client_address!s}"
  738. )
  739. def main(argv: list[str] = sys.argv) -> None:
  740. """Entry point for starting an HTTP git server."""
  741. import optparse
  742. parser = optparse.OptionParser()
  743. parser.add_option(
  744. "-l",
  745. "--listen_address",
  746. dest="listen_address",
  747. default="localhost",
  748. help="Binding IP address.",
  749. )
  750. parser.add_option(
  751. "-p",
  752. "--port",
  753. dest="port",
  754. type=int,
  755. default=8000,
  756. help="Port to listen on.",
  757. )
  758. options, args = parser.parse_args(argv)
  759. if len(args) > 1:
  760. gitdir = args[1]
  761. else:
  762. gitdir = os.getcwd()
  763. log_utils.default_logging_config()
  764. from typing import cast
  765. from dulwich.server import BackendRepo
  766. backend = DictBackend({"/": cast(BackendRepo, Repo(gitdir))})
  767. app = make_wsgi_chain(backend)
  768. server = make_server(
  769. options.listen_address,
  770. options.port,
  771. app,
  772. handler_class=WSGIRequestHandlerLogger,
  773. server_class=WSGIServerLogger,
  774. )
  775. logger.info(
  776. "Listening for HTTP connections on %s:%d",
  777. options.listen_address,
  778. options.port,
  779. )
  780. server.serve_forever()
  781. if __name__ == "__main__":
  782. main()