test_web.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573
  1. # test_web.py -- Tests for the git HTTP server
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Tests for the Git HTTP server."""
  21. from io import BytesIO
  22. import gzip
  23. import re
  24. import os
  25. from typing import Type
  26. from dulwich.object_store import (
  27. MemoryObjectStore,
  28. )
  29. from dulwich.objects import (
  30. Blob,
  31. )
  32. from dulwich.repo import (
  33. BaseRepo,
  34. MemoryRepo,
  35. )
  36. from dulwich.server import (
  37. DictBackend,
  38. )
  39. from dulwich.tests import (
  40. TestCase,
  41. )
  42. from dulwich.web import (
  43. HTTP_OK,
  44. HTTP_NOT_FOUND,
  45. HTTP_FORBIDDEN,
  46. HTTP_ERROR,
  47. GunzipFilter,
  48. send_file,
  49. get_text_file,
  50. get_loose_object,
  51. get_pack_file,
  52. get_idx_file,
  53. get_info_refs,
  54. get_info_packs,
  55. handle_service_request,
  56. _LengthLimitedFile,
  57. HTTPGitRequest,
  58. HTTPGitApplication,
  59. )
  60. from dulwich.tests.utils import (
  61. make_object,
  62. make_tag,
  63. )
  64. class MinimalistWSGIInputStream(object):
  65. """WSGI input stream with no 'seek()' and 'tell()' methods."""
  66. def __init__(self, data):
  67. self.data = data
  68. self.pos = 0
  69. def read(self, howmuch):
  70. start = self.pos
  71. end = self.pos + howmuch
  72. if start >= len(self.data):
  73. return ''
  74. self.pos = end
  75. return self.data[start:end]
  76. class MinimalistWSGIInputStream2(MinimalistWSGIInputStream):
  77. """WSGI input stream with no *working* 'seek()' and 'tell()' methods."""
  78. def seek(self, pos):
  79. raise NotImplementedError
  80. def tell(self):
  81. raise NotImplementedError
  82. class TestHTTPGitRequest(HTTPGitRequest):
  83. """HTTPGitRequest with overridden methods to help test caching."""
  84. def __init__(self, *args, **kwargs):
  85. HTTPGitRequest.__init__(self, *args, **kwargs)
  86. self.cached = None
  87. def nocache(self):
  88. self.cached = False
  89. def cache_forever(self):
  90. self.cached = True
  91. class WebTestCase(TestCase):
  92. """Base TestCase with useful instance vars and utility functions."""
  93. _req_class = TestHTTPGitRequest # type: Type[HTTPGitRequest]
  94. def setUp(self):
  95. super(WebTestCase, self).setUp()
  96. self._environ = {}
  97. self._req = self._req_class(self._environ, self._start_response,
  98. handlers=self._handlers())
  99. self._status = None
  100. self._headers = []
  101. self._output = BytesIO()
  102. def _start_response(self, status, headers):
  103. self._status = status
  104. self._headers = list(headers)
  105. return self._output.write
  106. def _handlers(self):
  107. return None
  108. def assertContentTypeEquals(self, expected):
  109. self.assertTrue(('Content-Type', expected) in self._headers)
  110. def _test_backend(objects, refs=None, named_files=None):
  111. if not refs:
  112. refs = {}
  113. if not named_files:
  114. named_files = {}
  115. repo = MemoryRepo.init_bare(objects, refs)
  116. for path, contents in named_files.items():
  117. repo._put_named_file(path, contents)
  118. return DictBackend({'/': repo})
  119. class DumbHandlersTestCase(WebTestCase):
  120. def test_send_file_not_found(self):
  121. list(send_file(self._req, None, 'text/plain'))
  122. self.assertEqual(HTTP_NOT_FOUND, self._status)
  123. def test_send_file(self):
  124. f = BytesIO(b'foobar')
  125. output = b''.join(send_file(self._req, f, 'some/thing'))
  126. self.assertEqual(b'foobar', output)
  127. self.assertEqual(HTTP_OK, self._status)
  128. self.assertContentTypeEquals('some/thing')
  129. self.assertTrue(f.closed)
  130. def test_send_file_buffered(self):
  131. bufsize = 10240
  132. xs = b'x' * bufsize
  133. f = BytesIO(2 * xs)
  134. self.assertEqual([xs, xs],
  135. list(send_file(self._req, f, 'some/thing')))
  136. self.assertEqual(HTTP_OK, self._status)
  137. self.assertContentTypeEquals('some/thing')
  138. self.assertTrue(f.closed)
  139. def test_send_file_error(self):
  140. class TestFile(object):
  141. def __init__(self, exc_class):
  142. self.closed = False
  143. self._exc_class = exc_class
  144. def read(self, size=-1):
  145. raise self._exc_class()
  146. def close(self):
  147. self.closed = True
  148. f = TestFile(IOError)
  149. list(send_file(self._req, f, 'some/thing'))
  150. self.assertEqual(HTTP_ERROR, self._status)
  151. self.assertTrue(f.closed)
  152. self.assertFalse(self._req.cached)
  153. # non-IOErrors are reraised
  154. f = TestFile(AttributeError)
  155. self.assertRaises(AttributeError, list,
  156. send_file(self._req, f, 'some/thing'))
  157. self.assertTrue(f.closed)
  158. self.assertFalse(self._req.cached)
  159. def test_get_text_file(self):
  160. backend = _test_backend([], named_files={'description': b'foo'})
  161. mat = re.search('.*', 'description')
  162. output = b''.join(get_text_file(self._req, backend, mat))
  163. self.assertEqual(b'foo', output)
  164. self.assertEqual(HTTP_OK, self._status)
  165. self.assertContentTypeEquals('text/plain')
  166. self.assertFalse(self._req.cached)
  167. def test_get_loose_object(self):
  168. blob = make_object(Blob, data=b'foo')
  169. backend = _test_backend([blob])
  170. mat = re.search('^(..)(.{38})$', blob.id.decode('ascii'))
  171. output = b''.join(get_loose_object(self._req, backend, mat))
  172. self.assertEqual(blob.as_legacy_object(), output)
  173. self.assertEqual(HTTP_OK, self._status)
  174. self.assertContentTypeEquals('application/x-git-loose-object')
  175. self.assertTrue(self._req.cached)
  176. def test_get_loose_object_missing(self):
  177. mat = re.search('^(..)(.{38})$', '1' * 40)
  178. list(get_loose_object(self._req, _test_backend([]), mat))
  179. self.assertEqual(HTTP_NOT_FOUND, self._status)
  180. def test_get_loose_object_error(self):
  181. blob = make_object(Blob, data=b'foo')
  182. backend = _test_backend([blob])
  183. mat = re.search('^(..)(.{38})$', blob.id.decode('ascii'))
  184. def as_legacy_object_error(self):
  185. raise IOError
  186. self.addCleanup(
  187. setattr, Blob, 'as_legacy_object', Blob.as_legacy_object)
  188. Blob.as_legacy_object = as_legacy_object_error
  189. list(get_loose_object(self._req, backend, mat))
  190. self.assertEqual(HTTP_ERROR, self._status)
  191. def test_get_pack_file(self):
  192. pack_name = os.path.join(
  193. 'objects', 'pack', 'pack-%s.pack' % ('1' * 40))
  194. backend = _test_backend([], named_files={pack_name: b'pack contents'})
  195. mat = re.search('.*', pack_name)
  196. output = b''.join(get_pack_file(self._req, backend, mat))
  197. self.assertEqual(b'pack contents', output)
  198. self.assertEqual(HTTP_OK, self._status)
  199. self.assertContentTypeEquals('application/x-git-packed-objects')
  200. self.assertTrue(self._req.cached)
  201. def test_get_idx_file(self):
  202. idx_name = os.path.join('objects', 'pack', 'pack-%s.idx' % ('1' * 40))
  203. backend = _test_backend([], named_files={idx_name: b'idx contents'})
  204. mat = re.search('.*', idx_name)
  205. output = b''.join(get_idx_file(self._req, backend, mat))
  206. self.assertEqual(b'idx contents', output)
  207. self.assertEqual(HTTP_OK, self._status)
  208. self.assertContentTypeEquals('application/x-git-packed-objects-toc')
  209. self.assertTrue(self._req.cached)
  210. def test_get_info_refs(self):
  211. self._environ['QUERY_STRING'] = ''
  212. blob1 = make_object(Blob, data=b'1')
  213. blob2 = make_object(Blob, data=b'2')
  214. blob3 = make_object(Blob, data=b'3')
  215. tag1 = make_tag(blob2, name=b'tag-tag')
  216. objects = [blob1, blob2, blob3, tag1]
  217. refs = {
  218. b'HEAD': b'000',
  219. b'refs/heads/master': blob1.id,
  220. b'refs/tags/tag-tag': tag1.id,
  221. b'refs/tags/blob-tag': blob3.id,
  222. }
  223. backend = _test_backend(objects, refs=refs)
  224. mat = re.search('.*', '//info/refs')
  225. self.assertEqual([blob1.id + b'\trefs/heads/master\n',
  226. blob3.id + b'\trefs/tags/blob-tag\n',
  227. tag1.id + b'\trefs/tags/tag-tag\n',
  228. blob2.id + b'\trefs/tags/tag-tag^{}\n'],
  229. list(get_info_refs(self._req, backend, mat)))
  230. self.assertEqual(HTTP_OK, self._status)
  231. self.assertContentTypeEquals('text/plain')
  232. self.assertFalse(self._req.cached)
  233. def test_get_info_refs_not_found(self):
  234. self._environ['QUERY_STRING'] = ''
  235. objects = []
  236. refs = {}
  237. backend = _test_backend(objects, refs=refs)
  238. mat = re.search('info/refs', '/foo/info/refs')
  239. self.assertEqual(
  240. [b'No git repository was found at /foo'],
  241. list(get_info_refs(self._req, backend, mat)))
  242. self.assertEqual(HTTP_NOT_FOUND, self._status)
  243. self.assertContentTypeEquals('text/plain')
  244. def test_get_info_packs(self):
  245. class TestPackData(object):
  246. def __init__(self, sha):
  247. self.filename = "pack-%s.pack" % sha
  248. class TestPack(object):
  249. def __init__(self, sha):
  250. self.data = TestPackData(sha)
  251. packs = [TestPack(str(i) * 40) for i in range(1, 4)]
  252. class TestObjectStore(MemoryObjectStore):
  253. # property must be overridden, can't be assigned
  254. @property
  255. def packs(self):
  256. return packs
  257. store = TestObjectStore()
  258. repo = BaseRepo(store, None)
  259. backend = DictBackend({'/': repo})
  260. mat = re.search('.*', '//info/packs')
  261. output = b''.join(get_info_packs(self._req, backend, mat))
  262. expected = b''.join(
  263. [(b'P pack-' + s + b'.pack\n')
  264. for s in [b'1' * 40, b'2' * 40, b'3' * 40]])
  265. self.assertEqual(expected, output)
  266. self.assertEqual(HTTP_OK, self._status)
  267. self.assertContentTypeEquals('text/plain')
  268. self.assertFalse(self._req.cached)
  269. class SmartHandlersTestCase(WebTestCase):
  270. class _TestUploadPackHandler(object):
  271. def __init__(self, backend, args, proto, stateless_rpc=None,
  272. advertise_refs=False):
  273. self.args = args
  274. self.proto = proto
  275. self.stateless_rpc = stateless_rpc
  276. self.advertise_refs = advertise_refs
  277. def handle(self):
  278. self.proto.write(b'handled input: ' + self.proto.recv(1024))
  279. def _make_handler(self, *args, **kwargs):
  280. self._handler = self._TestUploadPackHandler(*args, **kwargs)
  281. return self._handler
  282. def _handlers(self):
  283. return {b'git-upload-pack': self._make_handler}
  284. def test_handle_service_request_unknown(self):
  285. mat = re.search('.*', '/git-evil-handler')
  286. content = list(handle_service_request(self._req, 'backend', mat))
  287. self.assertEqual(HTTP_FORBIDDEN, self._status)
  288. self.assertFalse(b'git-evil-handler' in b"".join(content))
  289. self.assertFalse(self._req.cached)
  290. def _run_handle_service_request(self, content_length=None):
  291. self._environ['wsgi.input'] = BytesIO(b'foo')
  292. if content_length is not None:
  293. self._environ['CONTENT_LENGTH'] = content_length
  294. mat = re.search('.*', '/git-upload-pack')
  295. class Backend(object):
  296. def open_repository(self, path):
  297. return None
  298. handler_output = b''.join(
  299. handle_service_request(self._req, Backend(), mat))
  300. write_output = self._output.getvalue()
  301. # Ensure all output was written via the write callback.
  302. self.assertEqual(b'', handler_output)
  303. self.assertEqual(b'handled input: foo', write_output)
  304. self.assertContentTypeEquals('application/x-git-upload-pack-result')
  305. self.assertFalse(self._handler.advertise_refs)
  306. self.assertTrue(self._handler.stateless_rpc)
  307. self.assertFalse(self._req.cached)
  308. def test_handle_service_request(self):
  309. self._run_handle_service_request()
  310. def test_handle_service_request_with_length(self):
  311. self._run_handle_service_request(content_length='3')
  312. def test_handle_service_request_empty_length(self):
  313. self._run_handle_service_request(content_length='')
  314. def test_get_info_refs_unknown(self):
  315. self._environ['QUERY_STRING'] = 'service=git-evil-handler'
  316. class Backend(object):
  317. def open_repository(self, url):
  318. return None
  319. mat = re.search('.*', '/git-evil-pack')
  320. content = list(get_info_refs(self._req, Backend(), mat))
  321. self.assertFalse(b'git-evil-handler' in b"".join(content))
  322. self.assertEqual(HTTP_FORBIDDEN, self._status)
  323. self.assertFalse(self._req.cached)
  324. def test_get_info_refs(self):
  325. self._environ['wsgi.input'] = BytesIO(b'foo')
  326. self._environ['QUERY_STRING'] = 'service=git-upload-pack'
  327. class Backend(object):
  328. def open_repository(self, url):
  329. return None
  330. mat = re.search('.*', '/git-upload-pack')
  331. handler_output = b''.join(get_info_refs(self._req, Backend(), mat))
  332. write_output = self._output.getvalue()
  333. self.assertEqual((b'001e# service=git-upload-pack\n'
  334. b'0000'
  335. # input is ignored by the handler
  336. b'handled input: '), write_output)
  337. # Ensure all output was written via the write callback.
  338. self.assertEqual(b'', handler_output)
  339. self.assertTrue(self._handler.advertise_refs)
  340. self.assertTrue(self._handler.stateless_rpc)
  341. self.assertFalse(self._req.cached)
  342. class LengthLimitedFileTestCase(TestCase):
  343. def test_no_cutoff(self):
  344. f = _LengthLimitedFile(BytesIO(b'foobar'), 1024)
  345. self.assertEqual(b'foobar', f.read())
  346. def test_cutoff(self):
  347. f = _LengthLimitedFile(BytesIO(b'foobar'), 3)
  348. self.assertEqual(b'foo', f.read())
  349. self.assertEqual(b'', f.read())
  350. def test_multiple_reads(self):
  351. f = _LengthLimitedFile(BytesIO(b'foobar'), 3)
  352. self.assertEqual(b'fo', f.read(2))
  353. self.assertEqual(b'o', f.read(2))
  354. self.assertEqual(b'', f.read())
  355. class HTTPGitRequestTestCase(WebTestCase):
  356. # This class tests the contents of the actual cache headers
  357. _req_class = HTTPGitRequest
  358. def test_not_found(self):
  359. self._req.cache_forever() # cache headers should be discarded
  360. message = 'Something not found'
  361. self.assertEqual(message.encode('ascii'), self._req.not_found(message))
  362. self.assertEqual(HTTP_NOT_FOUND, self._status)
  363. self.assertEqual(set([('Content-Type', 'text/plain')]),
  364. set(self._headers))
  365. def test_forbidden(self):
  366. self._req.cache_forever() # cache headers should be discarded
  367. message = 'Something not found'
  368. self.assertEqual(message.encode('ascii'), self._req.forbidden(message))
  369. self.assertEqual(HTTP_FORBIDDEN, self._status)
  370. self.assertEqual(set([('Content-Type', 'text/plain')]),
  371. set(self._headers))
  372. def test_respond_ok(self):
  373. self._req.respond()
  374. self.assertEqual([], self._headers)
  375. self.assertEqual(HTTP_OK, self._status)
  376. def test_respond(self):
  377. self._req.nocache()
  378. self._req.respond(status=402, content_type='some/type',
  379. headers=[('X-Foo', 'foo'), ('X-Bar', 'bar')])
  380. self.assertEqual(set([
  381. ('X-Foo', 'foo'),
  382. ('X-Bar', 'bar'),
  383. ('Content-Type', 'some/type'),
  384. ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT'),
  385. ('Pragma', 'no-cache'),
  386. ('Cache-Control', 'no-cache, max-age=0, must-revalidate'),
  387. ]), set(self._headers))
  388. self.assertEqual(402, self._status)
  389. class HTTPGitApplicationTestCase(TestCase):
  390. def setUp(self):
  391. super(HTTPGitApplicationTestCase, self).setUp()
  392. self._app = HTTPGitApplication('backend')
  393. self._environ = {
  394. 'PATH_INFO': '/foo',
  395. 'REQUEST_METHOD': 'GET',
  396. }
  397. def _test_handler(self, req, backend, mat):
  398. # tests interface used by all handlers
  399. self.assertEqual(self._environ, req.environ)
  400. self.assertEqual('backend', backend)
  401. self.assertEqual('/foo', mat.group(0))
  402. return 'output'
  403. def _add_handler(self, app):
  404. req = self._environ['REQUEST_METHOD']
  405. app.services = {
  406. (req, re.compile('/foo$')): self._test_handler,
  407. }
  408. def test_call(self):
  409. self._add_handler(self._app)
  410. self.assertEqual('output', self._app(self._environ, None))
  411. def test_fallback_app(self):
  412. def test_app(environ, start_response):
  413. return 'output'
  414. app = HTTPGitApplication('backend', fallback_app=test_app)
  415. self.assertEqual('output', app(self._environ, None))
  416. class GunzipTestCase(HTTPGitApplicationTestCase):
  417. __doc__ = """TestCase for testing the GunzipFilter, ensuring the wsgi.input
  418. is correctly decompressed and headers are corrected.
  419. """
  420. example_text = __doc__.encode('ascii')
  421. def setUp(self):
  422. super(GunzipTestCase, self).setUp()
  423. self._app = GunzipFilter(self._app)
  424. self._environ['HTTP_CONTENT_ENCODING'] = 'gzip'
  425. self._environ['REQUEST_METHOD'] = 'POST'
  426. def _get_zstream(self, text):
  427. zstream = BytesIO()
  428. zfile = gzip.GzipFile(fileobj=zstream, mode='w')
  429. zfile.write(text)
  430. zfile.close()
  431. zlength = zstream.tell()
  432. zstream.seek(0)
  433. return zstream, zlength
  434. def _test_call(self, orig, zstream, zlength):
  435. self._add_handler(self._app.app)
  436. self.assertLess(zlength, len(orig))
  437. self.assertEqual(self._environ['HTTP_CONTENT_ENCODING'], 'gzip')
  438. self._environ['CONTENT_LENGTH'] = zlength
  439. self._environ['wsgi.input'] = zstream
  440. self._app(self._environ, None)
  441. buf = self._environ['wsgi.input']
  442. self.assertIsNot(buf, zstream)
  443. buf.seek(0)
  444. self.assertEqual(orig, buf.read())
  445. self.assertIs(None, self._environ.get('CONTENT_LENGTH'))
  446. self.assertNotIn('HTTP_CONTENT_ENCODING', self._environ)
  447. def test_call(self):
  448. self._test_call(
  449. self.example_text,
  450. *self._get_zstream(self.example_text)
  451. )
  452. def test_call_no_seek(self):
  453. """
  454. This ensures that the gunzipping code doesn't require any methods on
  455. 'wsgi.input' except for '.read()'. (In particular, it shouldn't
  456. require '.seek()'. See https://github.com/jelmer/dulwich/issues/140.)
  457. """
  458. zstream, zlength = self._get_zstream(self.example_text)
  459. self._test_call(
  460. self.example_text,
  461. MinimalistWSGIInputStream(zstream.read()), zlength)
  462. def test_call_no_working_seek(self):
  463. """
  464. Similar to 'test_call_no_seek', but this time the methods are available
  465. (but defunct). See https://github.com/jonashaag/klaus/issues/154.
  466. """
  467. zstream, zlength = self._get_zstream(self.example_text)
  468. self._test_call(
  469. self.example_text,
  470. MinimalistWSGIInputStream2(zstream.read()), zlength)