test_web.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. # test_web.py -- Tests for the git HTTP server
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; version 2
  7. # or (at your option) any later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """Tests for the Git HTTP server."""
  19. from cStringIO import StringIO
  20. import gzip
  21. import re
  22. import os
  23. from dulwich.object_store import (
  24. MemoryObjectStore,
  25. )
  26. from dulwich.objects import (
  27. Blob,
  28. Tag,
  29. )
  30. from dulwich.repo import (
  31. BaseRepo,
  32. MemoryRepo,
  33. )
  34. from dulwich.server import (
  35. DictBackend,
  36. )
  37. from dulwich.tests import (
  38. TestCase,
  39. )
  40. from dulwich.web import (
  41. HTTP_OK,
  42. HTTP_NOT_FOUND,
  43. HTTP_FORBIDDEN,
  44. HTTP_ERROR,
  45. GunzipFilter,
  46. send_file,
  47. get_text_file,
  48. get_loose_object,
  49. get_pack_file,
  50. get_idx_file,
  51. get_info_refs,
  52. get_info_packs,
  53. handle_service_request,
  54. _LengthLimitedFile,
  55. HTTPGitRequest,
  56. HTTPGitApplication,
  57. )
  58. from dulwich.tests.utils import (
  59. make_object,
  60. )
  61. class TestHTTPGitRequest(HTTPGitRequest):
  62. """HTTPGitRequest with overridden methods to help test caching."""
  63. def __init__(self, *args, **kwargs):
  64. HTTPGitRequest.__init__(self, *args, **kwargs)
  65. self.cached = None
  66. def nocache(self):
  67. self.cached = False
  68. def cache_forever(self):
  69. self.cached = True
  70. class WebTestCase(TestCase):
  71. """Base TestCase with useful instance vars and utility functions."""
  72. _req_class = TestHTTPGitRequest
  73. def setUp(self):
  74. super(WebTestCase, self).setUp()
  75. self._environ = {}
  76. self._req = self._req_class(self._environ, self._start_response,
  77. handlers=self._handlers())
  78. self._status = None
  79. self._headers = []
  80. self._output = StringIO()
  81. def _start_response(self, status, headers):
  82. self._status = status
  83. self._headers = list(headers)
  84. return self._output.write
  85. def _handlers(self):
  86. return None
  87. def assertContentTypeEquals(self, expected):
  88. self.assertTrue(('Content-Type', expected) in self._headers)
  89. def _test_backend(objects, refs=None, named_files=None):
  90. if not refs:
  91. refs = {}
  92. if not named_files:
  93. named_files = {}
  94. repo = MemoryRepo.init_bare(objects, refs)
  95. for path, contents in named_files.iteritems():
  96. repo._put_named_file(path, contents)
  97. return DictBackend({'/': repo})
  98. class DumbHandlersTestCase(WebTestCase):
  99. def test_send_file_not_found(self):
  100. list(send_file(self._req, None, 'text/plain'))
  101. self.assertEqual(HTTP_NOT_FOUND, self._status)
  102. def test_send_file(self):
  103. f = StringIO('foobar')
  104. output = ''.join(send_file(self._req, f, 'some/thing'))
  105. self.assertEqual('foobar', output)
  106. self.assertEqual(HTTP_OK, self._status)
  107. self.assertContentTypeEquals('some/thing')
  108. self.assertTrue(f.closed)
  109. def test_send_file_buffered(self):
  110. bufsize = 10240
  111. xs = 'x' * bufsize
  112. f = StringIO(2 * xs)
  113. self.assertEqual([xs, xs],
  114. list(send_file(self._req, f, 'some/thing')))
  115. self.assertEqual(HTTP_OK, self._status)
  116. self.assertContentTypeEquals('some/thing')
  117. self.assertTrue(f.closed)
  118. def test_send_file_error(self):
  119. class TestFile(object):
  120. def __init__(self, exc_class):
  121. self.closed = False
  122. self._exc_class = exc_class
  123. def read(self, size=-1):
  124. raise self._exc_class()
  125. def close(self):
  126. self.closed = True
  127. f = TestFile(IOError)
  128. list(send_file(self._req, f, 'some/thing'))
  129. self.assertEqual(HTTP_ERROR, self._status)
  130. self.assertTrue(f.closed)
  131. self.assertFalse(self._req.cached)
  132. # non-IOErrors are reraised
  133. f = TestFile(AttributeError)
  134. self.assertRaises(AttributeError, list,
  135. send_file(self._req, f, 'some/thing'))
  136. self.assertTrue(f.closed)
  137. self.assertFalse(self._req.cached)
  138. def test_get_text_file(self):
  139. backend = _test_backend([], named_files={'description': 'foo'})
  140. mat = re.search('.*', 'description')
  141. output = ''.join(get_text_file(self._req, backend, mat))
  142. self.assertEqual('foo', output)
  143. self.assertEqual(HTTP_OK, self._status)
  144. self.assertContentTypeEquals('text/plain')
  145. self.assertFalse(self._req.cached)
  146. def test_get_loose_object(self):
  147. blob = make_object(Blob, data='foo')
  148. backend = _test_backend([blob])
  149. mat = re.search('^(..)(.{38})$', blob.id)
  150. output = ''.join(get_loose_object(self._req, backend, mat))
  151. self.assertEqual(blob.as_legacy_object(), output)
  152. self.assertEqual(HTTP_OK, self._status)
  153. self.assertContentTypeEquals('application/x-git-loose-object')
  154. self.assertTrue(self._req.cached)
  155. def test_get_loose_object_missing(self):
  156. mat = re.search('^(..)(.{38})$', '1' * 40)
  157. list(get_loose_object(self._req, _test_backend([]), mat))
  158. self.assertEqual(HTTP_NOT_FOUND, self._status)
  159. def test_get_loose_object_error(self):
  160. blob = make_object(Blob, data='foo')
  161. backend = _test_backend([blob])
  162. mat = re.search('^(..)(.{38})$', blob.id)
  163. def as_legacy_object_error():
  164. raise IOError
  165. blob.as_legacy_object = as_legacy_object_error
  166. list(get_loose_object(self._req, backend, mat))
  167. self.assertEqual(HTTP_ERROR, self._status)
  168. def test_get_pack_file(self):
  169. pack_name = os.path.join('objects', 'pack', 'pack-%s.pack' % ('1' * 40))
  170. backend = _test_backend([], named_files={pack_name: 'pack contents'})
  171. mat = re.search('.*', pack_name)
  172. output = ''.join(get_pack_file(self._req, backend, mat))
  173. self.assertEqual('pack contents', output)
  174. self.assertEqual(HTTP_OK, self._status)
  175. self.assertContentTypeEquals('application/x-git-packed-objects')
  176. self.assertTrue(self._req.cached)
  177. def test_get_idx_file(self):
  178. idx_name = os.path.join('objects', 'pack', 'pack-%s.idx' % ('1' * 40))
  179. backend = _test_backend([], named_files={idx_name: 'idx contents'})
  180. mat = re.search('.*', idx_name)
  181. output = ''.join(get_idx_file(self._req, backend, mat))
  182. self.assertEqual('idx contents', output)
  183. self.assertEqual(HTTP_OK, self._status)
  184. self.assertContentTypeEquals('application/x-git-packed-objects-toc')
  185. self.assertTrue(self._req.cached)
  186. def test_get_info_refs(self):
  187. self._environ['QUERY_STRING'] = ''
  188. blob1 = make_object(Blob, data='1')
  189. blob2 = make_object(Blob, data='2')
  190. blob3 = make_object(Blob, data='3')
  191. tag1 = make_object(Tag, name='tag-tag',
  192. tagger='Test <test@example.com>',
  193. tag_time=12345,
  194. tag_timezone=0,
  195. message='message',
  196. object=(Blob, blob2.id))
  197. objects = [blob1, blob2, blob3, tag1]
  198. refs = {
  199. 'HEAD': '000',
  200. 'refs/heads/master': blob1.id,
  201. 'refs/tags/tag-tag': tag1.id,
  202. 'refs/tags/blob-tag': blob3.id,
  203. }
  204. backend = _test_backend(objects, refs=refs)
  205. mat = re.search('.*', '//info/refs')
  206. self.assertEqual(['%s\trefs/heads/master\n' % blob1.id,
  207. '%s\trefs/tags/blob-tag\n' % blob3.id,
  208. '%s\trefs/tags/tag-tag\n' % tag1.id,
  209. '%s\trefs/tags/tag-tag^{}\n' % blob2.id],
  210. list(get_info_refs(self._req, backend, mat)))
  211. self.assertEqual(HTTP_OK, self._status)
  212. self.assertContentTypeEquals('text/plain')
  213. self.assertFalse(self._req.cached)
  214. def test_get_info_packs(self):
  215. class TestPackData(object):
  216. def __init__(self, sha):
  217. self.filename = "pack-%s.pack" % sha
  218. class TestPack(object):
  219. def __init__(self, sha):
  220. self.data = TestPackData(sha)
  221. packs = [TestPack(str(i) * 40) for i in range(1, 4)]
  222. class TestObjectStore(MemoryObjectStore):
  223. # property must be overridden, can't be assigned
  224. @property
  225. def packs(self):
  226. return packs
  227. store = TestObjectStore()
  228. repo = BaseRepo(store, None)
  229. backend = DictBackend({'/': repo})
  230. mat = re.search('.*', '//info/packs')
  231. output = ''.join(get_info_packs(self._req, backend, mat))
  232. expected = 'P pack-%s.pack\n' * 3
  233. expected %= ('1' * 40, '2' * 40, '3' * 40)
  234. self.assertEqual(expected, output)
  235. self.assertEqual(HTTP_OK, self._status)
  236. self.assertContentTypeEquals('text/plain')
  237. self.assertFalse(self._req.cached)
  238. class SmartHandlersTestCase(WebTestCase):
  239. class _TestUploadPackHandler(object):
  240. def __init__(self, backend, args, proto, http_req=None,
  241. advertise_refs=False):
  242. self.args = args
  243. self.proto = proto
  244. self.http_req = http_req
  245. self.advertise_refs = advertise_refs
  246. def handle(self):
  247. self.proto.write('handled input: %s' % self.proto.recv(1024))
  248. def _make_handler(self, *args, **kwargs):
  249. self._handler = self._TestUploadPackHandler(*args, **kwargs)
  250. return self._handler
  251. def _handlers(self):
  252. return {'git-upload-pack': self._make_handler}
  253. def test_handle_service_request_unknown(self):
  254. mat = re.search('.*', '/git-evil-handler')
  255. list(handle_service_request(self._req, 'backend', mat))
  256. self.assertEqual(HTTP_FORBIDDEN, self._status)
  257. self.assertFalse(self._req.cached)
  258. def _run_handle_service_request(self, content_length=None):
  259. self._environ['wsgi.input'] = StringIO('foo')
  260. if content_length is not None:
  261. self._environ['CONTENT_LENGTH'] = content_length
  262. mat = re.search('.*', '/git-upload-pack')
  263. handler_output = ''.join(
  264. handle_service_request(self._req, 'backend', mat))
  265. write_output = self._output.getvalue()
  266. # Ensure all output was written via the write callback.
  267. self.assertEqual('', handler_output)
  268. self.assertEqual('handled input: foo', write_output)
  269. self.assertContentTypeEquals('application/x-git-upload-pack-result')
  270. self.assertFalse(self._handler.advertise_refs)
  271. self.assertTrue(self._handler.http_req)
  272. self.assertFalse(self._req.cached)
  273. def test_handle_service_request(self):
  274. self._run_handle_service_request()
  275. def test_handle_service_request_with_length(self):
  276. self._run_handle_service_request(content_length='3')
  277. def test_handle_service_request_empty_length(self):
  278. self._run_handle_service_request(content_length='')
  279. def test_get_info_refs_unknown(self):
  280. self._environ['QUERY_STRING'] = 'service=git-evil-handler'
  281. list(get_info_refs(self._req, 'backend', None))
  282. self.assertEqual(HTTP_FORBIDDEN, self._status)
  283. self.assertFalse(self._req.cached)
  284. def test_get_info_refs(self):
  285. self._environ['wsgi.input'] = StringIO('foo')
  286. self._environ['QUERY_STRING'] = 'service=git-upload-pack'
  287. mat = re.search('.*', '/git-upload-pack')
  288. handler_output = ''.join(get_info_refs(self._req, 'backend', mat))
  289. write_output = self._output.getvalue()
  290. self.assertEqual(('001e# service=git-upload-pack\n'
  291. '0000'
  292. # input is ignored by the handler
  293. 'handled input: '), write_output)
  294. # Ensure all output was written via the write callback.
  295. self.assertEqual('', handler_output)
  296. self.assertTrue(self._handler.advertise_refs)
  297. self.assertTrue(self._handler.http_req)
  298. self.assertFalse(self._req.cached)
  299. class LengthLimitedFileTestCase(TestCase):
  300. def test_no_cutoff(self):
  301. f = _LengthLimitedFile(StringIO('foobar'), 1024)
  302. self.assertEqual('foobar', f.read())
  303. def test_cutoff(self):
  304. f = _LengthLimitedFile(StringIO('foobar'), 3)
  305. self.assertEqual('foo', f.read())
  306. self.assertEqual('', f.read())
  307. def test_multiple_reads(self):
  308. f = _LengthLimitedFile(StringIO('foobar'), 3)
  309. self.assertEqual('fo', f.read(2))
  310. self.assertEqual('o', f.read(2))
  311. self.assertEqual('', f.read())
  312. class HTTPGitRequestTestCase(WebTestCase):
  313. # This class tests the contents of the actual cache headers
  314. _req_class = HTTPGitRequest
  315. def test_not_found(self):
  316. self._req.cache_forever() # cache headers should be discarded
  317. message = 'Something not found'
  318. self.assertEqual(message, self._req.not_found(message))
  319. self.assertEqual(HTTP_NOT_FOUND, self._status)
  320. self.assertEqual(set([('Content-Type', 'text/plain')]),
  321. set(self._headers))
  322. def test_forbidden(self):
  323. self._req.cache_forever() # cache headers should be discarded
  324. message = 'Something not found'
  325. self.assertEqual(message, self._req.forbidden(message))
  326. self.assertEqual(HTTP_FORBIDDEN, self._status)
  327. self.assertEqual(set([('Content-Type', 'text/plain')]),
  328. set(self._headers))
  329. def test_respond_ok(self):
  330. self._req.respond()
  331. self.assertEqual([], self._headers)
  332. self.assertEqual(HTTP_OK, self._status)
  333. def test_respond(self):
  334. self._req.nocache()
  335. self._req.respond(status=402, content_type='some/type',
  336. headers=[('X-Foo', 'foo'), ('X-Bar', 'bar')])
  337. self.assertEqual(set([
  338. ('X-Foo', 'foo'),
  339. ('X-Bar', 'bar'),
  340. ('Content-Type', 'some/type'),
  341. ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT'),
  342. ('Pragma', 'no-cache'),
  343. ('Cache-Control', 'no-cache, max-age=0, must-revalidate'),
  344. ]), set(self._headers))
  345. self.assertEqual(402, self._status)
  346. class HTTPGitApplicationTestCase(TestCase):
  347. def setUp(self):
  348. super(HTTPGitApplicationTestCase, self).setUp()
  349. self._app = HTTPGitApplication('backend')
  350. self._environ = {
  351. 'PATH_INFO': '/foo',
  352. 'REQUEST_METHOD': 'GET',
  353. }
  354. def _test_handler(self, req, backend, mat):
  355. # tests interface used by all handlers
  356. self.assertEqual(self._environ, req.environ)
  357. self.assertEqual('backend', backend)
  358. self.assertEqual('/foo', mat.group(0))
  359. return 'output'
  360. def _add_handler(self, app):
  361. req = self._environ['REQUEST_METHOD']
  362. app.services = {
  363. (req, re.compile('/foo$')): self._test_handler,
  364. }
  365. def test_call(self):
  366. self._add_handler(self._app)
  367. self.assertEqual('output', self._app(self._environ, None))
  368. def test_fallback_app(self):
  369. def test_app(environ, start_response):
  370. return 'output'
  371. app = HTTPGitApplication('backend', fallback_app=test_app)
  372. self.assertEqual('output', app(self._environ, None))
  373. class GunzipTestCase(HTTPGitApplicationTestCase):
  374. __doc__ = """TestCase for testing the GunzipFilter, ensuring the wsgi.input
  375. is correctly decompressed and headers are corrected.
  376. """
  377. example_text = __doc__
  378. def setUp(self):
  379. super(GunzipTestCase, self).setUp()
  380. self._app = GunzipFilter(self._app)
  381. self._environ['HTTP_CONTENT_ENCODING'] = 'gzip'
  382. self._environ['REQUEST_METHOD'] = 'POST'
  383. def _get_zstream(self, text):
  384. zstream = StringIO()
  385. zfile = gzip.GzipFile(fileobj=zstream, mode='w')
  386. zfile.write(text)
  387. zfile.close()
  388. zlength = zstream.tell()
  389. zstream.seek(0)
  390. return zstream, zlength
  391. def _test_call(self, orig, zstream, zlength):
  392. self._add_handler(self._app.app)
  393. self.assertLess(zlength, len(orig))
  394. self.assertEqual(self._environ['HTTP_CONTENT_ENCODING'], 'gzip')
  395. self._environ['CONTENT_LENGTH'] = zlength
  396. self._environ['wsgi.input'] = zstream
  397. app_output = self._app(self._environ, None)
  398. buf = self._environ['wsgi.input']
  399. self.assertIsNot(buf, zstream)
  400. buf.seek(0)
  401. self.assertEqual(orig, buf.read())
  402. self.assertIs(None, self._environ.get('CONTENT_LENGTH'))
  403. self.assertNotIn('HTTP_CONTENT_ENCODING', self._environ)
  404. def test_call(self):
  405. self._test_call(
  406. self.example_text,
  407. *self._get_zstream(self.example_text)
  408. )
  409. def test_call_no_seek(self):
  410. """
  411. This ensures that the gunzipping code doesn't require any methods on
  412. 'wsgi.input' except for '.read()'. (In particular, it shouldn't
  413. require '.seek()'. See https://github.com/jelmer/dulwich/issues/140.)
  414. """
  415. class MinimalistWSGIInputStream(object):
  416. def __init__(self, data):
  417. self.data = data
  418. self.pos = 0
  419. def read(self, howmuch):
  420. start = self.pos
  421. end = self.pos + howmuch
  422. if start >= len(self.data):
  423. return ''
  424. self.pos = end
  425. return self.data[start:end]
  426. zstream, zlength = self._get_zstream(self.example_text)
  427. self._test_call(self.example_text,
  428. MinimalistWSGIInputStream(zstream.read()), zlength)