test_pack.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924
  1. # test_pack.py -- Tests for the handling of git packs.
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
  4. #
  5. # This program is free software; you can redistribute it and/or
  6. # modify it under the terms of the GNU General Public License
  7. # as published by the Free Software Foundation; version 2
  8. # of the License, or (at your option) any later version of the license.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program; if not, write to the Free Software
  17. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  18. # MA 02110-1301, USA.
  19. """Tests for Dulwich packs."""
  20. from cStringIO import StringIO
  21. import os
  22. import shutil
  23. import tempfile
  24. import zlib
  25. from dulwich._compat import (
  26. make_sha,
  27. )
  28. from dulwich.errors import (
  29. ChecksumMismatch,
  30. )
  31. from dulwich.file import (
  32. GitFile,
  33. )
  34. from dulwich.object_store import (
  35. MemoryObjectStore,
  36. )
  37. from dulwich.objects import (
  38. Blob,
  39. hex_to_sha,
  40. sha_to_hex,
  41. Commit,
  42. Tree,
  43. Blob,
  44. )
  45. from dulwich.pack import (
  46. OFS_DELTA,
  47. REF_DELTA,
  48. DELTA_TYPES,
  49. MemoryPackIndex,
  50. Pack,
  51. PackData,
  52. apply_delta,
  53. create_delta,
  54. deltify_pack_objects,
  55. load_pack_index,
  56. UnpackedObject,
  57. read_zlib_chunks,
  58. write_pack_header,
  59. write_pack_index_v1,
  60. write_pack_index_v2,
  61. SHA1Writer,
  62. write_pack_object,
  63. write_pack,
  64. unpack_object,
  65. compute_file_sha,
  66. PackStreamReader,
  67. DeltaChainIterator,
  68. )
  69. from dulwich.tests import (
  70. TestCase,
  71. )
  72. from utils import (
  73. make_object,
  74. build_pack,
  75. )
  76. pack1_sha = 'bc63ddad95e7321ee734ea11a7a62d314e0d7481'
  77. a_sha = '6f670c0fb53f9463760b7295fbb814e965fb20c8'
  78. tree_sha = 'b2a2766a2879c209ab1176e7e778b81ae422eeaa'
  79. commit_sha = 'f18faa16531ac570a3fdc8c7ca16682548dafd12'
  80. class PackTests(TestCase):
  81. """Base class for testing packs"""
  82. def setUp(self):
  83. super(PackTests, self).setUp()
  84. self.tempdir = tempfile.mkdtemp()
  85. self.addCleanup(shutil.rmtree, self.tempdir)
  86. datadir = os.path.abspath(os.path.join(os.path.dirname(__file__),
  87. 'data/packs'))
  88. def get_pack_index(self, sha):
  89. """Returns a PackIndex from the datadir with the given sha"""
  90. return load_pack_index(os.path.join(self.datadir, 'pack-%s.idx' % sha))
  91. def get_pack_data(self, sha):
  92. """Returns a PackData object from the datadir with the given sha"""
  93. return PackData(os.path.join(self.datadir, 'pack-%s.pack' % sha))
  94. def get_pack(self, sha):
  95. return Pack(os.path.join(self.datadir, 'pack-%s' % sha))
  96. def assertSucceeds(self, func, *args, **kwargs):
  97. try:
  98. func(*args, **kwargs)
  99. except ChecksumMismatch, e:
  100. self.fail(e)
  101. class PackIndexTests(PackTests):
  102. """Class that tests the index of packfiles"""
  103. def test_object_index(self):
  104. """Tests that the correct object offset is returned from the index."""
  105. p = self.get_pack_index(pack1_sha)
  106. self.assertRaises(KeyError, p.object_index, pack1_sha)
  107. self.assertEqual(p.object_index(a_sha), 178)
  108. self.assertEqual(p.object_index(tree_sha), 138)
  109. self.assertEqual(p.object_index(commit_sha), 12)
  110. def test_index_len(self):
  111. p = self.get_pack_index(pack1_sha)
  112. self.assertEquals(3, len(p))
  113. def test_get_stored_checksum(self):
  114. p = self.get_pack_index(pack1_sha)
  115. self.assertEquals('f2848e2ad16f329ae1c92e3b95e91888daa5bd01',
  116. sha_to_hex(p.get_stored_checksum()))
  117. self.assertEquals('721980e866af9a5f93ad674144e1459b8ba3e7b7',
  118. sha_to_hex(p.get_pack_checksum()))
  119. def test_index_check(self):
  120. p = self.get_pack_index(pack1_sha)
  121. self.assertSucceeds(p.check)
  122. def test_iterentries(self):
  123. p = self.get_pack_index(pack1_sha)
  124. entries = [(sha_to_hex(s), o, c) for s, o, c in p.iterentries()]
  125. self.assertEquals([
  126. ('6f670c0fb53f9463760b7295fbb814e965fb20c8', 178, None),
  127. ('b2a2766a2879c209ab1176e7e778b81ae422eeaa', 138, None),
  128. ('f18faa16531ac570a3fdc8c7ca16682548dafd12', 12, None)
  129. ], entries)
  130. def test_iter(self):
  131. p = self.get_pack_index(pack1_sha)
  132. self.assertEquals(set([tree_sha, commit_sha, a_sha]), set(p))
  133. class TestPackDeltas(TestCase):
  134. test_string1 = 'The answer was flailing in the wind'
  135. test_string2 = 'The answer was falling down the pipe'
  136. test_string3 = 'zzzzz'
  137. test_string_empty = ''
  138. test_string_big = 'Z' * 8192
  139. def _test_roundtrip(self, base, target):
  140. self.assertEquals(target,
  141. ''.join(apply_delta(base, create_delta(base, target))))
  142. def test_nochange(self):
  143. self._test_roundtrip(self.test_string1, self.test_string1)
  144. def test_change(self):
  145. self._test_roundtrip(self.test_string1, self.test_string2)
  146. def test_rewrite(self):
  147. self._test_roundtrip(self.test_string1, self.test_string3)
  148. def test_overflow(self):
  149. self._test_roundtrip(self.test_string_empty, self.test_string_big)
  150. class TestPackData(PackTests):
  151. """Tests getting the data from the packfile."""
  152. def test_create_pack(self):
  153. p = self.get_pack_data(pack1_sha)
  154. def test_from_file(self):
  155. path = os.path.join(self.datadir, 'pack-%s.pack' % pack1_sha)
  156. PackData.from_file(open(path), os.path.getsize(path))
  157. def test_pack_len(self):
  158. p = self.get_pack_data(pack1_sha)
  159. self.assertEquals(3, len(p))
  160. def test_index_check(self):
  161. p = self.get_pack_data(pack1_sha)
  162. self.assertSucceeds(p.check)
  163. def test_iterobjects(self):
  164. p = self.get_pack_data(pack1_sha)
  165. commit_data = ('tree b2a2766a2879c209ab1176e7e778b81ae422eeaa\n'
  166. 'author James Westby <jw+debian@jameswestby.net> '
  167. '1174945067 +0100\n'
  168. 'committer James Westby <jw+debian@jameswestby.net> '
  169. '1174945067 +0100\n'
  170. '\n'
  171. 'Test commit\n')
  172. blob_sha = '6f670c0fb53f9463760b7295fbb814e965fb20c8'
  173. tree_data = '100644 a\0%s' % hex_to_sha(blob_sha)
  174. actual = []
  175. for offset, type_num, chunks, crc32 in p.iterobjects():
  176. actual.append((offset, type_num, ''.join(chunks), crc32))
  177. self.assertEquals([
  178. (12, 1, commit_data, 3775879613L),
  179. (138, 2, tree_data, 912998690L),
  180. (178, 3, 'test 1\n', 1373561701L)
  181. ], actual)
  182. def test_iterentries(self):
  183. p = self.get_pack_data(pack1_sha)
  184. entries = set((sha_to_hex(s), o, c) for s, o, c in p.iterentries())
  185. self.assertEquals(set([
  186. ('6f670c0fb53f9463760b7295fbb814e965fb20c8', 178, 1373561701L),
  187. ('b2a2766a2879c209ab1176e7e778b81ae422eeaa', 138, 912998690L),
  188. ('f18faa16531ac570a3fdc8c7ca16682548dafd12', 12, 3775879613L),
  189. ]), entries)
  190. def test_create_index_v1(self):
  191. p = self.get_pack_data(pack1_sha)
  192. filename = os.path.join(self.tempdir, 'v1test.idx')
  193. p.create_index_v1(filename)
  194. idx1 = load_pack_index(filename)
  195. idx2 = self.get_pack_index(pack1_sha)
  196. self.assertEquals(idx1, idx2)
  197. def test_create_index_v2(self):
  198. p = self.get_pack_data(pack1_sha)
  199. filename = os.path.join(self.tempdir, 'v2test.idx')
  200. p.create_index_v2(filename)
  201. idx1 = load_pack_index(filename)
  202. idx2 = self.get_pack_index(pack1_sha)
  203. self.assertEquals(idx1, idx2)
  204. def test_compute_file_sha(self):
  205. f = StringIO('abcd1234wxyz')
  206. self.assertEqual(make_sha('abcd1234wxyz').hexdigest(),
  207. compute_file_sha(f).hexdigest())
  208. self.assertEqual(make_sha('abcd1234wxyz').hexdigest(),
  209. compute_file_sha(f, buffer_size=5).hexdigest())
  210. self.assertEqual(make_sha('abcd1234').hexdigest(),
  211. compute_file_sha(f, end_ofs=-4).hexdigest())
  212. self.assertEqual(make_sha('1234wxyz').hexdigest(),
  213. compute_file_sha(f, start_ofs=4).hexdigest())
  214. self.assertEqual(
  215. make_sha('1234').hexdigest(),
  216. compute_file_sha(f, start_ofs=4, end_ofs=-4).hexdigest())
  217. class TestPack(PackTests):
  218. def test_len(self):
  219. p = self.get_pack(pack1_sha)
  220. self.assertEquals(3, len(p))
  221. def test_contains(self):
  222. p = self.get_pack(pack1_sha)
  223. self.assertTrue(tree_sha in p)
  224. def test_get(self):
  225. p = self.get_pack(pack1_sha)
  226. self.assertEquals(type(p[tree_sha]), Tree)
  227. def test_iter(self):
  228. p = self.get_pack(pack1_sha)
  229. self.assertEquals(set([tree_sha, commit_sha, a_sha]), set(p))
  230. def test_iterobjects(self):
  231. p = self.get_pack(pack1_sha)
  232. expected = set([p[s] for s in [commit_sha, tree_sha, a_sha]])
  233. self.assertEquals(expected, set(list(p.iterobjects())))
  234. def test_pack_tuples(self):
  235. p = self.get_pack(pack1_sha)
  236. tuples = p.pack_tuples()
  237. expected = set([(p[s], None) for s in [commit_sha, tree_sha, a_sha]])
  238. self.assertEquals(expected, set(list(tuples)))
  239. self.assertEquals(expected, set(list(tuples)))
  240. self.assertEquals(3, len(tuples))
  241. def test_get_object_at(self):
  242. """Tests random access for non-delta objects"""
  243. p = self.get_pack(pack1_sha)
  244. obj = p[a_sha]
  245. self.assertEqual(obj.type_name, 'blob')
  246. self.assertEqual(obj.sha().hexdigest(), a_sha)
  247. obj = p[tree_sha]
  248. self.assertEqual(obj.type_name, 'tree')
  249. self.assertEqual(obj.sha().hexdigest(), tree_sha)
  250. obj = p[commit_sha]
  251. self.assertEqual(obj.type_name, 'commit')
  252. self.assertEqual(obj.sha().hexdigest(), commit_sha)
  253. def test_copy(self):
  254. origpack = self.get_pack(pack1_sha)
  255. try:
  256. self.assertSucceeds(origpack.index.check)
  257. basename = os.path.join(self.tempdir, 'Elch')
  258. write_pack(basename, origpack.pack_tuples())
  259. newpack = Pack(basename)
  260. try:
  261. self.assertEquals(origpack, newpack)
  262. self.assertSucceeds(newpack.index.check)
  263. self.assertEquals(origpack.name(), newpack.name())
  264. self.assertEquals(origpack.index.get_pack_checksum(),
  265. newpack.index.get_pack_checksum())
  266. wrong_version = origpack.index.version != newpack.index.version
  267. orig_checksum = origpack.index.get_stored_checksum()
  268. new_checksum = newpack.index.get_stored_checksum()
  269. self.assertTrue(wrong_version or orig_checksum == new_checksum)
  270. finally:
  271. newpack.close()
  272. finally:
  273. origpack.close()
  274. def test_commit_obj(self):
  275. p = self.get_pack(pack1_sha)
  276. commit = p[commit_sha]
  277. self.assertEquals('James Westby <jw+debian@jameswestby.net>',
  278. commit.author)
  279. self.assertEquals([], commit.parents)
  280. def _copy_pack(self, origpack):
  281. basename = os.path.join(self.tempdir, 'somepack')
  282. write_pack(basename, origpack.pack_tuples())
  283. return Pack(basename)
  284. def test_keep_no_message(self):
  285. p = self.get_pack(pack1_sha)
  286. p = self._copy_pack(p)
  287. keepfile_name = p.keep()
  288. # file should exist
  289. self.assertTrue(os.path.exists(keepfile_name))
  290. f = open(keepfile_name, 'r')
  291. try:
  292. buf = f.read()
  293. self.assertEqual('', buf)
  294. finally:
  295. f.close()
  296. def test_keep_message(self):
  297. p = self.get_pack(pack1_sha)
  298. p = self._copy_pack(p)
  299. msg = 'some message'
  300. keepfile_name = p.keep(msg)
  301. # file should exist
  302. self.assertTrue(os.path.exists(keepfile_name))
  303. # and contain the right message, with a linefeed
  304. f = open(keepfile_name, 'r')
  305. try:
  306. buf = f.read()
  307. self.assertEqual(msg + '\n', buf)
  308. finally:
  309. f.close()
  310. def test_name(self):
  311. p = self.get_pack(pack1_sha)
  312. self.assertEquals(pack1_sha, p.name())
  313. def test_length_mismatch(self):
  314. data = self.get_pack_data(pack1_sha)
  315. index = self.get_pack_index(pack1_sha)
  316. Pack.from_objects(data, index).check_length_and_checksum()
  317. data._file.seek(12)
  318. bad_file = StringIO()
  319. write_pack_header(bad_file, 9999)
  320. bad_file.write(data._file.read())
  321. bad_file = StringIO(bad_file.getvalue())
  322. bad_data = PackData('', file=bad_file)
  323. bad_pack = Pack.from_lazy_objects(lambda: bad_data, lambda: index)
  324. self.assertRaises(AssertionError, lambda: bad_pack.data)
  325. self.assertRaises(AssertionError,
  326. lambda: bad_pack.check_length_and_checksum())
  327. def test_checksum_mismatch(self):
  328. data = self.get_pack_data(pack1_sha)
  329. index = self.get_pack_index(pack1_sha)
  330. Pack.from_objects(data, index).check_length_and_checksum()
  331. data._file.seek(0)
  332. bad_file = StringIO(data._file.read()[:-20] + ('\xff' * 20))
  333. bad_data = PackData('', file=bad_file)
  334. bad_pack = Pack.from_lazy_objects(lambda: bad_data, lambda: index)
  335. self.assertRaises(ChecksumMismatch, lambda: bad_pack.data)
  336. self.assertRaises(ChecksumMismatch, lambda:
  337. bad_pack.check_length_and_checksum())
  338. def test_iterobjects(self):
  339. p = self.get_pack(pack1_sha)
  340. objs = dict((o.id, o) for o in p.iterobjects())
  341. self.assertEquals(3, len(objs))
  342. self.assertEquals(sorted(objs), sorted(p.index))
  343. self.assertTrue(isinstance(objs[a_sha], Blob))
  344. self.assertTrue(isinstance(objs[tree_sha], Tree))
  345. self.assertTrue(isinstance(objs[commit_sha], Commit))
  346. class WritePackTests(TestCase):
  347. def test_write_pack_header(self):
  348. f = StringIO()
  349. write_pack_header(f, 42)
  350. self.assertEquals('PACK\x00\x00\x00\x02\x00\x00\x00*',
  351. f.getvalue())
  352. def test_write_pack_object(self):
  353. f = StringIO()
  354. f.write('header')
  355. offset = f.tell()
  356. crc32 = write_pack_object(f, Blob.type_num, 'blob')
  357. self.assertEqual(crc32, zlib.crc32(f.getvalue()[6:]) & 0xffffffff)
  358. f.write('x') # unpack_object needs extra trailing data.
  359. f.seek(offset)
  360. comp_len = len(f.getvalue()) - offset - 1
  361. unpacked, unused = unpack_object(f.read, compute_crc32=True)
  362. self.assertEqual(Blob.type_num, unpacked.pack_type_num)
  363. self.assertEqual(Blob.type_num, unpacked.obj_type_num)
  364. self.assertEqual(['blob'], unpacked.decomp_chunks)
  365. self.assertEqual(crc32, unpacked.crc32)
  366. self.assertEqual('x', unused)
  367. def test_write_pack_object_sha(self):
  368. f = StringIO()
  369. f.write('header')
  370. offset = f.tell()
  371. sha_a = make_sha('foo')
  372. sha_b = sha_a.copy()
  373. write_pack_object(f, Blob.type_num, 'blob', sha=sha_a)
  374. self.assertNotEqual(sha_a.digest(), sha_b.digest())
  375. sha_b.update(f.getvalue()[offset:])
  376. self.assertEqual(sha_a.digest(), sha_b.digest())
  377. pack_checksum = hex_to_sha('721980e866af9a5f93ad674144e1459b8ba3e7b7')
  378. class BaseTestPackIndexWriting(object):
  379. def assertSucceeds(self, func, *args, **kwargs):
  380. try:
  381. func(*args, **kwargs)
  382. except ChecksumMismatch, e:
  383. self.fail(e)
  384. def index(self, filename, entries, pack_checksum):
  385. raise NotImplementedError(self.index)
  386. def test_empty(self):
  387. idx = self.index('empty.idx', [], pack_checksum)
  388. self.assertEquals(idx.get_pack_checksum(), pack_checksum)
  389. self.assertEquals(0, len(idx))
  390. def test_single(self):
  391. entry_sha = hex_to_sha('6f670c0fb53f9463760b7295fbb814e965fb20c8')
  392. my_entries = [(entry_sha, 178, 42)]
  393. idx = self.index('single.idx', my_entries, pack_checksum)
  394. self.assertEquals(idx.get_pack_checksum(), pack_checksum)
  395. self.assertEquals(1, len(idx))
  396. actual_entries = list(idx.iterentries())
  397. self.assertEquals(len(my_entries), len(actual_entries))
  398. for mine, actual in zip(my_entries, actual_entries):
  399. my_sha, my_offset, my_crc = mine
  400. actual_sha, actual_offset, actual_crc = actual
  401. self.assertEquals(my_sha, actual_sha)
  402. self.assertEquals(my_offset, actual_offset)
  403. if self._has_crc32_checksum:
  404. self.assertEquals(my_crc, actual_crc)
  405. else:
  406. self.assertTrue(actual_crc is None)
  407. class BaseTestFilePackIndexWriting(BaseTestPackIndexWriting):
  408. def setUp(self):
  409. self.tempdir = tempfile.mkdtemp()
  410. def tearDown(self):
  411. shutil.rmtree(self.tempdir)
  412. def index(self, filename, entries, pack_checksum):
  413. path = os.path.join(self.tempdir, filename)
  414. self.writeIndex(path, entries, pack_checksum)
  415. idx = load_pack_index(path)
  416. self.assertSucceeds(idx.check)
  417. self.assertEquals(idx.version, self._expected_version)
  418. return idx
  419. def writeIndex(self, filename, entries, pack_checksum):
  420. # FIXME: Write to StringIO instead rather than hitting disk ?
  421. f = GitFile(filename, "wb")
  422. try:
  423. self._write_fn(f, entries, pack_checksum)
  424. finally:
  425. f.close()
  426. class TestMemoryIndexWriting(TestCase, BaseTestPackIndexWriting):
  427. def setUp(self):
  428. TestCase.setUp(self)
  429. self._has_crc32_checksum = True
  430. def index(self, filename, entries, pack_checksum):
  431. return MemoryPackIndex(entries, pack_checksum)
  432. def tearDown(self):
  433. TestCase.tearDown(self)
  434. class TestPackIndexWritingv1(TestCase, BaseTestFilePackIndexWriting):
  435. def setUp(self):
  436. TestCase.setUp(self)
  437. BaseTestFilePackIndexWriting.setUp(self)
  438. self._has_crc32_checksum = False
  439. self._expected_version = 1
  440. self._write_fn = write_pack_index_v1
  441. def tearDown(self):
  442. TestCase.tearDown(self)
  443. BaseTestFilePackIndexWriting.tearDown(self)
  444. class TestPackIndexWritingv2(TestCase, BaseTestFilePackIndexWriting):
  445. def setUp(self):
  446. TestCase.setUp(self)
  447. BaseTestFilePackIndexWriting.setUp(self)
  448. self._has_crc32_checksum = True
  449. self._expected_version = 2
  450. self._write_fn = write_pack_index_v2
  451. def tearDown(self):
  452. TestCase.tearDown(self)
  453. BaseTestFilePackIndexWriting.tearDown(self)
  454. class ReadZlibTests(TestCase):
  455. decomp = (
  456. 'tree 4ada885c9196b6b6fa08744b5862bf92896fc002\n'
  457. 'parent None\n'
  458. 'author Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n'
  459. 'committer Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n'
  460. '\n'
  461. "Provide replacement for mmap()'s offset argument.")
  462. comp = zlib.compress(decomp)
  463. extra = 'nextobject'
  464. def setUp(self):
  465. super(ReadZlibTests, self).setUp()
  466. self.read = StringIO(self.comp + self.extra).read
  467. self.unpacked = UnpackedObject(Tree.type_num, None, len(self.decomp), 0)
  468. def test_decompress_size(self):
  469. good_decomp_len = len(self.decomp)
  470. self.unpacked.decomp_len = -1
  471. self.assertRaises(ValueError, read_zlib_chunks, self.read,
  472. self.unpacked)
  473. self.unpacked.decomp_len = good_decomp_len - 1
  474. self.assertRaises(zlib.error, read_zlib_chunks, self.read,
  475. self.unpacked)
  476. self.unpacked.decomp_len = good_decomp_len + 1
  477. self.assertRaises(zlib.error, read_zlib_chunks, self.read,
  478. self.unpacked)
  479. def test_decompress_truncated(self):
  480. read = StringIO(self.comp[:10]).read
  481. self.assertRaises(zlib.error, read_zlib_chunks, read, self.unpacked)
  482. read = StringIO(self.comp).read
  483. self.assertRaises(zlib.error, read_zlib_chunks, read, self.unpacked)
  484. def test_decompress_empty(self):
  485. unpacked = UnpackedObject(Tree.type_num, None, 0, None)
  486. comp = zlib.compress('')
  487. read = StringIO(comp + self.extra).read
  488. unused = read_zlib_chunks(read, unpacked)
  489. self.assertEqual('', ''.join(unpacked.decomp_chunks))
  490. self.assertNotEquals('', unused)
  491. self.assertEquals(self.extra, unused + read())
  492. def test_decompress_no_crc32(self):
  493. self.unpacked.crc32 = None
  494. read_zlib_chunks(self.read, self.unpacked)
  495. self.assertEquals(None, self.unpacked.crc32)
  496. def _do_decompress_test(self, buffer_size, **kwargs):
  497. unused = read_zlib_chunks(self.read, self.unpacked,
  498. buffer_size=buffer_size, **kwargs)
  499. self.assertEquals(self.decomp, ''.join(self.unpacked.decomp_chunks))
  500. self.assertEquals(zlib.crc32(self.comp), self.unpacked.crc32)
  501. self.assertNotEquals('', unused)
  502. self.assertEquals(self.extra, unused + self.read())
  503. def test_simple_decompress(self):
  504. self._do_decompress_test(4096)
  505. self.assertEqual(None, self.unpacked.comp_chunks)
  506. # These buffer sizes are not intended to be realistic, but rather simulate
  507. # larger buffer sizes that may end at various places.
  508. def test_decompress_buffer_size_1(self):
  509. self._do_decompress_test(1)
  510. def test_decompress_buffer_size_2(self):
  511. self._do_decompress_test(2)
  512. def test_decompress_buffer_size_3(self):
  513. self._do_decompress_test(3)
  514. def test_decompress_buffer_size_4(self):
  515. self._do_decompress_test(4)
  516. def test_decompress_include_comp(self):
  517. self._do_decompress_test(4096, include_comp=True)
  518. self.assertEqual(self.comp, ''.join(self.unpacked.comp_chunks))
  519. class DeltifyTests(TestCase):
  520. def test_empty(self):
  521. self.assertEquals([], list(deltify_pack_objects([])))
  522. def test_single(self):
  523. b = Blob.from_string("foo")
  524. self.assertEquals(
  525. [(b.type_num, b.sha().digest(), None, b.as_raw_string())],
  526. list(deltify_pack_objects([(b, "")])))
  527. def test_simple_delta(self):
  528. b1 = Blob.from_string("a" * 101)
  529. b2 = Blob.from_string("a" * 100)
  530. delta = create_delta(b1.as_raw_string(), b2.as_raw_string())
  531. self.assertEquals([
  532. (b1.type_num, b1.sha().digest(), None, b1.as_raw_string()),
  533. (b2.type_num, b2.sha().digest(), b1.sha().digest(), delta)
  534. ],
  535. list(deltify_pack_objects([(b1, ""), (b2, "")])))
  536. class TestPackStreamReader(TestCase):
  537. def test_read_objects_emtpy(self):
  538. f = StringIO()
  539. build_pack(f, [])
  540. reader = PackStreamReader(f.read)
  541. self.assertEqual(0, len(list(reader.read_objects())))
  542. def test_read_objects(self):
  543. f = StringIO()
  544. entries = build_pack(f, [
  545. (Blob.type_num, 'blob'),
  546. (OFS_DELTA, (0, 'blob1')),
  547. ])
  548. reader = PackStreamReader(f.read)
  549. objects = list(reader.read_objects(compute_crc32=True))
  550. self.assertEqual(2, len(objects))
  551. unpacked_blob, unpacked_delta = objects
  552. self.assertEqual(entries[0][0], unpacked_blob.offset)
  553. self.assertEqual(Blob.type_num, unpacked_blob.pack_type_num)
  554. self.assertEqual(Blob.type_num, unpacked_blob.obj_type_num)
  555. self.assertEqual(None, unpacked_blob.delta_base)
  556. self.assertEqual('blob', ''.join(unpacked_blob.decomp_chunks))
  557. self.assertEqual(entries[0][4], unpacked_blob.crc32)
  558. self.assertEqual(entries[1][0], unpacked_delta.offset)
  559. self.assertEqual(OFS_DELTA, unpacked_delta.pack_type_num)
  560. self.assertEqual(None, unpacked_delta.obj_type_num)
  561. self.assertEqual(unpacked_delta.offset - unpacked_blob.offset,
  562. unpacked_delta.delta_base)
  563. delta = create_delta('blob', 'blob1')
  564. self.assertEqual(delta, ''.join(unpacked_delta.decomp_chunks))
  565. self.assertEqual(entries[1][4], unpacked_delta.crc32)
  566. def test_read_objects_buffered(self):
  567. f = StringIO()
  568. build_pack(f, [
  569. (Blob.type_num, 'blob'),
  570. (OFS_DELTA, (0, 'blob1')),
  571. ])
  572. reader = PackStreamReader(f.read, zlib_bufsize=4)
  573. self.assertEqual(2, len(list(reader.read_objects())))
  574. def test_read_objects_empty(self):
  575. reader = PackStreamReader(StringIO().read)
  576. self.assertEqual([], list(reader.read_objects()))
  577. class TestPackIterator(DeltaChainIterator):
  578. _compute_crc32 = True
  579. def __init__(self, *args, **kwargs):
  580. super(TestPackIterator, self).__init__(*args, **kwargs)
  581. self._unpacked_offsets = set()
  582. def _result(self, unpacked):
  583. """Return entries in the same format as build_pack."""
  584. return (unpacked.offset, unpacked.obj_type_num,
  585. ''.join(unpacked.obj_chunks), unpacked.sha(), unpacked.crc32)
  586. def _resolve_object(self, offset, pack_type_num, base_chunks):
  587. assert offset not in self._unpacked_offsets, (
  588. 'Attempted to re-inflate offset %i' % offset)
  589. self._unpacked_offsets.add(offset)
  590. return super(TestPackIterator, self)._resolve_object(
  591. offset, pack_type_num, base_chunks)
  592. class DeltaChainIteratorTests(TestCase):
  593. def setUp(self):
  594. self.store = MemoryObjectStore()
  595. self.fetched = set()
  596. def store_blobs(self, blobs_data):
  597. blobs = []
  598. for data in blobs_data:
  599. blob = make_object(Blob, data=data)
  600. blobs.append(blob)
  601. self.store.add_object(blob)
  602. return blobs
  603. def get_raw_no_repeat(self, bin_sha):
  604. """Wrapper around store.get_raw that doesn't allow repeat lookups."""
  605. hex_sha = sha_to_hex(bin_sha)
  606. self.assertFalse(hex_sha in self.fetched,
  607. 'Attempted to re-fetch object %s' % hex_sha)
  608. self.fetched.add(hex_sha)
  609. return self.store.get_raw(hex_sha)
  610. def make_pack_iter(self, f, thin=None):
  611. if thin is None:
  612. thin = bool(list(self.store))
  613. resolve_ext_ref = thin and self.get_raw_no_repeat or None
  614. data = PackData('test.pack', file=f)
  615. return TestPackIterator.for_pack_data(
  616. data, resolve_ext_ref=resolve_ext_ref)
  617. def assertEntriesMatch(self, expected_indexes, entries, pack_iter):
  618. expected = [entries[i] for i in expected_indexes]
  619. self.assertEqual(expected, list(pack_iter._walk_all_chains()))
  620. def test_no_deltas(self):
  621. f = StringIO()
  622. entries = build_pack(f, [
  623. (Commit.type_num, 'commit'),
  624. (Blob.type_num, 'blob'),
  625. (Tree.type_num, 'tree'),
  626. ])
  627. self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
  628. def test_ofs_deltas(self):
  629. f = StringIO()
  630. entries = build_pack(f, [
  631. (Blob.type_num, 'blob'),
  632. (OFS_DELTA, (0, 'blob1')),
  633. (OFS_DELTA, (0, 'blob2')),
  634. ])
  635. self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
  636. def test_ofs_deltas_chain(self):
  637. f = StringIO()
  638. entries = build_pack(f, [
  639. (Blob.type_num, 'blob'),
  640. (OFS_DELTA, (0, 'blob1')),
  641. (OFS_DELTA, (1, 'blob2')),
  642. ])
  643. self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
  644. def test_ref_deltas(self):
  645. f = StringIO()
  646. entries = build_pack(f, [
  647. (REF_DELTA, (1, 'blob1')),
  648. (Blob.type_num, ('blob')),
  649. (REF_DELTA, (1, 'blob2')),
  650. ])
  651. self.assertEntriesMatch([1, 0, 2], entries, self.make_pack_iter(f))
  652. def test_ref_deltas_chain(self):
  653. f = StringIO()
  654. entries = build_pack(f, [
  655. (REF_DELTA, (2, 'blob1')),
  656. (Blob.type_num, ('blob')),
  657. (REF_DELTA, (1, 'blob2')),
  658. ])
  659. self.assertEntriesMatch([1, 2, 0], entries, self.make_pack_iter(f))
  660. def test_ofs_and_ref_deltas(self):
  661. # Deltas pending on this offset are popped before deltas depending on
  662. # this ref.
  663. f = StringIO()
  664. entries = build_pack(f, [
  665. (REF_DELTA, (1, 'blob1')),
  666. (Blob.type_num, ('blob')),
  667. (OFS_DELTA, (1, 'blob2')),
  668. ])
  669. self.assertEntriesMatch([1, 2, 0], entries, self.make_pack_iter(f))
  670. def test_mixed_chain(self):
  671. f = StringIO()
  672. entries = build_pack(f, [
  673. (Blob.type_num, 'blob'),
  674. (REF_DELTA, (2, 'blob2')),
  675. (OFS_DELTA, (0, 'blob1')),
  676. (OFS_DELTA, (1, 'blob3')),
  677. (OFS_DELTA, (0, 'bob')),
  678. ])
  679. self.assertEntriesMatch([0, 2, 1, 3, 4], entries,
  680. self.make_pack_iter(f))
  681. def test_long_chain(self):
  682. n = 100
  683. objects_spec = [(Blob.type_num, 'blob')]
  684. for i in xrange(n):
  685. objects_spec.append((OFS_DELTA, (i, 'blob%i' % i)))
  686. f = StringIO()
  687. entries = build_pack(f, objects_spec)
  688. self.assertEntriesMatch(xrange(n + 1), entries, self.make_pack_iter(f))
  689. def test_branchy_chain(self):
  690. n = 100
  691. objects_spec = [(Blob.type_num, 'blob')]
  692. for i in xrange(n):
  693. objects_spec.append((OFS_DELTA, (0, 'blob%i' % i)))
  694. f = StringIO()
  695. entries = build_pack(f, objects_spec)
  696. self.assertEntriesMatch(xrange(n + 1), entries, self.make_pack_iter(f))
  697. def test_ext_ref(self):
  698. blob, = self.store_blobs(['blob'])
  699. f = StringIO()
  700. entries = build_pack(f, [(REF_DELTA, (blob.id, 'blob1'))],
  701. store=self.store)
  702. pack_iter = self.make_pack_iter(f)
  703. self.assertEntriesMatch([0], entries, pack_iter)
  704. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  705. def test_ext_ref_chain(self):
  706. blob, = self.store_blobs(['blob'])
  707. f = StringIO()
  708. entries = build_pack(f, [
  709. (REF_DELTA, (1, 'blob2')),
  710. (REF_DELTA, (blob.id, 'blob1')),
  711. ], store=self.store)
  712. pack_iter = self.make_pack_iter(f)
  713. self.assertEntriesMatch([1, 0], entries, pack_iter)
  714. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  715. def test_ext_ref_multiple_times(self):
  716. blob, = self.store_blobs(['blob'])
  717. f = StringIO()
  718. entries = build_pack(f, [
  719. (REF_DELTA, (blob.id, 'blob1')),
  720. (REF_DELTA, (blob.id, 'blob2')),
  721. ], store=self.store)
  722. pack_iter = self.make_pack_iter(f)
  723. self.assertEntriesMatch([0, 1], entries, pack_iter)
  724. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  725. def test_multiple_ext_refs(self):
  726. b1, b2 = self.store_blobs(['foo', 'bar'])
  727. f = StringIO()
  728. entries = build_pack(f, [
  729. (REF_DELTA, (b1.id, 'foo1')),
  730. (REF_DELTA, (b2.id, 'bar2')),
  731. ], store=self.store)
  732. pack_iter = self.make_pack_iter(f)
  733. self.assertEntriesMatch([0, 1], entries, pack_iter)
  734. self.assertEqual([hex_to_sha(b1.id), hex_to_sha(b2.id)],
  735. pack_iter.ext_refs())
  736. def test_bad_ext_ref_non_thin_pack(self):
  737. blob, = self.store_blobs(['blob'])
  738. f = StringIO()
  739. entries = build_pack(f, [(REF_DELTA, (blob.id, 'blob1'))],
  740. store=self.store)
  741. pack_iter = self.make_pack_iter(f, thin=False)
  742. try:
  743. list(pack_iter._walk_all_chains())
  744. self.fail()
  745. except KeyError, e:
  746. self.assertEqual(([blob.id],), e.args)
  747. def test_bad_ext_ref_thin_pack(self):
  748. b1, b2, b3 = self.store_blobs(['foo', 'bar', 'baz'])
  749. f = StringIO()
  750. entries = build_pack(f, [
  751. (REF_DELTA, (1, 'foo99')),
  752. (REF_DELTA, (b1.id, 'foo1')),
  753. (REF_DELTA, (b2.id, 'bar2')),
  754. (REF_DELTA, (b3.id, 'baz3')),
  755. ], store=self.store)
  756. del self.store[b2.id]
  757. del self.store[b3.id]
  758. pack_iter = self.make_pack_iter(f)
  759. try:
  760. list(pack_iter._walk_all_chains())
  761. self.fail()
  762. except KeyError, e:
  763. self.assertEqual((sorted([b2.id, b3.id]),), e.args)