test_pack.py 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044
  1. # test_pack.py -- Tests for the handling of git packs.
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
  4. #
  5. # This program is free software; you can redistribute it and/or
  6. # modify it under the terms of the GNU General Public License
  7. # as published by the Free Software Foundation; version 2
  8. # of the License, or (at your option) any later version of the license.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program; if not, write to the Free Software
  17. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  18. # MA 02110-1301, USA.
  19. """Tests for Dulwich packs."""
  20. from io import BytesIO
  21. from hashlib import sha1
  22. import os
  23. import shutil
  24. import tempfile
  25. import zlib
  26. from dulwich.errors import (
  27. ChecksumMismatch,
  28. )
  29. from dulwich.file import (
  30. GitFile,
  31. )
  32. from dulwich.object_store import (
  33. MemoryObjectStore,
  34. )
  35. from dulwich.objects import (
  36. hex_to_sha,
  37. sha_to_hex,
  38. Commit,
  39. Tree,
  40. Blob,
  41. )
  42. from dulwich.pack import (
  43. OFS_DELTA,
  44. REF_DELTA,
  45. MemoryPackIndex,
  46. Pack,
  47. PackData,
  48. apply_delta,
  49. create_delta,
  50. deltify_pack_objects,
  51. load_pack_index,
  52. UnpackedObject,
  53. read_zlib_chunks,
  54. write_pack_header,
  55. write_pack_index_v1,
  56. write_pack_index_v2,
  57. write_pack_object,
  58. write_pack,
  59. unpack_object,
  60. compute_file_sha,
  61. PackStreamReader,
  62. DeltaChainIterator,
  63. _delta_encode_size,
  64. _encode_copy_operation,
  65. )
  66. from dulwich.tests import (
  67. TestCase,
  68. )
  69. from dulwich.tests.utils import (
  70. make_object,
  71. build_pack,
  72. )
  73. pack1_sha = 'bc63ddad95e7321ee734ea11a7a62d314e0d7481'
  74. a_sha = '6f670c0fb53f9463760b7295fbb814e965fb20c8'
  75. tree_sha = 'b2a2766a2879c209ab1176e7e778b81ae422eeaa'
  76. commit_sha = 'f18faa16531ac570a3fdc8c7ca16682548dafd12'
  77. class PackTests(TestCase):
  78. """Base class for testing packs"""
  79. def setUp(self):
  80. super(PackTests, self).setUp()
  81. self.tempdir = tempfile.mkdtemp()
  82. self.addCleanup(shutil.rmtree, self.tempdir)
  83. datadir = os.path.abspath(os.path.join(os.path.dirname(__file__),
  84. 'data/packs'))
  85. def get_pack_index(self, sha):
  86. """Returns a PackIndex from the datadir with the given sha"""
  87. return load_pack_index(os.path.join(self.datadir, 'pack-%s.idx' % sha))
  88. def get_pack_data(self, sha):
  89. """Returns a PackData object from the datadir with the given sha"""
  90. return PackData(os.path.join(self.datadir, 'pack-%s.pack' % sha))
  91. def get_pack(self, sha):
  92. return Pack(os.path.join(self.datadir, 'pack-%s' % sha))
  93. def assertSucceeds(self, func, *args, **kwargs):
  94. try:
  95. func(*args, **kwargs)
  96. except ChecksumMismatch as e:
  97. self.fail(e)
  98. class PackIndexTests(PackTests):
  99. """Class that tests the index of packfiles"""
  100. def test_object_index(self):
  101. """Tests that the correct object offset is returned from the index."""
  102. p = self.get_pack_index(pack1_sha)
  103. self.assertRaises(KeyError, p.object_index, pack1_sha)
  104. self.assertEqual(p.object_index(a_sha), 178)
  105. self.assertEqual(p.object_index(tree_sha), 138)
  106. self.assertEqual(p.object_index(commit_sha), 12)
  107. def test_index_len(self):
  108. p = self.get_pack_index(pack1_sha)
  109. self.assertEqual(3, len(p))
  110. def test_get_stored_checksum(self):
  111. p = self.get_pack_index(pack1_sha)
  112. self.assertEqual('f2848e2ad16f329ae1c92e3b95e91888daa5bd01',
  113. sha_to_hex(p.get_stored_checksum()))
  114. self.assertEqual('721980e866af9a5f93ad674144e1459b8ba3e7b7',
  115. sha_to_hex(p.get_pack_checksum()))
  116. def test_index_check(self):
  117. p = self.get_pack_index(pack1_sha)
  118. self.assertSucceeds(p.check)
  119. def test_iterentries(self):
  120. p = self.get_pack_index(pack1_sha)
  121. entries = [(sha_to_hex(s), o, c) for s, o, c in p.iterentries()]
  122. self.assertEqual([
  123. ('6f670c0fb53f9463760b7295fbb814e965fb20c8', 178, None),
  124. ('b2a2766a2879c209ab1176e7e778b81ae422eeaa', 138, None),
  125. ('f18faa16531ac570a3fdc8c7ca16682548dafd12', 12, None)
  126. ], entries)
  127. def test_iter(self):
  128. p = self.get_pack_index(pack1_sha)
  129. self.assertEqual(set([tree_sha, commit_sha, a_sha]), set(p))
  130. class TestPackDeltas(TestCase):
  131. test_string1 = 'The answer was flailing in the wind'
  132. test_string2 = 'The answer was falling down the pipe'
  133. test_string3 = 'zzzzz'
  134. test_string_empty = ''
  135. test_string_big = 'Z' * 8192
  136. test_string_huge = 'Z' * 100000
  137. def _test_roundtrip(self, base, target):
  138. self.assertEqual(target,
  139. ''.join(apply_delta(base, create_delta(base, target))))
  140. def test_nochange(self):
  141. self._test_roundtrip(self.test_string1, self.test_string1)
  142. def test_nochange_huge(self):
  143. self._test_roundtrip(self.test_string_huge, self.test_string_huge)
  144. def test_change(self):
  145. self._test_roundtrip(self.test_string1, self.test_string2)
  146. def test_rewrite(self):
  147. self._test_roundtrip(self.test_string1, self.test_string3)
  148. def test_empty_to_big(self):
  149. self._test_roundtrip(self.test_string_empty, self.test_string_big)
  150. def test_empty_to_huge(self):
  151. self._test_roundtrip(self.test_string_empty, self.test_string_huge)
  152. def test_huge_copy(self):
  153. self._test_roundtrip(self.test_string_huge + self.test_string1,
  154. self.test_string_huge + self.test_string2)
  155. class TestPackData(PackTests):
  156. """Tests getting the data from the packfile."""
  157. def test_create_pack(self):
  158. self.get_pack_data(pack1_sha).close()
  159. def test_from_file(self):
  160. path = os.path.join(self.datadir, 'pack-%s.pack' % pack1_sha)
  161. PackData.from_file(open(path), os.path.getsize(path))
  162. def test_pack_len(self):
  163. with self.get_pack_data(pack1_sha) as p:
  164. self.assertEqual(3, len(p))
  165. def test_index_check(self):
  166. with self.get_pack_data(pack1_sha) as p:
  167. self.assertSucceeds(p.check)
  168. def test_iterobjects(self):
  169. with self.get_pack_data(pack1_sha) as p:
  170. commit_data = ('tree b2a2766a2879c209ab1176e7e778b81ae422eeaa\n'
  171. 'author James Westby <jw+debian@jameswestby.net> '
  172. '1174945067 +0100\n'
  173. 'committer James Westby <jw+debian@jameswestby.net> '
  174. '1174945067 +0100\n'
  175. '\n'
  176. 'Test commit\n')
  177. blob_sha = '6f670c0fb53f9463760b7295fbb814e965fb20c8'
  178. tree_data = '100644 a\0%s' % hex_to_sha(blob_sha)
  179. actual = []
  180. for offset, type_num, chunks, crc32 in p.iterobjects():
  181. actual.append((offset, type_num, ''.join(chunks), crc32))
  182. self.assertEqual([
  183. (12, 1, commit_data, 3775879613),
  184. (138, 2, tree_data, 912998690),
  185. (178, 3, 'test 1\n', 1373561701)
  186. ], actual)
  187. def test_iterentries(self):
  188. with self.get_pack_data(pack1_sha) as p:
  189. entries = set((sha_to_hex(s), o, c) for s, o, c in p.iterentries())
  190. self.assertEqual(set([
  191. ('6f670c0fb53f9463760b7295fbb814e965fb20c8', 178, 1373561701),
  192. ('b2a2766a2879c209ab1176e7e778b81ae422eeaa', 138, 912998690),
  193. ('f18faa16531ac570a3fdc8c7ca16682548dafd12', 12, 3775879613),
  194. ]), entries)
  195. def test_create_index_v1(self):
  196. with self.get_pack_data(pack1_sha) as p:
  197. filename = os.path.join(self.tempdir, 'v1test.idx')
  198. p.create_index_v1(filename)
  199. idx1 = load_pack_index(filename)
  200. idx2 = self.get_pack_index(pack1_sha)
  201. self.assertEqual(idx1, idx2)
  202. def test_create_index_v2(self):
  203. with self.get_pack_data(pack1_sha) as p:
  204. filename = os.path.join(self.tempdir, 'v2test.idx')
  205. p.create_index_v2(filename)
  206. idx1 = load_pack_index(filename)
  207. idx2 = self.get_pack_index(pack1_sha)
  208. self.assertEqual(idx1, idx2)
  209. def test_compute_file_sha(self):
  210. f = BytesIO('abcd1234wxyz')
  211. self.assertEqual(sha1('abcd1234wxyz').hexdigest(),
  212. compute_file_sha(f).hexdigest())
  213. self.assertEqual(sha1('abcd1234wxyz').hexdigest(),
  214. compute_file_sha(f, buffer_size=5).hexdigest())
  215. self.assertEqual(sha1('abcd1234').hexdigest(),
  216. compute_file_sha(f, end_ofs=-4).hexdigest())
  217. self.assertEqual(sha1('1234wxyz').hexdigest(),
  218. compute_file_sha(f, start_ofs=4).hexdigest())
  219. self.assertEqual(
  220. sha1('1234').hexdigest(),
  221. compute_file_sha(f, start_ofs=4, end_ofs=-4).hexdigest())
  222. def test_compute_file_sha_short_file(self):
  223. f = BytesIO('abcd1234wxyz')
  224. self.assertRaises(AssertionError, compute_file_sha, f, end_ofs=-20)
  225. self.assertRaises(AssertionError, compute_file_sha, f, end_ofs=20)
  226. self.assertRaises(AssertionError, compute_file_sha, f, start_ofs=10,
  227. end_ofs=-12)
  228. class TestPack(PackTests):
  229. def test_len(self):
  230. with self.get_pack(pack1_sha) as p:
  231. self.assertEqual(3, len(p))
  232. def test_contains(self):
  233. with self.get_pack(pack1_sha) as p:
  234. self.assertTrue(tree_sha in p)
  235. def test_get(self):
  236. with self.get_pack(pack1_sha) as p:
  237. self.assertEqual(type(p[tree_sha]), Tree)
  238. def test_iter(self):
  239. with self.get_pack(pack1_sha) as p:
  240. self.assertEqual(set([tree_sha, commit_sha, a_sha]), set(p))
  241. def test_iterobjects(self):
  242. with self.get_pack(pack1_sha) as p:
  243. expected = set([p[s] for s in [commit_sha, tree_sha, a_sha]])
  244. self.assertEqual(expected, set(list(p.iterobjects())))
  245. def test_pack_tuples(self):
  246. with self.get_pack(pack1_sha) as p:
  247. tuples = p.pack_tuples()
  248. expected = set([(p[s], None) for s in [commit_sha, tree_sha, a_sha]])
  249. self.assertEqual(expected, set(list(tuples)))
  250. self.assertEqual(expected, set(list(tuples)))
  251. self.assertEqual(3, len(tuples))
  252. def test_get_object_at(self):
  253. """Tests random access for non-delta objects"""
  254. with self.get_pack(pack1_sha) as p:
  255. obj = p[a_sha]
  256. self.assertEqual(obj.type_name, 'blob')
  257. self.assertEqual(obj.sha().hexdigest(), a_sha)
  258. obj = p[tree_sha]
  259. self.assertEqual(obj.type_name, 'tree')
  260. self.assertEqual(obj.sha().hexdigest(), tree_sha)
  261. obj = p[commit_sha]
  262. self.assertEqual(obj.type_name, 'commit')
  263. self.assertEqual(obj.sha().hexdigest(), commit_sha)
  264. def test_copy(self):
  265. with self.get_pack(pack1_sha) as origpack:
  266. self.assertSucceeds(origpack.index.check)
  267. basename = os.path.join(self.tempdir, 'Elch')
  268. write_pack(basename, origpack.pack_tuples())
  269. with Pack(basename) as newpack:
  270. self.assertEqual(origpack, newpack)
  271. self.assertSucceeds(newpack.index.check)
  272. self.assertEqual(origpack.name(), newpack.name())
  273. self.assertEqual(origpack.index.get_pack_checksum(),
  274. newpack.index.get_pack_checksum())
  275. wrong_version = origpack.index.version != newpack.index.version
  276. orig_checksum = origpack.index.get_stored_checksum()
  277. new_checksum = newpack.index.get_stored_checksum()
  278. self.assertTrue(wrong_version or orig_checksum == new_checksum)
  279. def test_commit_obj(self):
  280. with self.get_pack(pack1_sha) as p:
  281. commit = p[commit_sha]
  282. self.assertEqual('James Westby <jw+debian@jameswestby.net>',
  283. commit.author)
  284. self.assertEqual([], commit.parents)
  285. def _copy_pack(self, origpack):
  286. basename = os.path.join(self.tempdir, 'somepack')
  287. write_pack(basename, origpack.pack_tuples())
  288. return Pack(basename)
  289. def test_keep_no_message(self):
  290. with self.get_pack(pack1_sha) as p:
  291. p = self._copy_pack(p)
  292. with p:
  293. keepfile_name = p.keep()
  294. # file should exist
  295. self.assertTrue(os.path.exists(keepfile_name))
  296. with open(keepfile_name, 'r') as f:
  297. buf = f.read()
  298. self.assertEqual('', buf)
  299. def test_keep_message(self):
  300. with self.get_pack(pack1_sha) as p:
  301. p = self._copy_pack(p)
  302. msg = 'some message'
  303. with p:
  304. keepfile_name = p.keep(msg)
  305. # file should exist
  306. self.assertTrue(os.path.exists(keepfile_name))
  307. # and contain the right message, with a linefeed
  308. with open(keepfile_name, 'r') as f:
  309. buf = f.read()
  310. self.assertEqual(msg + '\n', buf)
  311. def test_name(self):
  312. with self.get_pack(pack1_sha) as p:
  313. self.assertEqual(pack1_sha, p.name())
  314. def test_length_mismatch(self):
  315. with self.get_pack_data(pack1_sha) as data:
  316. index = self.get_pack_index(pack1_sha)
  317. Pack.from_objects(data, index).check_length_and_checksum()
  318. data._file.seek(12)
  319. bad_file = BytesIO()
  320. write_pack_header(bad_file, 9999)
  321. bad_file.write(data._file.read())
  322. bad_file = BytesIO(bad_file.getvalue())
  323. bad_data = PackData('', file=bad_file)
  324. bad_pack = Pack.from_lazy_objects(lambda: bad_data, lambda: index)
  325. self.assertRaises(AssertionError, lambda: bad_pack.data)
  326. self.assertRaises(AssertionError,
  327. lambda: bad_pack.check_length_and_checksum())
  328. def test_checksum_mismatch(self):
  329. with self.get_pack_data(pack1_sha) as data:
  330. index = self.get_pack_index(pack1_sha)
  331. Pack.from_objects(data, index).check_length_and_checksum()
  332. data._file.seek(0)
  333. bad_file = BytesIO(data._file.read()[:-20] + ('\xff' * 20))
  334. bad_data = PackData('', file=bad_file)
  335. bad_pack = Pack.from_lazy_objects(lambda: bad_data, lambda: index)
  336. self.assertRaises(ChecksumMismatch, lambda: bad_pack.data)
  337. self.assertRaises(ChecksumMismatch, lambda:
  338. bad_pack.check_length_and_checksum())
  339. def test_iterobjects_2(self):
  340. with self.get_pack(pack1_sha) as p:
  341. objs = dict((o.id, o) for o in p.iterobjects())
  342. self.assertEqual(3, len(objs))
  343. self.assertEqual(sorted(objs), sorted(p.index))
  344. self.assertTrue(isinstance(objs[a_sha], Blob))
  345. self.assertTrue(isinstance(objs[tree_sha], Tree))
  346. self.assertTrue(isinstance(objs[commit_sha], Commit))
  347. class TestThinPack(PackTests):
  348. def setUp(self):
  349. super(TestThinPack, self).setUp()
  350. self.store = MemoryObjectStore()
  351. self.blobs = {}
  352. for blob in ('foo', 'bar', 'foo1234', 'bar2468'):
  353. self.blobs[blob] = make_object(Blob, data=blob)
  354. self.store.add_object(self.blobs['foo'])
  355. self.store.add_object(self.blobs['bar'])
  356. # Build a thin pack. 'foo' is as an external reference, 'bar' an
  357. # internal reference.
  358. self.pack_dir = tempfile.mkdtemp()
  359. self.addCleanup(shutil.rmtree, self.pack_dir)
  360. self.pack_prefix = os.path.join(self.pack_dir, 'pack')
  361. with open(self.pack_prefix + '.pack', 'wb') as f:
  362. build_pack(f, [
  363. (REF_DELTA, (self.blobs['foo'].id, 'foo1234')),
  364. (Blob.type_num, 'bar'),
  365. (REF_DELTA, (self.blobs['bar'].id, 'bar2468'))],
  366. store=self.store)
  367. # Index the new pack.
  368. with self.make_pack(True) as pack:
  369. with PackData(pack._data_path) as data:
  370. data.pack = pack
  371. data.create_index(self.pack_prefix + '.idx')
  372. del self.store[self.blobs['bar'].id]
  373. def make_pack(self, resolve_ext_ref):
  374. return Pack(
  375. self.pack_prefix,
  376. resolve_ext_ref=self.store.get_raw if resolve_ext_ref else None)
  377. def test_get_raw(self):
  378. with self.make_pack(False) as p:
  379. self.assertRaises(
  380. KeyError, p.get_raw, self.blobs['foo1234'].id)
  381. with self.make_pack(True) as p:
  382. self.assertEqual(
  383. (3, 'foo1234'),
  384. p.get_raw(self.blobs['foo1234'].id))
  385. def test_iterobjects(self):
  386. with self.make_pack(False) as p:
  387. self.assertRaises(KeyError, list, p.iterobjects())
  388. with self.make_pack(True) as p:
  389. self.assertEqual(
  390. sorted([self.blobs['foo1234'].id, self.blobs[b'bar'].id,
  391. self.blobs['bar2468'].id]),
  392. sorted(o.id for o in p.iterobjects()))
  393. class WritePackTests(TestCase):
  394. def test_write_pack_header(self):
  395. f = BytesIO()
  396. write_pack_header(f, 42)
  397. self.assertEqual('PACK\x00\x00\x00\x02\x00\x00\x00*',
  398. f.getvalue())
  399. def test_write_pack_object(self):
  400. f = BytesIO()
  401. f.write('header')
  402. offset = f.tell()
  403. crc32 = write_pack_object(f, Blob.type_num, 'blob')
  404. self.assertEqual(crc32, zlib.crc32(f.getvalue()[6:]) & 0xffffffff)
  405. f.write('x') # unpack_object needs extra trailing data.
  406. f.seek(offset)
  407. unpacked, unused = unpack_object(f.read, compute_crc32=True)
  408. self.assertEqual(Blob.type_num, unpacked.pack_type_num)
  409. self.assertEqual(Blob.type_num, unpacked.obj_type_num)
  410. self.assertEqual(['blob'], unpacked.decomp_chunks)
  411. self.assertEqual(crc32, unpacked.crc32)
  412. self.assertEqual('x', unused)
  413. def test_write_pack_object_sha(self):
  414. f = BytesIO()
  415. f.write('header')
  416. offset = f.tell()
  417. sha_a = sha1('foo')
  418. sha_b = sha_a.copy()
  419. write_pack_object(f, Blob.type_num, 'blob', sha=sha_a)
  420. self.assertNotEqual(sha_a.digest(), sha_b.digest())
  421. sha_b.update(f.getvalue()[offset:])
  422. self.assertEqual(sha_a.digest(), sha_b.digest())
  423. pack_checksum = hex_to_sha('721980e866af9a5f93ad674144e1459b8ba3e7b7')
  424. class BaseTestPackIndexWriting(object):
  425. def assertSucceeds(self, func, *args, **kwargs):
  426. try:
  427. func(*args, **kwargs)
  428. except ChecksumMismatch as e:
  429. self.fail(e)
  430. def index(self, filename, entries, pack_checksum):
  431. raise NotImplementedError(self.index)
  432. def test_empty(self):
  433. idx = self.index('empty.idx', [], pack_checksum)
  434. self.assertEqual(idx.get_pack_checksum(), pack_checksum)
  435. self.assertEqual(0, len(idx))
  436. def test_large(self):
  437. entry1_sha = hex_to_sha('4e6388232ec39792661e2e75db8fb117fc869ce6')
  438. entry2_sha = hex_to_sha('e98f071751bd77f59967bfa671cd2caebdccc9a2')
  439. entries = [(entry1_sha, 0xf2972d0830529b87, 24),
  440. (entry2_sha, (~0xf2972d0830529b87)&(2**64-1), 92)]
  441. if not self._supports_large:
  442. self.assertRaises(TypeError, self.index, 'single.idx',
  443. entries, pack_checksum)
  444. return
  445. idx = self.index('single.idx', entries, pack_checksum)
  446. self.assertEqual(idx.get_pack_checksum(), pack_checksum)
  447. self.assertEqual(2, len(idx))
  448. actual_entries = list(idx.iterentries())
  449. self.assertEqual(len(entries), len(actual_entries))
  450. for mine, actual in zip(entries, actual_entries):
  451. my_sha, my_offset, my_crc = mine
  452. actual_sha, actual_offset, actual_crc = actual
  453. self.assertEqual(my_sha, actual_sha)
  454. self.assertEqual(my_offset, actual_offset)
  455. if self._has_crc32_checksum:
  456. self.assertEqual(my_crc, actual_crc)
  457. else:
  458. self.assertTrue(actual_crc is None)
  459. def test_single(self):
  460. entry_sha = hex_to_sha('6f670c0fb53f9463760b7295fbb814e965fb20c8')
  461. my_entries = [(entry_sha, 178, 42)]
  462. idx = self.index('single.idx', my_entries, pack_checksum)
  463. self.assertEqual(idx.get_pack_checksum(), pack_checksum)
  464. self.assertEqual(1, len(idx))
  465. actual_entries = list(idx.iterentries())
  466. self.assertEqual(len(my_entries), len(actual_entries))
  467. for mine, actual in zip(my_entries, actual_entries):
  468. my_sha, my_offset, my_crc = mine
  469. actual_sha, actual_offset, actual_crc = actual
  470. self.assertEqual(my_sha, actual_sha)
  471. self.assertEqual(my_offset, actual_offset)
  472. if self._has_crc32_checksum:
  473. self.assertEqual(my_crc, actual_crc)
  474. else:
  475. self.assertTrue(actual_crc is None)
  476. class BaseTestFilePackIndexWriting(BaseTestPackIndexWriting):
  477. def setUp(self):
  478. self.tempdir = tempfile.mkdtemp()
  479. def tearDown(self):
  480. shutil.rmtree(self.tempdir)
  481. def index(self, filename, entries, pack_checksum):
  482. path = os.path.join(self.tempdir, filename)
  483. self.writeIndex(path, entries, pack_checksum)
  484. idx = load_pack_index(path)
  485. self.assertSucceeds(idx.check)
  486. self.assertEqual(idx.version, self._expected_version)
  487. return idx
  488. def writeIndex(self, filename, entries, pack_checksum):
  489. # FIXME: Write to BytesIO instead rather than hitting disk ?
  490. with GitFile(filename, "wb") as f:
  491. self._write_fn(f, entries, pack_checksum)
  492. class TestMemoryIndexWriting(TestCase, BaseTestPackIndexWriting):
  493. def setUp(self):
  494. TestCase.setUp(self)
  495. self._has_crc32_checksum = True
  496. self._supports_large = True
  497. def index(self, filename, entries, pack_checksum):
  498. return MemoryPackIndex(entries, pack_checksum)
  499. def tearDown(self):
  500. TestCase.tearDown(self)
  501. class TestPackIndexWritingv1(TestCase, BaseTestFilePackIndexWriting):
  502. def setUp(self):
  503. TestCase.setUp(self)
  504. BaseTestFilePackIndexWriting.setUp(self)
  505. self._has_crc32_checksum = False
  506. self._expected_version = 1
  507. self._supports_large = False
  508. self._write_fn = write_pack_index_v1
  509. def tearDown(self):
  510. TestCase.tearDown(self)
  511. BaseTestFilePackIndexWriting.tearDown(self)
  512. class TestPackIndexWritingv2(TestCase, BaseTestFilePackIndexWriting):
  513. def setUp(self):
  514. TestCase.setUp(self)
  515. BaseTestFilePackIndexWriting.setUp(self)
  516. self._has_crc32_checksum = True
  517. self._supports_large = True
  518. self._expected_version = 2
  519. self._write_fn = write_pack_index_v2
  520. def tearDown(self):
  521. TestCase.tearDown(self)
  522. BaseTestFilePackIndexWriting.tearDown(self)
  523. class ReadZlibTests(TestCase):
  524. decomp = (
  525. b'tree 4ada885c9196b6b6fa08744b5862bf92896fc002\n'
  526. b'parent None\n'
  527. b'author Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n'
  528. b'committer Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n'
  529. b'\n'
  530. b"Provide replacement for mmap()'s offset argument.")
  531. comp = zlib.compress(decomp)
  532. extra = 'nextobject'
  533. def setUp(self):
  534. super(ReadZlibTests, self).setUp()
  535. self.read = BytesIO(self.comp + self.extra).read
  536. self.unpacked = UnpackedObject(Tree.type_num, None, len(self.decomp), 0)
  537. def test_decompress_size(self):
  538. good_decomp_len = len(self.decomp)
  539. self.unpacked.decomp_len = -1
  540. self.assertRaises(ValueError, read_zlib_chunks, self.read,
  541. self.unpacked)
  542. self.unpacked.decomp_len = good_decomp_len - 1
  543. self.assertRaises(zlib.error, read_zlib_chunks, self.read,
  544. self.unpacked)
  545. self.unpacked.decomp_len = good_decomp_len + 1
  546. self.assertRaises(zlib.error, read_zlib_chunks, self.read,
  547. self.unpacked)
  548. def test_decompress_truncated(self):
  549. read = BytesIO(self.comp[:10]).read
  550. self.assertRaises(zlib.error, read_zlib_chunks, read, self.unpacked)
  551. read = BytesIO(self.comp).read
  552. self.assertRaises(zlib.error, read_zlib_chunks, read, self.unpacked)
  553. def test_decompress_empty(self):
  554. unpacked = UnpackedObject(Tree.type_num, None, 0, None)
  555. comp = zlib.compress('')
  556. read = BytesIO(comp + self.extra).read
  557. unused = read_zlib_chunks(read, unpacked)
  558. self.assertEqual('', ''.join(unpacked.decomp_chunks))
  559. self.assertNotEqual('', unused)
  560. self.assertEqual(self.extra, unused + read())
  561. def test_decompress_no_crc32(self):
  562. self.unpacked.crc32 = None
  563. read_zlib_chunks(self.read, self.unpacked)
  564. self.assertEqual(None, self.unpacked.crc32)
  565. def _do_decompress_test(self, buffer_size, **kwargs):
  566. unused = read_zlib_chunks(self.read, self.unpacked,
  567. buffer_size=buffer_size, **kwargs)
  568. self.assertEqual(self.decomp, ''.join(self.unpacked.decomp_chunks))
  569. self.assertEqual(zlib.crc32(self.comp), self.unpacked.crc32)
  570. self.assertNotEqual('', unused)
  571. self.assertEqual(self.extra, unused + self.read())
  572. def test_simple_decompress(self):
  573. self._do_decompress_test(4096)
  574. self.assertEqual(None, self.unpacked.comp_chunks)
  575. # These buffer sizes are not intended to be realistic, but rather simulate
  576. # larger buffer sizes that may end at various places.
  577. def test_decompress_buffer_size_1(self):
  578. self._do_decompress_test(1)
  579. def test_decompress_buffer_size_2(self):
  580. self._do_decompress_test(2)
  581. def test_decompress_buffer_size_3(self):
  582. self._do_decompress_test(3)
  583. def test_decompress_buffer_size_4(self):
  584. self._do_decompress_test(4)
  585. def test_decompress_include_comp(self):
  586. self._do_decompress_test(4096, include_comp=True)
  587. self.assertEqual(self.comp, ''.join(self.unpacked.comp_chunks))
  588. class DeltifyTests(TestCase):
  589. def test_empty(self):
  590. self.assertEqual([], list(deltify_pack_objects([])))
  591. def test_single(self):
  592. b = Blob.from_string("foo")
  593. self.assertEqual(
  594. [(b.type_num, b.sha().digest(), None, b.as_raw_string())],
  595. list(deltify_pack_objects([(b, "")])))
  596. def test_simple_delta(self):
  597. b1 = Blob.from_string("a" * 101)
  598. b2 = Blob.from_string("a" * 100)
  599. delta = create_delta(b1.as_raw_string(), b2.as_raw_string())
  600. self.assertEqual([
  601. (b1.type_num, b1.sha().digest(), None, b1.as_raw_string()),
  602. (b2.type_num, b2.sha().digest(), b1.sha().digest(), delta)
  603. ],
  604. list(deltify_pack_objects([(b1, ""), (b2, "")])))
  605. class TestPackStreamReader(TestCase):
  606. def test_read_objects_emtpy(self):
  607. f = BytesIO()
  608. build_pack(f, [])
  609. reader = PackStreamReader(f.read)
  610. self.assertEqual(0, len(list(reader.read_objects())))
  611. def test_read_objects(self):
  612. f = BytesIO()
  613. entries = build_pack(f, [
  614. (Blob.type_num, 'blob'),
  615. (OFS_DELTA, (0, 'blob1')),
  616. ])
  617. reader = PackStreamReader(f.read)
  618. objects = list(reader.read_objects(compute_crc32=True))
  619. self.assertEqual(2, len(objects))
  620. unpacked_blob, unpacked_delta = objects
  621. self.assertEqual(entries[0][0], unpacked_blob.offset)
  622. self.assertEqual(Blob.type_num, unpacked_blob.pack_type_num)
  623. self.assertEqual(Blob.type_num, unpacked_blob.obj_type_num)
  624. self.assertEqual(None, unpacked_blob.delta_base)
  625. self.assertEqual('blob', ''.join(unpacked_blob.decomp_chunks))
  626. self.assertEqual(entries[0][4], unpacked_blob.crc32)
  627. self.assertEqual(entries[1][0], unpacked_delta.offset)
  628. self.assertEqual(OFS_DELTA, unpacked_delta.pack_type_num)
  629. self.assertEqual(None, unpacked_delta.obj_type_num)
  630. self.assertEqual(unpacked_delta.offset - unpacked_blob.offset,
  631. unpacked_delta.delta_base)
  632. delta = create_delta('blob', 'blob1')
  633. self.assertEqual(delta, ''.join(unpacked_delta.decomp_chunks))
  634. self.assertEqual(entries[1][4], unpacked_delta.crc32)
  635. def test_read_objects_buffered(self):
  636. f = BytesIO()
  637. build_pack(f, [
  638. (Blob.type_num, 'blob'),
  639. (OFS_DELTA, (0, 'blob1')),
  640. ])
  641. reader = PackStreamReader(f.read, zlib_bufsize=4)
  642. self.assertEqual(2, len(list(reader.read_objects())))
  643. def test_read_objects_empty(self):
  644. reader = PackStreamReader(BytesIO().read)
  645. self.assertEqual([], list(reader.read_objects()))
  646. class TestPackIterator(DeltaChainIterator):
  647. _compute_crc32 = True
  648. def __init__(self, *args, **kwargs):
  649. super(TestPackIterator, self).__init__(*args, **kwargs)
  650. self._unpacked_offsets = set()
  651. def _result(self, unpacked):
  652. """Return entries in the same format as build_pack."""
  653. return (unpacked.offset, unpacked.obj_type_num,
  654. ''.join(unpacked.obj_chunks), unpacked.sha(), unpacked.crc32)
  655. def _resolve_object(self, offset, pack_type_num, base_chunks):
  656. assert offset not in self._unpacked_offsets, (
  657. 'Attempted to re-inflate offset %i' % offset)
  658. self._unpacked_offsets.add(offset)
  659. return super(TestPackIterator, self)._resolve_object(
  660. offset, pack_type_num, base_chunks)
  661. class DeltaChainIteratorTests(TestCase):
  662. def setUp(self):
  663. super(DeltaChainIteratorTests, self).setUp()
  664. self.store = MemoryObjectStore()
  665. self.fetched = set()
  666. def store_blobs(self, blobs_data):
  667. blobs = []
  668. for data in blobs_data:
  669. blob = make_object(Blob, data=data)
  670. blobs.append(blob)
  671. self.store.add_object(blob)
  672. return blobs
  673. def get_raw_no_repeat(self, bin_sha):
  674. """Wrapper around store.get_raw that doesn't allow repeat lookups."""
  675. hex_sha = sha_to_hex(bin_sha)
  676. self.assertFalse(hex_sha in self.fetched,
  677. 'Attempted to re-fetch object %s' % hex_sha)
  678. self.fetched.add(hex_sha)
  679. return self.store.get_raw(hex_sha)
  680. def make_pack_iter(self, f, thin=None):
  681. if thin is None:
  682. thin = bool(list(self.store))
  683. resolve_ext_ref = thin and self.get_raw_no_repeat or None
  684. data = PackData('test.pack', file=f)
  685. return TestPackIterator.for_pack_data(
  686. data, resolve_ext_ref=resolve_ext_ref)
  687. def assertEntriesMatch(self, expected_indexes, entries, pack_iter):
  688. expected = [entries[i] for i in expected_indexes]
  689. self.assertEqual(expected, list(pack_iter._walk_all_chains()))
  690. def test_no_deltas(self):
  691. f = BytesIO()
  692. entries = build_pack(f, [
  693. (Commit.type_num, 'commit'),
  694. (Blob.type_num, 'blob'),
  695. (Tree.type_num, 'tree'),
  696. ])
  697. self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
  698. def test_ofs_deltas(self):
  699. f = BytesIO()
  700. entries = build_pack(f, [
  701. (Blob.type_num, 'blob'),
  702. (OFS_DELTA, (0, 'blob1')),
  703. (OFS_DELTA, (0, 'blob2')),
  704. ])
  705. self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
  706. def test_ofs_deltas_chain(self):
  707. f = BytesIO()
  708. entries = build_pack(f, [
  709. (Blob.type_num, 'blob'),
  710. (OFS_DELTA, (0, 'blob1')),
  711. (OFS_DELTA, (1, 'blob2')),
  712. ])
  713. self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
  714. def test_ref_deltas(self):
  715. f = BytesIO()
  716. entries = build_pack(f, [
  717. (REF_DELTA, (1, 'blob1')),
  718. (Blob.type_num, ('blob')),
  719. (REF_DELTA, (1, 'blob2')),
  720. ])
  721. self.assertEntriesMatch([1, 0, 2], entries, self.make_pack_iter(f))
  722. def test_ref_deltas_chain(self):
  723. f = BytesIO()
  724. entries = build_pack(f, [
  725. (REF_DELTA, (2, 'blob1')),
  726. (Blob.type_num, ('blob')),
  727. (REF_DELTA, (1, 'blob2')),
  728. ])
  729. self.assertEntriesMatch([1, 2, 0], entries, self.make_pack_iter(f))
  730. def test_ofs_and_ref_deltas(self):
  731. # Deltas pending on this offset are popped before deltas depending on
  732. # this ref.
  733. f = BytesIO()
  734. entries = build_pack(f, [
  735. (REF_DELTA, (1, 'blob1')),
  736. (Blob.type_num, ('blob')),
  737. (OFS_DELTA, (1, 'blob2')),
  738. ])
  739. self.assertEntriesMatch([1, 2, 0], entries, self.make_pack_iter(f))
  740. def test_mixed_chain(self):
  741. f = BytesIO()
  742. entries = build_pack(f, [
  743. (Blob.type_num, 'blob'),
  744. (REF_DELTA, (2, 'blob2')),
  745. (OFS_DELTA, (0, 'blob1')),
  746. (OFS_DELTA, (1, 'blob3')),
  747. (OFS_DELTA, (0, 'bob')),
  748. ])
  749. self.assertEntriesMatch([0, 2, 1, 3, 4], entries,
  750. self.make_pack_iter(f))
  751. def test_long_chain(self):
  752. n = 100
  753. objects_spec = [(Blob.type_num, 'blob')]
  754. for i in range(n):
  755. objects_spec.append((OFS_DELTA, (i, 'blob%i' % i)))
  756. f = BytesIO()
  757. entries = build_pack(f, objects_spec)
  758. self.assertEntriesMatch(range(n + 1), entries, self.make_pack_iter(f))
  759. def test_branchy_chain(self):
  760. n = 100
  761. objects_spec = [(Blob.type_num, 'blob')]
  762. for i in range(n):
  763. objects_spec.append((OFS_DELTA, (0, 'blob%i' % i)))
  764. f = BytesIO()
  765. entries = build_pack(f, objects_spec)
  766. self.assertEntriesMatch(range(n + 1), entries, self.make_pack_iter(f))
  767. def test_ext_ref(self):
  768. blob, = self.store_blobs(['blob'])
  769. f = BytesIO()
  770. entries = build_pack(f, [(REF_DELTA, (blob.id, 'blob1'))],
  771. store=self.store)
  772. pack_iter = self.make_pack_iter(f)
  773. self.assertEntriesMatch([0], entries, pack_iter)
  774. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  775. def test_ext_ref_chain(self):
  776. blob, = self.store_blobs(['blob'])
  777. f = BytesIO()
  778. entries = build_pack(f, [
  779. (REF_DELTA, (1, 'blob2')),
  780. (REF_DELTA, (blob.id, 'blob1')),
  781. ], store=self.store)
  782. pack_iter = self.make_pack_iter(f)
  783. self.assertEntriesMatch([1, 0], entries, pack_iter)
  784. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  785. def test_ext_ref_chain_degenerate(self):
  786. # Test a degenerate case where the sender is sending a REF_DELTA
  787. # object that expands to an object already in the repository.
  788. blob, = self.store_blobs(['blob'])
  789. blob2, = self.store_blobs(['blob2'])
  790. assert blob.id < blob2.id
  791. f = BytesIO()
  792. entries = build_pack(f, [
  793. (REF_DELTA, (blob.id, 'blob2')),
  794. (REF_DELTA, (0, 'blob3')),
  795. ], store=self.store)
  796. pack_iter = self.make_pack_iter(f)
  797. self.assertEntriesMatch([0, 1], entries, pack_iter)
  798. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  799. def test_ext_ref_multiple_times(self):
  800. blob, = self.store_blobs(['blob'])
  801. f = BytesIO()
  802. entries = build_pack(f, [
  803. (REF_DELTA, (blob.id, 'blob1')),
  804. (REF_DELTA, (blob.id, 'blob2')),
  805. ], store=self.store)
  806. pack_iter = self.make_pack_iter(f)
  807. self.assertEntriesMatch([0, 1], entries, pack_iter)
  808. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  809. def test_multiple_ext_refs(self):
  810. b1, b2 = self.store_blobs(['foo', 'bar'])
  811. f = BytesIO()
  812. entries = build_pack(f, [
  813. (REF_DELTA, (b1.id, 'foo1')),
  814. (REF_DELTA, (b2.id, 'bar2')),
  815. ], store=self.store)
  816. pack_iter = self.make_pack_iter(f)
  817. self.assertEntriesMatch([0, 1], entries, pack_iter)
  818. self.assertEqual([hex_to_sha(b1.id), hex_to_sha(b2.id)],
  819. pack_iter.ext_refs())
  820. def test_bad_ext_ref_non_thin_pack(self):
  821. blob, = self.store_blobs(['blob'])
  822. f = BytesIO()
  823. build_pack(f, [(REF_DELTA, (blob.id, 'blob1'))],
  824. store=self.store)
  825. pack_iter = self.make_pack_iter(f, thin=False)
  826. try:
  827. list(pack_iter._walk_all_chains())
  828. self.fail()
  829. except KeyError as e:
  830. self.assertEqual(([blob.id],), e.args)
  831. def test_bad_ext_ref_thin_pack(self):
  832. b1, b2, b3 = self.store_blobs(['foo', 'bar', 'baz'])
  833. f = BytesIO()
  834. build_pack(f, [
  835. (REF_DELTA, (1, 'foo99')),
  836. (REF_DELTA, (b1.id, 'foo1')),
  837. (REF_DELTA, (b2.id, 'bar2')),
  838. (REF_DELTA, (b3.id, 'baz3')),
  839. ], store=self.store)
  840. del self.store[b2.id]
  841. del self.store[b3.id]
  842. pack_iter = self.make_pack_iter(f)
  843. try:
  844. list(pack_iter._walk_all_chains())
  845. self.fail()
  846. except KeyError as e:
  847. self.assertEqual((sorted([b2.id, b3.id]),), (sorted(e.args[0]),))
  848. class DeltaEncodeSizeTests(TestCase):
  849. def test_basic(self):
  850. self.assertEquals('\x00', _delta_encode_size(0))
  851. self.assertEquals('\x01', _delta_encode_size(1))
  852. self.assertEquals('\xfa\x01', _delta_encode_size(250))
  853. self.assertEquals('\xe8\x07', _delta_encode_size(1000))
  854. self.assertEquals('\xa0\x8d\x06', _delta_encode_size(100000))
  855. class EncodeCopyOperationTests(TestCase):
  856. def test_basic(self):
  857. self.assertEquals('\x80', _encode_copy_operation(0, 0))
  858. self.assertEquals('\x91\x01\x0a', _encode_copy_operation(1, 10))
  859. self.assertEquals('\xb1\x64\xe8\x03', _encode_copy_operation(100, 1000))
  860. self.assertEquals('\x93\xe8\x03\x01', _encode_copy_operation(1000, 1))