test_pack.py 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010
  1. # test_pack.py -- Tests for the handling of git packs.
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
  4. #
  5. # This program is free software; you can redistribute it and/or
  6. # modify it under the terms of the GNU General Public License
  7. # as published by the Free Software Foundation; version 2
  8. # of the License, or (at your option) any later version of the license.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program; if not, write to the Free Software
  17. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  18. # MA 02110-1301, USA.
  19. """Tests for Dulwich packs."""
  20. from cStringIO import StringIO
  21. from hashlib import sha1
  22. import os
  23. import shutil
  24. import tempfile
  25. import zlib
  26. from dulwich.errors import (
  27. ChecksumMismatch,
  28. )
  29. from dulwich.file import (
  30. GitFile,
  31. )
  32. from dulwich.object_store import (
  33. MemoryObjectStore,
  34. )
  35. from dulwich.objects import (
  36. Blob,
  37. hex_to_sha,
  38. sha_to_hex,
  39. Commit,
  40. Tree,
  41. Blob,
  42. )
  43. from dulwich.pack import (
  44. OFS_DELTA,
  45. REF_DELTA,
  46. DELTA_TYPES,
  47. MemoryPackIndex,
  48. Pack,
  49. PackData,
  50. apply_delta,
  51. create_delta,
  52. deltify_pack_objects,
  53. load_pack_index,
  54. UnpackedObject,
  55. read_zlib_chunks,
  56. write_pack_header,
  57. write_pack_index_v1,
  58. write_pack_index_v2,
  59. SHA1Writer,
  60. write_pack_object,
  61. write_pack,
  62. unpack_object,
  63. compute_file_sha,
  64. PackStreamReader,
  65. DeltaChainIterator,
  66. )
  67. from dulwich.tests import (
  68. TestCase,
  69. )
  70. from dulwich.tests.utils import (
  71. make_object,
  72. build_pack,
  73. )
  74. pack1_sha = 'bc63ddad95e7321ee734ea11a7a62d314e0d7481'
  75. a_sha = '6f670c0fb53f9463760b7295fbb814e965fb20c8'
  76. tree_sha = 'b2a2766a2879c209ab1176e7e778b81ae422eeaa'
  77. commit_sha = 'f18faa16531ac570a3fdc8c7ca16682548dafd12'
  78. class PackTests(TestCase):
  79. """Base class for testing packs"""
  80. def setUp(self):
  81. super(PackTests, self).setUp()
  82. self.tempdir = tempfile.mkdtemp()
  83. self.addCleanup(shutil.rmtree, self.tempdir)
  84. datadir = os.path.abspath(os.path.join(os.path.dirname(__file__),
  85. 'data/packs'))
  86. def get_pack_index(self, sha):
  87. """Returns a PackIndex from the datadir with the given sha"""
  88. return load_pack_index(os.path.join(self.datadir, 'pack-%s.idx' % sha))
  89. def get_pack_data(self, sha):
  90. """Returns a PackData object from the datadir with the given sha"""
  91. return PackData(os.path.join(self.datadir, 'pack-%s.pack' % sha))
  92. def get_pack(self, sha):
  93. return Pack(os.path.join(self.datadir, 'pack-%s' % sha))
  94. def assertSucceeds(self, func, *args, **kwargs):
  95. try:
  96. func(*args, **kwargs)
  97. except ChecksumMismatch as e:
  98. self.fail(e)
  99. class PackIndexTests(PackTests):
  100. """Class that tests the index of packfiles"""
  101. def test_object_index(self):
  102. """Tests that the correct object offset is returned from the index."""
  103. p = self.get_pack_index(pack1_sha)
  104. self.assertRaises(KeyError, p.object_index, pack1_sha)
  105. self.assertEqual(p.object_index(a_sha), 178)
  106. self.assertEqual(p.object_index(tree_sha), 138)
  107. self.assertEqual(p.object_index(commit_sha), 12)
  108. def test_index_len(self):
  109. p = self.get_pack_index(pack1_sha)
  110. self.assertEqual(3, len(p))
  111. def test_get_stored_checksum(self):
  112. p = self.get_pack_index(pack1_sha)
  113. self.assertEqual('f2848e2ad16f329ae1c92e3b95e91888daa5bd01',
  114. sha_to_hex(p.get_stored_checksum()))
  115. self.assertEqual('721980e866af9a5f93ad674144e1459b8ba3e7b7',
  116. sha_to_hex(p.get_pack_checksum()))
  117. def test_index_check(self):
  118. p = self.get_pack_index(pack1_sha)
  119. self.assertSucceeds(p.check)
  120. def test_iterentries(self):
  121. p = self.get_pack_index(pack1_sha)
  122. entries = [(sha_to_hex(s), o, c) for s, o, c in p.iterentries()]
  123. self.assertEqual([
  124. ('6f670c0fb53f9463760b7295fbb814e965fb20c8', 178, None),
  125. ('b2a2766a2879c209ab1176e7e778b81ae422eeaa', 138, None),
  126. ('f18faa16531ac570a3fdc8c7ca16682548dafd12', 12, None)
  127. ], entries)
  128. def test_iter(self):
  129. p = self.get_pack_index(pack1_sha)
  130. self.assertEqual(set([tree_sha, commit_sha, a_sha]), set(p))
  131. class TestPackDeltas(TestCase):
  132. test_string1 = 'The answer was flailing in the wind'
  133. test_string2 = 'The answer was falling down the pipe'
  134. test_string3 = 'zzzzz'
  135. test_string_empty = ''
  136. test_string_big = 'Z' * 8192
  137. test_string_huge = 'Z' * 100000
  138. def _test_roundtrip(self, base, target):
  139. self.assertEqual(target,
  140. ''.join(apply_delta(base, create_delta(base, target))))
  141. def test_nochange(self):
  142. self._test_roundtrip(self.test_string1, self.test_string1)
  143. def test_change(self):
  144. self._test_roundtrip(self.test_string1, self.test_string2)
  145. def test_rewrite(self):
  146. self._test_roundtrip(self.test_string1, self.test_string3)
  147. def test_overflow(self):
  148. self._test_roundtrip(self.test_string_empty, self.test_string_big)
  149. def test_overflow_64k(self):
  150. self.skipTest("big strings don't work yet")
  151. self._test_roundtrip(self.test_string_huge, self.test_string_huge)
  152. class TestPackData(PackTests):
  153. """Tests getting the data from the packfile."""
  154. def test_create_pack(self):
  155. p = self.get_pack_data(pack1_sha)
  156. def test_from_file(self):
  157. path = os.path.join(self.datadir, 'pack-%s.pack' % pack1_sha)
  158. PackData.from_file(open(path), os.path.getsize(path))
  159. def test_pack_len(self):
  160. p = self.get_pack_data(pack1_sha)
  161. self.assertEqual(3, len(p))
  162. def test_index_check(self):
  163. p = self.get_pack_data(pack1_sha)
  164. self.assertSucceeds(p.check)
  165. def test_iterobjects(self):
  166. p = self.get_pack_data(pack1_sha)
  167. commit_data = ('tree b2a2766a2879c209ab1176e7e778b81ae422eeaa\n'
  168. 'author James Westby <jw+debian@jameswestby.net> '
  169. '1174945067 +0100\n'
  170. 'committer James Westby <jw+debian@jameswestby.net> '
  171. '1174945067 +0100\n'
  172. '\n'
  173. 'Test commit\n')
  174. blob_sha = '6f670c0fb53f9463760b7295fbb814e965fb20c8'
  175. tree_data = '100644 a\0%s' % hex_to_sha(blob_sha)
  176. actual = []
  177. for offset, type_num, chunks, crc32 in p.iterobjects():
  178. actual.append((offset, type_num, ''.join(chunks), crc32))
  179. self.assertEqual([
  180. (12, 1, commit_data, 3775879613),
  181. (138, 2, tree_data, 912998690),
  182. (178, 3, 'test 1\n', 1373561701)
  183. ], actual)
  184. def test_iterentries(self):
  185. p = self.get_pack_data(pack1_sha)
  186. entries = set((sha_to_hex(s), o, c) for s, o, c in p.iterentries())
  187. self.assertEqual(set([
  188. ('6f670c0fb53f9463760b7295fbb814e965fb20c8', 178, 1373561701),
  189. ('b2a2766a2879c209ab1176e7e778b81ae422eeaa', 138, 912998690),
  190. ('f18faa16531ac570a3fdc8c7ca16682548dafd12', 12, 3775879613),
  191. ]), entries)
  192. def test_create_index_v1(self):
  193. p = self.get_pack_data(pack1_sha)
  194. filename = os.path.join(self.tempdir, 'v1test.idx')
  195. p.create_index_v1(filename)
  196. idx1 = load_pack_index(filename)
  197. idx2 = self.get_pack_index(pack1_sha)
  198. self.assertEqual(idx1, idx2)
  199. def test_create_index_v2(self):
  200. p = self.get_pack_data(pack1_sha)
  201. filename = os.path.join(self.tempdir, 'v2test.idx')
  202. p.create_index_v2(filename)
  203. idx1 = load_pack_index(filename)
  204. idx2 = self.get_pack_index(pack1_sha)
  205. self.assertEqual(idx1, idx2)
  206. def test_compute_file_sha(self):
  207. f = StringIO('abcd1234wxyz')
  208. self.assertEqual(sha1('abcd1234wxyz').hexdigest(),
  209. compute_file_sha(f).hexdigest())
  210. self.assertEqual(sha1('abcd1234wxyz').hexdigest(),
  211. compute_file_sha(f, buffer_size=5).hexdigest())
  212. self.assertEqual(sha1('abcd1234').hexdigest(),
  213. compute_file_sha(f, end_ofs=-4).hexdigest())
  214. self.assertEqual(sha1('1234wxyz').hexdigest(),
  215. compute_file_sha(f, start_ofs=4).hexdigest())
  216. self.assertEqual(
  217. sha1('1234').hexdigest(),
  218. compute_file_sha(f, start_ofs=4, end_ofs=-4).hexdigest())
  219. class TestPack(PackTests):
  220. def test_len(self):
  221. p = self.get_pack(pack1_sha)
  222. self.assertEqual(3, len(p))
  223. def test_contains(self):
  224. p = self.get_pack(pack1_sha)
  225. self.assertTrue(tree_sha in p)
  226. def test_get(self):
  227. p = self.get_pack(pack1_sha)
  228. self.assertEqual(type(p[tree_sha]), Tree)
  229. def test_iter(self):
  230. p = self.get_pack(pack1_sha)
  231. self.assertEqual(set([tree_sha, commit_sha, a_sha]), set(p))
  232. def test_iterobjects(self):
  233. p = self.get_pack(pack1_sha)
  234. expected = set([p[s] for s in [commit_sha, tree_sha, a_sha]])
  235. self.assertEqual(expected, set(list(p.iterobjects())))
  236. def test_pack_tuples(self):
  237. p = self.get_pack(pack1_sha)
  238. tuples = p.pack_tuples()
  239. expected = set([(p[s], None) for s in [commit_sha, tree_sha, a_sha]])
  240. self.assertEqual(expected, set(list(tuples)))
  241. self.assertEqual(expected, set(list(tuples)))
  242. self.assertEqual(3, len(tuples))
  243. def test_get_object_at(self):
  244. """Tests random access for non-delta objects"""
  245. p = self.get_pack(pack1_sha)
  246. obj = p[a_sha]
  247. self.assertEqual(obj.type_name, 'blob')
  248. self.assertEqual(obj.sha().hexdigest(), a_sha)
  249. obj = p[tree_sha]
  250. self.assertEqual(obj.type_name, 'tree')
  251. self.assertEqual(obj.sha().hexdigest(), tree_sha)
  252. obj = p[commit_sha]
  253. self.assertEqual(obj.type_name, 'commit')
  254. self.assertEqual(obj.sha().hexdigest(), commit_sha)
  255. def test_copy(self):
  256. origpack = self.get_pack(pack1_sha)
  257. try:
  258. self.assertSucceeds(origpack.index.check)
  259. basename = os.path.join(self.tempdir, 'Elch')
  260. write_pack(basename, origpack.pack_tuples())
  261. newpack = Pack(basename)
  262. try:
  263. self.assertEqual(origpack, newpack)
  264. self.assertSucceeds(newpack.index.check)
  265. self.assertEqual(origpack.name(), newpack.name())
  266. self.assertEqual(origpack.index.get_pack_checksum(),
  267. newpack.index.get_pack_checksum())
  268. wrong_version = origpack.index.version != newpack.index.version
  269. orig_checksum = origpack.index.get_stored_checksum()
  270. new_checksum = newpack.index.get_stored_checksum()
  271. self.assertTrue(wrong_version or orig_checksum == new_checksum)
  272. finally:
  273. newpack.close()
  274. finally:
  275. origpack.close()
  276. def test_commit_obj(self):
  277. p = self.get_pack(pack1_sha)
  278. commit = p[commit_sha]
  279. self.assertEqual('James Westby <jw+debian@jameswestby.net>',
  280. commit.author)
  281. self.assertEqual([], commit.parents)
  282. def _copy_pack(self, origpack):
  283. basename = os.path.join(self.tempdir, 'somepack')
  284. write_pack(basename, origpack.pack_tuples())
  285. return Pack(basename)
  286. def test_keep_no_message(self):
  287. p = self.get_pack(pack1_sha)
  288. p = self._copy_pack(p)
  289. keepfile_name = p.keep()
  290. # file should exist
  291. self.assertTrue(os.path.exists(keepfile_name))
  292. f = open(keepfile_name, 'r')
  293. try:
  294. buf = f.read()
  295. self.assertEqual('', buf)
  296. finally:
  297. f.close()
  298. def test_keep_message(self):
  299. p = self.get_pack(pack1_sha)
  300. p = self._copy_pack(p)
  301. msg = 'some message'
  302. keepfile_name = p.keep(msg)
  303. # file should exist
  304. self.assertTrue(os.path.exists(keepfile_name))
  305. # and contain the right message, with a linefeed
  306. f = open(keepfile_name, 'r')
  307. try:
  308. buf = f.read()
  309. self.assertEqual(msg + '\n', buf)
  310. finally:
  311. f.close()
  312. def test_name(self):
  313. p = self.get_pack(pack1_sha)
  314. self.assertEqual(pack1_sha, p.name())
  315. def test_length_mismatch(self):
  316. data = self.get_pack_data(pack1_sha)
  317. index = self.get_pack_index(pack1_sha)
  318. Pack.from_objects(data, index).check_length_and_checksum()
  319. data._file.seek(12)
  320. bad_file = StringIO()
  321. write_pack_header(bad_file, 9999)
  322. bad_file.write(data._file.read())
  323. bad_file = StringIO(bad_file.getvalue())
  324. bad_data = PackData('', file=bad_file)
  325. bad_pack = Pack.from_lazy_objects(lambda: bad_data, lambda: index)
  326. self.assertRaises(AssertionError, lambda: bad_pack.data)
  327. self.assertRaises(AssertionError,
  328. lambda: bad_pack.check_length_and_checksum())
  329. def test_checksum_mismatch(self):
  330. data = self.get_pack_data(pack1_sha)
  331. index = self.get_pack_index(pack1_sha)
  332. Pack.from_objects(data, index).check_length_and_checksum()
  333. data._file.seek(0)
  334. bad_file = StringIO(data._file.read()[:-20] + ('\xff' * 20))
  335. bad_data = PackData('', file=bad_file)
  336. bad_pack = Pack.from_lazy_objects(lambda: bad_data, lambda: index)
  337. self.assertRaises(ChecksumMismatch, lambda: bad_pack.data)
  338. self.assertRaises(ChecksumMismatch, lambda:
  339. bad_pack.check_length_and_checksum())
  340. def test_iterobjects(self):
  341. p = self.get_pack(pack1_sha)
  342. objs = dict((o.id, o) for o in p.iterobjects())
  343. self.assertEqual(3, len(objs))
  344. self.assertEqual(sorted(objs), sorted(p.index))
  345. self.assertTrue(isinstance(objs[a_sha], Blob))
  346. self.assertTrue(isinstance(objs[tree_sha], Tree))
  347. self.assertTrue(isinstance(objs[commit_sha], Commit))
  348. class TestThinPack(PackTests):
  349. def setUp(self):
  350. super(TestThinPack, self).setUp()
  351. self.store = MemoryObjectStore()
  352. self.blobs = {}
  353. for blob in ('foo', 'bar', 'foo1234', 'bar2468'):
  354. self.blobs[blob] = make_object(Blob, data=blob)
  355. self.store.add_object(self.blobs['foo'])
  356. self.store.add_object(self.blobs['bar'])
  357. # Build a thin pack. 'foo' is as an external reference, 'bar' an
  358. # internal reference.
  359. self.pack_dir = tempfile.mkdtemp()
  360. self.addCleanup(shutil.rmtree, self.pack_dir)
  361. self.pack_prefix = os.path.join(self.pack_dir, 'pack')
  362. f = open(self.pack_prefix + '.pack', 'wb')
  363. try:
  364. build_pack(f, [
  365. (REF_DELTA, (self.blobs['foo'].id, 'foo1234')),
  366. (Blob.type_num, 'bar'),
  367. (REF_DELTA, (self.blobs['bar'].id, 'bar2468'))],
  368. store=self.store)
  369. finally:
  370. f.close()
  371. # Index the new pack.
  372. pack = self.make_pack(True)
  373. data = PackData(pack._data_path)
  374. data.pack = pack
  375. data.create_index(self.pack_prefix + '.idx')
  376. del self.store[self.blobs['bar'].id]
  377. def make_pack(self, resolve_ext_ref):
  378. return Pack(
  379. self.pack_prefix,
  380. resolve_ext_ref=self.store.get_raw if resolve_ext_ref else None)
  381. def test_get_raw(self):
  382. self.assertRaises(
  383. KeyError, self.make_pack(False).get_raw, self.blobs['foo1234'].id)
  384. self.assertEqual(
  385. (3, 'foo1234'),
  386. self.make_pack(True).get_raw(self.blobs['foo1234'].id))
  387. def test_iterobjects(self):
  388. self.assertRaises(KeyError, list, self.make_pack(False).iterobjects())
  389. self.assertEqual(
  390. sorted([self.blobs['foo1234'].id, self.blobs['bar'].id,
  391. self.blobs['bar2468'].id]),
  392. sorted(o.id for o in self.make_pack(True).iterobjects()))
  393. class WritePackTests(TestCase):
  394. def test_write_pack_header(self):
  395. f = StringIO()
  396. write_pack_header(f, 42)
  397. self.assertEqual('PACK\x00\x00\x00\x02\x00\x00\x00*',
  398. f.getvalue())
  399. def test_write_pack_object(self):
  400. f = StringIO()
  401. f.write('header')
  402. offset = f.tell()
  403. crc32 = write_pack_object(f, Blob.type_num, 'blob')
  404. self.assertEqual(crc32, zlib.crc32(f.getvalue()[6:]) & 0xffffffff)
  405. f.write('x') # unpack_object needs extra trailing data.
  406. f.seek(offset)
  407. comp_len = len(f.getvalue()) - offset - 1
  408. unpacked, unused = unpack_object(f.read, compute_crc32=True)
  409. self.assertEqual(Blob.type_num, unpacked.pack_type_num)
  410. self.assertEqual(Blob.type_num, unpacked.obj_type_num)
  411. self.assertEqual(['blob'], unpacked.decomp_chunks)
  412. self.assertEqual(crc32, unpacked.crc32)
  413. self.assertEqual('x', unused)
  414. def test_write_pack_object_sha(self):
  415. f = StringIO()
  416. f.write('header')
  417. offset = f.tell()
  418. sha_a = sha1('foo')
  419. sha_b = sha_a.copy()
  420. write_pack_object(f, Blob.type_num, 'blob', sha=sha_a)
  421. self.assertNotEqual(sha_a.digest(), sha_b.digest())
  422. sha_b.update(f.getvalue()[offset:])
  423. self.assertEqual(sha_a.digest(), sha_b.digest())
  424. pack_checksum = hex_to_sha('721980e866af9a5f93ad674144e1459b8ba3e7b7')
  425. class BaseTestPackIndexWriting(object):
  426. def assertSucceeds(self, func, *args, **kwargs):
  427. try:
  428. func(*args, **kwargs)
  429. except ChecksumMismatch as e:
  430. self.fail(e)
  431. def index(self, filename, entries, pack_checksum):
  432. raise NotImplementedError(self.index)
  433. def test_empty(self):
  434. idx = self.index('empty.idx', [], pack_checksum)
  435. self.assertEqual(idx.get_pack_checksum(), pack_checksum)
  436. self.assertEqual(0, len(idx))
  437. def test_large(self):
  438. entry1_sha = hex_to_sha('4e6388232ec39792661e2e75db8fb117fc869ce6')
  439. entry2_sha = hex_to_sha('e98f071751bd77f59967bfa671cd2caebdccc9a2')
  440. entries = [(entry1_sha, 0xf2972d0830529b87, 24),
  441. (entry2_sha, (~0xf2972d0830529b87)&(2**64-1), 92)]
  442. if not self._supports_large:
  443. self.assertRaises(TypeError, self.index, 'single.idx',
  444. entries, pack_checksum)
  445. return
  446. idx = self.index('single.idx', entries, pack_checksum)
  447. self.assertEqual(idx.get_pack_checksum(), pack_checksum)
  448. self.assertEqual(2, len(idx))
  449. actual_entries = list(idx.iterentries())
  450. self.assertEqual(len(entries), len(actual_entries))
  451. for mine, actual in zip(entries, actual_entries):
  452. my_sha, my_offset, my_crc = mine
  453. actual_sha, actual_offset, actual_crc = actual
  454. self.assertEqual(my_sha, actual_sha)
  455. self.assertEqual(my_offset, actual_offset)
  456. if self._has_crc32_checksum:
  457. self.assertEqual(my_crc, actual_crc)
  458. else:
  459. self.assertTrue(actual_crc is None)
  460. def test_single(self):
  461. entry_sha = hex_to_sha('6f670c0fb53f9463760b7295fbb814e965fb20c8')
  462. my_entries = [(entry_sha, 178, 42)]
  463. idx = self.index('single.idx', my_entries, pack_checksum)
  464. self.assertEqual(idx.get_pack_checksum(), pack_checksum)
  465. self.assertEqual(1, len(idx))
  466. actual_entries = list(idx.iterentries())
  467. self.assertEqual(len(my_entries), len(actual_entries))
  468. for mine, actual in zip(my_entries, actual_entries):
  469. my_sha, my_offset, my_crc = mine
  470. actual_sha, actual_offset, actual_crc = actual
  471. self.assertEqual(my_sha, actual_sha)
  472. self.assertEqual(my_offset, actual_offset)
  473. if self._has_crc32_checksum:
  474. self.assertEqual(my_crc, actual_crc)
  475. else:
  476. self.assertTrue(actual_crc is None)
  477. class BaseTestFilePackIndexWriting(BaseTestPackIndexWriting):
  478. def setUp(self):
  479. self.tempdir = tempfile.mkdtemp()
  480. def tearDown(self):
  481. shutil.rmtree(self.tempdir)
  482. def index(self, filename, entries, pack_checksum):
  483. path = os.path.join(self.tempdir, filename)
  484. self.writeIndex(path, entries, pack_checksum)
  485. idx = load_pack_index(path)
  486. self.assertSucceeds(idx.check)
  487. self.assertEqual(idx.version, self._expected_version)
  488. return idx
  489. def writeIndex(self, filename, entries, pack_checksum):
  490. # FIXME: Write to StringIO instead rather than hitting disk ?
  491. f = GitFile(filename, "wb")
  492. try:
  493. self._write_fn(f, entries, pack_checksum)
  494. finally:
  495. f.close()
  496. class TestMemoryIndexWriting(TestCase, BaseTestPackIndexWriting):
  497. def setUp(self):
  498. TestCase.setUp(self)
  499. self._has_crc32_checksum = True
  500. self._supports_large = True
  501. def index(self, filename, entries, pack_checksum):
  502. return MemoryPackIndex(entries, pack_checksum)
  503. def tearDown(self):
  504. TestCase.tearDown(self)
  505. class TestPackIndexWritingv1(TestCase, BaseTestFilePackIndexWriting):
  506. def setUp(self):
  507. TestCase.setUp(self)
  508. BaseTestFilePackIndexWriting.setUp(self)
  509. self._has_crc32_checksum = False
  510. self._expected_version = 1
  511. self._supports_large = False
  512. self._write_fn = write_pack_index_v1
  513. def tearDown(self):
  514. TestCase.tearDown(self)
  515. BaseTestFilePackIndexWriting.tearDown(self)
  516. class TestPackIndexWritingv2(TestCase, BaseTestFilePackIndexWriting):
  517. def setUp(self):
  518. TestCase.setUp(self)
  519. BaseTestFilePackIndexWriting.setUp(self)
  520. self._has_crc32_checksum = True
  521. self._supports_large = True
  522. self._expected_version = 2
  523. self._write_fn = write_pack_index_v2
  524. def tearDown(self):
  525. TestCase.tearDown(self)
  526. BaseTestFilePackIndexWriting.tearDown(self)
  527. class ReadZlibTests(TestCase):
  528. decomp = (
  529. 'tree 4ada885c9196b6b6fa08744b5862bf92896fc002\n'
  530. 'parent None\n'
  531. 'author Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n'
  532. 'committer Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n'
  533. '\n'
  534. "Provide replacement for mmap()'s offset argument.")
  535. comp = zlib.compress(decomp)
  536. extra = 'nextobject'
  537. def setUp(self):
  538. super(ReadZlibTests, self).setUp()
  539. self.read = StringIO(self.comp + self.extra).read
  540. self.unpacked = UnpackedObject(Tree.type_num, None, len(self.decomp), 0)
  541. def test_decompress_size(self):
  542. good_decomp_len = len(self.decomp)
  543. self.unpacked.decomp_len = -1
  544. self.assertRaises(ValueError, read_zlib_chunks, self.read,
  545. self.unpacked)
  546. self.unpacked.decomp_len = good_decomp_len - 1
  547. self.assertRaises(zlib.error, read_zlib_chunks, self.read,
  548. self.unpacked)
  549. self.unpacked.decomp_len = good_decomp_len + 1
  550. self.assertRaises(zlib.error, read_zlib_chunks, self.read,
  551. self.unpacked)
  552. def test_decompress_truncated(self):
  553. read = StringIO(self.comp[:10]).read
  554. self.assertRaises(zlib.error, read_zlib_chunks, read, self.unpacked)
  555. read = StringIO(self.comp).read
  556. self.assertRaises(zlib.error, read_zlib_chunks, read, self.unpacked)
  557. def test_decompress_empty(self):
  558. unpacked = UnpackedObject(Tree.type_num, None, 0, None)
  559. comp = zlib.compress('')
  560. read = StringIO(comp + self.extra).read
  561. unused = read_zlib_chunks(read, unpacked)
  562. self.assertEqual('', ''.join(unpacked.decomp_chunks))
  563. self.assertNotEquals('', unused)
  564. self.assertEqual(self.extra, unused + read())
  565. def test_decompress_no_crc32(self):
  566. self.unpacked.crc32 = None
  567. read_zlib_chunks(self.read, self.unpacked)
  568. self.assertEqual(None, self.unpacked.crc32)
  569. def _do_decompress_test(self, buffer_size, **kwargs):
  570. unused = read_zlib_chunks(self.read, self.unpacked,
  571. buffer_size=buffer_size, **kwargs)
  572. self.assertEqual(self.decomp, ''.join(self.unpacked.decomp_chunks))
  573. self.assertEqual(zlib.crc32(self.comp), self.unpacked.crc32)
  574. self.assertNotEquals('', unused)
  575. self.assertEqual(self.extra, unused + self.read())
  576. def test_simple_decompress(self):
  577. self._do_decompress_test(4096)
  578. self.assertEqual(None, self.unpacked.comp_chunks)
  579. # These buffer sizes are not intended to be realistic, but rather simulate
  580. # larger buffer sizes that may end at various places.
  581. def test_decompress_buffer_size_1(self):
  582. self._do_decompress_test(1)
  583. def test_decompress_buffer_size_2(self):
  584. self._do_decompress_test(2)
  585. def test_decompress_buffer_size_3(self):
  586. self._do_decompress_test(3)
  587. def test_decompress_buffer_size_4(self):
  588. self._do_decompress_test(4)
  589. def test_decompress_include_comp(self):
  590. self._do_decompress_test(4096, include_comp=True)
  591. self.assertEqual(self.comp, ''.join(self.unpacked.comp_chunks))
  592. class DeltifyTests(TestCase):
  593. def test_empty(self):
  594. self.assertEqual([], list(deltify_pack_objects([])))
  595. def test_single(self):
  596. b = Blob.from_string("foo")
  597. self.assertEqual(
  598. [(b.type_num, b.sha().digest(), None, b.as_raw_string())],
  599. list(deltify_pack_objects([(b, "")])))
  600. def test_simple_delta(self):
  601. b1 = Blob.from_string("a" * 101)
  602. b2 = Blob.from_string("a" * 100)
  603. delta = create_delta(b1.as_raw_string(), b2.as_raw_string())
  604. self.assertEqual([
  605. (b1.type_num, b1.sha().digest(), None, b1.as_raw_string()),
  606. (b2.type_num, b2.sha().digest(), b1.sha().digest(), delta)
  607. ],
  608. list(deltify_pack_objects([(b1, ""), (b2, "")])))
  609. class TestPackStreamReader(TestCase):
  610. def test_read_objects_emtpy(self):
  611. f = StringIO()
  612. build_pack(f, [])
  613. reader = PackStreamReader(f.read)
  614. self.assertEqual(0, len(list(reader.read_objects())))
  615. def test_read_objects(self):
  616. f = StringIO()
  617. entries = build_pack(f, [
  618. (Blob.type_num, 'blob'),
  619. (OFS_DELTA, (0, 'blob1')),
  620. ])
  621. reader = PackStreamReader(f.read)
  622. objects = list(reader.read_objects(compute_crc32=True))
  623. self.assertEqual(2, len(objects))
  624. unpacked_blob, unpacked_delta = objects
  625. self.assertEqual(entries[0][0], unpacked_blob.offset)
  626. self.assertEqual(Blob.type_num, unpacked_blob.pack_type_num)
  627. self.assertEqual(Blob.type_num, unpacked_blob.obj_type_num)
  628. self.assertEqual(None, unpacked_blob.delta_base)
  629. self.assertEqual('blob', ''.join(unpacked_blob.decomp_chunks))
  630. self.assertEqual(entries[0][4], unpacked_blob.crc32)
  631. self.assertEqual(entries[1][0], unpacked_delta.offset)
  632. self.assertEqual(OFS_DELTA, unpacked_delta.pack_type_num)
  633. self.assertEqual(None, unpacked_delta.obj_type_num)
  634. self.assertEqual(unpacked_delta.offset - unpacked_blob.offset,
  635. unpacked_delta.delta_base)
  636. delta = create_delta('blob', 'blob1')
  637. self.assertEqual(delta, ''.join(unpacked_delta.decomp_chunks))
  638. self.assertEqual(entries[1][4], unpacked_delta.crc32)
  639. def test_read_objects_buffered(self):
  640. f = StringIO()
  641. build_pack(f, [
  642. (Blob.type_num, 'blob'),
  643. (OFS_DELTA, (0, 'blob1')),
  644. ])
  645. reader = PackStreamReader(f.read, zlib_bufsize=4)
  646. self.assertEqual(2, len(list(reader.read_objects())))
  647. def test_read_objects_empty(self):
  648. reader = PackStreamReader(StringIO().read)
  649. self.assertEqual([], list(reader.read_objects()))
  650. class TestPackIterator(DeltaChainIterator):
  651. _compute_crc32 = True
  652. def __init__(self, *args, **kwargs):
  653. super(TestPackIterator, self).__init__(*args, **kwargs)
  654. self._unpacked_offsets = set()
  655. def _result(self, unpacked):
  656. """Return entries in the same format as build_pack."""
  657. return (unpacked.offset, unpacked.obj_type_num,
  658. ''.join(unpacked.obj_chunks), unpacked.sha(), unpacked.crc32)
  659. def _resolve_object(self, offset, pack_type_num, base_chunks):
  660. assert offset not in self._unpacked_offsets, (
  661. 'Attempted to re-inflate offset %i' % offset)
  662. self._unpacked_offsets.add(offset)
  663. return super(TestPackIterator, self)._resolve_object(
  664. offset, pack_type_num, base_chunks)
  665. class DeltaChainIteratorTests(TestCase):
  666. def setUp(self):
  667. super(DeltaChainIteratorTests, self).setUp()
  668. self.store = MemoryObjectStore()
  669. self.fetched = set()
  670. def store_blobs(self, blobs_data):
  671. blobs = []
  672. for data in blobs_data:
  673. blob = make_object(Blob, data=data)
  674. blobs.append(blob)
  675. self.store.add_object(blob)
  676. return blobs
  677. def get_raw_no_repeat(self, bin_sha):
  678. """Wrapper around store.get_raw that doesn't allow repeat lookups."""
  679. hex_sha = sha_to_hex(bin_sha)
  680. self.assertFalse(hex_sha in self.fetched,
  681. 'Attempted to re-fetch object %s' % hex_sha)
  682. self.fetched.add(hex_sha)
  683. return self.store.get_raw(hex_sha)
  684. def make_pack_iter(self, f, thin=None):
  685. if thin is None:
  686. thin = bool(list(self.store))
  687. resolve_ext_ref = thin and self.get_raw_no_repeat or None
  688. data = PackData('test.pack', file=f)
  689. return TestPackIterator.for_pack_data(
  690. data, resolve_ext_ref=resolve_ext_ref)
  691. def assertEntriesMatch(self, expected_indexes, entries, pack_iter):
  692. expected = [entries[i] for i in expected_indexes]
  693. self.assertEqual(expected, list(pack_iter._walk_all_chains()))
  694. def test_no_deltas(self):
  695. f = StringIO()
  696. entries = build_pack(f, [
  697. (Commit.type_num, 'commit'),
  698. (Blob.type_num, 'blob'),
  699. (Tree.type_num, 'tree'),
  700. ])
  701. self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
  702. def test_ofs_deltas(self):
  703. f = StringIO()
  704. entries = build_pack(f, [
  705. (Blob.type_num, 'blob'),
  706. (OFS_DELTA, (0, 'blob1')),
  707. (OFS_DELTA, (0, 'blob2')),
  708. ])
  709. self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
  710. def test_ofs_deltas_chain(self):
  711. f = StringIO()
  712. entries = build_pack(f, [
  713. (Blob.type_num, 'blob'),
  714. (OFS_DELTA, (0, 'blob1')),
  715. (OFS_DELTA, (1, 'blob2')),
  716. ])
  717. self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
  718. def test_ref_deltas(self):
  719. f = StringIO()
  720. entries = build_pack(f, [
  721. (REF_DELTA, (1, 'blob1')),
  722. (Blob.type_num, ('blob')),
  723. (REF_DELTA, (1, 'blob2')),
  724. ])
  725. self.assertEntriesMatch([1, 0, 2], entries, self.make_pack_iter(f))
  726. def test_ref_deltas_chain(self):
  727. f = StringIO()
  728. entries = build_pack(f, [
  729. (REF_DELTA, (2, 'blob1')),
  730. (Blob.type_num, ('blob')),
  731. (REF_DELTA, (1, 'blob2')),
  732. ])
  733. self.assertEntriesMatch([1, 2, 0], entries, self.make_pack_iter(f))
  734. def test_ofs_and_ref_deltas(self):
  735. # Deltas pending on this offset are popped before deltas depending on
  736. # this ref.
  737. f = StringIO()
  738. entries = build_pack(f, [
  739. (REF_DELTA, (1, 'blob1')),
  740. (Blob.type_num, ('blob')),
  741. (OFS_DELTA, (1, 'blob2')),
  742. ])
  743. self.assertEntriesMatch([1, 2, 0], entries, self.make_pack_iter(f))
  744. def test_mixed_chain(self):
  745. f = StringIO()
  746. entries = build_pack(f, [
  747. (Blob.type_num, 'blob'),
  748. (REF_DELTA, (2, 'blob2')),
  749. (OFS_DELTA, (0, 'blob1')),
  750. (OFS_DELTA, (1, 'blob3')),
  751. (OFS_DELTA, (0, 'bob')),
  752. ])
  753. self.assertEntriesMatch([0, 2, 1, 3, 4], entries,
  754. self.make_pack_iter(f))
  755. def test_long_chain(self):
  756. n = 100
  757. objects_spec = [(Blob.type_num, 'blob')]
  758. for i in range(n):
  759. objects_spec.append((OFS_DELTA, (i, 'blob%i' % i)))
  760. f = StringIO()
  761. entries = build_pack(f, objects_spec)
  762. self.assertEntriesMatch(range(n + 1), entries, self.make_pack_iter(f))
  763. def test_branchy_chain(self):
  764. n = 100
  765. objects_spec = [(Blob.type_num, 'blob')]
  766. for i in range(n):
  767. objects_spec.append((OFS_DELTA, (0, 'blob%i' % i)))
  768. f = StringIO()
  769. entries = build_pack(f, objects_spec)
  770. self.assertEntriesMatch(range(n + 1), entries, self.make_pack_iter(f))
  771. def test_ext_ref(self):
  772. blob, = self.store_blobs(['blob'])
  773. f = StringIO()
  774. entries = build_pack(f, [(REF_DELTA, (blob.id, 'blob1'))],
  775. store=self.store)
  776. pack_iter = self.make_pack_iter(f)
  777. self.assertEntriesMatch([0], entries, pack_iter)
  778. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  779. def test_ext_ref_chain(self):
  780. blob, = self.store_blobs(['blob'])
  781. f = StringIO()
  782. entries = build_pack(f, [
  783. (REF_DELTA, (1, 'blob2')),
  784. (REF_DELTA, (blob.id, 'blob1')),
  785. ], store=self.store)
  786. pack_iter = self.make_pack_iter(f)
  787. self.assertEntriesMatch([1, 0], entries, pack_iter)
  788. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  789. def test_ext_ref_multiple_times(self):
  790. blob, = self.store_blobs(['blob'])
  791. f = StringIO()
  792. entries = build_pack(f, [
  793. (REF_DELTA, (blob.id, 'blob1')),
  794. (REF_DELTA, (blob.id, 'blob2')),
  795. ], store=self.store)
  796. pack_iter = self.make_pack_iter(f)
  797. self.assertEntriesMatch([0, 1], entries, pack_iter)
  798. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  799. def test_multiple_ext_refs(self):
  800. b1, b2 = self.store_blobs(['foo', 'bar'])
  801. f = StringIO()
  802. entries = build_pack(f, [
  803. (REF_DELTA, (b1.id, 'foo1')),
  804. (REF_DELTA, (b2.id, 'bar2')),
  805. ], store=self.store)
  806. pack_iter = self.make_pack_iter(f)
  807. self.assertEntriesMatch([0, 1], entries, pack_iter)
  808. self.assertEqual([hex_to_sha(b1.id), hex_to_sha(b2.id)],
  809. pack_iter.ext_refs())
  810. def test_bad_ext_ref_non_thin_pack(self):
  811. blob, = self.store_blobs(['blob'])
  812. f = StringIO()
  813. entries = build_pack(f, [(REF_DELTA, (blob.id, 'blob1'))],
  814. store=self.store)
  815. pack_iter = self.make_pack_iter(f, thin=False)
  816. try:
  817. list(pack_iter._walk_all_chains())
  818. self.fail()
  819. except KeyError as e:
  820. self.assertEqual(([blob.id],), e.args)
  821. def test_bad_ext_ref_thin_pack(self):
  822. b1, b2, b3 = self.store_blobs(['foo', 'bar', 'baz'])
  823. f = StringIO()
  824. entries = build_pack(f, [
  825. (REF_DELTA, (1, 'foo99')),
  826. (REF_DELTA, (b1.id, 'foo1')),
  827. (REF_DELTA, (b2.id, 'bar2')),
  828. (REF_DELTA, (b3.id, 'baz3')),
  829. ], store=self.store)
  830. del self.store[b2.id]
  831. del self.store[b3.id]
  832. pack_iter = self.make_pack_iter(f)
  833. try:
  834. list(pack_iter._walk_all_chains())
  835. self.fail()
  836. except KeyError as e:
  837. self.assertEqual((sorted([b2.id, b3.id]),), e.args)