test_pack.py 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086
  1. # test_pack.py -- Tests for the handling of git packs.
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
  4. #
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for Dulwich packs."""
  22. from io import BytesIO
  23. from hashlib import sha1
  24. import os
  25. import shutil
  26. import tempfile
  27. import zlib
  28. from dulwich.errors import (
  29. ApplyDeltaError,
  30. ChecksumMismatch,
  31. )
  32. from dulwich.file import (
  33. GitFile,
  34. )
  35. from dulwich.object_store import (
  36. MemoryObjectStore,
  37. )
  38. from dulwich.objects import (
  39. hex_to_sha,
  40. sha_to_hex,
  41. Commit,
  42. Tree,
  43. Blob,
  44. )
  45. from dulwich.pack import (
  46. OFS_DELTA,
  47. REF_DELTA,
  48. MemoryPackIndex,
  49. Pack,
  50. PackData,
  51. apply_delta,
  52. create_delta,
  53. deltify_pack_objects,
  54. load_pack_index,
  55. UnpackedObject,
  56. read_zlib_chunks,
  57. write_pack_header,
  58. write_pack_index_v1,
  59. write_pack_index_v2,
  60. write_pack_object,
  61. write_pack,
  62. unpack_object,
  63. compute_file_sha,
  64. PackStreamReader,
  65. DeltaChainIterator,
  66. _delta_encode_size,
  67. _encode_copy_operation,
  68. )
  69. from dulwich.tests import (
  70. TestCase,
  71. )
  72. from dulwich.tests.utils import (
  73. make_object,
  74. build_pack,
  75. )
  76. pack1_sha = b'bc63ddad95e7321ee734ea11a7a62d314e0d7481'
  77. a_sha = b'6f670c0fb53f9463760b7295fbb814e965fb20c8'
  78. tree_sha = b'b2a2766a2879c209ab1176e7e778b81ae422eeaa'
  79. commit_sha = b'f18faa16531ac570a3fdc8c7ca16682548dafd12'
  80. class PackTests(TestCase):
  81. """Base class for testing packs"""
  82. def setUp(self):
  83. super(PackTests, self).setUp()
  84. self.tempdir = tempfile.mkdtemp()
  85. self.addCleanup(shutil.rmtree, self.tempdir)
  86. datadir = os.path.abspath(os.path.join(os.path.dirname(__file__),
  87. 'data/packs'))
  88. def get_pack_index(self, sha):
  89. """Returns a PackIndex from the datadir with the given sha"""
  90. return load_pack_index(os.path.join(self.datadir, 'pack-%s.idx' % sha.decode('ascii')))
  91. def get_pack_data(self, sha):
  92. """Returns a PackData object from the datadir with the given sha"""
  93. return PackData(os.path.join(self.datadir, 'pack-%s.pack' % sha.decode('ascii')))
  94. def get_pack(self, sha):
  95. return Pack(os.path.join(self.datadir, 'pack-%s' % sha.decode('ascii')))
  96. def assertSucceeds(self, func, *args, **kwargs):
  97. try:
  98. func(*args, **kwargs)
  99. except ChecksumMismatch as e:
  100. self.fail(e)
  101. class PackIndexTests(PackTests):
  102. """Class that tests the index of packfiles"""
  103. def test_object_index(self):
  104. """Tests that the correct object offset is returned from the index."""
  105. p = self.get_pack_index(pack1_sha)
  106. self.assertRaises(KeyError, p.object_index, pack1_sha)
  107. self.assertEqual(p.object_index(a_sha), 178)
  108. self.assertEqual(p.object_index(tree_sha), 138)
  109. self.assertEqual(p.object_index(commit_sha), 12)
  110. def test_index_len(self):
  111. p = self.get_pack_index(pack1_sha)
  112. self.assertEqual(3, len(p))
  113. def test_get_stored_checksum(self):
  114. p = self.get_pack_index(pack1_sha)
  115. self.assertEqual(b'f2848e2ad16f329ae1c92e3b95e91888daa5bd01',
  116. sha_to_hex(p.get_stored_checksum()))
  117. self.assertEqual(b'721980e866af9a5f93ad674144e1459b8ba3e7b7',
  118. sha_to_hex(p.get_pack_checksum()))
  119. def test_index_check(self):
  120. p = self.get_pack_index(pack1_sha)
  121. self.assertSucceeds(p.check)
  122. def test_iterentries(self):
  123. p = self.get_pack_index(pack1_sha)
  124. entries = [(sha_to_hex(s), o, c) for s, o, c in p.iterentries()]
  125. self.assertEqual([
  126. (b'6f670c0fb53f9463760b7295fbb814e965fb20c8', 178, None),
  127. (b'b2a2766a2879c209ab1176e7e778b81ae422eeaa', 138, None),
  128. (b'f18faa16531ac570a3fdc8c7ca16682548dafd12', 12, None)
  129. ], entries)
  130. def test_iter(self):
  131. p = self.get_pack_index(pack1_sha)
  132. self.assertEqual(set([tree_sha, commit_sha, a_sha]), set(p))
  133. class TestPackDeltas(TestCase):
  134. test_string1 = b'The answer was flailing in the wind'
  135. test_string2 = b'The answer was falling down the pipe'
  136. test_string3 = b'zzzzz'
  137. test_string_empty = b''
  138. test_string_big = b'Z' * 8192
  139. test_string_huge = b'Z' * 100000
  140. def _test_roundtrip(self, base, target):
  141. self.assertEqual(target,
  142. b''.join(apply_delta(base, create_delta(base, target))))
  143. def test_nochange(self):
  144. self._test_roundtrip(self.test_string1, self.test_string1)
  145. def test_nochange_huge(self):
  146. self._test_roundtrip(self.test_string_huge, self.test_string_huge)
  147. def test_change(self):
  148. self._test_roundtrip(self.test_string1, self.test_string2)
  149. def test_rewrite(self):
  150. self._test_roundtrip(self.test_string1, self.test_string3)
  151. def test_empty_to_big(self):
  152. self._test_roundtrip(self.test_string_empty, self.test_string_big)
  153. def test_empty_to_huge(self):
  154. self._test_roundtrip(self.test_string_empty, self.test_string_huge)
  155. def test_huge_copy(self):
  156. self._test_roundtrip(self.test_string_huge + self.test_string1,
  157. self.test_string_huge + self.test_string2)
  158. def test_dest_overflow(self):
  159. self.assertRaises(
  160. ApplyDeltaError,
  161. apply_delta, b'a'*0x10000, b'\x80\x80\x04\x80\x80\x04\x80' + b'a'*0x10000)
  162. self.assertRaises(
  163. ApplyDeltaError,
  164. apply_delta, b'', b'\x00\x80\x02\xb0\x11\x11')
  165. def test_pypy_issue(self):
  166. # Test for https://github.com/jelmer/dulwich/issues/509 /
  167. # https://bitbucket.org/pypy/pypy/issues/2499/cpyext-pystring_asstring-doesnt-work
  168. chunks = [
  169. b'tree 03207ccf58880a748188836155ceed72f03d65d6\n'
  170. b'parent 408fbab530fd4abe49249a636a10f10f44d07a21\n'
  171. b'author Victor Stinner <victor.stinner@gmail.com> 1421355207 +0100\n'
  172. b'committer Victor Stinner <victor.stinner@gmail.com> 1421355207 +0100\n'
  173. b'\n'
  174. b'Backout changeset 3a06020af8cf\n'
  175. b'\nStreamWriter: close() now clears the reference to the transport\n'
  176. b'\nStreamWriter now raises an exception if it is closed: write(), writelines(),\n'
  177. b'write_eof(), can_write_eof(), get_extra_info(), drain().\n']
  178. delta = [
  179. b'\xcd\x03\xad\x03]tree ff3c181a393d5a7270cddc01ea863818a8621ca8\n'
  180. b'parent 20a103cc90135494162e819f98d0edfc1f1fba6b\x91]7\x0510738'
  181. b'\x91\x99@\x0b10738 +0100\x93\x04\x01\xc9']
  182. res = apply_delta(chunks, delta)
  183. expected = [
  184. b'tree ff3c181a393d5a7270cddc01ea863818a8621ca8\n'
  185. b'parent 20a103cc90135494162e819f98d0edfc1f1fba6b',
  186. b'\nauthor Victor Stinner <victor.stinner@gmail.com> 14213',
  187. b'10738',
  188. b' +0100\ncommitter Victor Stinner <victor.stinner@gmail.com> 14213',
  189. b'10738 +0100',
  190. b'\n\nStreamWriter: close() now clears the reference to the transport\n\n'
  191. b'StreamWriter now raises an exception if it is closed: write(), writelines(),\n'
  192. b'write_eof(), can_write_eof(), get_extra_info(), drain().\n']
  193. self.assertEqual(b''.join(expected), b''.join(res))
  194. class TestPackData(PackTests):
  195. """Tests getting the data from the packfile."""
  196. def test_create_pack(self):
  197. self.get_pack_data(pack1_sha).close()
  198. def test_from_file(self):
  199. path = os.path.join(self.datadir, 'pack-%s.pack' % pack1_sha.decode('ascii'))
  200. with open(path, 'rb') as f:
  201. PackData.from_file(f, os.path.getsize(path))
  202. def test_pack_len(self):
  203. with self.get_pack_data(pack1_sha) as p:
  204. self.assertEqual(3, len(p))
  205. def test_index_check(self):
  206. with self.get_pack_data(pack1_sha) as p:
  207. self.assertSucceeds(p.check)
  208. def test_iterobjects(self):
  209. with self.get_pack_data(pack1_sha) as p:
  210. commit_data = (b'tree b2a2766a2879c209ab1176e7e778b81ae422eeaa\n'
  211. b'author James Westby <jw+debian@jameswestby.net> '
  212. b'1174945067 +0100\n'
  213. b'committer James Westby <jw+debian@jameswestby.net> '
  214. b'1174945067 +0100\n'
  215. b'\n'
  216. b'Test commit\n')
  217. blob_sha = b'6f670c0fb53f9463760b7295fbb814e965fb20c8'
  218. tree_data = b'100644 a\0' + hex_to_sha(blob_sha)
  219. actual = []
  220. for offset, type_num, chunks, crc32 in p.iterobjects():
  221. actual.append((offset, type_num, b''.join(chunks), crc32))
  222. self.assertEqual([
  223. (12, 1, commit_data, 3775879613),
  224. (138, 2, tree_data, 912998690),
  225. (178, 3, b'test 1\n', 1373561701)
  226. ], actual)
  227. def test_iterentries(self):
  228. with self.get_pack_data(pack1_sha) as p:
  229. entries = set((sha_to_hex(s), o, c) for s, o, c in p.iterentries())
  230. self.assertEqual(set([
  231. (b'6f670c0fb53f9463760b7295fbb814e965fb20c8', 178, 1373561701),
  232. (b'b2a2766a2879c209ab1176e7e778b81ae422eeaa', 138, 912998690),
  233. (b'f18faa16531ac570a3fdc8c7ca16682548dafd12', 12, 3775879613),
  234. ]), entries)
  235. def test_create_index_v1(self):
  236. with self.get_pack_data(pack1_sha) as p:
  237. filename = os.path.join(self.tempdir, 'v1test.idx')
  238. p.create_index_v1(filename)
  239. idx1 = load_pack_index(filename)
  240. idx2 = self.get_pack_index(pack1_sha)
  241. self.assertEqual(idx1, idx2)
  242. def test_create_index_v2(self):
  243. with self.get_pack_data(pack1_sha) as p:
  244. filename = os.path.join(self.tempdir, 'v2test.idx')
  245. p.create_index_v2(filename)
  246. idx1 = load_pack_index(filename)
  247. idx2 = self.get_pack_index(pack1_sha)
  248. self.assertEqual(idx1, idx2)
  249. def test_compute_file_sha(self):
  250. f = BytesIO(b'abcd1234wxyz')
  251. self.assertEqual(sha1(b'abcd1234wxyz').hexdigest(),
  252. compute_file_sha(f).hexdigest())
  253. self.assertEqual(sha1(b'abcd1234wxyz').hexdigest(),
  254. compute_file_sha(f, buffer_size=5).hexdigest())
  255. self.assertEqual(sha1(b'abcd1234').hexdigest(),
  256. compute_file_sha(f, end_ofs=-4).hexdigest())
  257. self.assertEqual(sha1(b'1234wxyz').hexdigest(),
  258. compute_file_sha(f, start_ofs=4).hexdigest())
  259. self.assertEqual(
  260. sha1(b'1234').hexdigest(),
  261. compute_file_sha(f, start_ofs=4, end_ofs=-4).hexdigest())
  262. def test_compute_file_sha_short_file(self):
  263. f = BytesIO(b'abcd1234wxyz')
  264. self.assertRaises(AssertionError, compute_file_sha, f, end_ofs=-20)
  265. self.assertRaises(AssertionError, compute_file_sha, f, end_ofs=20)
  266. self.assertRaises(AssertionError, compute_file_sha, f, start_ofs=10,
  267. end_ofs=-12)
  268. class TestPack(PackTests):
  269. def test_len(self):
  270. with self.get_pack(pack1_sha) as p:
  271. self.assertEqual(3, len(p))
  272. def test_contains(self):
  273. with self.get_pack(pack1_sha) as p:
  274. self.assertTrue(tree_sha in p)
  275. def test_get(self):
  276. with self.get_pack(pack1_sha) as p:
  277. self.assertEqual(type(p[tree_sha]), Tree)
  278. def test_iter(self):
  279. with self.get_pack(pack1_sha) as p:
  280. self.assertEqual(set([tree_sha, commit_sha, a_sha]), set(p))
  281. def test_iterobjects(self):
  282. with self.get_pack(pack1_sha) as p:
  283. expected = set([p[s] for s in [commit_sha, tree_sha, a_sha]])
  284. self.assertEqual(expected, set(list(p.iterobjects())))
  285. def test_pack_tuples(self):
  286. with self.get_pack(pack1_sha) as p:
  287. tuples = p.pack_tuples()
  288. expected = set([(p[s], None) for s in [commit_sha, tree_sha, a_sha]])
  289. self.assertEqual(expected, set(list(tuples)))
  290. self.assertEqual(expected, set(list(tuples)))
  291. self.assertEqual(3, len(tuples))
  292. def test_get_object_at(self):
  293. """Tests random access for non-delta objects"""
  294. with self.get_pack(pack1_sha) as p:
  295. obj = p[a_sha]
  296. self.assertEqual(obj.type_name, b'blob')
  297. self.assertEqual(obj.sha().hexdigest().encode('ascii'), a_sha)
  298. obj = p[tree_sha]
  299. self.assertEqual(obj.type_name, b'tree')
  300. self.assertEqual(obj.sha().hexdigest().encode('ascii'), tree_sha)
  301. obj = p[commit_sha]
  302. self.assertEqual(obj.type_name, b'commit')
  303. self.assertEqual(obj.sha().hexdigest().encode('ascii'), commit_sha)
  304. def test_copy(self):
  305. with self.get_pack(pack1_sha) as origpack:
  306. self.assertSucceeds(origpack.index.check)
  307. basename = os.path.join(self.tempdir, 'Elch')
  308. write_pack(basename, origpack.pack_tuples())
  309. with Pack(basename) as newpack:
  310. self.assertEqual(origpack, newpack)
  311. self.assertSucceeds(newpack.index.check)
  312. self.assertEqual(origpack.name(), newpack.name())
  313. self.assertEqual(origpack.index.get_pack_checksum(),
  314. newpack.index.get_pack_checksum())
  315. wrong_version = origpack.index.version != newpack.index.version
  316. orig_checksum = origpack.index.get_stored_checksum()
  317. new_checksum = newpack.index.get_stored_checksum()
  318. self.assertTrue(wrong_version or orig_checksum == new_checksum)
  319. def test_commit_obj(self):
  320. with self.get_pack(pack1_sha) as p:
  321. commit = p[commit_sha]
  322. self.assertEqual(b'James Westby <jw+debian@jameswestby.net>',
  323. commit.author)
  324. self.assertEqual([], commit.parents)
  325. def _copy_pack(self, origpack):
  326. basename = os.path.join(self.tempdir, 'somepack')
  327. write_pack(basename, origpack.pack_tuples())
  328. return Pack(basename)
  329. def test_keep_no_message(self):
  330. with self.get_pack(pack1_sha) as p:
  331. p = self._copy_pack(p)
  332. with p:
  333. keepfile_name = p.keep()
  334. # file should exist
  335. self.assertTrue(os.path.exists(keepfile_name))
  336. with open(keepfile_name, 'r') as f:
  337. buf = f.read()
  338. self.assertEqual('', buf)
  339. def test_keep_message(self):
  340. with self.get_pack(pack1_sha) as p:
  341. p = self._copy_pack(p)
  342. msg = b'some message'
  343. with p:
  344. keepfile_name = p.keep(msg)
  345. # file should exist
  346. self.assertTrue(os.path.exists(keepfile_name))
  347. # and contain the right message, with a linefeed
  348. with open(keepfile_name, 'rb') as f:
  349. buf = f.read()
  350. self.assertEqual(msg + b'\n', buf)
  351. def test_name(self):
  352. with self.get_pack(pack1_sha) as p:
  353. self.assertEqual(pack1_sha, p.name())
  354. def test_length_mismatch(self):
  355. with self.get_pack_data(pack1_sha) as data:
  356. index = self.get_pack_index(pack1_sha)
  357. Pack.from_objects(data, index).check_length_and_checksum()
  358. data._file.seek(12)
  359. bad_file = BytesIO()
  360. write_pack_header(bad_file, 9999)
  361. bad_file.write(data._file.read())
  362. bad_file = BytesIO(bad_file.getvalue())
  363. bad_data = PackData('', file=bad_file)
  364. bad_pack = Pack.from_lazy_objects(lambda: bad_data, lambda: index)
  365. self.assertRaises(AssertionError, lambda: bad_pack.data)
  366. self.assertRaises(AssertionError,
  367. lambda: bad_pack.check_length_and_checksum())
  368. def test_checksum_mismatch(self):
  369. with self.get_pack_data(pack1_sha) as data:
  370. index = self.get_pack_index(pack1_sha)
  371. Pack.from_objects(data, index).check_length_and_checksum()
  372. data._file.seek(0)
  373. bad_file = BytesIO(data._file.read()[:-20] + (b'\xff' * 20))
  374. bad_data = PackData('', file=bad_file)
  375. bad_pack = Pack.from_lazy_objects(lambda: bad_data, lambda: index)
  376. self.assertRaises(ChecksumMismatch, lambda: bad_pack.data)
  377. self.assertRaises(ChecksumMismatch, lambda:
  378. bad_pack.check_length_and_checksum())
  379. def test_iterobjects_2(self):
  380. with self.get_pack(pack1_sha) as p:
  381. objs = dict((o.id, o) for o in p.iterobjects())
  382. self.assertEqual(3, len(objs))
  383. self.assertEqual(sorted(objs), sorted(p.index))
  384. self.assertTrue(isinstance(objs[a_sha], Blob))
  385. self.assertTrue(isinstance(objs[tree_sha], Tree))
  386. self.assertTrue(isinstance(objs[commit_sha], Commit))
  387. class TestThinPack(PackTests):
  388. def setUp(self):
  389. super(TestThinPack, self).setUp()
  390. self.store = MemoryObjectStore()
  391. self.blobs = {}
  392. for blob in (b'foo', b'bar', b'foo1234', b'bar2468'):
  393. self.blobs[blob] = make_object(Blob, data=blob)
  394. self.store.add_object(self.blobs[b'foo'])
  395. self.store.add_object(self.blobs[b'bar'])
  396. # Build a thin pack. 'foo' is as an external reference, 'bar' an
  397. # internal reference.
  398. self.pack_dir = tempfile.mkdtemp()
  399. self.addCleanup(shutil.rmtree, self.pack_dir)
  400. self.pack_prefix = os.path.join(self.pack_dir, 'pack')
  401. with open(self.pack_prefix + '.pack', 'wb') as f:
  402. build_pack(f, [
  403. (REF_DELTA, (self.blobs[b'foo'].id, b'foo1234')),
  404. (Blob.type_num, b'bar'),
  405. (REF_DELTA, (self.blobs[b'bar'].id, b'bar2468'))],
  406. store=self.store)
  407. # Index the new pack.
  408. with self.make_pack(True) as pack:
  409. with PackData(pack._data_path) as data:
  410. data.pack = pack
  411. data.create_index(self.pack_prefix + '.idx')
  412. del self.store[self.blobs[b'bar'].id]
  413. def make_pack(self, resolve_ext_ref):
  414. return Pack(
  415. self.pack_prefix,
  416. resolve_ext_ref=self.store.get_raw if resolve_ext_ref else None)
  417. def test_get_raw(self):
  418. with self.make_pack(False) as p:
  419. self.assertRaises(
  420. KeyError, p.get_raw, self.blobs[b'foo1234'].id)
  421. with self.make_pack(True) as p:
  422. self.assertEqual(
  423. (3, b'foo1234'),
  424. p.get_raw(self.blobs[b'foo1234'].id))
  425. def test_iterobjects(self):
  426. with self.make_pack(False) as p:
  427. self.assertRaises(KeyError, list, p.iterobjects())
  428. with self.make_pack(True) as p:
  429. self.assertEqual(
  430. sorted([self.blobs[b'foo1234'].id, self.blobs[b'bar'].id,
  431. self.blobs[b'bar2468'].id]),
  432. sorted(o.id for o in p.iterobjects()))
  433. class WritePackTests(TestCase):
  434. def test_write_pack_header(self):
  435. f = BytesIO()
  436. write_pack_header(f, 42)
  437. self.assertEqual(b'PACK\x00\x00\x00\x02\x00\x00\x00*',
  438. f.getvalue())
  439. def test_write_pack_object(self):
  440. f = BytesIO()
  441. f.write(b'header')
  442. offset = f.tell()
  443. crc32 = write_pack_object(f, Blob.type_num, b'blob')
  444. self.assertEqual(crc32, zlib.crc32(f.getvalue()[6:]) & 0xffffffff)
  445. f.write(b'x') # unpack_object needs extra trailing data.
  446. f.seek(offset)
  447. unpacked, unused = unpack_object(f.read, compute_crc32=True)
  448. self.assertEqual(Blob.type_num, unpacked.pack_type_num)
  449. self.assertEqual(Blob.type_num, unpacked.obj_type_num)
  450. self.assertEqual([b'blob'], unpacked.decomp_chunks)
  451. self.assertEqual(crc32, unpacked.crc32)
  452. self.assertEqual(b'x', unused)
  453. def test_write_pack_object_sha(self):
  454. f = BytesIO()
  455. f.write(b'header')
  456. offset = f.tell()
  457. sha_a = sha1(b'foo')
  458. sha_b = sha_a.copy()
  459. write_pack_object(f, Blob.type_num, b'blob', sha=sha_a)
  460. self.assertNotEqual(sha_a.digest(), sha_b.digest())
  461. sha_b.update(f.getvalue()[offset:])
  462. self.assertEqual(sha_a.digest(), sha_b.digest())
  463. pack_checksum = hex_to_sha('721980e866af9a5f93ad674144e1459b8ba3e7b7')
  464. class BaseTestPackIndexWriting(object):
  465. def assertSucceeds(self, func, *args, **kwargs):
  466. try:
  467. func(*args, **kwargs)
  468. except ChecksumMismatch as e:
  469. self.fail(e)
  470. def index(self, filename, entries, pack_checksum):
  471. raise NotImplementedError(self.index)
  472. def test_empty(self):
  473. idx = self.index('empty.idx', [], pack_checksum)
  474. self.assertEqual(idx.get_pack_checksum(), pack_checksum)
  475. self.assertEqual(0, len(idx))
  476. def test_large(self):
  477. entry1_sha = hex_to_sha('4e6388232ec39792661e2e75db8fb117fc869ce6')
  478. entry2_sha = hex_to_sha('e98f071751bd77f59967bfa671cd2caebdccc9a2')
  479. entries = [(entry1_sha, 0xf2972d0830529b87, 24),
  480. (entry2_sha, (~0xf2972d0830529b87)&(2**64-1), 92)]
  481. if not self._supports_large:
  482. self.assertRaises(TypeError, self.index, 'single.idx',
  483. entries, pack_checksum)
  484. return
  485. idx = self.index('single.idx', entries, pack_checksum)
  486. self.assertEqual(idx.get_pack_checksum(), pack_checksum)
  487. self.assertEqual(2, len(idx))
  488. actual_entries = list(idx.iterentries())
  489. self.assertEqual(len(entries), len(actual_entries))
  490. for mine, actual in zip(entries, actual_entries):
  491. my_sha, my_offset, my_crc = mine
  492. actual_sha, actual_offset, actual_crc = actual
  493. self.assertEqual(my_sha, actual_sha)
  494. self.assertEqual(my_offset, actual_offset)
  495. if self._has_crc32_checksum:
  496. self.assertEqual(my_crc, actual_crc)
  497. else:
  498. self.assertTrue(actual_crc is None)
  499. def test_single(self):
  500. entry_sha = hex_to_sha('6f670c0fb53f9463760b7295fbb814e965fb20c8')
  501. my_entries = [(entry_sha, 178, 42)]
  502. idx = self.index('single.idx', my_entries, pack_checksum)
  503. self.assertEqual(idx.get_pack_checksum(), pack_checksum)
  504. self.assertEqual(1, len(idx))
  505. actual_entries = list(idx.iterentries())
  506. self.assertEqual(len(my_entries), len(actual_entries))
  507. for mine, actual in zip(my_entries, actual_entries):
  508. my_sha, my_offset, my_crc = mine
  509. actual_sha, actual_offset, actual_crc = actual
  510. self.assertEqual(my_sha, actual_sha)
  511. self.assertEqual(my_offset, actual_offset)
  512. if self._has_crc32_checksum:
  513. self.assertEqual(my_crc, actual_crc)
  514. else:
  515. self.assertTrue(actual_crc is None)
  516. class BaseTestFilePackIndexWriting(BaseTestPackIndexWriting):
  517. def setUp(self):
  518. self.tempdir = tempfile.mkdtemp()
  519. def tearDown(self):
  520. shutil.rmtree(self.tempdir)
  521. def index(self, filename, entries, pack_checksum):
  522. path = os.path.join(self.tempdir, filename)
  523. self.writeIndex(path, entries, pack_checksum)
  524. idx = load_pack_index(path)
  525. self.assertSucceeds(idx.check)
  526. self.assertEqual(idx.version, self._expected_version)
  527. return idx
  528. def writeIndex(self, filename, entries, pack_checksum):
  529. # FIXME: Write to BytesIO instead rather than hitting disk ?
  530. with GitFile(filename, "wb") as f:
  531. self._write_fn(f, entries, pack_checksum)
  532. class TestMemoryIndexWriting(TestCase, BaseTestPackIndexWriting):
  533. def setUp(self):
  534. TestCase.setUp(self)
  535. self._has_crc32_checksum = True
  536. self._supports_large = True
  537. def index(self, filename, entries, pack_checksum):
  538. return MemoryPackIndex(entries, pack_checksum)
  539. def tearDown(self):
  540. TestCase.tearDown(self)
  541. class TestPackIndexWritingv1(TestCase, BaseTestFilePackIndexWriting):
  542. def setUp(self):
  543. TestCase.setUp(self)
  544. BaseTestFilePackIndexWriting.setUp(self)
  545. self._has_crc32_checksum = False
  546. self._expected_version = 1
  547. self._supports_large = False
  548. self._write_fn = write_pack_index_v1
  549. def tearDown(self):
  550. TestCase.tearDown(self)
  551. BaseTestFilePackIndexWriting.tearDown(self)
  552. class TestPackIndexWritingv2(TestCase, BaseTestFilePackIndexWriting):
  553. def setUp(self):
  554. TestCase.setUp(self)
  555. BaseTestFilePackIndexWriting.setUp(self)
  556. self._has_crc32_checksum = True
  557. self._supports_large = True
  558. self._expected_version = 2
  559. self._write_fn = write_pack_index_v2
  560. def tearDown(self):
  561. TestCase.tearDown(self)
  562. BaseTestFilePackIndexWriting.tearDown(self)
  563. class ReadZlibTests(TestCase):
  564. decomp = (
  565. b'tree 4ada885c9196b6b6fa08744b5862bf92896fc002\n'
  566. b'parent None\n'
  567. b'author Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n'
  568. b'committer Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n'
  569. b'\n'
  570. b"Provide replacement for mmap()'s offset argument.")
  571. comp = zlib.compress(decomp)
  572. extra = b'nextobject'
  573. def setUp(self):
  574. super(ReadZlibTests, self).setUp()
  575. self.read = BytesIO(self.comp + self.extra).read
  576. self.unpacked = UnpackedObject(Tree.type_num, None, len(self.decomp), 0)
  577. def test_decompress_size(self):
  578. good_decomp_len = len(self.decomp)
  579. self.unpacked.decomp_len = -1
  580. self.assertRaises(ValueError, read_zlib_chunks, self.read,
  581. self.unpacked)
  582. self.unpacked.decomp_len = good_decomp_len - 1
  583. self.assertRaises(zlib.error, read_zlib_chunks, self.read,
  584. self.unpacked)
  585. self.unpacked.decomp_len = good_decomp_len + 1
  586. self.assertRaises(zlib.error, read_zlib_chunks, self.read,
  587. self.unpacked)
  588. def test_decompress_truncated(self):
  589. read = BytesIO(self.comp[:10]).read
  590. self.assertRaises(zlib.error, read_zlib_chunks, read, self.unpacked)
  591. read = BytesIO(self.comp).read
  592. self.assertRaises(zlib.error, read_zlib_chunks, read, self.unpacked)
  593. def test_decompress_empty(self):
  594. unpacked = UnpackedObject(Tree.type_num, None, 0, None)
  595. comp = zlib.compress(b'')
  596. read = BytesIO(comp + self.extra).read
  597. unused = read_zlib_chunks(read, unpacked)
  598. self.assertEqual(b'', b''.join(unpacked.decomp_chunks))
  599. self.assertNotEqual(b'', unused)
  600. self.assertEqual(self.extra, unused + read())
  601. def test_decompress_no_crc32(self):
  602. self.unpacked.crc32 = None
  603. read_zlib_chunks(self.read, self.unpacked)
  604. self.assertEqual(None, self.unpacked.crc32)
  605. def _do_decompress_test(self, buffer_size, **kwargs):
  606. unused = read_zlib_chunks(self.read, self.unpacked,
  607. buffer_size=buffer_size, **kwargs)
  608. self.assertEqual(self.decomp, b''.join(self.unpacked.decomp_chunks))
  609. self.assertEqual(zlib.crc32(self.comp), self.unpacked.crc32)
  610. self.assertNotEqual(b'', unused)
  611. self.assertEqual(self.extra, unused + self.read())
  612. def test_simple_decompress(self):
  613. self._do_decompress_test(4096)
  614. self.assertEqual(None, self.unpacked.comp_chunks)
  615. # These buffer sizes are not intended to be realistic, but rather simulate
  616. # larger buffer sizes that may end at various places.
  617. def test_decompress_buffer_size_1(self):
  618. self._do_decompress_test(1)
  619. def test_decompress_buffer_size_2(self):
  620. self._do_decompress_test(2)
  621. def test_decompress_buffer_size_3(self):
  622. self._do_decompress_test(3)
  623. def test_decompress_buffer_size_4(self):
  624. self._do_decompress_test(4)
  625. def test_decompress_include_comp(self):
  626. self._do_decompress_test(4096, include_comp=True)
  627. self.assertEqual(self.comp, b''.join(self.unpacked.comp_chunks))
  628. class DeltifyTests(TestCase):
  629. def test_empty(self):
  630. self.assertEqual([], list(deltify_pack_objects([])))
  631. def test_single(self):
  632. b = Blob.from_string(b"foo")
  633. self.assertEqual(
  634. [(b.type_num, b.sha().digest(), None, b.as_raw_string())],
  635. list(deltify_pack_objects([(b, b"")])))
  636. def test_simple_delta(self):
  637. b1 = Blob.from_string(b"a" * 101)
  638. b2 = Blob.from_string(b"a" * 100)
  639. delta = create_delta(b1.as_raw_string(), b2.as_raw_string())
  640. self.assertEqual([
  641. (b1.type_num, b1.sha().digest(), None, b1.as_raw_string()),
  642. (b2.type_num, b2.sha().digest(), b1.sha().digest(), delta)
  643. ],
  644. list(deltify_pack_objects([(b1, b""), (b2, b"")])))
  645. class TestPackStreamReader(TestCase):
  646. def test_read_objects_emtpy(self):
  647. f = BytesIO()
  648. build_pack(f, [])
  649. reader = PackStreamReader(f.read)
  650. self.assertEqual(0, len(list(reader.read_objects())))
  651. def test_read_objects(self):
  652. f = BytesIO()
  653. entries = build_pack(f, [
  654. (Blob.type_num, b'blob'),
  655. (OFS_DELTA, (0, b'blob1')),
  656. ])
  657. reader = PackStreamReader(f.read)
  658. objects = list(reader.read_objects(compute_crc32=True))
  659. self.assertEqual(2, len(objects))
  660. unpacked_blob, unpacked_delta = objects
  661. self.assertEqual(entries[0][0], unpacked_blob.offset)
  662. self.assertEqual(Blob.type_num, unpacked_blob.pack_type_num)
  663. self.assertEqual(Blob.type_num, unpacked_blob.obj_type_num)
  664. self.assertEqual(None, unpacked_blob.delta_base)
  665. self.assertEqual(b'blob', b''.join(unpacked_blob.decomp_chunks))
  666. self.assertEqual(entries[0][4], unpacked_blob.crc32)
  667. self.assertEqual(entries[1][0], unpacked_delta.offset)
  668. self.assertEqual(OFS_DELTA, unpacked_delta.pack_type_num)
  669. self.assertEqual(None, unpacked_delta.obj_type_num)
  670. self.assertEqual(unpacked_delta.offset - unpacked_blob.offset,
  671. unpacked_delta.delta_base)
  672. delta = create_delta(b'blob', b'blob1')
  673. self.assertEqual(delta, b''.join(unpacked_delta.decomp_chunks))
  674. self.assertEqual(entries[1][4], unpacked_delta.crc32)
  675. def test_read_objects_buffered(self):
  676. f = BytesIO()
  677. build_pack(f, [
  678. (Blob.type_num, b'blob'),
  679. (OFS_DELTA, (0, b'blob1')),
  680. ])
  681. reader = PackStreamReader(f.read, zlib_bufsize=4)
  682. self.assertEqual(2, len(list(reader.read_objects())))
  683. def test_read_objects_empty(self):
  684. reader = PackStreamReader(BytesIO().read)
  685. self.assertEqual([], list(reader.read_objects()))
  686. class TestPackIterator(DeltaChainIterator):
  687. _compute_crc32 = True
  688. def __init__(self, *args, **kwargs):
  689. super(TestPackIterator, self).__init__(*args, **kwargs)
  690. self._unpacked_offsets = set()
  691. def _result(self, unpacked):
  692. """Return entries in the same format as build_pack."""
  693. return (unpacked.offset, unpacked.obj_type_num,
  694. b''.join(unpacked.obj_chunks), unpacked.sha(), unpacked.crc32)
  695. def _resolve_object(self, offset, pack_type_num, base_chunks):
  696. assert offset not in self._unpacked_offsets, (
  697. 'Attempted to re-inflate offset %i' % offset)
  698. self._unpacked_offsets.add(offset)
  699. return super(TestPackIterator, self)._resolve_object(
  700. offset, pack_type_num, base_chunks)
  701. class DeltaChainIteratorTests(TestCase):
  702. def setUp(self):
  703. super(DeltaChainIteratorTests, self).setUp()
  704. self.store = MemoryObjectStore()
  705. self.fetched = set()
  706. def store_blobs(self, blobs_data):
  707. blobs = []
  708. for data in blobs_data:
  709. blob = make_object(Blob, data=data)
  710. blobs.append(blob)
  711. self.store.add_object(blob)
  712. return blobs
  713. def get_raw_no_repeat(self, bin_sha):
  714. """Wrapper around store.get_raw that doesn't allow repeat lookups."""
  715. hex_sha = sha_to_hex(bin_sha)
  716. self.assertFalse(hex_sha in self.fetched,
  717. 'Attempted to re-fetch object %s' % hex_sha)
  718. self.fetched.add(hex_sha)
  719. return self.store.get_raw(hex_sha)
  720. def make_pack_iter(self, f, thin=None):
  721. if thin is None:
  722. thin = bool(list(self.store))
  723. resolve_ext_ref = thin and self.get_raw_no_repeat or None
  724. data = PackData('test.pack', file=f)
  725. return TestPackIterator.for_pack_data(
  726. data, resolve_ext_ref=resolve_ext_ref)
  727. def assertEntriesMatch(self, expected_indexes, entries, pack_iter):
  728. expected = [entries[i] for i in expected_indexes]
  729. self.assertEqual(expected, list(pack_iter._walk_all_chains()))
  730. def test_no_deltas(self):
  731. f = BytesIO()
  732. entries = build_pack(f, [
  733. (Commit.type_num, b'commit'),
  734. (Blob.type_num, b'blob'),
  735. (Tree.type_num, b'tree'),
  736. ])
  737. self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
  738. def test_ofs_deltas(self):
  739. f = BytesIO()
  740. entries = build_pack(f, [
  741. (Blob.type_num, b'blob'),
  742. (OFS_DELTA, (0, b'blob1')),
  743. (OFS_DELTA, (0, b'blob2')),
  744. ])
  745. self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
  746. def test_ofs_deltas_chain(self):
  747. f = BytesIO()
  748. entries = build_pack(f, [
  749. (Blob.type_num, b'blob'),
  750. (OFS_DELTA, (0, b'blob1')),
  751. (OFS_DELTA, (1, b'blob2')),
  752. ])
  753. self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
  754. def test_ref_deltas(self):
  755. f = BytesIO()
  756. entries = build_pack(f, [
  757. (REF_DELTA, (1, b'blob1')),
  758. (Blob.type_num, (b'blob')),
  759. (REF_DELTA, (1, b'blob2')),
  760. ])
  761. self.assertEntriesMatch([1, 0, 2], entries, self.make_pack_iter(f))
  762. def test_ref_deltas_chain(self):
  763. f = BytesIO()
  764. entries = build_pack(f, [
  765. (REF_DELTA, (2, b'blob1')),
  766. (Blob.type_num, (b'blob')),
  767. (REF_DELTA, (1, b'blob2')),
  768. ])
  769. self.assertEntriesMatch([1, 2, 0], entries, self.make_pack_iter(f))
  770. def test_ofs_and_ref_deltas(self):
  771. # Deltas pending on this offset are popped before deltas depending on
  772. # this ref.
  773. f = BytesIO()
  774. entries = build_pack(f, [
  775. (REF_DELTA, (1, b'blob1')),
  776. (Blob.type_num, (b'blob')),
  777. (OFS_DELTA, (1, b'blob2')),
  778. ])
  779. self.assertEntriesMatch([1, 2, 0], entries, self.make_pack_iter(f))
  780. def test_mixed_chain(self):
  781. f = BytesIO()
  782. entries = build_pack(f, [
  783. (Blob.type_num, b'blob'),
  784. (REF_DELTA, (2, b'blob2')),
  785. (OFS_DELTA, (0, b'blob1')),
  786. (OFS_DELTA, (1, b'blob3')),
  787. (OFS_DELTA, (0, b'bob')),
  788. ])
  789. self.assertEntriesMatch([0, 2, 4, 1, 3], entries,
  790. self.make_pack_iter(f))
  791. def test_long_chain(self):
  792. n = 100
  793. objects_spec = [(Blob.type_num, b'blob')]
  794. for i in range(n):
  795. objects_spec.append((OFS_DELTA, (i, b'blob' + str(i).encode('ascii'))))
  796. f = BytesIO()
  797. entries = build_pack(f, objects_spec)
  798. self.assertEntriesMatch(range(n + 1), entries, self.make_pack_iter(f))
  799. def test_branchy_chain(self):
  800. n = 100
  801. objects_spec = [(Blob.type_num, b'blob')]
  802. for i in range(n):
  803. objects_spec.append((OFS_DELTA, (0, b'blob' + str(i).encode('ascii'))))
  804. f = BytesIO()
  805. entries = build_pack(f, objects_spec)
  806. self.assertEntriesMatch(range(n + 1), entries, self.make_pack_iter(f))
  807. def test_ext_ref(self):
  808. blob, = self.store_blobs([b'blob'])
  809. f = BytesIO()
  810. entries = build_pack(f, [(REF_DELTA, (blob.id, b'blob1'))],
  811. store=self.store)
  812. pack_iter = self.make_pack_iter(f)
  813. self.assertEntriesMatch([0], entries, pack_iter)
  814. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  815. def test_ext_ref_chain(self):
  816. blob, = self.store_blobs([b'blob'])
  817. f = BytesIO()
  818. entries = build_pack(f, [
  819. (REF_DELTA, (1, b'blob2')),
  820. (REF_DELTA, (blob.id, b'blob1')),
  821. ], store=self.store)
  822. pack_iter = self.make_pack_iter(f)
  823. self.assertEntriesMatch([1, 0], entries, pack_iter)
  824. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  825. def test_ext_ref_chain_degenerate(self):
  826. # Test a degenerate case where the sender is sending a REF_DELTA
  827. # object that expands to an object already in the repository.
  828. blob, = self.store_blobs([b'blob'])
  829. blob2, = self.store_blobs([b'blob2'])
  830. assert blob.id < blob2.id
  831. f = BytesIO()
  832. entries = build_pack(f, [
  833. (REF_DELTA, (blob.id, b'blob2')),
  834. (REF_DELTA, (0, b'blob3')),
  835. ], store=self.store)
  836. pack_iter = self.make_pack_iter(f)
  837. self.assertEntriesMatch([0, 1], entries, pack_iter)
  838. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  839. def test_ext_ref_multiple_times(self):
  840. blob, = self.store_blobs([b'blob'])
  841. f = BytesIO()
  842. entries = build_pack(f, [
  843. (REF_DELTA, (blob.id, b'blob1')),
  844. (REF_DELTA, (blob.id, b'blob2')),
  845. ], store=self.store)
  846. pack_iter = self.make_pack_iter(f)
  847. self.assertEntriesMatch([0, 1], entries, pack_iter)
  848. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  849. def test_multiple_ext_refs(self):
  850. b1, b2 = self.store_blobs([b'foo', b'bar'])
  851. f = BytesIO()
  852. entries = build_pack(f, [
  853. (REF_DELTA, (b1.id, b'foo1')),
  854. (REF_DELTA, (b2.id, b'bar2')),
  855. ], store=self.store)
  856. pack_iter = self.make_pack_iter(f)
  857. self.assertEntriesMatch([0, 1], entries, pack_iter)
  858. self.assertEqual([hex_to_sha(b1.id), hex_to_sha(b2.id)],
  859. pack_iter.ext_refs())
  860. def test_bad_ext_ref_non_thin_pack(self):
  861. blob, = self.store_blobs([b'blob'])
  862. f = BytesIO()
  863. entries = build_pack(f, [(REF_DELTA, (blob.id, b'blob1'))],
  864. store=self.store)
  865. pack_iter = self.make_pack_iter(f, thin=False)
  866. try:
  867. list(pack_iter._walk_all_chains())
  868. self.fail()
  869. except KeyError as e:
  870. self.assertEqual(([blob.id],), e.args)
  871. def test_bad_ext_ref_thin_pack(self):
  872. b1, b2, b3 = self.store_blobs([b'foo', b'bar', b'baz'])
  873. f = BytesIO()
  874. build_pack(f, [
  875. (REF_DELTA, (1, b'foo99')),
  876. (REF_DELTA, (b1.id, b'foo1')),
  877. (REF_DELTA, (b2.id, b'bar2')),
  878. (REF_DELTA, (b3.id, b'baz3')),
  879. ], store=self.store)
  880. del self.store[b2.id]
  881. del self.store[b3.id]
  882. pack_iter = self.make_pack_iter(f)
  883. try:
  884. list(pack_iter._walk_all_chains())
  885. self.fail()
  886. except KeyError as e:
  887. self.assertEqual((sorted([b2.id, b3.id]),), (sorted(e.args[0]),))
  888. class DeltaEncodeSizeTests(TestCase):
  889. def test_basic(self):
  890. self.assertEqual(b'\x00', _delta_encode_size(0))
  891. self.assertEqual(b'\x01', _delta_encode_size(1))
  892. self.assertEqual(b'\xfa\x01', _delta_encode_size(250))
  893. self.assertEqual(b'\xe8\x07', _delta_encode_size(1000))
  894. self.assertEqual(b'\xa0\x8d\x06', _delta_encode_size(100000))
  895. class EncodeCopyOperationTests(TestCase):
  896. def test_basic(self):
  897. self.assertEqual(b'\x80', _encode_copy_operation(0, 0))
  898. self.assertEqual(b'\x91\x01\x0a', _encode_copy_operation(1, 10))
  899. self.assertEqual(b'\xb1\x64\xe8\x03', _encode_copy_operation(100, 1000))
  900. self.assertEqual(b'\x93\xe8\x03\x01', _encode_copy_operation(1000, 1))