2
0

test_pack.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488
  1. # test_pack.py -- Tests for the handling of git packs.
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
  4. #
  5. # This program is free software; you can redistribute it and/or
  6. # modify it under the terms of the GNU General Public License
  7. # as published by the Free Software Foundation; version 2
  8. # of the License, or (at your option) any later version of the license.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program; if not, write to the Free Software
  17. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  18. # MA 02110-1301, USA.
  19. """Tests for Dulwich packs."""
  20. from cStringIO import StringIO
  21. import os
  22. import shutil
  23. import tempfile
  24. import zlib
  25. from dulwich.errors import (
  26. ChecksumMismatch,
  27. )
  28. from dulwich.file import (
  29. GitFile,
  30. )
  31. from dulwich.objects import (
  32. hex_to_sha,
  33. sha_to_hex,
  34. Tree,
  35. )
  36. from dulwich.pack import (
  37. MemoryPackIndex,
  38. Pack,
  39. PackData,
  40. ThinPackData,
  41. apply_delta,
  42. create_delta,
  43. load_pack_index,
  44. read_zlib_chunks,
  45. write_pack_header,
  46. write_pack_index_v1,
  47. write_pack_index_v2,
  48. write_pack,
  49. )
  50. from dulwich.tests import (
  51. TestCase,
  52. )
  53. pack1_sha = 'bc63ddad95e7321ee734ea11a7a62d314e0d7481'
  54. a_sha = '6f670c0fb53f9463760b7295fbb814e965fb20c8'
  55. tree_sha = 'b2a2766a2879c209ab1176e7e778b81ae422eeaa'
  56. commit_sha = 'f18faa16531ac570a3fdc8c7ca16682548dafd12'
  57. class PackTests(TestCase):
  58. """Base class for testing packs"""
  59. def setUp(self):
  60. super(PackTests, self).setUp()
  61. self.tempdir = tempfile.mkdtemp()
  62. def tearDown(self):
  63. shutil.rmtree(self.tempdir)
  64. super(PackTests, self).tearDown()
  65. datadir = os.path.abspath(os.path.join(os.path.dirname(__file__),
  66. 'data/packs'))
  67. def get_pack_index(self, sha):
  68. """Returns a PackIndex from the datadir with the given sha"""
  69. return load_pack_index(os.path.join(self.datadir, 'pack-%s.idx' % sha))
  70. def get_pack_data(self, sha):
  71. """Returns a PackData object from the datadir with the given sha"""
  72. return PackData(os.path.join(self.datadir, 'pack-%s.pack' % sha))
  73. def get_pack(self, sha):
  74. return Pack(os.path.join(self.datadir, 'pack-%s' % sha))
  75. def assertSucceeds(self, func, *args, **kwargs):
  76. try:
  77. func(*args, **kwargs)
  78. except ChecksumMismatch, e:
  79. self.fail(e)
  80. class PackIndexTests(PackTests):
  81. """Class that tests the index of packfiles"""
  82. def test_object_index(self):
  83. """Tests that the correct object offset is returned from the index."""
  84. p = self.get_pack_index(pack1_sha)
  85. self.assertRaises(KeyError, p.object_index, pack1_sha)
  86. self.assertEqual(p.object_index(a_sha), 178)
  87. self.assertEqual(p.object_index(tree_sha), 138)
  88. self.assertEqual(p.object_index(commit_sha), 12)
  89. def test_index_len(self):
  90. p = self.get_pack_index(pack1_sha)
  91. self.assertEquals(3, len(p))
  92. def test_get_stored_checksum(self):
  93. p = self.get_pack_index(pack1_sha)
  94. self.assertEquals('f2848e2ad16f329ae1c92e3b95e91888daa5bd01',
  95. sha_to_hex(p.get_stored_checksum()))
  96. self.assertEquals('721980e866af9a5f93ad674144e1459b8ba3e7b7',
  97. sha_to_hex(p.get_pack_checksum()))
  98. def test_index_check(self):
  99. p = self.get_pack_index(pack1_sha)
  100. self.assertSucceeds(p.check)
  101. def test_iterentries(self):
  102. p = self.get_pack_index(pack1_sha)
  103. entries = [(sha_to_hex(s), o, c) for s, o, c in p.iterentries()]
  104. self.assertEquals([
  105. ('6f670c0fb53f9463760b7295fbb814e965fb20c8', 178, None),
  106. ('b2a2766a2879c209ab1176e7e778b81ae422eeaa', 138, None),
  107. ('f18faa16531ac570a3fdc8c7ca16682548dafd12', 12, None)
  108. ], entries)
  109. def test_iter(self):
  110. p = self.get_pack_index(pack1_sha)
  111. self.assertEquals(set([tree_sha, commit_sha, a_sha]), set(p))
  112. class TestPackDeltas(TestCase):
  113. test_string1 = 'The answer was flailing in the wind'
  114. test_string2 = 'The answer was falling down the pipe'
  115. test_string3 = 'zzzzz'
  116. test_string_empty = ''
  117. test_string_big = 'Z' * 8192
  118. def _test_roundtrip(self, base, target):
  119. self.assertEquals(target,
  120. ''.join(apply_delta(base, create_delta(base, target))))
  121. def test_nochange(self):
  122. self._test_roundtrip(self.test_string1, self.test_string1)
  123. def test_change(self):
  124. self._test_roundtrip(self.test_string1, self.test_string2)
  125. def test_rewrite(self):
  126. self._test_roundtrip(self.test_string1, self.test_string3)
  127. def test_overflow(self):
  128. self._test_roundtrip(self.test_string_empty, self.test_string_big)
  129. class TestPackData(PackTests):
  130. """Tests getting the data from the packfile."""
  131. def test_create_pack(self):
  132. p = self.get_pack_data(pack1_sha)
  133. def test_from_file(self):
  134. path = os.path.join(self.datadir, 'pack-%s.pack' % pack1_sha)
  135. PackData.from_file(open(path), os.path.getsize(path))
  136. # TODO: more ThinPackData tests.
  137. def test_thin_from_file(self):
  138. test_sha = '1' * 40
  139. def resolve(sha):
  140. self.assertEqual(test_sha, sha)
  141. return 3, 'data'
  142. path = os.path.join(self.datadir, 'pack-%s.pack' % pack1_sha)
  143. data = ThinPackData.from_file(resolve, open(path),
  144. os.path.getsize(path))
  145. idx = self.get_pack_index(pack1_sha)
  146. Pack.from_objects(data, idx)
  147. self.assertEqual((None, 3, 'data'), data.get_ref(test_sha))
  148. def test_pack_len(self):
  149. p = self.get_pack_data(pack1_sha)
  150. self.assertEquals(3, len(p))
  151. def test_index_check(self):
  152. p = self.get_pack_data(pack1_sha)
  153. self.assertSucceeds(p.check)
  154. def test_iterobjects(self):
  155. p = self.get_pack_data(pack1_sha)
  156. commit_data = ('tree b2a2766a2879c209ab1176e7e778b81ae422eeaa\n'
  157. 'author James Westby <jw+debian@jameswestby.net> '
  158. '1174945067 +0100\n'
  159. 'committer James Westby <jw+debian@jameswestby.net> '
  160. '1174945067 +0100\n'
  161. '\n'
  162. 'Test commit\n')
  163. blob_sha = '6f670c0fb53f9463760b7295fbb814e965fb20c8'
  164. tree_data = '100644 a\0%s' % hex_to_sha(blob_sha)
  165. actual = []
  166. for offset, type_num, chunks, crc32 in p.iterobjects():
  167. actual.append((offset, type_num, ''.join(chunks), crc32))
  168. self.assertEquals([
  169. (12, 1, commit_data, 3775879613L),
  170. (138, 2, tree_data, 912998690L),
  171. (178, 3, 'test 1\n', 1373561701L)
  172. ], actual)
  173. def test_iterentries(self):
  174. p = self.get_pack_data(pack1_sha)
  175. entries = set((sha_to_hex(s), o, c) for s, o, c in p.iterentries())
  176. self.assertEquals(set([
  177. ('6f670c0fb53f9463760b7295fbb814e965fb20c8', 178, 1373561701L),
  178. ('b2a2766a2879c209ab1176e7e778b81ae422eeaa', 138, 912998690L),
  179. ('f18faa16531ac570a3fdc8c7ca16682548dafd12', 12, 3775879613L),
  180. ]), entries)
  181. def test_create_index_v1(self):
  182. p = self.get_pack_data(pack1_sha)
  183. filename = os.path.join(self.tempdir, 'v1test.idx')
  184. p.create_index_v1(filename)
  185. idx1 = load_pack_index(filename)
  186. idx2 = self.get_pack_index(pack1_sha)
  187. self.assertEquals(idx1, idx2)
  188. def test_create_index_v2(self):
  189. p = self.get_pack_data(pack1_sha)
  190. filename = os.path.join(self.tempdir, 'v2test.idx')
  191. p.create_index_v2(filename)
  192. idx1 = load_pack_index(filename)
  193. idx2 = self.get_pack_index(pack1_sha)
  194. self.assertEquals(idx1, idx2)
  195. class TestPack(PackTests):
  196. def test_len(self):
  197. p = self.get_pack(pack1_sha)
  198. self.assertEquals(3, len(p))
  199. def test_contains(self):
  200. p = self.get_pack(pack1_sha)
  201. self.assertTrue(tree_sha in p)
  202. def test_get(self):
  203. p = self.get_pack(pack1_sha)
  204. self.assertEquals(type(p[tree_sha]), Tree)
  205. def test_iter(self):
  206. p = self.get_pack(pack1_sha)
  207. self.assertEquals(set([tree_sha, commit_sha, a_sha]), set(p))
  208. def test_get_object_at(self):
  209. """Tests random access for non-delta objects"""
  210. p = self.get_pack(pack1_sha)
  211. obj = p[a_sha]
  212. self.assertEqual(obj.type_name, 'blob')
  213. self.assertEqual(obj.sha().hexdigest(), a_sha)
  214. obj = p[tree_sha]
  215. self.assertEqual(obj.type_name, 'tree')
  216. self.assertEqual(obj.sha().hexdigest(), tree_sha)
  217. obj = p[commit_sha]
  218. self.assertEqual(obj.type_name, 'commit')
  219. self.assertEqual(obj.sha().hexdigest(), commit_sha)
  220. def test_copy(self):
  221. origpack = self.get_pack(pack1_sha)
  222. try:
  223. self.assertSucceeds(origpack.index.check)
  224. basename = os.path.join(self.tempdir, 'Elch')
  225. write_pack(basename, [(x, '') for x in origpack.iterobjects()],
  226. len(origpack))
  227. newpack = Pack(basename)
  228. try:
  229. self.assertEquals(origpack, newpack)
  230. self.assertSucceeds(newpack.index.check)
  231. self.assertEquals(origpack.name(), newpack.name())
  232. self.assertEquals(origpack.index.get_pack_checksum(),
  233. newpack.index.get_pack_checksum())
  234. wrong_version = origpack.index.version != newpack.index.version
  235. orig_checksum = origpack.index.get_stored_checksum()
  236. new_checksum = newpack.index.get_stored_checksum()
  237. self.assertTrue(wrong_version or orig_checksum == new_checksum)
  238. finally:
  239. newpack.close()
  240. finally:
  241. origpack.close()
  242. def test_commit_obj(self):
  243. p = self.get_pack(pack1_sha)
  244. commit = p[commit_sha]
  245. self.assertEquals('James Westby <jw+debian@jameswestby.net>',
  246. commit.author)
  247. self.assertEquals([], commit.parents)
  248. def test_name(self):
  249. p = self.get_pack(pack1_sha)
  250. self.assertEquals(pack1_sha, p.name())
  251. class WritePackHeaderTests(TestCase):
  252. def test_simple(self):
  253. f = StringIO()
  254. write_pack_header(f, 42)
  255. self.assertEquals('PACK\x00\x00\x00\x02\x00\x00\x00*',
  256. f.getvalue())
  257. pack_checksum = hex_to_sha('721980e866af9a5f93ad674144e1459b8ba3e7b7')
  258. class BaseTestPackIndexWriting(object):
  259. def assertSucceeds(self, func, *args, **kwargs):
  260. try:
  261. func(*args, **kwargs)
  262. except ChecksumMismatch, e:
  263. self.fail(e)
  264. def index(self, filename, entries, pack_checksum):
  265. raise NotImplementedError(self.index)
  266. def test_empty(self):
  267. idx = self.index('empty.idx', [], pack_checksum)
  268. self.assertEquals(idx.get_pack_checksum(), pack_checksum)
  269. self.assertEquals(0, len(idx))
  270. def test_single(self):
  271. entry_sha = hex_to_sha('6f670c0fb53f9463760b7295fbb814e965fb20c8')
  272. my_entries = [(entry_sha, 178, 42)]
  273. idx = self.index('single.idx', my_entries, pack_checksum)
  274. self.assertEquals(idx.get_pack_checksum(), pack_checksum)
  275. self.assertEquals(1, len(idx))
  276. actual_entries = list(idx.iterentries())
  277. self.assertEquals(len(my_entries), len(actual_entries))
  278. for mine, actual in zip(my_entries, actual_entries):
  279. my_sha, my_offset, my_crc = mine
  280. actual_sha, actual_offset, actual_crc = actual
  281. self.assertEquals(my_sha, actual_sha)
  282. self.assertEquals(my_offset, actual_offset)
  283. if self._has_crc32_checksum:
  284. self.assertEquals(my_crc, actual_crc)
  285. else:
  286. self.assertTrue(actual_crc is None)
  287. class BaseTestFilePackIndexWriting(BaseTestPackIndexWriting):
  288. def setUp(self):
  289. self.tempdir = tempfile.mkdtemp()
  290. def tearDown(self):
  291. shutil.rmtree(self.tempdir)
  292. def index(self, filename, entries, pack_checksum):
  293. path = os.path.join(self.tempdir, filename)
  294. self.writeIndex(path, entries, pack_checksum)
  295. idx = load_pack_index(path)
  296. self.assertSucceeds(idx.check)
  297. self.assertEquals(idx.version, self._expected_version)
  298. return idx
  299. def writeIndex(self, filename, entries, pack_checksum):
  300. # FIXME: Write to StringIO instead rather than hitting disk ?
  301. f = GitFile(filename, "wb")
  302. try:
  303. self._write_fn(f, entries, pack_checksum)
  304. finally:
  305. f.close()
  306. class TestMemoryIndexWriting(TestCase, BaseTestPackIndexWriting):
  307. def setUp(self):
  308. TestCase.setUp(self)
  309. self._has_crc32_checksum = True
  310. def index(self, filename, entries, pack_checksum):
  311. return MemoryPackIndex(entries, pack_checksum)
  312. def tearDown(self):
  313. TestCase.tearDown(self)
  314. class TestPackIndexWritingv1(TestCase, BaseTestFilePackIndexWriting):
  315. def setUp(self):
  316. TestCase.setUp(self)
  317. BaseTestFilePackIndexWriting.setUp(self)
  318. self._has_crc32_checksum = False
  319. self._expected_version = 1
  320. self._write_fn = write_pack_index_v1
  321. def tearDown(self):
  322. TestCase.tearDown(self)
  323. BaseTestFilePackIndexWriting.tearDown(self)
  324. class TestPackIndexWritingv2(TestCase, BaseTestFilePackIndexWriting):
  325. def setUp(self):
  326. TestCase.setUp(self)
  327. BaseTestFilePackIndexWriting.setUp(self)
  328. self._has_crc32_checksum = True
  329. self._expected_version = 2
  330. self._write_fn = write_pack_index_v2
  331. def tearDown(self):
  332. TestCase.tearDown(self)
  333. BaseTestFilePackIndexWriting.tearDown(self)
  334. class ReadZlibTests(TestCase):
  335. decomp = (
  336. 'tree 4ada885c9196b6b6fa08744b5862bf92896fc002\n'
  337. 'parent None\n'
  338. 'author Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n'
  339. 'committer Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n'
  340. '\n'
  341. "Provide replacement for mmap()'s offset argument.")
  342. comp = zlib.compress(decomp)
  343. extra = 'nextobject'
  344. def setUp(self):
  345. super(ReadZlibTests, self).setUp()
  346. self.read = StringIO(self.comp + self.extra).read
  347. def test_decompress_size(self):
  348. good_decomp_len = len(self.decomp)
  349. self.assertRaises(ValueError, read_zlib_chunks, self.read, -1)
  350. self.assertRaises(zlib.error, read_zlib_chunks, self.read,
  351. good_decomp_len - 1)
  352. self.assertRaises(zlib.error, read_zlib_chunks, self.read,
  353. good_decomp_len + 1)
  354. def test_decompress_truncated(self):
  355. read = StringIO(self.comp[:10]).read
  356. self.assertRaises(zlib.error, read_zlib_chunks, read, len(self.decomp))
  357. read = StringIO(self.comp).read
  358. self.assertRaises(zlib.error, read_zlib_chunks, read, len(self.decomp))
  359. def test_decompress_empty(self):
  360. comp = zlib.compress('')
  361. read = StringIO(comp + self.extra).read
  362. decomp, comp_len, unused_data = read_zlib_chunks(read, 0)
  363. self.assertEqual('', ''.join(decomp))
  364. self.assertEqual(len(comp), comp_len)
  365. self.assertNotEquals('', unused_data)
  366. self.assertEquals(self.extra, unused_data + read())
  367. def _do_decompress_test(self, buffer_size):
  368. decomp, comp_len, unused_data = read_zlib_chunks(
  369. self.read, len(self.decomp), buffer_size=buffer_size)
  370. self.assertEquals(self.decomp, ''.join(decomp))
  371. self.assertEquals(len(self.comp), comp_len)
  372. self.assertNotEquals('', unused_data)
  373. self.assertEquals(self.extra, unused_data + self.read())
  374. def test_simple_decompress(self):
  375. self._do_decompress_test(4096)
  376. # These buffer sizes are not intended to be realistic, but rather simulate
  377. # larger buffer sizes that may end at various places.
  378. def test_decompress_buffer_size_1(self):
  379. self._do_decompress_test(1)
  380. def test_decompress_buffer_size_2(self):
  381. self._do_decompress_test(2)
  382. def test_decompress_buffer_size_3(self):
  383. self._do_decompress_test(3)
  384. def test_decompress_buffer_size_4(self):
  385. self._do_decompress_test(4)