test_pack.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. # test_pack.py -- Tests for the handling of git packs.
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
  4. #
  5. # This program is free software; you can redistribute it and/or
  6. # modify it under the terms of the GNU General Public License
  7. # as published by the Free Software Foundation; version 2
  8. # of the License, or (at your option) any later version of the license.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program; if not, write to the Free Software
  17. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  18. # MA 02110-1301, USA.
  19. """Tests for Dulwich packs."""
  20. from cStringIO import StringIO
  21. import os
  22. import shutil
  23. import tempfile
  24. import zlib
  25. from dulwich.errors import (
  26. ChecksumMismatch,
  27. )
  28. from dulwich.file import (
  29. GitFile,
  30. )
  31. from dulwich.objects import (
  32. hex_to_sha,
  33. sha_to_hex,
  34. Tree,
  35. )
  36. from dulwich.pack import (
  37. MemoryPackIndex,
  38. Pack,
  39. PackData,
  40. ThinPackData,
  41. apply_delta,
  42. create_delta,
  43. load_pack_index,
  44. read_zlib_chunks,
  45. write_pack_header,
  46. write_pack_index_v1,
  47. write_pack_index_v2,
  48. write_pack,
  49. )
  50. from dulwich.tests import (
  51. TestCase,
  52. )
  53. pack1_sha = 'bc63ddad95e7321ee734ea11a7a62d314e0d7481'
  54. a_sha = '6f670c0fb53f9463760b7295fbb814e965fb20c8'
  55. tree_sha = 'b2a2766a2879c209ab1176e7e778b81ae422eeaa'
  56. commit_sha = 'f18faa16531ac570a3fdc8c7ca16682548dafd12'
  57. class PackTests(TestCase):
  58. """Base class for testing packs"""
  59. def setUp(self):
  60. super(PackTests, self).setUp()
  61. self.tempdir = tempfile.mkdtemp()
  62. def tearDown(self):
  63. shutil.rmtree(self.tempdir)
  64. super(PackTests, self).tearDown()
  65. datadir = os.path.join(os.path.dirname(__file__), 'data/packs')
  66. def get_pack_index(self, sha):
  67. """Returns a PackIndex from the datadir with the given sha"""
  68. return load_pack_index(os.path.join(self.datadir, 'pack-%s.idx' % sha))
  69. def get_pack_data(self, sha):
  70. """Returns a PackData object from the datadir with the given sha"""
  71. return PackData(os.path.join(self.datadir, 'pack-%s.pack' % sha))
  72. def get_pack(self, sha):
  73. return Pack(os.path.join(self.datadir, 'pack-%s' % sha))
  74. def assertSucceeds(self, func, *args, **kwargs):
  75. try:
  76. func(*args, **kwargs)
  77. except ChecksumMismatch, e:
  78. self.fail(e)
  79. class PackIndexTests(PackTests):
  80. """Class that tests the index of packfiles"""
  81. def test_object_index(self):
  82. """Tests that the correct object offset is returned from the index."""
  83. p = self.get_pack_index(pack1_sha)
  84. self.assertRaises(KeyError, p.object_index, pack1_sha)
  85. self.assertEqual(p.object_index(a_sha), 178)
  86. self.assertEqual(p.object_index(tree_sha), 138)
  87. self.assertEqual(p.object_index(commit_sha), 12)
  88. def test_index_len(self):
  89. p = self.get_pack_index(pack1_sha)
  90. self.assertEquals(3, len(p))
  91. def test_get_stored_checksum(self):
  92. p = self.get_pack_index(pack1_sha)
  93. self.assertEquals('f2848e2ad16f329ae1c92e3b95e91888daa5bd01',
  94. sha_to_hex(p.get_stored_checksum()))
  95. self.assertEquals('721980e866af9a5f93ad674144e1459b8ba3e7b7',
  96. sha_to_hex(p.get_pack_checksum()))
  97. def test_index_check(self):
  98. p = self.get_pack_index(pack1_sha)
  99. self.assertSucceeds(p.check)
  100. def test_iterentries(self):
  101. p = self.get_pack_index(pack1_sha)
  102. entries = [(sha_to_hex(s), o, c) for s, o, c in p.iterentries()]
  103. self.assertEquals([
  104. ('6f670c0fb53f9463760b7295fbb814e965fb20c8', 178, None),
  105. ('b2a2766a2879c209ab1176e7e778b81ae422eeaa', 138, None),
  106. ('f18faa16531ac570a3fdc8c7ca16682548dafd12', 12, None)
  107. ], entries)
  108. def test_iter(self):
  109. p = self.get_pack_index(pack1_sha)
  110. self.assertEquals(set([tree_sha, commit_sha, a_sha]), set(p))
  111. class TestPackDeltas(TestCase):
  112. test_string1 = 'The answer was flailing in the wind'
  113. test_string2 = 'The answer was falling down the pipe'
  114. test_string3 = 'zzzzz'
  115. test_string_empty = ''
  116. test_string_big = 'Z' * 8192
  117. def _test_roundtrip(self, base, target):
  118. self.assertEquals(target,
  119. ''.join(apply_delta(base, create_delta(base, target))))
  120. def test_nochange(self):
  121. self._test_roundtrip(self.test_string1, self.test_string1)
  122. def test_change(self):
  123. self._test_roundtrip(self.test_string1, self.test_string2)
  124. def test_rewrite(self):
  125. self._test_roundtrip(self.test_string1, self.test_string3)
  126. def test_overflow(self):
  127. self._test_roundtrip(self.test_string_empty, self.test_string_big)
  128. class TestPackData(PackTests):
  129. """Tests getting the data from the packfile."""
  130. def test_create_pack(self):
  131. p = self.get_pack_data(pack1_sha)
  132. def test_from_file(self):
  133. path = os.path.join(self.datadir, 'pack-%s.pack' % pack1_sha)
  134. PackData.from_file(open(path), os.path.getsize(path))
  135. # TODO: more ThinPackData tests.
  136. def test_thin_from_file(self):
  137. test_sha = '1' * 40
  138. def resolve(sha):
  139. self.assertEqual(test_sha, sha)
  140. return 3, 'data'
  141. path = os.path.join(self.datadir, 'pack-%s.pack' % pack1_sha)
  142. data = ThinPackData.from_file(resolve, open(path),
  143. os.path.getsize(path))
  144. idx = self.get_pack_index(pack1_sha)
  145. Pack.from_objects(data, idx)
  146. self.assertEqual((None, 3, 'data'), data.get_ref(test_sha))
  147. def test_pack_len(self):
  148. p = self.get_pack_data(pack1_sha)
  149. self.assertEquals(3, len(p))
  150. def test_index_check(self):
  151. p = self.get_pack_data(pack1_sha)
  152. self.assertSucceeds(p.check)
  153. def test_iterobjects(self):
  154. p = self.get_pack_data(pack1_sha)
  155. commit_data = ('tree b2a2766a2879c209ab1176e7e778b81ae422eeaa\n'
  156. 'author James Westby <jw+debian@jameswestby.net> '
  157. '1174945067 +0100\n'
  158. 'committer James Westby <jw+debian@jameswestby.net> '
  159. '1174945067 +0100\n'
  160. '\n'
  161. 'Test commit\n')
  162. blob_sha = '6f670c0fb53f9463760b7295fbb814e965fb20c8'
  163. tree_data = '100644 a\0%s' % hex_to_sha(blob_sha)
  164. actual = []
  165. for offset, type_num, chunks, crc32 in p.iterobjects():
  166. actual.append((offset, type_num, ''.join(chunks), crc32))
  167. self.assertEquals([
  168. (12, 1, commit_data, 3775879613L),
  169. (138, 2, tree_data, 912998690L),
  170. (178, 3, 'test 1\n', 1373561701L)
  171. ], actual)
  172. def test_iterentries(self):
  173. p = self.get_pack_data(pack1_sha)
  174. entries = set((sha_to_hex(s), o, c) for s, o, c in p.iterentries())
  175. self.assertEquals(set([
  176. ('6f670c0fb53f9463760b7295fbb814e965fb20c8', 178, 1373561701L),
  177. ('b2a2766a2879c209ab1176e7e778b81ae422eeaa', 138, 912998690L),
  178. ('f18faa16531ac570a3fdc8c7ca16682548dafd12', 12, 3775879613L),
  179. ]), entries)
  180. def test_create_index_v1(self):
  181. p = self.get_pack_data(pack1_sha)
  182. filename = os.path.join(self.tempdir, 'v1test.idx')
  183. p.create_index_v1(filename)
  184. idx1 = load_pack_index(filename)
  185. idx2 = self.get_pack_index(pack1_sha)
  186. self.assertEquals(idx1, idx2)
  187. def test_create_index_v2(self):
  188. p = self.get_pack_data(pack1_sha)
  189. filename = os.path.join(self.tempdir, 'v2test.idx')
  190. p.create_index_v2(filename)
  191. idx1 = load_pack_index(filename)
  192. idx2 = self.get_pack_index(pack1_sha)
  193. self.assertEquals(idx1, idx2)
  194. class TestPack(PackTests):
  195. def test_len(self):
  196. p = self.get_pack(pack1_sha)
  197. self.assertEquals(3, len(p))
  198. def test_contains(self):
  199. p = self.get_pack(pack1_sha)
  200. self.assertTrue(tree_sha in p)
  201. def test_get(self):
  202. p = self.get_pack(pack1_sha)
  203. self.assertEquals(type(p[tree_sha]), Tree)
  204. def test_iter(self):
  205. p = self.get_pack(pack1_sha)
  206. self.assertEquals(set([tree_sha, commit_sha, a_sha]), set(p))
  207. def test_get_object_at(self):
  208. """Tests random access for non-delta objects"""
  209. p = self.get_pack(pack1_sha)
  210. obj = p[a_sha]
  211. self.assertEqual(obj.type_name, 'blob')
  212. self.assertEqual(obj.sha().hexdigest(), a_sha)
  213. obj = p[tree_sha]
  214. self.assertEqual(obj.type_name, 'tree')
  215. self.assertEqual(obj.sha().hexdigest(), tree_sha)
  216. obj = p[commit_sha]
  217. self.assertEqual(obj.type_name, 'commit')
  218. self.assertEqual(obj.sha().hexdigest(), commit_sha)
  219. def test_copy(self):
  220. origpack = self.get_pack(pack1_sha)
  221. self.assertSucceeds(origpack.index.check)
  222. basename = os.path.join(self.tempdir, 'Elch')
  223. write_pack(basename, [(x, '') for x in origpack.iterobjects()],
  224. len(origpack))
  225. newpack = Pack(basename)
  226. self.assertEquals(origpack, newpack)
  227. self.assertSucceeds(newpack.index.check)
  228. self.assertEquals(origpack.name(), newpack.name())
  229. self.assertEquals(origpack.index.get_pack_checksum(),
  230. newpack.index.get_pack_checksum())
  231. wrong_version = origpack.index.version != newpack.index.version
  232. orig_checksum = origpack.index.get_stored_checksum()
  233. new_checksum = newpack.index.get_stored_checksum()
  234. self.assertTrue(wrong_version or orig_checksum == new_checksum)
  235. def test_commit_obj(self):
  236. p = self.get_pack(pack1_sha)
  237. commit = p[commit_sha]
  238. self.assertEquals('James Westby <jw+debian@jameswestby.net>',
  239. commit.author)
  240. self.assertEquals([], commit.parents)
  241. def test_name(self):
  242. p = self.get_pack(pack1_sha)
  243. self.assertEquals(pack1_sha, p.name())
  244. class WritePackHeaderTests(TestCase):
  245. def test_simple(self):
  246. f = StringIO()
  247. write_pack_header(f, 42)
  248. self.assertEquals('PACK\x00\x00\x00\x02\x00\x00\x00*',
  249. f.getvalue())
  250. pack_checksum = hex_to_sha('721980e866af9a5f93ad674144e1459b8ba3e7b7')
  251. class BaseTestPackIndexWriting(object):
  252. def assertSucceeds(self, func, *args, **kwargs):
  253. try:
  254. func(*args, **kwargs)
  255. except ChecksumMismatch, e:
  256. self.fail(e)
  257. def index(self, filename, entries, pack_checksum):
  258. raise NotImplementedError(self.index)
  259. def test_empty(self):
  260. idx = self.index('empty.idx', [], pack_checksum)
  261. self.assertEquals(idx.get_pack_checksum(), pack_checksum)
  262. self.assertEquals(0, len(idx))
  263. def test_single(self):
  264. entry_sha = hex_to_sha('6f670c0fb53f9463760b7295fbb814e965fb20c8')
  265. my_entries = [(entry_sha, 178, 42)]
  266. idx = self.index('single.idx', my_entries, pack_checksum)
  267. self.assertEquals(idx.get_pack_checksum(), pack_checksum)
  268. self.assertEquals(1, len(idx))
  269. actual_entries = list(idx.iterentries())
  270. self.assertEquals(len(my_entries), len(actual_entries))
  271. for mine, actual in zip(my_entries, actual_entries):
  272. my_sha, my_offset, my_crc = mine
  273. actual_sha, actual_offset, actual_crc = actual
  274. self.assertEquals(my_sha, actual_sha)
  275. self.assertEquals(my_offset, actual_offset)
  276. if self._has_crc32_checksum:
  277. self.assertEquals(my_crc, actual_crc)
  278. else:
  279. self.assertTrue(actual_crc is None)
  280. class BaseTestFilePackIndexWriting(BaseTestPackIndexWriting):
  281. def setUp(self):
  282. self.tempdir = tempfile.mkdtemp()
  283. def tearDown(self):
  284. shutil.rmtree(self.tempdir)
  285. def index(self, filename, entries, pack_checksum):
  286. path = os.path.join(self.tempdir, filename)
  287. self.writeIndex(path, entries, pack_checksum)
  288. idx = load_pack_index(path)
  289. self.assertSucceeds(idx.check)
  290. self.assertEquals(idx.version, self._expected_version)
  291. return idx
  292. def writeIndex(self, filename, entries, pack_checksum):
  293. # FIXME: Write to StringIO instead rather than hitting disk ?
  294. f = GitFile(filename, "wb")
  295. try:
  296. self._write_fn(f, entries, pack_checksum)
  297. finally:
  298. f.close()
  299. class TestMemoryIndexWriting(TestCase, BaseTestPackIndexWriting):
  300. def setUp(self):
  301. TestCase.setUp(self)
  302. self._has_crc32_checksum = True
  303. def index(self, filename, entries, pack_checksum):
  304. return MemoryPackIndex(entries, pack_checksum)
  305. def tearDown(self):
  306. TestCase.tearDown(self)
  307. class TestPackIndexWritingv1(TestCase, BaseTestFilePackIndexWriting):
  308. def setUp(self):
  309. TestCase.setUp(self)
  310. BaseTestFilePackIndexWriting.setUp(self)
  311. self._has_crc32_checksum = False
  312. self._expected_version = 1
  313. self._write_fn = write_pack_index_v1
  314. def tearDown(self):
  315. TestCase.tearDown(self)
  316. BaseTestFilePackIndexWriting.tearDown(self)
  317. class TestPackIndexWritingv2(TestCase, BaseTestFilePackIndexWriting):
  318. def setUp(self):
  319. TestCase.setUp(self)
  320. BaseTestFilePackIndexWriting.setUp(self)
  321. self._has_crc32_checksum = True
  322. self._expected_version = 2
  323. self._write_fn = write_pack_index_v2
  324. def tearDown(self):
  325. TestCase.tearDown(self)
  326. BaseTestFilePackIndexWriting.tearDown(self)
  327. class ReadZlibTests(TestCase):
  328. decomp = (
  329. 'tree 4ada885c9196b6b6fa08744b5862bf92896fc002\n'
  330. 'parent None\n'
  331. 'author Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n'
  332. 'committer Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n'
  333. '\n'
  334. "Provide replacement for mmap()'s offset argument.")
  335. comp = zlib.compress(decomp)
  336. extra = 'nextobject'
  337. def setUp(self):
  338. super(ReadZlibTests, self).setUp()
  339. self.read = StringIO(self.comp + self.extra).read
  340. def test_decompress_size(self):
  341. good_decomp_len = len(self.decomp)
  342. self.assertRaises(ValueError, read_zlib_chunks, self.read, -1)
  343. self.assertRaises(zlib.error, read_zlib_chunks, self.read,
  344. good_decomp_len - 1)
  345. self.assertRaises(zlib.error, read_zlib_chunks, self.read,
  346. good_decomp_len + 1)
  347. def test_decompress_truncated(self):
  348. read = StringIO(self.comp[:10]).read
  349. self.assertRaises(zlib.error, read_zlib_chunks, read, len(self.decomp))
  350. read = StringIO(self.comp).read
  351. self.assertRaises(zlib.error, read_zlib_chunks, read, len(self.decomp))
  352. def test_decompress_empty(self):
  353. comp = zlib.compress('')
  354. read = StringIO(comp + self.extra).read
  355. decomp, comp_len, unused_data = read_zlib_chunks(read, 0)
  356. self.assertEqual('', ''.join(decomp))
  357. self.assertEqual(len(comp), comp_len)
  358. self.assertNotEquals('', unused_data)
  359. self.assertEquals(self.extra, unused_data + read())
  360. def _do_decompress_test(self, buffer_size):
  361. decomp, comp_len, unused_data = read_zlib_chunks(
  362. self.read, len(self.decomp), buffer_size=buffer_size)
  363. self.assertEquals(self.decomp, ''.join(decomp))
  364. self.assertEquals(len(self.comp), comp_len)
  365. self.assertNotEquals('', unused_data)
  366. self.assertEquals(self.extra, unused_data + self.read())
  367. def test_simple_decompress(self):
  368. self._do_decompress_test(4096)
  369. # These buffer sizes are not intended to be realistic, but rather simulate
  370. # larger buffer sizes that may end at various places.
  371. def test_decompress_buffer_size_1(self):
  372. self._do_decompress_test(1)
  373. def test_decompress_buffer_size_2(self):
  374. self._do_decompress_test(2)
  375. def test_decompress_buffer_size_3(self):
  376. self._do_decompress_test(3)
  377. def test_decompress_buffer_size_4(self):
  378. self._do_decompress_test(4)