test_pack.py 40 KB


  1. # test_pack.py -- Tests for the handling of git packs.
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
  4. #
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for Dulwich packs."""
  22. from io import BytesIO
  23. from hashlib import sha1
  24. import os
  25. import shutil
  26. import tempfile
  27. import zlib
  28. from dulwich.errors import (
  29. ApplyDeltaError,
  30. ChecksumMismatch,
  31. )
  32. from dulwich.file import (
  33. GitFile,
  34. )
  35. from dulwich.object_store import (
  36. MemoryObjectStore,
  37. )
  38. from dulwich.objects import (
  39. hex_to_sha,
  40. sha_to_hex,
  41. Commit,
  42. Tree,
  43. Blob,
  44. )
  45. from dulwich.pack import (
  46. OFS_DELTA,
  47. REF_DELTA,
  48. MemoryPackIndex,
  49. Pack,
  50. PackData,
  51. apply_delta,
  52. create_delta,
  53. deltify_pack_objects,
  54. load_pack_index,
  55. UnpackedObject,
  56. read_zlib_chunks,
  57. write_pack_header,
  58. write_pack_index_v1,
  59. write_pack_index_v2,
  60. write_pack_object,
  61. write_pack,
  62. unpack_object,
  63. compute_file_sha,
  64. PackStreamReader,
  65. DeltaChainIterator,
  66. _delta_encode_size,
  67. _encode_copy_operation,
  68. )
  69. from dulwich.tests import (
  70. TestCase,
  71. )
  72. from dulwich.tests.utils import (
  73. make_object,
  74. build_pack,
  75. )
  76. pack1_sha = b'bc63ddad95e7321ee734ea11a7a62d314e0d7481'
  77. a_sha = b'6f670c0fb53f9463760b7295fbb814e965fb20c8'
  78. tree_sha = b'b2a2766a2879c209ab1176e7e778b81ae422eeaa'
  79. commit_sha = b'f18faa16531ac570a3fdc8c7ca16682548dafd12'
  80. class PackTests(TestCase):
  81. """Base class for testing packs"""
  82. def setUp(self):
  83. super(PackTests, self).setUp()
  84. self.tempdir = tempfile.mkdtemp()
  85. self.addCleanup(shutil.rmtree, self.tempdir)
  86. datadir = os.path.abspath(
  87. os.path.join(os.path.dirname(__file__), 'data/packs'))
  88. def get_pack_index(self, sha):
  89. """Returns a PackIndex from the datadir with the given sha"""
  90. return load_pack_index(
  91. os.path.join(self.datadir,
  92. 'pack-%s.idx' % sha.decode('ascii')))
  93. def get_pack_data(self, sha):
  94. """Returns a PackData object from the datadir with the given sha"""
  95. return PackData(
  96. os.path.join(
  97. self.datadir, 'pack-%s.pack' % sha.decode('ascii')))
  98. def get_pack(self, sha):
  99. return Pack(
  100. os.path.join(self.datadir, 'pack-%s' % sha.decode('ascii')))
  101. def assertSucceeds(self, func, *args, **kwargs):
  102. try:
  103. func(*args, **kwargs)
  104. except ChecksumMismatch as e:
  105. self.fail(e)
  106. class PackIndexTests(PackTests):
  107. """Class that tests the index of packfiles"""
  108. def test_object_index(self):
  109. """Tests that the correct object offset is returned from the index."""
  110. p = self.get_pack_index(pack1_sha)
  111. self.assertRaises(KeyError, p.object_index, pack1_sha)
  112. self.assertEqual(p.object_index(a_sha), 178)
  113. self.assertEqual(p.object_index(tree_sha), 138)
  114. self.assertEqual(p.object_index(commit_sha), 12)
  115. def test_index_len(self):
  116. p = self.get_pack_index(pack1_sha)
  117. self.assertEqual(3, len(p))
  118. def test_get_stored_checksum(self):
  119. p = self.get_pack_index(pack1_sha)
  120. self.assertEqual(b'f2848e2ad16f329ae1c92e3b95e91888daa5bd01',
  121. sha_to_hex(p.get_stored_checksum()))
  122. self.assertEqual(b'721980e866af9a5f93ad674144e1459b8ba3e7b7',
  123. sha_to_hex(p.get_pack_checksum()))
  124. def test_index_check(self):
  125. p = self.get_pack_index(pack1_sha)
  126. self.assertSucceeds(p.check)
  127. def test_iterentries(self):
  128. p = self.get_pack_index(pack1_sha)
  129. entries = [(sha_to_hex(s), o, c) for s, o, c in p.iterentries()]
  130. self.assertEqual([
  131. (b'6f670c0fb53f9463760b7295fbb814e965fb20c8', 178, None),
  132. (b'b2a2766a2879c209ab1176e7e778b81ae422eeaa', 138, None),
  133. (b'f18faa16531ac570a3fdc8c7ca16682548dafd12', 12, None)
  134. ], entries)
  135. def test_iter(self):
  136. p = self.get_pack_index(pack1_sha)
  137. self.assertEqual(set([tree_sha, commit_sha, a_sha]), set(p))
  138. class TestPackDeltas(TestCase):
  139. test_string1 = b'The answer was flailing in the wind'
  140. test_string2 = b'The answer was falling down the pipe'
  141. test_string3 = b'zzzzz'
  142. test_string_empty = b''
  143. test_string_big = b'Z' * 8192
  144. test_string_huge = b'Z' * 100000
  145. def _test_roundtrip(self, base, target):
  146. self.assertEqual(
  147. target,
  148. b''.join(apply_delta(base, create_delta(base, target))))
  149. def test_nochange(self):
  150. self._test_roundtrip(self.test_string1, self.test_string1)
  151. def test_nochange_huge(self):
  152. self._test_roundtrip(self.test_string_huge, self.test_string_huge)
  153. def test_change(self):
  154. self._test_roundtrip(self.test_string1, self.test_string2)
  155. def test_rewrite(self):
  156. self._test_roundtrip(self.test_string1, self.test_string3)
  157. def test_empty_to_big(self):
  158. self._test_roundtrip(self.test_string_empty, self.test_string_big)
  159. def test_empty_to_huge(self):
  160. self._test_roundtrip(self.test_string_empty, self.test_string_huge)
  161. def test_huge_copy(self):
  162. self._test_roundtrip(self.test_string_huge + self.test_string1,
  163. self.test_string_huge + self.test_string2)
  164. def test_dest_overflow(self):
  165. self.assertRaises(ApplyDeltaError, apply_delta,
  166. b'a'*0x10000, b'\x80\x80\x04\x80\x80\x04\x80' +
  167. b'a'*0x10000)
  168. self.assertRaises(
  169. ApplyDeltaError,
  170. apply_delta, b'', b'\x00\x80\x02\xb0\x11\x11')
  171. def test_pypy_issue(self):
  172. # Test for https://github.com/jelmer/dulwich/issues/509 /
  173. # https://bitbucket.org/pypy/pypy/issues/2499/cpyext-pystring_asstring-doesnt-work
  174. chunks = [
  175. b'tree 03207ccf58880a748188836155ceed72f03d65d6\n'
  176. b'parent 408fbab530fd4abe49249a636a10f10f44d07a21\n'
  177. b'author Victor Stinner <victor.stinner@gmail.com> '
  178. b'1421355207 +0100\n'
  179. b'committer Victor Stinner <victor.stinner@gmail.com> '
  180. b'1421355207 +0100\n'
  181. b'\n'
  182. b'Backout changeset 3a06020af8cf\n'
  183. b'\nStreamWriter: close() now clears the reference to the '
  184. b'transport\n'
  185. b'\nStreamWriter now raises an exception if it is closed: '
  186. b'write(), writelines(),\n'
  187. b'write_eof(), can_write_eof(), get_extra_info(), drain().\n']
  188. delta = [
  189. b'\xcd\x03\xad\x03]tree ff3c181a393d5a7270cddc01ea863818a8621ca8\n'
  190. b'parent 20a103cc90135494162e819f98d0edfc1f1fba6b\x91]7\x0510738'
  191. b'\x91\x99@\x0b10738 +0100\x93\x04\x01\xc9']
  192. res = apply_delta(chunks, delta)
  193. expected = [
  194. b'tree ff3c181a393d5a7270cddc01ea863818a8621ca8\n'
  195. b'parent 20a103cc90135494162e819f98d0edfc1f1fba6b',
  196. b'\nauthor Victor Stinner <victor.stinner@gmail.com> 14213',
  197. b'10738',
  198. b' +0100\ncommitter Victor Stinner <victor.stinner@gmail.com> '
  199. b'14213',
  200. b'10738 +0100',
  201. b'\n\nStreamWriter: close() now clears the reference to the '
  202. b'transport\n\n'
  203. b'StreamWriter now raises an exception if it is closed: '
  204. b'write(), writelines(),\n'
  205. b'write_eof(), can_write_eof(), get_extra_info(), drain().\n']
  206. self.assertEqual(b''.join(expected), b''.join(res))
  207. class TestPackData(PackTests):
  208. """Tests getting the data from the packfile."""
  209. def test_create_pack(self):
  210. self.get_pack_data(pack1_sha).close()
  211. def test_from_file(self):
  212. path = os.path.join(self.datadir,
  213. 'pack-%s.pack' % pack1_sha.decode('ascii'))
  214. with open(path, 'rb') as f:
  215. PackData.from_file(f, os.path.getsize(path))
  216. def test_pack_len(self):
  217. with self.get_pack_data(pack1_sha) as p:
  218. self.assertEqual(3, len(p))
  219. def test_index_check(self):
  220. with self.get_pack_data(pack1_sha) as p:
  221. self.assertSucceeds(p.check)
  222. def test_iterobjects(self):
  223. with self.get_pack_data(pack1_sha) as p:
  224. commit_data = (
  225. b'tree b2a2766a2879c209ab1176e7e778b81ae422eeaa\n'
  226. b'author James Westby <jw+debian@jameswestby.net> '
  227. b'1174945067 +0100\n'
  228. b'committer James Westby <jw+debian@jameswestby.net> '
  229. b'1174945067 +0100\n'
  230. b'\n'
  231. b'Test commit\n')
  232. blob_sha = b'6f670c0fb53f9463760b7295fbb814e965fb20c8'
  233. tree_data = b'100644 a\0' + hex_to_sha(blob_sha)
  234. actual = []
  235. for offset, type_num, chunks, crc32 in p.iterobjects():
  236. actual.append((offset, type_num, b''.join(chunks), crc32))
  237. self.assertEqual([
  238. (12, 1, commit_data, 3775879613),
  239. (138, 2, tree_data, 912998690),
  240. (178, 3, b'test 1\n', 1373561701)
  241. ], actual)
  242. def test_iterentries(self):
  243. with self.get_pack_data(pack1_sha) as p:
  244. entries = set((sha_to_hex(s), o, c) for s, o, c in p.iterentries())
  245. self.assertEqual(set([
  246. (b'6f670c0fb53f9463760b7295fbb814e965fb20c8', 178, 1373561701),
  247. (b'b2a2766a2879c209ab1176e7e778b81ae422eeaa', 138, 912998690),
  248. (b'f18faa16531ac570a3fdc8c7ca16682548dafd12', 12, 3775879613),
  249. ]), entries)
  250. def test_create_index_v1(self):
  251. with self.get_pack_data(pack1_sha) as p:
  252. filename = os.path.join(self.tempdir, 'v1test.idx')
  253. p.create_index_v1(filename)
  254. idx1 = load_pack_index(filename)
  255. idx2 = self.get_pack_index(pack1_sha)
  256. self.assertEqual(idx1, idx2)
  257. def test_create_index_v2(self):
  258. with self.get_pack_data(pack1_sha) as p:
  259. filename = os.path.join(self.tempdir, 'v2test.idx')
  260. p.create_index_v2(filename)
  261. idx1 = load_pack_index(filename)
  262. idx2 = self.get_pack_index(pack1_sha)
  263. self.assertEqual(idx1, idx2)
  264. def test_compute_file_sha(self):
  265. f = BytesIO(b'abcd1234wxyz')
  266. self.assertEqual(sha1(b'abcd1234wxyz').hexdigest(),
  267. compute_file_sha(f).hexdigest())
  268. self.assertEqual(sha1(b'abcd1234wxyz').hexdigest(),
  269. compute_file_sha(f, buffer_size=5).hexdigest())
  270. self.assertEqual(sha1(b'abcd1234').hexdigest(),
  271. compute_file_sha(f, end_ofs=-4).hexdigest())
  272. self.assertEqual(sha1(b'1234wxyz').hexdigest(),
  273. compute_file_sha(f, start_ofs=4).hexdigest())
  274. self.assertEqual(
  275. sha1(b'1234').hexdigest(),
  276. compute_file_sha(f, start_ofs=4, end_ofs=-4).hexdigest())
  277. def test_compute_file_sha_short_file(self):
  278. f = BytesIO(b'abcd1234wxyz')
  279. self.assertRaises(AssertionError, compute_file_sha, f, end_ofs=-20)
  280. self.assertRaises(AssertionError, compute_file_sha, f, end_ofs=20)
  281. self.assertRaises(AssertionError, compute_file_sha, f, start_ofs=10,
  282. end_ofs=-12)
  283. class TestPack(PackTests):
  284. def test_len(self):
  285. with self.get_pack(pack1_sha) as p:
  286. self.assertEqual(3, len(p))
  287. def test_contains(self):
  288. with self.get_pack(pack1_sha) as p:
  289. self.assertTrue(tree_sha in p)
  290. def test_get(self):
  291. with self.get_pack(pack1_sha) as p:
  292. self.assertEqual(type(p[tree_sha]), Tree)
  293. def test_iter(self):
  294. with self.get_pack(pack1_sha) as p:
  295. self.assertEqual(set([tree_sha, commit_sha, a_sha]), set(p))
  296. def test_iterobjects(self):
  297. with self.get_pack(pack1_sha) as p:
  298. expected = set([p[s] for s in [commit_sha, tree_sha, a_sha]])
  299. self.assertEqual(expected, set(list(p.iterobjects())))
  300. def test_pack_tuples(self):
  301. with self.get_pack(pack1_sha) as p:
  302. tuples = p.pack_tuples()
  303. expected = set(
  304. [(p[s], None) for s in [commit_sha, tree_sha, a_sha]])
  305. self.assertEqual(expected, set(list(tuples)))
  306. self.assertEqual(expected, set(list(tuples)))
  307. self.assertEqual(3, len(tuples))
  308. def test_get_object_at(self):
  309. """Tests random access for non-delta objects"""
  310. with self.get_pack(pack1_sha) as p:
  311. obj = p[a_sha]
  312. self.assertEqual(obj.type_name, b'blob')
  313. self.assertEqual(obj.sha().hexdigest().encode('ascii'), a_sha)
  314. obj = p[tree_sha]
  315. self.assertEqual(obj.type_name, b'tree')
  316. self.assertEqual(obj.sha().hexdigest().encode('ascii'), tree_sha)
  317. obj = p[commit_sha]
  318. self.assertEqual(obj.type_name, b'commit')
  319. self.assertEqual(obj.sha().hexdigest().encode('ascii'), commit_sha)
  320. def test_copy(self):
  321. with self.get_pack(pack1_sha) as origpack:
  322. self.assertSucceeds(origpack.index.check)
  323. basename = os.path.join(self.tempdir, 'Elch')
  324. write_pack(basename, origpack.pack_tuples())
  325. with Pack(basename) as newpack:
  326. self.assertEqual(origpack, newpack)
  327. self.assertSucceeds(newpack.index.check)
  328. self.assertEqual(origpack.name(), newpack.name())
  329. self.assertEqual(origpack.index.get_pack_checksum(),
  330. newpack.index.get_pack_checksum())
  331. wrong_version = origpack.index.version != newpack.index.version
  332. orig_checksum = origpack.index.get_stored_checksum()
  333. new_checksum = newpack.index.get_stored_checksum()
  334. self.assertTrue(wrong_version or orig_checksum == new_checksum)
  335. def test_commit_obj(self):
  336. with self.get_pack(pack1_sha) as p:
  337. commit = p[commit_sha]
  338. self.assertEqual(b'James Westby <jw+debian@jameswestby.net>',
  339. commit.author)
  340. self.assertEqual([], commit.parents)
  341. def _copy_pack(self, origpack):
  342. basename = os.path.join(self.tempdir, 'somepack')
  343. write_pack(basename, origpack.pack_tuples())
  344. return Pack(basename)
  345. def test_keep_no_message(self):
  346. with self.get_pack(pack1_sha) as p:
  347. p = self._copy_pack(p)
  348. with p:
  349. keepfile_name = p.keep()
  350. # file should exist
  351. self.assertTrue(os.path.exists(keepfile_name))
  352. with open(keepfile_name, 'r') as f:
  353. buf = f.read()
  354. self.assertEqual('', buf)
  355. def test_keep_message(self):
  356. with self.get_pack(pack1_sha) as p:
  357. p = self._copy_pack(p)
  358. msg = b'some message'
  359. with p:
  360. keepfile_name = p.keep(msg)
  361. # file should exist
  362. self.assertTrue(os.path.exists(keepfile_name))
  363. # and contain the right message, with a linefeed
  364. with open(keepfile_name, 'rb') as f:
  365. buf = f.read()
  366. self.assertEqual(msg + b'\n', buf)
  367. def test_name(self):
  368. with self.get_pack(pack1_sha) as p:
  369. self.assertEqual(pack1_sha, p.name())
  370. def test_length_mismatch(self):
  371. with self.get_pack_data(pack1_sha) as data:
  372. index = self.get_pack_index(pack1_sha)
  373. Pack.from_objects(data, index).check_length_and_checksum()
  374. data._file.seek(12)
  375. bad_file = BytesIO()
  376. write_pack_header(bad_file, 9999)
  377. bad_file.write(data._file.read())
  378. bad_file = BytesIO(bad_file.getvalue())
  379. bad_data = PackData('', file=bad_file)
  380. bad_pack = Pack.from_lazy_objects(lambda: bad_data, lambda: index)
  381. self.assertRaises(AssertionError, lambda: bad_pack.data)
  382. self.assertRaises(AssertionError,
  383. lambda: bad_pack.check_length_and_checksum())
  384. def test_checksum_mismatch(self):
  385. with self.get_pack_data(pack1_sha) as data:
  386. index = self.get_pack_index(pack1_sha)
  387. Pack.from_objects(data, index).check_length_and_checksum()
  388. data._file.seek(0)
  389. bad_file = BytesIO(data._file.read()[:-20] + (b'\xff' * 20))
  390. bad_data = PackData('', file=bad_file)
  391. bad_pack = Pack.from_lazy_objects(lambda: bad_data, lambda: index)
  392. self.assertRaises(ChecksumMismatch, lambda: bad_pack.data)
  393. self.assertRaises(ChecksumMismatch, lambda:
  394. bad_pack.check_length_and_checksum())
  395. def test_iterobjects_2(self):
  396. with self.get_pack(pack1_sha) as p:
  397. objs = dict((o.id, o) for o in p.iterobjects())
  398. self.assertEqual(3, len(objs))
  399. self.assertEqual(sorted(objs), sorted(p.index))
  400. self.assertTrue(isinstance(objs[a_sha], Blob))
  401. self.assertTrue(isinstance(objs[tree_sha], Tree))
  402. self.assertTrue(isinstance(objs[commit_sha], Commit))
  403. class TestThinPack(PackTests):
  404. def setUp(self):
  405. super(TestThinPack, self).setUp()
  406. self.store = MemoryObjectStore()
  407. self.blobs = {}
  408. for blob in (b'foo', b'bar', b'foo1234', b'bar2468'):
  409. self.blobs[blob] = make_object(Blob, data=blob)
  410. self.store.add_object(self.blobs[b'foo'])
  411. self.store.add_object(self.blobs[b'bar'])
  412. # Build a thin pack. 'foo' is as an external reference, 'bar' an
  413. # internal reference.
  414. self.pack_dir = tempfile.mkdtemp()
  415. self.addCleanup(shutil.rmtree, self.pack_dir)
  416. self.pack_prefix = os.path.join(self.pack_dir, 'pack')
  417. with open(self.pack_prefix + '.pack', 'wb') as f:
  418. build_pack(f, [
  419. (REF_DELTA, (self.blobs[b'foo'].id, b'foo1234')),
  420. (Blob.type_num, b'bar'),
  421. (REF_DELTA, (self.blobs[b'bar'].id, b'bar2468'))],
  422. store=self.store)
  423. # Index the new pack.
  424. with self.make_pack(True) as pack:
  425. with PackData(pack._data_path) as data:
  426. data.pack = pack
  427. data.create_index(self.pack_prefix + '.idx')
  428. del self.store[self.blobs[b'bar'].id]
  429. def make_pack(self, resolve_ext_ref):
  430. return Pack(
  431. self.pack_prefix,
  432. resolve_ext_ref=self.store.get_raw if resolve_ext_ref else None)
  433. def test_get_raw(self):
  434. with self.make_pack(False) as p:
  435. self.assertRaises(
  436. KeyError, p.get_raw, self.blobs[b'foo1234'].id)
  437. with self.make_pack(True) as p:
  438. self.assertEqual(
  439. (3, b'foo1234'),
  440. p.get_raw(self.blobs[b'foo1234'].id))
  441. def test_iterobjects(self):
  442. with self.make_pack(False) as p:
  443. self.assertRaises(KeyError, list, p.iterobjects())
  444. with self.make_pack(True) as p:
  445. self.assertEqual(
  446. sorted([self.blobs[b'foo1234'].id, self.blobs[b'bar'].id,
  447. self.blobs[b'bar2468'].id]),
  448. sorted(o.id for o in p.iterobjects()))
  449. class WritePackTests(TestCase):
  450. def test_write_pack_header(self):
  451. f = BytesIO()
  452. write_pack_header(f, 42)
  453. self.assertEqual(b'PACK\x00\x00\x00\x02\x00\x00\x00*',
  454. f.getvalue())
  455. def test_write_pack_object(self):
  456. f = BytesIO()
  457. f.write(b'header')
  458. offset = f.tell()
  459. crc32 = write_pack_object(f, Blob.type_num, b'blob')
  460. self.assertEqual(crc32, zlib.crc32(f.getvalue()[6:]) & 0xffffffff)
  461. f.write(b'x') # unpack_object needs extra trailing data.
  462. f.seek(offset)
  463. unpacked, unused = unpack_object(f.read, compute_crc32=True)
  464. self.assertEqual(Blob.type_num, unpacked.pack_type_num)
  465. self.assertEqual(Blob.type_num, unpacked.obj_type_num)
  466. self.assertEqual([b'blob'], unpacked.decomp_chunks)
  467. self.assertEqual(crc32, unpacked.crc32)
  468. self.assertEqual(b'x', unused)
  469. def test_write_pack_object_sha(self):
  470. f = BytesIO()
  471. f.write(b'header')
  472. offset = f.tell()
  473. sha_a = sha1(b'foo')
  474. sha_b = sha_a.copy()
  475. write_pack_object(f, Blob.type_num, b'blob', sha=sha_a)
  476. self.assertNotEqual(sha_a.digest(), sha_b.digest())
  477. sha_b.update(f.getvalue()[offset:])
  478. self.assertEqual(sha_a.digest(), sha_b.digest())
  479. pack_checksum = hex_to_sha('721980e866af9a5f93ad674144e1459b8ba3e7b7')
  480. class BaseTestPackIndexWriting(object):
  481. def assertSucceeds(self, func, *args, **kwargs):
  482. try:
  483. func(*args, **kwargs)
  484. except ChecksumMismatch as e:
  485. self.fail(e)
  486. def index(self, filename, entries, pack_checksum):
  487. raise NotImplementedError(self.index)
  488. def test_empty(self):
  489. idx = self.index('empty.idx', [], pack_checksum)
  490. self.assertEqual(idx.get_pack_checksum(), pack_checksum)
  491. self.assertEqual(0, len(idx))
  492. def test_large(self):
  493. entry1_sha = hex_to_sha('4e6388232ec39792661e2e75db8fb117fc869ce6')
  494. entry2_sha = hex_to_sha('e98f071751bd77f59967bfa671cd2caebdccc9a2')
  495. entries = [(entry1_sha, 0xf2972d0830529b87, 24),
  496. (entry2_sha, (~0xf2972d0830529b87) & (2 ** 64 - 1), 92)]
  497. if not self._supports_large:
  498. self.assertRaises(TypeError, self.index, 'single.idx',
  499. entries, pack_checksum)
  500. return
  501. idx = self.index('single.idx', entries, pack_checksum)
  502. self.assertEqual(idx.get_pack_checksum(), pack_checksum)
  503. self.assertEqual(2, len(idx))
  504. actual_entries = list(idx.iterentries())
  505. self.assertEqual(len(entries), len(actual_entries))
  506. for mine, actual in zip(entries, actual_entries):
  507. my_sha, my_offset, my_crc = mine
  508. actual_sha, actual_offset, actual_crc = actual
  509. self.assertEqual(my_sha, actual_sha)
  510. self.assertEqual(my_offset, actual_offset)
  511. if self._has_crc32_checksum:
  512. self.assertEqual(my_crc, actual_crc)
  513. else:
  514. self.assertTrue(actual_crc is None)
  515. def test_single(self):
  516. entry_sha = hex_to_sha('6f670c0fb53f9463760b7295fbb814e965fb20c8')
  517. my_entries = [(entry_sha, 178, 42)]
  518. idx = self.index('single.idx', my_entries, pack_checksum)
  519. self.assertEqual(idx.get_pack_checksum(), pack_checksum)
  520. self.assertEqual(1, len(idx))
  521. actual_entries = list(idx.iterentries())
  522. self.assertEqual(len(my_entries), len(actual_entries))
  523. for mine, actual in zip(my_entries, actual_entries):
  524. my_sha, my_offset, my_crc = mine
  525. actual_sha, actual_offset, actual_crc = actual
  526. self.assertEqual(my_sha, actual_sha)
  527. self.assertEqual(my_offset, actual_offset)
  528. if self._has_crc32_checksum:
  529. self.assertEqual(my_crc, actual_crc)
  530. else:
  531. self.assertTrue(actual_crc is None)
  532. class BaseTestFilePackIndexWriting(BaseTestPackIndexWriting):
  533. def setUp(self):
  534. self.tempdir = tempfile.mkdtemp()
  535. def tearDown(self):
  536. shutil.rmtree(self.tempdir)
  537. def index(self, filename, entries, pack_checksum):
  538. path = os.path.join(self.tempdir, filename)
  539. self.writeIndex(path, entries, pack_checksum)
  540. idx = load_pack_index(path)
  541. self.assertSucceeds(idx.check)
  542. self.assertEqual(idx.version, self._expected_version)
  543. return idx
  544. def writeIndex(self, filename, entries, pack_checksum):
  545. # FIXME: Write to BytesIO instead rather than hitting disk ?
  546. with GitFile(filename, "wb") as f:
  547. self._write_fn(f, entries, pack_checksum)
  548. class TestMemoryIndexWriting(TestCase, BaseTestPackIndexWriting):
  549. def setUp(self):
  550. TestCase.setUp(self)
  551. self._has_crc32_checksum = True
  552. self._supports_large = True
  553. def index(self, filename, entries, pack_checksum):
  554. return MemoryPackIndex(entries, pack_checksum)
  555. def tearDown(self):
  556. TestCase.tearDown(self)
  557. class TestPackIndexWritingv1(TestCase, BaseTestFilePackIndexWriting):
  558. def setUp(self):
  559. TestCase.setUp(self)
  560. BaseTestFilePackIndexWriting.setUp(self)
  561. self._has_crc32_checksum = False
  562. self._expected_version = 1
  563. self._supports_large = False
  564. self._write_fn = write_pack_index_v1
  565. def tearDown(self):
  566. TestCase.tearDown(self)
  567. BaseTestFilePackIndexWriting.tearDown(self)
  568. class TestPackIndexWritingv2(TestCase, BaseTestFilePackIndexWriting):
  569. def setUp(self):
  570. TestCase.setUp(self)
  571. BaseTestFilePackIndexWriting.setUp(self)
  572. self._has_crc32_checksum = True
  573. self._supports_large = True
  574. self._expected_version = 2
  575. self._write_fn = write_pack_index_v2
  576. def tearDown(self):
  577. TestCase.tearDown(self)
  578. BaseTestFilePackIndexWriting.tearDown(self)
  579. class ReadZlibTests(TestCase):
  580. decomp = (
  581. b'tree 4ada885c9196b6b6fa08744b5862bf92896fc002\n'
  582. b'parent None\n'
  583. b'author Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n'
  584. b'committer Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n'
  585. b'\n'
  586. b"Provide replacement for mmap()'s offset argument.")
  587. comp = zlib.compress(decomp)
  588. extra = b'nextobject'
  589. def setUp(self):
  590. super(ReadZlibTests, self).setUp()
  591. self.read = BytesIO(self.comp + self.extra).read
  592. self.unpacked = UnpackedObject(
  593. Tree.type_num, None, len(self.decomp), 0)
  594. def test_decompress_size(self):
  595. good_decomp_len = len(self.decomp)
  596. self.unpacked.decomp_len = -1
  597. self.assertRaises(ValueError, read_zlib_chunks, self.read,
  598. self.unpacked)
  599. self.unpacked.decomp_len = good_decomp_len - 1
  600. self.assertRaises(zlib.error, read_zlib_chunks, self.read,
  601. self.unpacked)
  602. self.unpacked.decomp_len = good_decomp_len + 1
  603. self.assertRaises(zlib.error, read_zlib_chunks, self.read,
  604. self.unpacked)
  605. def test_decompress_truncated(self):
  606. read = BytesIO(self.comp[:10]).read
  607. self.assertRaises(zlib.error, read_zlib_chunks, read, self.unpacked)
  608. read = BytesIO(self.comp).read
  609. self.assertRaises(zlib.error, read_zlib_chunks, read, self.unpacked)
  610. def test_decompress_empty(self):
  611. unpacked = UnpackedObject(Tree.type_num, None, 0, None)
  612. comp = zlib.compress(b'')
  613. read = BytesIO(comp + self.extra).read
  614. unused = read_zlib_chunks(read, unpacked)
  615. self.assertEqual(b'', b''.join(unpacked.decomp_chunks))
  616. self.assertNotEqual(b'', unused)
  617. self.assertEqual(self.extra, unused + read())
  618. def test_decompress_no_crc32(self):
  619. self.unpacked.crc32 = None
  620. read_zlib_chunks(self.read, self.unpacked)
  621. self.assertEqual(None, self.unpacked.crc32)
  622. def _do_decompress_test(self, buffer_size, **kwargs):
  623. unused = read_zlib_chunks(self.read, self.unpacked,
  624. buffer_size=buffer_size, **kwargs)
  625. self.assertEqual(self.decomp, b''.join(self.unpacked.decomp_chunks))
  626. self.assertEqual(zlib.crc32(self.comp), self.unpacked.crc32)
  627. self.assertNotEqual(b'', unused)
  628. self.assertEqual(self.extra, unused + self.read())
  629. def test_simple_decompress(self):
  630. self._do_decompress_test(4096)
  631. self.assertEqual(None, self.unpacked.comp_chunks)
  632. # These buffer sizes are not intended to be realistic, but rather simulate
  633. # larger buffer sizes that may end at various places.
  634. def test_decompress_buffer_size_1(self):
  635. self._do_decompress_test(1)
  636. def test_decompress_buffer_size_2(self):
  637. self._do_decompress_test(2)
  638. def test_decompress_buffer_size_3(self):
  639. self._do_decompress_test(3)
  640. def test_decompress_buffer_size_4(self):
  641. self._do_decompress_test(4)
  642. def test_decompress_include_comp(self):
  643. self._do_decompress_test(4096, include_comp=True)
  644. self.assertEqual(self.comp, b''.join(self.unpacked.comp_chunks))
  645. class DeltifyTests(TestCase):
  646. def test_empty(self):
  647. self.assertEqual([], list(deltify_pack_objects([])))
  648. def test_single(self):
  649. b = Blob.from_string(b"foo")
  650. self.assertEqual(
  651. [(b.type_num, b.sha().digest(), None, b.as_raw_string())],
  652. list(deltify_pack_objects([(b, b"")])))
  653. def test_simple_delta(self):
  654. b1 = Blob.from_string(b"a" * 101)
  655. b2 = Blob.from_string(b"a" * 100)
  656. delta = create_delta(b1.as_raw_string(), b2.as_raw_string())
  657. self.assertEqual([
  658. (b1.type_num, b1.sha().digest(), None, b1.as_raw_string()),
  659. (b2.type_num, b2.sha().digest(), b1.sha().digest(), delta)
  660. ],
  661. list(deltify_pack_objects([(b1, b""), (b2, b"")])))
  662. class TestPackStreamReader(TestCase):
  663. def test_read_objects_emtpy(self):
  664. f = BytesIO()
  665. build_pack(f, [])
  666. reader = PackStreamReader(f.read)
  667. self.assertEqual(0, len(list(reader.read_objects())))
  668. def test_read_objects(self):
  669. f = BytesIO()
  670. entries = build_pack(f, [
  671. (Blob.type_num, b'blob'),
  672. (OFS_DELTA, (0, b'blob1')),
  673. ])
  674. reader = PackStreamReader(f.read)
  675. objects = list(reader.read_objects(compute_crc32=True))
  676. self.assertEqual(2, len(objects))
  677. unpacked_blob, unpacked_delta = objects
  678. self.assertEqual(entries[0][0], unpacked_blob.offset)
  679. self.assertEqual(Blob.type_num, unpacked_blob.pack_type_num)
  680. self.assertEqual(Blob.type_num, unpacked_blob.obj_type_num)
  681. self.assertEqual(None, unpacked_blob.delta_base)
  682. self.assertEqual(b'blob', b''.join(unpacked_blob.decomp_chunks))
  683. self.assertEqual(entries[0][4], unpacked_blob.crc32)
  684. self.assertEqual(entries[1][0], unpacked_delta.offset)
  685. self.assertEqual(OFS_DELTA, unpacked_delta.pack_type_num)
  686. self.assertEqual(None, unpacked_delta.obj_type_num)
  687. self.assertEqual(unpacked_delta.offset - unpacked_blob.offset,
  688. unpacked_delta.delta_base)
  689. delta = create_delta(b'blob', b'blob1')
  690. self.assertEqual(delta, b''.join(unpacked_delta.decomp_chunks))
  691. self.assertEqual(entries[1][4], unpacked_delta.crc32)
  692. def test_read_objects_buffered(self):
  693. f = BytesIO()
  694. build_pack(f, [
  695. (Blob.type_num, b'blob'),
  696. (OFS_DELTA, (0, b'blob1')),
  697. ])
  698. reader = PackStreamReader(f.read, zlib_bufsize=4)
  699. self.assertEqual(2, len(list(reader.read_objects())))
  700. def test_read_objects_empty(self):
  701. reader = PackStreamReader(BytesIO().read)
  702. self.assertEqual([], list(reader.read_objects()))
  703. class TestPackIterator(DeltaChainIterator):
  704. _compute_crc32 = True
  705. def __init__(self, *args, **kwargs):
  706. super(TestPackIterator, self).__init__(*args, **kwargs)
  707. self._unpacked_offsets = set()
  708. def _result(self, unpacked):
  709. """Return entries in the same format as build_pack."""
  710. return (unpacked.offset, unpacked.obj_type_num,
  711. b''.join(unpacked.obj_chunks), unpacked.sha(), unpacked.crc32)
  712. def _resolve_object(self, offset, pack_type_num, base_chunks):
  713. assert offset not in self._unpacked_offsets, (
  714. 'Attempted to re-inflate offset %i' % offset)
  715. self._unpacked_offsets.add(offset)
  716. return super(TestPackIterator, self)._resolve_object(
  717. offset, pack_type_num, base_chunks)
  718. class DeltaChainIteratorTests(TestCase):
  719. def setUp(self):
  720. super(DeltaChainIteratorTests, self).setUp()
  721. self.store = MemoryObjectStore()
  722. self.fetched = set()
  723. def store_blobs(self, blobs_data):
  724. blobs = []
  725. for data in blobs_data:
  726. blob = make_object(Blob, data=data)
  727. blobs.append(blob)
  728. self.store.add_object(blob)
  729. return blobs
  730. def get_raw_no_repeat(self, bin_sha):
  731. """Wrapper around store.get_raw that doesn't allow repeat lookups."""
  732. hex_sha = sha_to_hex(bin_sha)
  733. self.assertFalse(hex_sha in self.fetched,
  734. 'Attempted to re-fetch object %s' % hex_sha)
  735. self.fetched.add(hex_sha)
  736. return self.store.get_raw(hex_sha)
  737. def make_pack_iter(self, f, thin=None):
  738. if thin is None:
  739. thin = bool(list(self.store))
  740. resolve_ext_ref = thin and self.get_raw_no_repeat or None
  741. data = PackData('test.pack', file=f)
  742. return TestPackIterator.for_pack_data(
  743. data, resolve_ext_ref=resolve_ext_ref)
  744. def assertEntriesMatch(self, expected_indexes, entries, pack_iter):
  745. expected = [entries[i] for i in expected_indexes]
  746. self.assertEqual(expected, list(pack_iter._walk_all_chains()))
  747. def test_no_deltas(self):
  748. f = BytesIO()
  749. entries = build_pack(f, [
  750. (Commit.type_num, b'commit'),
  751. (Blob.type_num, b'blob'),
  752. (Tree.type_num, b'tree'),
  753. ])
  754. self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
  755. def test_ofs_deltas(self):
  756. f = BytesIO()
  757. entries = build_pack(f, [
  758. (Blob.type_num, b'blob'),
  759. (OFS_DELTA, (0, b'blob1')),
  760. (OFS_DELTA, (0, b'blob2')),
  761. ])
  762. self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
  763. def test_ofs_deltas_chain(self):
  764. f = BytesIO()
  765. entries = build_pack(f, [
  766. (Blob.type_num, b'blob'),
  767. (OFS_DELTA, (0, b'blob1')),
  768. (OFS_DELTA, (1, b'blob2')),
  769. ])
  770. self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
  771. def test_ref_deltas(self):
  772. f = BytesIO()
  773. entries = build_pack(f, [
  774. (REF_DELTA, (1, b'blob1')),
  775. (Blob.type_num, (b'blob')),
  776. (REF_DELTA, (1, b'blob2')),
  777. ])
  778. self.assertEntriesMatch([1, 0, 2], entries, self.make_pack_iter(f))
  779. def test_ref_deltas_chain(self):
  780. f = BytesIO()
  781. entries = build_pack(f, [
  782. (REF_DELTA, (2, b'blob1')),
  783. (Blob.type_num, (b'blob')),
  784. (REF_DELTA, (1, b'blob2')),
  785. ])
  786. self.assertEntriesMatch([1, 2, 0], entries, self.make_pack_iter(f))
  787. def test_ofs_and_ref_deltas(self):
  788. # Deltas pending on this offset are popped before deltas depending on
  789. # this ref.
  790. f = BytesIO()
  791. entries = build_pack(f, [
  792. (REF_DELTA, (1, b'blob1')),
  793. (Blob.type_num, (b'blob')),
  794. (OFS_DELTA, (1, b'blob2')),
  795. ])
  796. self.assertEntriesMatch([1, 2, 0], entries, self.make_pack_iter(f))
  797. def test_mixed_chain(self):
  798. f = BytesIO()
  799. entries = build_pack(f, [
  800. (Blob.type_num, b'blob'),
  801. (REF_DELTA, (2, b'blob2')),
  802. (OFS_DELTA, (0, b'blob1')),
  803. (OFS_DELTA, (1, b'blob3')),
  804. (OFS_DELTA, (0, b'bob')),
  805. ])
  806. self.assertEntriesMatch([0, 2, 4, 1, 3], entries,
  807. self.make_pack_iter(f))
  808. def test_long_chain(self):
  809. n = 100
  810. objects_spec = [(Blob.type_num, b'blob')]
  811. for i in range(n):
  812. objects_spec.append(
  813. (OFS_DELTA, (i, b'blob' + str(i).encode('ascii'))))
  814. f = BytesIO()
  815. entries = build_pack(f, objects_spec)
  816. self.assertEntriesMatch(range(n + 1), entries, self.make_pack_iter(f))
  817. def test_branchy_chain(self):
  818. n = 100
  819. objects_spec = [(Blob.type_num, b'blob')]
  820. for i in range(n):
  821. objects_spec.append(
  822. (OFS_DELTA, (0, b'blob' + str(i).encode('ascii'))))
  823. f = BytesIO()
  824. entries = build_pack(f, objects_spec)
  825. self.assertEntriesMatch(range(n + 1), entries, self.make_pack_iter(f))
  826. def test_ext_ref(self):
  827. blob, = self.store_blobs([b'blob'])
  828. f = BytesIO()
  829. entries = build_pack(f, [(REF_DELTA, (blob.id, b'blob1'))],
  830. store=self.store)
  831. pack_iter = self.make_pack_iter(f)
  832. self.assertEntriesMatch([0], entries, pack_iter)
  833. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  834. def test_ext_ref_chain(self):
  835. blob, = self.store_blobs([b'blob'])
  836. f = BytesIO()
  837. entries = build_pack(f, [
  838. (REF_DELTA, (1, b'blob2')),
  839. (REF_DELTA, (blob.id, b'blob1')),
  840. ], store=self.store)
  841. pack_iter = self.make_pack_iter(f)
  842. self.assertEntriesMatch([1, 0], entries, pack_iter)
  843. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  844. def test_ext_ref_chain_degenerate(self):
  845. # Test a degenerate case where the sender is sending a REF_DELTA
  846. # object that expands to an object already in the repository.
  847. blob, = self.store_blobs([b'blob'])
  848. blob2, = self.store_blobs([b'blob2'])
  849. assert blob.id < blob2.id
  850. f = BytesIO()
  851. entries = build_pack(f, [
  852. (REF_DELTA, (blob.id, b'blob2')),
  853. (REF_DELTA, (0, b'blob3')),
  854. ], store=self.store)
  855. pack_iter = self.make_pack_iter(f)
  856. self.assertEntriesMatch([0, 1], entries, pack_iter)
  857. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  858. def test_ext_ref_multiple_times(self):
  859. blob, = self.store_blobs([b'blob'])
  860. f = BytesIO()
  861. entries = build_pack(f, [
  862. (REF_DELTA, (blob.id, b'blob1')),
  863. (REF_DELTA, (blob.id, b'blob2')),
  864. ], store=self.store)
  865. pack_iter = self.make_pack_iter(f)
  866. self.assertEntriesMatch([0, 1], entries, pack_iter)
  867. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  868. def test_multiple_ext_refs(self):
  869. b1, b2 = self.store_blobs([b'foo', b'bar'])
  870. f = BytesIO()
  871. entries = build_pack(f, [
  872. (REF_DELTA, (b1.id, b'foo1')),
  873. (REF_DELTA, (b2.id, b'bar2')),
  874. ], store=self.store)
  875. pack_iter = self.make_pack_iter(f)
  876. self.assertEntriesMatch([0, 1], entries, pack_iter)
  877. self.assertEqual([hex_to_sha(b1.id), hex_to_sha(b2.id)],
  878. pack_iter.ext_refs())
  879. def test_bad_ext_ref_non_thin_pack(self):
  880. blob, = self.store_blobs([b'blob'])
  881. f = BytesIO()
  882. build_pack(f, [(REF_DELTA, (blob.id, b'blob1'))], store=self.store)
  883. pack_iter = self.make_pack_iter(f, thin=False)
  884. try:
  885. list(pack_iter._walk_all_chains())
  886. self.fail()
  887. except KeyError as e:
  888. self.assertEqual(([blob.id],), e.args)
  889. def test_bad_ext_ref_thin_pack(self):
  890. b1, b2, b3 = self.store_blobs([b'foo', b'bar', b'baz'])
  891. f = BytesIO()
  892. build_pack(f, [
  893. (REF_DELTA, (1, b'foo99')),
  894. (REF_DELTA, (b1.id, b'foo1')),
  895. (REF_DELTA, (b2.id, b'bar2')),
  896. (REF_DELTA, (b3.id, b'baz3')),
  897. ], store=self.store)
  898. del self.store[b2.id]
  899. del self.store[b3.id]
  900. pack_iter = self.make_pack_iter(f)
  901. try:
  902. list(pack_iter._walk_all_chains())
  903. self.fail()
  904. except KeyError as e:
  905. self.assertEqual((sorted([b2.id, b3.id]),), (sorted(e.args[0]),))
  906. class DeltaEncodeSizeTests(TestCase):
  907. def test_basic(self):
  908. self.assertEqual(b'\x00', _delta_encode_size(0))
  909. self.assertEqual(b'\x01', _delta_encode_size(1))
  910. self.assertEqual(b'\xfa\x01', _delta_encode_size(250))
  911. self.assertEqual(b'\xe8\x07', _delta_encode_size(1000))
  912. self.assertEqual(b'\xa0\x8d\x06', _delta_encode_size(100000))
  913. class EncodeCopyOperationTests(TestCase):
  914. def test_basic(self):
  915. self.assertEqual(b'\x80', _encode_copy_operation(0, 0))
  916. self.assertEqual(b'\x91\x01\x0a', _encode_copy_operation(1, 10))
  917. self.assertEqual(b'\xb1\x64\xe8\x03',
  918. _encode_copy_operation(100, 1000))
  919. self.assertEqual(b'\x93\xe8\x03\x01',
  920. _encode_copy_operation(1000, 1))