test_pack.py 46 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302
  1. # test_pack.py -- Tests for the handling of git packs.
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # Copyright (C) 2008 Jelmer Vernooij <jelmer@jelmer.uk>
  4. #
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for Dulwich packs."""
  22. import os
  23. import shutil
  24. import sys
  25. import tempfile
  26. import zlib
  27. from hashlib import sha1
  28. from io import BytesIO
  29. from typing import Set
  30. from dulwich.errors import ApplyDeltaError, ChecksumMismatch
  31. from dulwich.file import GitFile
  32. from dulwich.object_store import MemoryObjectStore
  33. from dulwich.objects import Blob, Commit, Tree, hex_to_sha, sha_to_hex
  34. from dulwich.pack import (
  35. OFS_DELTA,
  36. REF_DELTA,
  37. DeltaChainIterator,
  38. MemoryPackIndex,
  39. Pack,
  40. PackData,
  41. PackStreamReader,
  42. UnpackedObject,
  43. UnresolvedDeltas,
  44. _delta_encode_size,
  45. _encode_copy_operation,
  46. apply_delta,
  47. compute_file_sha,
  48. create_delta,
  49. deltify_pack_objects,
  50. load_pack_index,
  51. read_zlib_chunks,
  52. unpack_object,
  53. write_pack,
  54. write_pack_header,
  55. write_pack_index_v1,
  56. write_pack_index_v2,
  57. write_pack_object,
  58. )
  59. from dulwich.tests.utils import build_pack, make_object
  60. from . import TestCase
  61. pack1_sha = b"bc63ddad95e7321ee734ea11a7a62d314e0d7481"
  62. a_sha = b"6f670c0fb53f9463760b7295fbb814e965fb20c8"
  63. tree_sha = b"b2a2766a2879c209ab1176e7e778b81ae422eeaa"
  64. commit_sha = b"f18faa16531ac570a3fdc8c7ca16682548dafd12"
  65. indexmode = "0o100644" if sys.platform != "win32" else "0o100666"
  66. class PackTests(TestCase):
  67. """Base class for testing packs."""
  68. def setUp(self):
  69. super().setUp()
  70. self.tempdir = tempfile.mkdtemp()
  71. self.addCleanup(shutil.rmtree, self.tempdir)
  72. datadir = os.path.abspath(
  73. os.path.join(os.path.dirname(__file__), "../testdata/packs")
  74. )
  75. def get_pack_index(self, sha):
  76. """Returns a PackIndex from the datadir with the given sha."""
  77. return load_pack_index(
  78. os.path.join(self.datadir, "pack-{}.idx".format(sha.decode("ascii")))
  79. )
  80. def get_pack_data(self, sha):
  81. """Returns a PackData object from the datadir with the given sha."""
  82. return PackData(
  83. os.path.join(self.datadir, "pack-{}.pack".format(sha.decode("ascii")))
  84. )
  85. def get_pack(self, sha):
  86. return Pack(os.path.join(self.datadir, "pack-{}".format(sha.decode("ascii"))))
  87. def assertSucceeds(self, func, *args, **kwargs):
  88. try:
  89. func(*args, **kwargs)
  90. except ChecksumMismatch as e:
  91. self.fail(e)
  92. class PackIndexTests(PackTests):
  93. """Class that tests the index of packfiles."""
  94. def test_object_offset(self):
  95. """Tests that the correct object offset is returned from the index."""
  96. p = self.get_pack_index(pack1_sha)
  97. self.assertRaises(KeyError, p.object_offset, pack1_sha)
  98. self.assertEqual(p.object_offset(a_sha), 178)
  99. self.assertEqual(p.object_offset(tree_sha), 138)
  100. self.assertEqual(p.object_offset(commit_sha), 12)
  101. def test_object_sha1(self):
  102. """Tests that the correct object offset is returned from the index."""
  103. p = self.get_pack_index(pack1_sha)
  104. self.assertRaises(KeyError, p.object_sha1, 876)
  105. self.assertEqual(p.object_sha1(178), hex_to_sha(a_sha))
  106. self.assertEqual(p.object_sha1(138), hex_to_sha(tree_sha))
  107. self.assertEqual(p.object_sha1(12), hex_to_sha(commit_sha))
  108. def test_index_len(self):
  109. p = self.get_pack_index(pack1_sha)
  110. self.assertEqual(3, len(p))
  111. def test_get_stored_checksum(self):
  112. p = self.get_pack_index(pack1_sha)
  113. self.assertEqual(
  114. b"f2848e2ad16f329ae1c92e3b95e91888daa5bd01",
  115. sha_to_hex(p.get_stored_checksum()),
  116. )
  117. self.assertEqual(
  118. b"721980e866af9a5f93ad674144e1459b8ba3e7b7",
  119. sha_to_hex(p.get_pack_checksum()),
  120. )
  121. def test_index_check(self):
  122. p = self.get_pack_index(pack1_sha)
  123. self.assertSucceeds(p.check)
  124. def test_iterentries(self):
  125. p = self.get_pack_index(pack1_sha)
  126. entries = [(sha_to_hex(s), o, c) for s, o, c in p.iterentries()]
  127. self.assertEqual(
  128. [
  129. (b"6f670c0fb53f9463760b7295fbb814e965fb20c8", 178, None),
  130. (b"b2a2766a2879c209ab1176e7e778b81ae422eeaa", 138, None),
  131. (b"f18faa16531ac570a3fdc8c7ca16682548dafd12", 12, None),
  132. ],
  133. entries,
  134. )
  135. def test_iter(self):
  136. p = self.get_pack_index(pack1_sha)
  137. self.assertEqual({tree_sha, commit_sha, a_sha}, set(p))
  138. class TestPackDeltas(TestCase):
  139. test_string1 = b"The answer was flailing in the wind"
  140. test_string2 = b"The answer was falling down the pipe"
  141. test_string3 = b"zzzzz"
  142. test_string_empty = b""
  143. test_string_big = b"Z" * 8192
  144. test_string_huge = b"Z" * 100000
  145. def _test_roundtrip(self, base, target):
  146. self.assertEqual(
  147. target, b"".join(apply_delta(base, list(create_delta(base, target))))
  148. )
  149. def test_nochange(self):
  150. self._test_roundtrip(self.test_string1, self.test_string1)
  151. def test_nochange_huge(self):
  152. self._test_roundtrip(self.test_string_huge, self.test_string_huge)
  153. def test_change(self):
  154. self._test_roundtrip(self.test_string1, self.test_string2)
  155. def test_rewrite(self):
  156. self._test_roundtrip(self.test_string1, self.test_string3)
  157. def test_empty_to_big(self):
  158. self._test_roundtrip(self.test_string_empty, self.test_string_big)
  159. def test_empty_to_huge(self):
  160. self._test_roundtrip(self.test_string_empty, self.test_string_huge)
  161. def test_huge_copy(self):
  162. self._test_roundtrip(
  163. self.test_string_huge + self.test_string1,
  164. self.test_string_huge + self.test_string2,
  165. )
  166. def test_dest_overflow(self):
  167. self.assertRaises(
  168. ApplyDeltaError,
  169. apply_delta,
  170. b"a" * 0x10000,
  171. b"\x80\x80\x04\x80\x80\x04\x80" + b"a" * 0x10000,
  172. )
  173. self.assertRaises(
  174. ApplyDeltaError, apply_delta, b"", b"\x00\x80\x02\xb0\x11\x11"
  175. )
  176. def test_pypy_issue(self):
  177. # Test for https://github.com/jelmer/dulwich/issues/509 /
  178. # https://bitbucket.org/pypy/pypy/issues/2499/cpyext-pystring_asstring-doesnt-work
  179. chunks = [
  180. b"tree 03207ccf58880a748188836155ceed72f03d65d6\n"
  181. b"parent 408fbab530fd4abe49249a636a10f10f44d07a21\n"
  182. b"author Victor Stinner <victor.stinner@gmail.com> "
  183. b"1421355207 +0100\n"
  184. b"committer Victor Stinner <victor.stinner@gmail.com> "
  185. b"1421355207 +0100\n"
  186. b"\n"
  187. b"Backout changeset 3a06020af8cf\n"
  188. b"\nStreamWriter: close() now clears the reference to the "
  189. b"transport\n"
  190. b"\nStreamWriter now raises an exception if it is closed: "
  191. b"write(), writelines(),\n"
  192. b"write_eof(), can_write_eof(), get_extra_info(), drain().\n"
  193. ]
  194. delta = [
  195. b"\xcd\x03\xad\x03]tree ff3c181a393d5a7270cddc01ea863818a8621ca8\n"
  196. b"parent 20a103cc90135494162e819f98d0edfc1f1fba6b\x91]7\x0510738"
  197. b"\x91\x99@\x0b10738 +0100\x93\x04\x01\xc9"
  198. ]
  199. res = apply_delta(chunks, delta)
  200. expected = [
  201. b"tree ff3c181a393d5a7270cddc01ea863818a8621ca8\n"
  202. b"parent 20a103cc90135494162e819f98d0edfc1f1fba6b",
  203. b"\nauthor Victor Stinner <victor.stinner@gmail.com> 14213",
  204. b"10738",
  205. b" +0100\ncommitter Victor Stinner <victor.stinner@gmail.com> " b"14213",
  206. b"10738 +0100",
  207. b"\n\nStreamWriter: close() now clears the reference to the "
  208. b"transport\n\n"
  209. b"StreamWriter now raises an exception if it is closed: "
  210. b"write(), writelines(),\n"
  211. b"write_eof(), can_write_eof(), get_extra_info(), drain().\n",
  212. ]
  213. self.assertEqual(b"".join(expected), b"".join(res))
  214. class TestPackData(PackTests):
  215. """Tests getting the data from the packfile."""
  216. def test_create_pack(self):
  217. self.get_pack_data(pack1_sha).close()
  218. def test_from_file(self):
  219. path = os.path.join(
  220. self.datadir, "pack-{}.pack".format(pack1_sha.decode("ascii"))
  221. )
  222. with open(path, "rb") as f:
  223. PackData.from_file(f, os.path.getsize(path))
  224. def test_pack_len(self):
  225. with self.get_pack_data(pack1_sha) as p:
  226. self.assertEqual(3, len(p))
  227. def test_index_check(self):
  228. with self.get_pack_data(pack1_sha) as p:
  229. self.assertSucceeds(p.check)
  230. def test_iter_unpacked(self):
  231. with self.get_pack_data(pack1_sha) as p:
  232. commit_data = (
  233. b"tree b2a2766a2879c209ab1176e7e778b81ae422eeaa\n"
  234. b"author James Westby <jw+debian@jameswestby.net> "
  235. b"1174945067 +0100\n"
  236. b"committer James Westby <jw+debian@jameswestby.net> "
  237. b"1174945067 +0100\n"
  238. b"\n"
  239. b"Test commit\n"
  240. )
  241. blob_sha = b"6f670c0fb53f9463760b7295fbb814e965fb20c8"
  242. tree_data = b"100644 a\0" + hex_to_sha(blob_sha)
  243. actual = list(p.iter_unpacked())
  244. self.assertEqual(
  245. [
  246. UnpackedObject(
  247. offset=12,
  248. pack_type_num=1,
  249. decomp_chunks=[commit_data],
  250. crc32=None,
  251. ),
  252. UnpackedObject(
  253. offset=138,
  254. pack_type_num=2,
  255. decomp_chunks=[tree_data],
  256. crc32=None,
  257. ),
  258. UnpackedObject(
  259. offset=178,
  260. pack_type_num=3,
  261. decomp_chunks=[b"test 1\n"],
  262. crc32=None,
  263. ),
  264. ],
  265. actual,
  266. )
  267. def test_iterentries(self):
  268. with self.get_pack_data(pack1_sha) as p:
  269. entries = {(sha_to_hex(s), o, c) for s, o, c in p.iterentries()}
  270. self.assertEqual(
  271. {
  272. (
  273. b"6f670c0fb53f9463760b7295fbb814e965fb20c8",
  274. 178,
  275. 1373561701,
  276. ),
  277. (
  278. b"b2a2766a2879c209ab1176e7e778b81ae422eeaa",
  279. 138,
  280. 912998690,
  281. ),
  282. (
  283. b"f18faa16531ac570a3fdc8c7ca16682548dafd12",
  284. 12,
  285. 3775879613,
  286. ),
  287. },
  288. entries,
  289. )
  290. def test_create_index_v1(self):
  291. with self.get_pack_data(pack1_sha) as p:
  292. filename = os.path.join(self.tempdir, "v1test.idx")
  293. p.create_index_v1(filename)
  294. idx1 = load_pack_index(filename)
  295. idx2 = self.get_pack_index(pack1_sha)
  296. self.assertEqual(oct(os.stat(filename).st_mode), indexmode)
  297. self.assertEqual(idx1, idx2)
  298. def test_create_index_v2(self):
  299. with self.get_pack_data(pack1_sha) as p:
  300. filename = os.path.join(self.tempdir, "v2test.idx")
  301. p.create_index_v2(filename)
  302. idx1 = load_pack_index(filename)
  303. idx2 = self.get_pack_index(pack1_sha)
  304. self.assertEqual(oct(os.stat(filename).st_mode), indexmode)
  305. self.assertEqual(idx1, idx2)
  306. def test_compute_file_sha(self):
  307. f = BytesIO(b"abcd1234wxyz")
  308. self.assertEqual(
  309. sha1(b"abcd1234wxyz").hexdigest(), compute_file_sha(f).hexdigest()
  310. )
  311. self.assertEqual(
  312. sha1(b"abcd1234wxyz").hexdigest(),
  313. compute_file_sha(f, buffer_size=5).hexdigest(),
  314. )
  315. self.assertEqual(
  316. sha1(b"abcd1234").hexdigest(),
  317. compute_file_sha(f, end_ofs=-4).hexdigest(),
  318. )
  319. self.assertEqual(
  320. sha1(b"1234wxyz").hexdigest(),
  321. compute_file_sha(f, start_ofs=4).hexdigest(),
  322. )
  323. self.assertEqual(
  324. sha1(b"1234").hexdigest(),
  325. compute_file_sha(f, start_ofs=4, end_ofs=-4).hexdigest(),
  326. )
  327. def test_compute_file_sha_short_file(self):
  328. f = BytesIO(b"abcd1234wxyz")
  329. self.assertRaises(AssertionError, compute_file_sha, f, end_ofs=-20)
  330. self.assertRaises(AssertionError, compute_file_sha, f, end_ofs=20)
  331. self.assertRaises(
  332. AssertionError, compute_file_sha, f, start_ofs=10, end_ofs=-12
  333. )
  334. class TestPack(PackTests):
  335. def test_len(self):
  336. with self.get_pack(pack1_sha) as p:
  337. self.assertEqual(3, len(p))
  338. def test_contains(self):
  339. with self.get_pack(pack1_sha) as p:
  340. self.assertIn(tree_sha, p)
  341. def test_get(self):
  342. with self.get_pack(pack1_sha) as p:
  343. self.assertEqual(type(p[tree_sha]), Tree)
  344. def test_iter(self):
  345. with self.get_pack(pack1_sha) as p:
  346. self.assertEqual({tree_sha, commit_sha, a_sha}, set(p))
  347. def test_iterobjects(self):
  348. with self.get_pack(pack1_sha) as p:
  349. expected = {p[s] for s in [commit_sha, tree_sha, a_sha]}
  350. self.assertEqual(expected, set(list(p.iterobjects())))
  351. def test_pack_tuples(self):
  352. with self.get_pack(pack1_sha) as p:
  353. tuples = p.pack_tuples()
  354. expected = {(p[s], None) for s in [commit_sha, tree_sha, a_sha]}
  355. self.assertEqual(expected, set(list(tuples)))
  356. self.assertEqual(expected, set(list(tuples)))
  357. self.assertEqual(3, len(tuples))
  358. def test_get_object_at(self):
  359. """Tests random access for non-delta objects."""
  360. with self.get_pack(pack1_sha) as p:
  361. obj = p[a_sha]
  362. self.assertEqual(obj.type_name, b"blob")
  363. self.assertEqual(obj.sha().hexdigest().encode("ascii"), a_sha)
  364. obj = p[tree_sha]
  365. self.assertEqual(obj.type_name, b"tree")
  366. self.assertEqual(obj.sha().hexdigest().encode("ascii"), tree_sha)
  367. obj = p[commit_sha]
  368. self.assertEqual(obj.type_name, b"commit")
  369. self.assertEqual(obj.sha().hexdigest().encode("ascii"), commit_sha)
  370. def test_copy(self):
  371. with self.get_pack(pack1_sha) as origpack:
  372. self.assertSucceeds(origpack.index.check)
  373. basename = os.path.join(self.tempdir, "Elch")
  374. write_pack(basename, origpack.pack_tuples())
  375. with Pack(basename) as newpack:
  376. self.assertEqual(origpack, newpack)
  377. self.assertSucceeds(newpack.index.check)
  378. self.assertEqual(origpack.name(), newpack.name())
  379. self.assertEqual(
  380. origpack.index.get_pack_checksum(),
  381. newpack.index.get_pack_checksum(),
  382. )
  383. wrong_version = origpack.index.version != newpack.index.version
  384. orig_checksum = origpack.index.get_stored_checksum()
  385. new_checksum = newpack.index.get_stored_checksum()
  386. self.assertTrue(wrong_version or orig_checksum == new_checksum)
  387. def test_commit_obj(self):
  388. with self.get_pack(pack1_sha) as p:
  389. commit = p[commit_sha]
  390. self.assertEqual(b"James Westby <jw+debian@jameswestby.net>", commit.author)
  391. self.assertEqual([], commit.parents)
  392. def _copy_pack(self, origpack):
  393. basename = os.path.join(self.tempdir, "somepack")
  394. write_pack(basename, origpack.pack_tuples())
  395. return Pack(basename)
  396. def test_keep_no_message(self):
  397. with self.get_pack(pack1_sha) as p:
  398. p = self._copy_pack(p)
  399. with p:
  400. keepfile_name = p.keep()
  401. # file should exist
  402. self.assertTrue(os.path.exists(keepfile_name))
  403. with open(keepfile_name) as f:
  404. buf = f.read()
  405. self.assertEqual("", buf)
  406. def test_keep_message(self):
  407. with self.get_pack(pack1_sha) as p:
  408. p = self._copy_pack(p)
  409. msg = b"some message"
  410. with p:
  411. keepfile_name = p.keep(msg)
  412. # file should exist
  413. self.assertTrue(os.path.exists(keepfile_name))
  414. # and contain the right message, with a linefeed
  415. with open(keepfile_name, "rb") as f:
  416. buf = f.read()
  417. self.assertEqual(msg + b"\n", buf)
  418. def test_name(self):
  419. with self.get_pack(pack1_sha) as p:
  420. self.assertEqual(pack1_sha, p.name())
  421. def test_length_mismatch(self):
  422. with self.get_pack_data(pack1_sha) as data:
  423. index = self.get_pack_index(pack1_sha)
  424. Pack.from_objects(data, index).check_length_and_checksum()
  425. data._file.seek(12)
  426. bad_file = BytesIO()
  427. write_pack_header(bad_file.write, 9999)
  428. bad_file.write(data._file.read())
  429. bad_file = BytesIO(bad_file.getvalue())
  430. bad_data = PackData("", file=bad_file)
  431. bad_pack = Pack.from_lazy_objects(lambda: bad_data, lambda: index)
  432. self.assertRaises(AssertionError, lambda: bad_pack.data)
  433. self.assertRaises(AssertionError, bad_pack.check_length_and_checksum)
  434. def test_checksum_mismatch(self):
  435. with self.get_pack_data(pack1_sha) as data:
  436. index = self.get_pack_index(pack1_sha)
  437. Pack.from_objects(data, index).check_length_and_checksum()
  438. data._file.seek(0)
  439. bad_file = BytesIO(data._file.read()[:-20] + (b"\xff" * 20))
  440. bad_data = PackData("", file=bad_file)
  441. bad_pack = Pack.from_lazy_objects(lambda: bad_data, lambda: index)
  442. self.assertRaises(ChecksumMismatch, lambda: bad_pack.data)
  443. self.assertRaises(ChecksumMismatch, bad_pack.check_length_and_checksum)
  444. def test_iterobjects_2(self):
  445. with self.get_pack(pack1_sha) as p:
  446. objs = {o.id: o for o in p.iterobjects()}
  447. self.assertEqual(3, len(objs))
  448. self.assertEqual(sorted(objs), sorted(p.index))
  449. self.assertIsInstance(objs[a_sha], Blob)
  450. self.assertIsInstance(objs[tree_sha], Tree)
  451. self.assertIsInstance(objs[commit_sha], Commit)
  452. def test_iterobjects_subset(self):
  453. with self.get_pack(pack1_sha) as p:
  454. objs = {o.id: o for o in p.iterobjects_subset([commit_sha])}
  455. self.assertEqual(1, len(objs))
  456. self.assertIsInstance(objs[commit_sha], Commit)
  457. class TestThinPack(PackTests):
  458. def setUp(self):
  459. super().setUp()
  460. self.store = MemoryObjectStore()
  461. self.blobs = {}
  462. for blob in (b"foo", b"bar", b"foo1234", b"bar2468"):
  463. self.blobs[blob] = make_object(Blob, data=blob)
  464. self.store.add_object(self.blobs[b"foo"])
  465. self.store.add_object(self.blobs[b"bar"])
  466. # Build a thin pack. 'foo' is as an external reference, 'bar' an
  467. # internal reference.
  468. self.pack_dir = tempfile.mkdtemp()
  469. self.addCleanup(shutil.rmtree, self.pack_dir)
  470. self.pack_prefix = os.path.join(self.pack_dir, "pack")
  471. with open(self.pack_prefix + ".pack", "wb") as f:
  472. build_pack(
  473. f,
  474. [
  475. (REF_DELTA, (self.blobs[b"foo"].id, b"foo1234")),
  476. (Blob.type_num, b"bar"),
  477. (REF_DELTA, (self.blobs[b"bar"].id, b"bar2468")),
  478. ],
  479. store=self.store,
  480. )
  481. # Index the new pack.
  482. with self.make_pack(True) as pack:
  483. with PackData(pack._data_path) as data:
  484. data.create_index(
  485. self.pack_prefix + ".idx", resolve_ext_ref=pack.resolve_ext_ref
  486. )
  487. del self.store[self.blobs[b"bar"].id]
  488. def make_pack(self, resolve_ext_ref):
  489. return Pack(
  490. self.pack_prefix,
  491. resolve_ext_ref=self.store.get_raw if resolve_ext_ref else None,
  492. )
  493. def test_get_raw(self):
  494. with self.make_pack(False) as p:
  495. self.assertRaises(KeyError, p.get_raw, self.blobs[b"foo1234"].id)
  496. with self.make_pack(True) as p:
  497. self.assertEqual((3, b"foo1234"), p.get_raw(self.blobs[b"foo1234"].id))
  498. def test_get_unpacked_object(self):
  499. self.maxDiff = None
  500. with self.make_pack(False) as p:
  501. expected = UnpackedObject(
  502. 7,
  503. delta_base=b"\x19\x10(\x15f=#\xf8\xb7ZG\xe7\xa0\x19e\xdc\xdc\x96F\x8c",
  504. decomp_chunks=[b"\x03\x07\x90\x03\x041234"],
  505. )
  506. expected.offset = 12
  507. got = p.get_unpacked_object(self.blobs[b"foo1234"].id)
  508. self.assertEqual(expected, got)
  509. with self.make_pack(True) as p:
  510. expected = UnpackedObject(
  511. 7,
  512. delta_base=b"\x19\x10(\x15f=#\xf8\xb7ZG\xe7\xa0\x19e\xdc\xdc\x96F\x8c",
  513. decomp_chunks=[b"\x03\x07\x90\x03\x041234"],
  514. )
  515. expected.offset = 12
  516. got = p.get_unpacked_object(self.blobs[b"foo1234"].id)
  517. self.assertEqual(
  518. expected,
  519. got,
  520. )
  521. def test_iterobjects(self):
  522. with self.make_pack(False) as p:
  523. self.assertRaises(UnresolvedDeltas, list, p.iterobjects())
  524. with self.make_pack(True) as p:
  525. self.assertEqual(
  526. sorted(
  527. [
  528. self.blobs[b"foo1234"].id,
  529. self.blobs[b"bar"].id,
  530. self.blobs[b"bar2468"].id,
  531. ]
  532. ),
  533. sorted(o.id for o in p.iterobjects()),
  534. )
  535. class WritePackTests(TestCase):
  536. def test_write_pack_header(self):
  537. f = BytesIO()
  538. write_pack_header(f.write, 42)
  539. self.assertEqual(b"PACK\x00\x00\x00\x02\x00\x00\x00*", f.getvalue())
  540. def test_write_pack_object(self):
  541. f = BytesIO()
  542. f.write(b"header")
  543. offset = f.tell()
  544. crc32 = write_pack_object(f.write, Blob.type_num, b"blob")
  545. self.assertEqual(crc32, zlib.crc32(f.getvalue()[6:]) & 0xFFFFFFFF)
  546. f.write(b"x") # unpack_object needs extra trailing data.
  547. f.seek(offset)
  548. unpacked, unused = unpack_object(f.read, compute_crc32=True)
  549. self.assertEqual(Blob.type_num, unpacked.pack_type_num)
  550. self.assertEqual(Blob.type_num, unpacked.obj_type_num)
  551. self.assertEqual([b"blob"], unpacked.decomp_chunks)
  552. self.assertEqual(crc32, unpacked.crc32)
  553. self.assertEqual(b"x", unused)
  554. def test_write_pack_object_sha(self):
  555. f = BytesIO()
  556. f.write(b"header")
  557. offset = f.tell()
  558. sha_a = sha1(b"foo")
  559. sha_b = sha_a.copy()
  560. write_pack_object(f.write, Blob.type_num, b"blob", sha=sha_a)
  561. self.assertNotEqual(sha_a.digest(), sha_b.digest())
  562. sha_b.update(f.getvalue()[offset:])
  563. self.assertEqual(sha_a.digest(), sha_b.digest())
  564. def test_write_pack_object_compression_level(self):
  565. f = BytesIO()
  566. f.write(b"header")
  567. offset = f.tell()
  568. sha_a = sha1(b"foo")
  569. sha_b = sha_a.copy()
  570. write_pack_object(
  571. f.write, Blob.type_num, b"blob", sha=sha_a, compression_level=6
  572. )
  573. self.assertNotEqual(sha_a.digest(), sha_b.digest())
  574. sha_b.update(f.getvalue()[offset:])
  575. self.assertEqual(sha_a.digest(), sha_b.digest())
  576. pack_checksum = hex_to_sha("721980e866af9a5f93ad674144e1459b8ba3e7b7")
  577. class BaseTestPackIndexWriting:
  578. def assertSucceeds(self, func, *args, **kwargs):
  579. try:
  580. func(*args, **kwargs)
  581. except ChecksumMismatch as e:
  582. self.fail(e)
  583. def index(self, filename, entries, pack_checksum):
  584. raise NotImplementedError(self.index)
  585. def test_empty(self):
  586. idx = self.index("empty.idx", [], pack_checksum)
  587. self.assertEqual(idx.get_pack_checksum(), pack_checksum)
  588. self.assertEqual(0, len(idx))
  589. def test_large(self):
  590. entry1_sha = hex_to_sha("4e6388232ec39792661e2e75db8fb117fc869ce6")
  591. entry2_sha = hex_to_sha("e98f071751bd77f59967bfa671cd2caebdccc9a2")
  592. entries = [
  593. (entry1_sha, 0xF2972D0830529B87, 24),
  594. (entry2_sha, (~0xF2972D0830529B87) & (2**64 - 1), 92),
  595. ]
  596. if not self._supports_large:
  597. self.assertRaises(
  598. TypeError, self.index, "single.idx", entries, pack_checksum
  599. )
  600. return
  601. idx = self.index("single.idx", entries, pack_checksum)
  602. self.assertEqual(idx.get_pack_checksum(), pack_checksum)
  603. self.assertEqual(2, len(idx))
  604. actual_entries = list(idx.iterentries())
  605. self.assertEqual(len(entries), len(actual_entries))
  606. for mine, actual in zip(entries, actual_entries):
  607. my_sha, my_offset, my_crc = mine
  608. actual_sha, actual_offset, actual_crc = actual
  609. self.assertEqual(my_sha, actual_sha)
  610. self.assertEqual(my_offset, actual_offset)
  611. if self._has_crc32_checksum:
  612. self.assertEqual(my_crc, actual_crc)
  613. else:
  614. self.assertIsNone(actual_crc)
  615. def test_single(self):
  616. entry_sha = hex_to_sha("6f670c0fb53f9463760b7295fbb814e965fb20c8")
  617. my_entries = [(entry_sha, 178, 42)]
  618. idx = self.index("single.idx", my_entries, pack_checksum)
  619. self.assertEqual(idx.get_pack_checksum(), pack_checksum)
  620. self.assertEqual(1, len(idx))
  621. actual_entries = list(idx.iterentries())
  622. self.assertEqual(len(my_entries), len(actual_entries))
  623. for mine, actual in zip(my_entries, actual_entries):
  624. my_sha, my_offset, my_crc = mine
  625. actual_sha, actual_offset, actual_crc = actual
  626. self.assertEqual(my_sha, actual_sha)
  627. self.assertEqual(my_offset, actual_offset)
  628. if self._has_crc32_checksum:
  629. self.assertEqual(my_crc, actual_crc)
  630. else:
  631. self.assertIsNone(actual_crc)
  632. class BaseTestFilePackIndexWriting(BaseTestPackIndexWriting):
  633. def setUp(self):
  634. self.tempdir = tempfile.mkdtemp()
  635. def tearDown(self):
  636. shutil.rmtree(self.tempdir)
  637. def index(self, filename, entries, pack_checksum):
  638. path = os.path.join(self.tempdir, filename)
  639. self.writeIndex(path, entries, pack_checksum)
  640. idx = load_pack_index(path)
  641. self.assertSucceeds(idx.check)
  642. self.assertEqual(idx.version, self._expected_version)
  643. return idx
  644. def writeIndex(self, filename, entries, pack_checksum):
  645. # FIXME: Write to BytesIO instead rather than hitting disk ?
  646. with GitFile(filename, "wb") as f:
  647. self._write_fn(f, entries, pack_checksum)
  648. class TestMemoryIndexWriting(TestCase, BaseTestPackIndexWriting):
  649. def setUp(self):
  650. TestCase.setUp(self)
  651. self._has_crc32_checksum = True
  652. self._supports_large = True
  653. def index(self, filename, entries, pack_checksum):
  654. return MemoryPackIndex(entries, pack_checksum)
  655. def tearDown(self):
  656. TestCase.tearDown(self)
  657. class TestPackIndexWritingv1(TestCase, BaseTestFilePackIndexWriting):
  658. def setUp(self):
  659. TestCase.setUp(self)
  660. BaseTestFilePackIndexWriting.setUp(self)
  661. self._has_crc32_checksum = False
  662. self._expected_version = 1
  663. self._supports_large = False
  664. self._write_fn = write_pack_index_v1
  665. def tearDown(self):
  666. TestCase.tearDown(self)
  667. BaseTestFilePackIndexWriting.tearDown(self)
  668. class TestPackIndexWritingv2(TestCase, BaseTestFilePackIndexWriting):
  669. def setUp(self):
  670. TestCase.setUp(self)
  671. BaseTestFilePackIndexWriting.setUp(self)
  672. self._has_crc32_checksum = True
  673. self._supports_large = True
  674. self._expected_version = 2
  675. self._write_fn = write_pack_index_v2
  676. def tearDown(self):
  677. TestCase.tearDown(self)
  678. BaseTestFilePackIndexWriting.tearDown(self)
  679. class ReadZlibTests(TestCase):
  680. decomp = (
  681. b"tree 4ada885c9196b6b6fa08744b5862bf92896fc002\n"
  682. b"parent None\n"
  683. b"author Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n"
  684. b"committer Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n"
  685. b"\n"
  686. b"Provide replacement for mmap()'s offset argument."
  687. )
  688. comp = zlib.compress(decomp)
  689. extra = b"nextobject"
  690. def setUp(self):
  691. super().setUp()
  692. self.read = BytesIO(self.comp + self.extra).read
  693. self.unpacked = UnpackedObject(
  694. Tree.type_num, decomp_len=len(self.decomp), crc32=0
  695. )
  696. def test_decompress_size(self):
  697. good_decomp_len = len(self.decomp)
  698. self.unpacked.decomp_len = -1
  699. self.assertRaises(ValueError, read_zlib_chunks, self.read, self.unpacked)
  700. self.unpacked.decomp_len = good_decomp_len - 1
  701. self.assertRaises(zlib.error, read_zlib_chunks, self.read, self.unpacked)
  702. self.unpacked.decomp_len = good_decomp_len + 1
  703. self.assertRaises(zlib.error, read_zlib_chunks, self.read, self.unpacked)
  704. def test_decompress_truncated(self):
  705. read = BytesIO(self.comp[:10]).read
  706. self.assertRaises(zlib.error, read_zlib_chunks, read, self.unpacked)
  707. read = BytesIO(self.comp).read
  708. self.assertRaises(zlib.error, read_zlib_chunks, read, self.unpacked)
  709. def test_decompress_empty(self):
  710. unpacked = UnpackedObject(Tree.type_num, decomp_len=0)
  711. comp = zlib.compress(b"")
  712. read = BytesIO(comp + self.extra).read
  713. unused = read_zlib_chunks(read, unpacked)
  714. self.assertEqual(b"", b"".join(unpacked.decomp_chunks))
  715. self.assertNotEqual(b"", unused)
  716. self.assertEqual(self.extra, unused + read())
  717. def test_decompress_no_crc32(self):
  718. self.unpacked.crc32 = None
  719. read_zlib_chunks(self.read, self.unpacked)
  720. self.assertEqual(None, self.unpacked.crc32)
  721. def _do_decompress_test(self, buffer_size, **kwargs):
  722. unused = read_zlib_chunks(
  723. self.read, self.unpacked, buffer_size=buffer_size, **kwargs
  724. )
  725. self.assertEqual(self.decomp, b"".join(self.unpacked.decomp_chunks))
  726. self.assertEqual(zlib.crc32(self.comp), self.unpacked.crc32)
  727. self.assertNotEqual(b"", unused)
  728. self.assertEqual(self.extra, unused + self.read())
  729. def test_simple_decompress(self):
  730. self._do_decompress_test(4096)
  731. self.assertEqual(None, self.unpacked.comp_chunks)
  732. # These buffer sizes are not intended to be realistic, but rather simulate
  733. # larger buffer sizes that may end at various places.
  734. def test_decompress_buffer_size_1(self):
  735. self._do_decompress_test(1)
  736. def test_decompress_buffer_size_2(self):
  737. self._do_decompress_test(2)
  738. def test_decompress_buffer_size_3(self):
  739. self._do_decompress_test(3)
  740. def test_decompress_buffer_size_4(self):
  741. self._do_decompress_test(4)
  742. def test_decompress_include_comp(self):
  743. self._do_decompress_test(4096, include_comp=True)
  744. self.assertEqual(self.comp, b"".join(self.unpacked.comp_chunks))
  745. class DeltifyTests(TestCase):
  746. def test_empty(self):
  747. self.assertEqual([], list(deltify_pack_objects([])))
  748. def test_single(self):
  749. b = Blob.from_string(b"foo")
  750. self.assertEqual(
  751. [
  752. UnpackedObject(
  753. b.type_num,
  754. sha=b.sha().digest(),
  755. delta_base=None,
  756. decomp_chunks=b.as_raw_chunks(),
  757. )
  758. ],
  759. list(deltify_pack_objects([(b, b"")])),
  760. )
  761. def test_simple_delta(self):
  762. b1 = Blob.from_string(b"a" * 101)
  763. b2 = Blob.from_string(b"a" * 100)
  764. delta = list(create_delta(b1.as_raw_chunks(), b2.as_raw_chunks()))
  765. self.assertEqual(
  766. [
  767. UnpackedObject(
  768. b1.type_num,
  769. sha=b1.sha().digest(),
  770. delta_base=None,
  771. decomp_chunks=b1.as_raw_chunks(),
  772. ),
  773. UnpackedObject(
  774. b2.type_num,
  775. sha=b2.sha().digest(),
  776. delta_base=b1.sha().digest(),
  777. decomp_chunks=delta,
  778. ),
  779. ],
  780. list(deltify_pack_objects([(b1, b""), (b2, b"")])),
  781. )
  782. class TestPackStreamReader(TestCase):
  783. def test_read_objects_emtpy(self):
  784. f = BytesIO()
  785. build_pack(f, [])
  786. reader = PackStreamReader(f.read)
  787. self.assertEqual(0, len(list(reader.read_objects())))
  788. def test_read_objects(self):
  789. f = BytesIO()
  790. entries = build_pack(
  791. f,
  792. [
  793. (Blob.type_num, b"blob"),
  794. (OFS_DELTA, (0, b"blob1")),
  795. ],
  796. )
  797. reader = PackStreamReader(f.read)
  798. objects = list(reader.read_objects(compute_crc32=True))
  799. self.assertEqual(2, len(objects))
  800. unpacked_blob, unpacked_delta = objects
  801. self.assertEqual(entries[0][0], unpacked_blob.offset)
  802. self.assertEqual(Blob.type_num, unpacked_blob.pack_type_num)
  803. self.assertEqual(Blob.type_num, unpacked_blob.obj_type_num)
  804. self.assertEqual(None, unpacked_blob.delta_base)
  805. self.assertEqual(b"blob", b"".join(unpacked_blob.decomp_chunks))
  806. self.assertEqual(entries[0][4], unpacked_blob.crc32)
  807. self.assertEqual(entries[1][0], unpacked_delta.offset)
  808. self.assertEqual(OFS_DELTA, unpacked_delta.pack_type_num)
  809. self.assertEqual(None, unpacked_delta.obj_type_num)
  810. self.assertEqual(
  811. unpacked_delta.offset - unpacked_blob.offset,
  812. unpacked_delta.delta_base,
  813. )
  814. delta = create_delta(b"blob", b"blob1")
  815. self.assertEqual(b"".join(delta), b"".join(unpacked_delta.decomp_chunks))
  816. self.assertEqual(entries[1][4], unpacked_delta.crc32)
  817. def test_read_objects_buffered(self):
  818. f = BytesIO()
  819. build_pack(
  820. f,
  821. [
  822. (Blob.type_num, b"blob"),
  823. (OFS_DELTA, (0, b"blob1")),
  824. ],
  825. )
  826. reader = PackStreamReader(f.read, zlib_bufsize=4)
  827. self.assertEqual(2, len(list(reader.read_objects())))
  828. def test_read_objects_empty(self):
  829. reader = PackStreamReader(BytesIO().read)
  830. self.assertRaises(AssertionError, list, reader.read_objects())
  831. class TestPackIterator(DeltaChainIterator):
  832. _compute_crc32 = True
  833. def __init__(self, *args, **kwargs) -> None:
  834. super().__init__(*args, **kwargs)
  835. self._unpacked_offsets: Set[int] = set()
  836. def _result(self, unpacked):
  837. """Return entries in the same format as build_pack."""
  838. return (
  839. unpacked.offset,
  840. unpacked.obj_type_num,
  841. b"".join(unpacked.obj_chunks),
  842. unpacked.sha(),
  843. unpacked.crc32,
  844. )
  845. def _resolve_object(self, offset, pack_type_num, base_chunks):
  846. assert offset not in self._unpacked_offsets, (
  847. "Attempted to re-inflate offset %i" % offset
  848. )
  849. self._unpacked_offsets.add(offset)
  850. return super()._resolve_object(offset, pack_type_num, base_chunks)
  851. class DeltaChainIteratorTests(TestCase):
  852. def setUp(self):
  853. super().setUp()
  854. self.store = MemoryObjectStore()
  855. self.fetched = set()
  856. def store_blobs(self, blobs_data):
  857. blobs = []
  858. for data in blobs_data:
  859. blob = make_object(Blob, data=data)
  860. blobs.append(blob)
  861. self.store.add_object(blob)
  862. return blobs
  863. def get_raw_no_repeat(self, bin_sha):
  864. """Wrapper around store.get_raw that doesn't allow repeat lookups."""
  865. hex_sha = sha_to_hex(bin_sha)
  866. self.assertNotIn(
  867. hex_sha, self.fetched, f"Attempted to re-fetch object {hex_sha}"
  868. )
  869. self.fetched.add(hex_sha)
  870. return self.store.get_raw(hex_sha)
  871. def make_pack_iter(self, f, thin=None):
  872. if thin is None:
  873. thin = bool(list(self.store))
  874. resolve_ext_ref = thin and self.get_raw_no_repeat or None
  875. data = PackData("test.pack", file=f)
  876. return TestPackIterator.for_pack_data(data, resolve_ext_ref=resolve_ext_ref)
  877. def make_pack_iter_subset(self, f, subset, thin=None):
  878. if thin is None:
  879. thin = bool(list(self.store))
  880. resolve_ext_ref = thin and self.get_raw_no_repeat or None
  881. data = PackData("test.pack", file=f)
  882. assert data
  883. index = MemoryPackIndex.for_pack(data)
  884. pack = Pack.from_objects(data, index)
  885. return TestPackIterator.for_pack_subset(
  886. pack, subset, resolve_ext_ref=resolve_ext_ref
  887. )
  888. def assertEntriesMatch(self, expected_indexes, entries, pack_iter):
  889. expected = [entries[i] for i in expected_indexes]
  890. self.assertEqual(expected, list(pack_iter._walk_all_chains()))
  891. def test_no_deltas(self):
  892. f = BytesIO()
  893. entries = build_pack(
  894. f,
  895. [
  896. (Commit.type_num, b"commit"),
  897. (Blob.type_num, b"blob"),
  898. (Tree.type_num, b"tree"),
  899. ],
  900. )
  901. self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
  902. f.seek(0)
  903. self.assertEntriesMatch([], entries, self.make_pack_iter_subset(f, []))
  904. f.seek(0)
  905. self.assertEntriesMatch(
  906. [1, 0],
  907. entries,
  908. self.make_pack_iter_subset(f, [entries[0][3], entries[1][3]]),
  909. )
  910. f.seek(0)
  911. self.assertEntriesMatch(
  912. [1, 0],
  913. entries,
  914. self.make_pack_iter_subset(
  915. f, [sha_to_hex(entries[0][3]), sha_to_hex(entries[1][3])]
  916. ),
  917. )
  918. def test_ofs_deltas(self):
  919. f = BytesIO()
  920. entries = build_pack(
  921. f,
  922. [
  923. (Blob.type_num, b"blob"),
  924. (OFS_DELTA, (0, b"blob1")),
  925. (OFS_DELTA, (0, b"blob2")),
  926. ],
  927. )
  928. # Delta resolution changed to DFS
  929. self.assertEntriesMatch([0, 2, 1], entries, self.make_pack_iter(f))
  930. f.seek(0)
  931. self.assertEntriesMatch(
  932. [0, 2, 1],
  933. entries,
  934. self.make_pack_iter_subset(f, [entries[1][3], entries[2][3]]),
  935. )
  936. def test_ofs_deltas_chain(self):
  937. f = BytesIO()
  938. entries = build_pack(
  939. f,
  940. [
  941. (Blob.type_num, b"blob"),
  942. (OFS_DELTA, (0, b"blob1")),
  943. (OFS_DELTA, (1, b"blob2")),
  944. ],
  945. )
  946. self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
  947. def test_ref_deltas(self):
  948. f = BytesIO()
  949. entries = build_pack(
  950. f,
  951. [
  952. (REF_DELTA, (1, b"blob1")),
  953. (Blob.type_num, (b"blob")),
  954. (REF_DELTA, (1, b"blob2")),
  955. ],
  956. )
  957. # Delta resolution changed to DFS
  958. self.assertEntriesMatch([1, 2, 0], entries, self.make_pack_iter(f))
  959. def test_ref_deltas_chain(self):
  960. f = BytesIO()
  961. entries = build_pack(
  962. f,
  963. [
  964. (REF_DELTA, (2, b"blob1")),
  965. (Blob.type_num, (b"blob")),
  966. (REF_DELTA, (1, b"blob2")),
  967. ],
  968. )
  969. self.assertEntriesMatch([1, 2, 0], entries, self.make_pack_iter(f))
  970. def test_ofs_and_ref_deltas(self):
  971. # Deltas pending on this offset are popped before deltas depending on
  972. # this ref.
  973. f = BytesIO()
  974. entries = build_pack(
  975. f,
  976. [
  977. (REF_DELTA, (1, b"blob1")),
  978. (Blob.type_num, (b"blob")),
  979. (OFS_DELTA, (1, b"blob2")),
  980. ],
  981. )
  982. # Delta resolution changed to DFS
  983. self.assertEntriesMatch([1, 0, 2], entries, self.make_pack_iter(f))
  984. def test_mixed_chain(self):
  985. f = BytesIO()
  986. entries = build_pack(
  987. f,
  988. [
  989. (Blob.type_num, b"blob"),
  990. (REF_DELTA, (2, b"blob2")),
  991. (OFS_DELTA, (0, b"blob1")),
  992. (OFS_DELTA, (1, b"blob3")),
  993. (OFS_DELTA, (0, b"bob")),
  994. ],
  995. )
  996. # Delta resolution changed to DFS
  997. self.assertEntriesMatch([0, 4, 2, 1, 3], entries, self.make_pack_iter(f))
  998. def test_long_chain(self):
  999. n = 100
  1000. objects_spec = [(Blob.type_num, b"blob")]
  1001. for i in range(n):
  1002. objects_spec.append((OFS_DELTA, (i, b"blob" + str(i).encode("ascii"))))
  1003. f = BytesIO()
  1004. entries = build_pack(f, objects_spec)
  1005. self.assertEntriesMatch(range(n + 1), entries, self.make_pack_iter(f))
  1006. def test_branchy_chain(self):
  1007. n = 100
  1008. objects_spec = [(Blob.type_num, b"blob")]
  1009. for i in range(n):
  1010. objects_spec.append((OFS_DELTA, (0, b"blob" + str(i).encode("ascii"))))
  1011. f = BytesIO()
  1012. entries = build_pack(f, objects_spec)
  1013. # Delta resolution changed to DFS
  1014. indices = [0, *list(range(100, 0, -1))]
  1015. self.assertEntriesMatch(indices, entries, self.make_pack_iter(f))
  1016. def test_ext_ref(self):
  1017. (blob,) = self.store_blobs([b"blob"])
  1018. f = BytesIO()
  1019. entries = build_pack(f, [(REF_DELTA, (blob.id, b"blob1"))], store=self.store)
  1020. pack_iter = self.make_pack_iter(f)
  1021. self.assertEntriesMatch([0], entries, pack_iter)
  1022. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  1023. def test_ext_ref_chain(self):
  1024. (blob,) = self.store_blobs([b"blob"])
  1025. f = BytesIO()
  1026. entries = build_pack(
  1027. f,
  1028. [
  1029. (REF_DELTA, (1, b"blob2")),
  1030. (REF_DELTA, (blob.id, b"blob1")),
  1031. ],
  1032. store=self.store,
  1033. )
  1034. pack_iter = self.make_pack_iter(f)
  1035. self.assertEntriesMatch([1, 0], entries, pack_iter)
  1036. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  1037. def test_ext_ref_chain_degenerate(self):
  1038. # Test a degenerate case where the sender is sending a REF_DELTA
  1039. # object that expands to an object already in the repository.
  1040. (blob,) = self.store_blobs([b"blob"])
  1041. (blob2,) = self.store_blobs([b"blob2"])
  1042. assert blob.id < blob2.id
  1043. f = BytesIO()
  1044. entries = build_pack(
  1045. f,
  1046. [
  1047. (REF_DELTA, (blob.id, b"blob2")),
  1048. (REF_DELTA, (0, b"blob3")),
  1049. ],
  1050. store=self.store,
  1051. )
  1052. pack_iter = self.make_pack_iter(f)
  1053. self.assertEntriesMatch([0, 1], entries, pack_iter)
  1054. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  1055. def test_ext_ref_multiple_times(self):
  1056. (blob,) = self.store_blobs([b"blob"])
  1057. f = BytesIO()
  1058. entries = build_pack(
  1059. f,
  1060. [
  1061. (REF_DELTA, (blob.id, b"blob1")),
  1062. (REF_DELTA, (blob.id, b"blob2")),
  1063. ],
  1064. store=self.store,
  1065. )
  1066. pack_iter = self.make_pack_iter(f)
  1067. self.assertEntriesMatch([0, 1], entries, pack_iter)
  1068. self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs())
  1069. def test_multiple_ext_refs(self):
  1070. b1, b2 = self.store_blobs([b"foo", b"bar"])
  1071. f = BytesIO()
  1072. entries = build_pack(
  1073. f,
  1074. [
  1075. (REF_DELTA, (b1.id, b"foo1")),
  1076. (REF_DELTA, (b2.id, b"bar2")),
  1077. ],
  1078. store=self.store,
  1079. )
  1080. pack_iter = self.make_pack_iter(f)
  1081. self.assertEntriesMatch([0, 1], entries, pack_iter)
  1082. self.assertEqual([hex_to_sha(b1.id), hex_to_sha(b2.id)], pack_iter.ext_refs())
  1083. def test_bad_ext_ref_non_thin_pack(self):
  1084. (blob,) = self.store_blobs([b"blob"])
  1085. f = BytesIO()
  1086. build_pack(f, [(REF_DELTA, (blob.id, b"blob1"))], store=self.store)
  1087. pack_iter = self.make_pack_iter(f, thin=False)
  1088. try:
  1089. list(pack_iter._walk_all_chains())
  1090. self.fail()
  1091. except UnresolvedDeltas as e:
  1092. self.assertEqual([blob.id], e.shas)
  1093. def test_bad_ext_ref_thin_pack(self):
  1094. b1, b2, b3 = self.store_blobs([b"foo", b"bar", b"baz"])
  1095. f = BytesIO()
  1096. build_pack(
  1097. f,
  1098. [
  1099. (REF_DELTA, (1, b"foo99")),
  1100. (REF_DELTA, (b1.id, b"foo1")),
  1101. (REF_DELTA, (b2.id, b"bar2")),
  1102. (REF_DELTA, (b3.id, b"baz3")),
  1103. ],
  1104. store=self.store,
  1105. )
  1106. del self.store[b2.id]
  1107. del self.store[b3.id]
  1108. pack_iter = self.make_pack_iter(f)
  1109. try:
  1110. list(pack_iter._walk_all_chains())
  1111. self.fail()
  1112. except UnresolvedDeltas as e:
  1113. self.assertEqual((sorted([b2.id, b3.id]),), (sorted(e.shas),))
  1114. class DeltaEncodeSizeTests(TestCase):
  1115. def test_basic(self):
  1116. self.assertEqual(b"\x00", _delta_encode_size(0))
  1117. self.assertEqual(b"\x01", _delta_encode_size(1))
  1118. self.assertEqual(b"\xfa\x01", _delta_encode_size(250))
  1119. self.assertEqual(b"\xe8\x07", _delta_encode_size(1000))
  1120. self.assertEqual(b"\xa0\x8d\x06", _delta_encode_size(100000))
  1121. class EncodeCopyOperationTests(TestCase):
  1122. def test_basic(self):
  1123. self.assertEqual(b"\x80", _encode_copy_operation(0, 0))
  1124. self.assertEqual(b"\x91\x01\x0a", _encode_copy_operation(1, 10))
  1125. self.assertEqual(b"\xb1\x64\xe8\x03", _encode_copy_operation(100, 1000))
  1126. self.assertEqual(b"\x93\xe8\x03\x01", _encode_copy_operation(1000, 1))