test_missing_obj_finder.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. # test_missing_obj_finder.py -- tests for MissingObjectFinder
  2. # Copyright (C) 2012 syntevo GmbH
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. from dulwich.object_store import MemoryObjectStore, MissingObjectFinder
  22. from dulwich.objects import Blob
  23. from dulwich.tests.utils import build_commit_graph, make_object, make_tag
  24. from . import TestCase
  25. class MissingObjectFinderTest(TestCase):
  26. def setUp(self) -> None:
  27. super().setUp()
  28. self.store = MemoryObjectStore()
  29. self.commits = []
  30. def cmt(self, n):
  31. return self.commits[n - 1]
  32. def assertMissingMatch(self, haves, wants, expected) -> None:
  33. for sha, path in MissingObjectFinder(self.store, haves, wants, shallow=set()):
  34. self.assertIn(
  35. sha, expected, f"({sha},{path}) erroneously reported as missing"
  36. )
  37. expected.remove(sha)
  38. self.assertEqual(
  39. len(expected),
  40. 0,
  41. f"some objects are not reported as missing: {expected}",
  42. )
  43. class MOFLinearRepoTest(MissingObjectFinderTest):
  44. def setUp(self) -> None:
  45. super().setUp()
  46. # present in 1, removed in 3
  47. f1_1 = make_object(Blob, data=b"f1")
  48. # present in all revisions, changed in 2 and 3
  49. f2_1 = make_object(Blob, data=b"f2")
  50. f2_2 = make_object(Blob, data=b"f2-changed")
  51. f2_3 = make_object(Blob, data=b"f2-changed-again")
  52. # added in 2, left unmodified in 3
  53. f3_2 = make_object(Blob, data=b"f3")
  54. commit_spec = [[1], [2, 1], [3, 2]]
  55. trees = {
  56. 1: [(b"f1", f1_1), (b"f2", f2_1)],
  57. 2: [(b"f1", f1_1), (b"f2", f2_2), (b"f3", f3_2)],
  58. 3: [(b"f2", f2_3), (b"f3", f3_2)],
  59. }
  60. # commit 1: f1 and f2
  61. # commit 2: f3 added, f2 changed. Missing shall report commit id and a
  62. # tree referenced by commit
  63. # commit 3: f1 removed, f2 changed. Commit sha and root tree sha shall
  64. # be reported as modified
  65. self.commits = build_commit_graph(self.store, commit_spec, trees)
  66. self.missing_1_2 = [self.cmt(2).id, self.cmt(2).tree, f2_2.id, f3_2.id]
  67. self.missing_2_3 = [self.cmt(3).id, self.cmt(3).tree, f2_3.id]
  68. self.missing_1_3 = [
  69. self.cmt(2).id,
  70. self.cmt(3).id,
  71. self.cmt(2).tree,
  72. self.cmt(3).tree,
  73. f2_2.id,
  74. f3_2.id,
  75. f2_3.id,
  76. ]
  77. def test_1_to_2(self) -> None:
  78. self.assertMissingMatch([self.cmt(1).id], [self.cmt(2).id], self.missing_1_2)
  79. def test_2_to_3(self) -> None:
  80. self.assertMissingMatch([self.cmt(2).id], [self.cmt(3).id], self.missing_2_3)
  81. def test_1_to_3(self) -> None:
  82. self.assertMissingMatch([self.cmt(1).id], [self.cmt(3).id], self.missing_1_3)
  83. def test_bogus_haves(self) -> None:
  84. """Ensure non-existent SHA in haves are tolerated."""
  85. bogus_sha = self.cmt(2).id[::-1]
  86. haves = [self.cmt(1).id, bogus_sha]
  87. wants = [self.cmt(3).id]
  88. self.assertMissingMatch(haves, wants, self.missing_1_3)
  89. def test_bogus_wants_failure(self) -> None:
  90. """Ensure non-existent SHA in wants are not tolerated."""
  91. bogus_sha = self.cmt(2).id[::-1]
  92. haves = [self.cmt(1).id]
  93. wants = [self.cmt(3).id, bogus_sha]
  94. self.assertRaises(
  95. KeyError, MissingObjectFinder, self.store, haves, wants, shallow=set()
  96. )
  97. def test_no_changes(self) -> None:
  98. self.assertMissingMatch([self.cmt(3).id], [self.cmt(3).id], [])
  99. class MOFMergeForkRepoTest(MissingObjectFinderTest):
  100. # 1 --- 2 --- 4 --- 6 --- 7
  101. # \ /
  102. # 3 ---
  103. # \
  104. # 5
  105. def setUp(self) -> None:
  106. super().setUp()
  107. f1_1 = make_object(Blob, data=b"f1")
  108. f1_2 = make_object(Blob, data=b"f1-2")
  109. f1_4 = make_object(Blob, data=b"f1-4")
  110. f1_7 = make_object(Blob, data=b"f1-2") # same data as in rev 2
  111. f2_1 = make_object(Blob, data=b"f2")
  112. f2_3 = make_object(Blob, data=b"f2-3")
  113. f3_3 = make_object(Blob, data=b"f3")
  114. f3_5 = make_object(Blob, data=b"f3-5")
  115. commit_spec = [[1], [2, 1], [3, 2], [4, 2], [5, 3], [6, 3, 4], [7, 6]]
  116. trees = {
  117. 1: [(b"f1", f1_1), (b"f2", f2_1)],
  118. 2: [(b"f1", f1_2), (b"f2", f2_1)], # f1 changed
  119. # f3 added, f2 changed
  120. 3: [(b"f1", f1_2), (b"f2", f2_3), (b"f3", f3_3)],
  121. 4: [(b"f1", f1_4), (b"f2", f2_1)], # f1 changed
  122. 5: [(b"f1", f1_2), (b"f3", f3_5)], # f2 removed, f3 changed
  123. # merged 3 and 4
  124. 6: [(b"f1", f1_4), (b"f2", f2_3), (b"f3", f3_3)],
  125. # f1 changed to match rev2. f3 removed
  126. 7: [(b"f1", f1_7), (b"f2", f2_3)],
  127. }
  128. self.commits = build_commit_graph(self.store, commit_spec, trees)
  129. self.f1_2_id = f1_2.id
  130. self.f1_4_id = f1_4.id
  131. self.f1_7_id = f1_7.id
  132. self.f2_3_id = f2_3.id
  133. self.f3_3_id = f3_3.id
  134. self.assertEqual(f1_2.id, f1_7.id, "[sanity]")
  135. def test_have6_want7(self) -> None:
  136. # have 6, want 7. Ideally, shall not report f1_7 as it's the same as
  137. # f1_2, however, to do so, MissingObjectFinder shall not record trees
  138. # of common commits only, but also all parent trees and tree items,
  139. # which is an overkill (i.e. in sha_done it records f1_4 as known, and
  140. # doesn't record f1_2 was known prior to that, hence can't detect f1_7
  141. # is in fact f1_2 and shall not be reported)
  142. self.assertMissingMatch(
  143. [self.cmt(6).id],
  144. [self.cmt(7).id],
  145. [self.cmt(7).id, self.cmt(7).tree, self.f1_7_id],
  146. )
  147. def test_have4_want7(self) -> None:
  148. # have 4, want 7. Shall not include rev5 as it is not in the tree
  149. # between 4 and 7 (well, it is, but its SHA's are irrelevant for 4..7
  150. # commit hierarchy)
  151. self.assertMissingMatch(
  152. [self.cmt(4).id],
  153. [self.cmt(7).id],
  154. [
  155. self.cmt(7).id,
  156. self.cmt(6).id,
  157. self.cmt(3).id,
  158. self.cmt(7).tree,
  159. self.cmt(6).tree,
  160. self.cmt(3).tree,
  161. self.f2_3_id,
  162. self.f3_3_id,
  163. ],
  164. )
  165. def test_have1_want6(self) -> None:
  166. # have 1, want 6. Shall not include rev5
  167. self.assertMissingMatch(
  168. [self.cmt(1).id],
  169. [self.cmt(6).id],
  170. [
  171. self.cmt(6).id,
  172. self.cmt(4).id,
  173. self.cmt(3).id,
  174. self.cmt(2).id,
  175. self.cmt(6).tree,
  176. self.cmt(4).tree,
  177. self.cmt(3).tree,
  178. self.cmt(2).tree,
  179. self.f1_2_id,
  180. self.f1_4_id,
  181. self.f2_3_id,
  182. self.f3_3_id,
  183. ],
  184. )
  185. def test_have3_want6(self) -> None:
  186. # have 3, want 7. Shall not report rev2 and its tree, because
  187. # haves(3) means has parents, i.e. rev2, too
  188. # BUT shall report any changes descending rev2 (excluding rev3)
  189. # Shall NOT report f1_7 as it's technically == f1_2
  190. self.assertMissingMatch(
  191. [self.cmt(3).id],
  192. [self.cmt(7).id],
  193. [
  194. self.cmt(7).id,
  195. self.cmt(6).id,
  196. self.cmt(4).id,
  197. self.cmt(7).tree,
  198. self.cmt(6).tree,
  199. self.cmt(4).tree,
  200. self.f1_4_id,
  201. ],
  202. )
  203. def test_have5_want7(self) -> None:
  204. # have 5, want 7. Common parent is rev2, hence children of rev2 from
  205. # a descent line other than rev5 shall be reported
  206. # expects f1_4 from rev6. f3_5 is known in rev5;
  207. # f1_7 shall be the same as f1_2 (known, too)
  208. self.assertMissingMatch(
  209. [self.cmt(5).id],
  210. [self.cmt(7).id],
  211. [
  212. self.cmt(7).id,
  213. self.cmt(6).id,
  214. self.cmt(4).id,
  215. self.cmt(7).tree,
  216. self.cmt(6).tree,
  217. self.cmt(4).tree,
  218. self.f1_4_id,
  219. ],
  220. )
  221. class MOFTagsTest(MissingObjectFinderTest):
  222. def setUp(self) -> None:
  223. super().setUp()
  224. f1_1 = make_object(Blob, data=b"f1")
  225. commit_spec = [[1]]
  226. trees = {1: [(b"f1", f1_1)]}
  227. self.commits = build_commit_graph(self.store, commit_spec, trees)
  228. self._normal_tag = make_tag(self.cmt(1))
  229. self.store.add_object(self._normal_tag)
  230. self._tag_of_tag = make_tag(self._normal_tag)
  231. self.store.add_object(self._tag_of_tag)
  232. self._tag_of_tree = make_tag(self.store[self.cmt(1).tree])
  233. self.store.add_object(self._tag_of_tree)
  234. self._tag_of_blob = make_tag(f1_1)
  235. self.store.add_object(self._tag_of_blob)
  236. self._tag_of_tag_of_blob = make_tag(self._tag_of_blob)
  237. self.store.add_object(self._tag_of_tag_of_blob)
  238. self.f1_1_id = f1_1.id
  239. def test_tagged_commit(self) -> None:
  240. # The user already has the tagged commit, all they want is the tag,
  241. # so send them only the tag object.
  242. self.assertMissingMatch(
  243. [self.cmt(1).id], [self._normal_tag.id], [self._normal_tag.id]
  244. )
  245. # The remaining cases are unusual, but do happen in the wild.
  246. def test_tagged_tag(self) -> None:
  247. # User already has tagged tag, send only tag of tag
  248. self.assertMissingMatch(
  249. [self._normal_tag.id], [self._tag_of_tag.id], [self._tag_of_tag.id]
  250. )
  251. # User needs both tags, but already has commit
  252. self.assertMissingMatch(
  253. [self.cmt(1).id],
  254. [self._tag_of_tag.id],
  255. [self._normal_tag.id, self._tag_of_tag.id],
  256. )
  257. def test_tagged_tree(self) -> None:
  258. self.assertMissingMatch(
  259. [],
  260. [self._tag_of_tree.id],
  261. [self._tag_of_tree.id, self.cmt(1).tree, self.f1_1_id],
  262. )
  263. def test_tagged_blob(self) -> None:
  264. self.assertMissingMatch(
  265. [], [self._tag_of_blob.id], [self._tag_of_blob.id, self.f1_1_id]
  266. )
  267. def test_tagged_tagged_blob(self) -> None:
  268. self.assertMissingMatch(
  269. [],
  270. [self._tag_of_tag_of_blob.id],
  271. [self._tag_of_tag_of_blob.id, self._tag_of_blob.id, self.f1_1_id],
  272. )