test_diff_tree.py 52 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392
  1. # test_diff_tree.py -- Tests for file and tree diff utilities.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for file and tree diff utilities."""
  22. from itertools import permutations
  23. from dulwich.diff_tree import (
  24. CHANGE_COPY,
  25. CHANGE_MODIFY,
  26. CHANGE_RENAME,
  27. CHANGE_UNCHANGED,
  28. RenameDetector,
  29. TreeChange,
  30. _count_blocks,
  31. _count_blocks_py,
  32. _is_tree,
  33. _is_tree_py,
  34. _merge_entries,
  35. _merge_entries_py,
  36. _similarity_score,
  37. _tree_change_key,
  38. tree_changes,
  39. tree_changes_for_merge,
  40. )
  41. from dulwich.index import commit_tree
  42. from dulwich.object_store import MemoryObjectStore
  43. from dulwich.objects import Blob, ShaFile, Tree, TreeEntry
  44. from dulwich.tests.utils import F, ext_functest_builder, functest_builder, make_object
  45. from . import TestCase
  46. class DiffTestCase(TestCase):
  47. def setUp(self) -> None:
  48. super().setUp()
  49. self.store = MemoryObjectStore()
  50. self.empty_tree = self.commit_tree([])
  51. def commit_tree(self, entries):
  52. commit_blobs = []
  53. for entry in entries:
  54. if len(entry) == 2:
  55. path, obj = entry
  56. mode = F
  57. else:
  58. path, obj, mode = entry
  59. if isinstance(obj, Blob):
  60. self.store.add_object(obj)
  61. sha = obj.id
  62. else:
  63. sha = obj
  64. commit_blobs.append((path, sha, mode))
  65. return self.store[commit_tree(self.store, commit_blobs)]
  66. class TreeChangesTest(DiffTestCase):
  67. def setUp(self) -> None:
  68. super().setUp()
  69. self.detector = RenameDetector(self.store)
  70. def assertMergeFails(self, merge_entries, name, mode, sha) -> None:
  71. t = Tree()
  72. t[name] = (mode, sha)
  73. self.assertRaises((TypeError, ValueError), merge_entries, "", t, t)
  74. def _do_test_merge_entries(self, merge_entries) -> None:
  75. blob_a1 = make_object(Blob, data=b"a1")
  76. blob_a2 = make_object(Blob, data=b"a2")
  77. blob_b1 = make_object(Blob, data=b"b1")
  78. blob_c2 = make_object(Blob, data=b"c2")
  79. tree1 = self.commit_tree([(b"a", blob_a1, 0o100644), (b"b", blob_b1, 0o100755)])
  80. tree2 = self.commit_tree([(b"a", blob_a2, 0o100644), (b"c", blob_c2, 0o100755)])
  81. self.assertEqual([], merge_entries(b"", self.empty_tree, self.empty_tree))
  82. self.assertEqual(
  83. [
  84. ((None, None, None), (b"a", 0o100644, blob_a1.id)),
  85. ((None, None, None), (b"b", 0o100755, blob_b1.id)),
  86. ],
  87. merge_entries(b"", self.empty_tree, tree1),
  88. )
  89. self.assertEqual(
  90. [
  91. ((None, None, None), (b"x/a", 0o100644, blob_a1.id)),
  92. ((None, None, None), (b"x/b", 0o100755, blob_b1.id)),
  93. ],
  94. merge_entries(b"x", self.empty_tree, tree1),
  95. )
  96. self.assertEqual(
  97. [
  98. ((b"a", 0o100644, blob_a2.id), (None, None, None)),
  99. ((b"c", 0o100755, blob_c2.id), (None, None, None)),
  100. ],
  101. merge_entries(b"", tree2, self.empty_tree),
  102. )
  103. self.assertEqual(
  104. [
  105. ((b"a", 0o100644, blob_a1.id), (b"a", 0o100644, blob_a2.id)),
  106. ((b"b", 0o100755, blob_b1.id), (None, None, None)),
  107. ((None, None, None), (b"c", 0o100755, blob_c2.id)),
  108. ],
  109. merge_entries(b"", tree1, tree2),
  110. )
  111. self.assertEqual(
  112. [
  113. ((b"a", 0o100644, blob_a2.id), (b"a", 0o100644, blob_a1.id)),
  114. ((None, None, None), (b"b", 0o100755, blob_b1.id)),
  115. ((b"c", 0o100755, blob_c2.id), (None, None, None)),
  116. ],
  117. merge_entries(b"", tree2, tree1),
  118. )
  119. self.assertMergeFails(merge_entries, 0xDEADBEEF, 0o100644, "1" * 40)
  120. self.assertMergeFails(merge_entries, b"a", b"deadbeef", "1" * 40)
  121. self.assertMergeFails(merge_entries, b"a", 0o100644, 0xDEADBEEF)
  122. test_merge_entries = functest_builder(_do_test_merge_entries, _merge_entries_py)
  123. test_merge_entries_extension = ext_functest_builder(
  124. _do_test_merge_entries, _merge_entries
  125. )
  126. def _do_test_is_tree(self, is_tree) -> None:
  127. self.assertFalse(is_tree(TreeEntry(None, None, None)))
  128. self.assertFalse(is_tree(TreeEntry(b"a", 0o100644, b"a" * 40)))
  129. self.assertFalse(is_tree(TreeEntry(b"a", 0o100755, b"a" * 40)))
  130. self.assertFalse(is_tree(TreeEntry(b"a", 0o120000, b"a" * 40)))
  131. self.assertTrue(is_tree(TreeEntry(b"a", 0o040000, b"a" * 40)))
  132. self.assertRaises(TypeError, is_tree, TreeEntry(b"a", b"x", b"a" * 40))
  133. self.assertRaises(AttributeError, is_tree, 1234)
  134. test_is_tree = functest_builder(_do_test_is_tree, _is_tree_py)
  135. test_is_tree_extension = ext_functest_builder(_do_test_is_tree, _is_tree)
  136. def assertChangesEqual(self, expected, tree1, tree2, **kwargs) -> None:
  137. actual = list(tree_changes(self.store, tree1.id, tree2.id, **kwargs))
  138. self.assertEqual(expected, actual)
  139. # For brevity, the following tests use tuples instead of TreeEntry objects.
  140. def test_tree_changes_empty(self) -> None:
  141. self.assertChangesEqual([], self.empty_tree, self.empty_tree)
  142. def test_tree_changes_no_changes(self) -> None:
  143. blob = make_object(Blob, data=b"blob")
  144. tree = self.commit_tree([(b"a", blob), (b"b/c", blob)])
  145. self.assertChangesEqual([], self.empty_tree, self.empty_tree)
  146. self.assertChangesEqual([], tree, tree)
  147. self.assertChangesEqual(
  148. [
  149. TreeChange(CHANGE_UNCHANGED, (b"a", F, blob.id), (b"a", F, blob.id)),
  150. TreeChange(
  151. CHANGE_UNCHANGED,
  152. (b"b/c", F, blob.id),
  153. (b"b/c", F, blob.id),
  154. ),
  155. ],
  156. tree,
  157. tree,
  158. want_unchanged=True,
  159. )
  160. def test_tree_changes_add_delete(self) -> None:
  161. blob_a = make_object(Blob, data=b"a")
  162. blob_b = make_object(Blob, data=b"b")
  163. tree = self.commit_tree([(b"a", blob_a, 0o100644), (b"x/b", blob_b, 0o100755)])
  164. self.assertChangesEqual(
  165. [
  166. TreeChange.add(TreeEntry(b"a", 0o100644, blob_a.id)),
  167. TreeChange.add(TreeEntry(b"x/b", 0o100755, blob_b.id)),
  168. ],
  169. self.empty_tree,
  170. tree,
  171. )
  172. self.assertChangesEqual(
  173. [
  174. TreeChange.delete(TreeEntry(b"a", 0o100644, blob_a.id)),
  175. TreeChange.delete(TreeEntry(b"x/b", 0o100755, blob_b.id)),
  176. ],
  177. tree,
  178. self.empty_tree,
  179. )
  180. def test_tree_changes_modify_contents(self) -> None:
  181. blob_a1 = make_object(Blob, data=b"a1")
  182. blob_a2 = make_object(Blob, data=b"a2")
  183. tree1 = self.commit_tree([(b"a", blob_a1)])
  184. tree2 = self.commit_tree([(b"a", blob_a2)])
  185. self.assertChangesEqual(
  186. [TreeChange(CHANGE_MODIFY, (b"a", F, blob_a1.id), (b"a", F, blob_a2.id))],
  187. tree1,
  188. tree2,
  189. )
  190. def test_tree_changes_modify_mode(self) -> None:
  191. blob_a = make_object(Blob, data=b"a")
  192. tree1 = self.commit_tree([(b"a", blob_a, 0o100644)])
  193. tree2 = self.commit_tree([(b"a", blob_a, 0o100755)])
  194. self.assertChangesEqual(
  195. [
  196. TreeChange(
  197. CHANGE_MODIFY,
  198. (b"a", 0o100644, blob_a.id),
  199. (b"a", 0o100755, blob_a.id),
  200. )
  201. ],
  202. tree1,
  203. tree2,
  204. )
  205. def test_tree_changes_change_type(self) -> None:
  206. blob_a1 = make_object(Blob, data=b"a")
  207. blob_a2 = make_object(Blob, data=b"/foo/bar")
  208. tree1 = self.commit_tree([(b"a", blob_a1, 0o100644)])
  209. tree2 = self.commit_tree([(b"a", blob_a2, 0o120000)])
  210. self.assertChangesEqual(
  211. [
  212. TreeChange.delete(TreeEntry(b"a", 0o100644, blob_a1.id)),
  213. TreeChange.add(TreeEntry(b"a", 0o120000, blob_a2.id)),
  214. ],
  215. tree1,
  216. tree2,
  217. )
  218. def test_tree_changes_change_type_same(self) -> None:
  219. blob_a1 = make_object(Blob, data=b"a")
  220. blob_a2 = make_object(Blob, data=b"/foo/bar")
  221. tree1 = self.commit_tree([(b"a", blob_a1, 0o100644)])
  222. tree2 = self.commit_tree([(b"a", blob_a2, 0o120000)])
  223. self.assertChangesEqual(
  224. [
  225. TreeChange(
  226. CHANGE_MODIFY,
  227. (b"a", 0o100644, blob_a1.id),
  228. (b"a", 0o120000, blob_a2.id),
  229. )
  230. ],
  231. tree1,
  232. tree2,
  233. change_type_same=True,
  234. )
  235. def test_tree_changes_to_tree(self) -> None:
  236. blob_a = make_object(Blob, data=b"a")
  237. blob_x = make_object(Blob, data=b"x")
  238. tree1 = self.commit_tree([(b"a", blob_a)])
  239. tree2 = self.commit_tree([(b"a/x", blob_x)])
  240. self.assertChangesEqual(
  241. [
  242. TreeChange.delete(TreeEntry(b"a", F, blob_a.id)),
  243. TreeChange.add(TreeEntry(b"a/x", F, blob_x.id)),
  244. ],
  245. tree1,
  246. tree2,
  247. )
  248. def test_tree_changes_complex(self) -> None:
  249. blob_a_1 = make_object(Blob, data=b"a1_1")
  250. blob_bx1_1 = make_object(Blob, data=b"bx1_1")
  251. blob_bx2_1 = make_object(Blob, data=b"bx2_1")
  252. blob_by1_1 = make_object(Blob, data=b"by1_1")
  253. blob_by2_1 = make_object(Blob, data=b"by2_1")
  254. tree1 = self.commit_tree(
  255. [
  256. (b"a", blob_a_1),
  257. (b"b/x/1", blob_bx1_1),
  258. (b"b/x/2", blob_bx2_1),
  259. (b"b/y/1", blob_by1_1),
  260. (b"b/y/2", blob_by2_1),
  261. ]
  262. )
  263. blob_a_2 = make_object(Blob, data=b"a1_2")
  264. blob_bx1_2 = blob_bx1_1
  265. blob_by_2 = make_object(Blob, data=b"by_2")
  266. blob_c_2 = make_object(Blob, data=b"c_2")
  267. tree2 = self.commit_tree(
  268. [
  269. (b"a", blob_a_2),
  270. (b"b/x/1", blob_bx1_2),
  271. (b"b/y", blob_by_2),
  272. (b"c", blob_c_2),
  273. ]
  274. )
  275. self.assertChangesEqual(
  276. [
  277. TreeChange(
  278. CHANGE_MODIFY,
  279. (b"a", F, blob_a_1.id),
  280. (b"a", F, blob_a_2.id),
  281. ),
  282. TreeChange.delete(TreeEntry(b"b/x/2", F, blob_bx2_1.id)),
  283. TreeChange.add(TreeEntry(b"b/y", F, blob_by_2.id)),
  284. TreeChange.delete(TreeEntry(b"b/y/1", F, blob_by1_1.id)),
  285. TreeChange.delete(TreeEntry(b"b/y/2", F, blob_by2_1.id)),
  286. TreeChange.add(TreeEntry(b"c", F, blob_c_2.id)),
  287. ],
  288. tree1,
  289. tree2,
  290. )
  291. def test_tree_changes_name_order(self) -> None:
  292. blob = make_object(Blob, data=b"a")
  293. tree1 = self.commit_tree([(b"a", blob), (b"a.", blob), (b"a..", blob)])
  294. # Tree order is the reverse of this, so if we used tree order, 'a..'
  295. # would not be merged.
  296. tree2 = self.commit_tree([(b"a/x", blob), (b"a./x", blob), (b"a..", blob)])
  297. self.assertChangesEqual(
  298. [
  299. TreeChange.delete(TreeEntry(b"a", F, blob.id)),
  300. TreeChange.add(TreeEntry(b"a/x", F, blob.id)),
  301. TreeChange.delete(TreeEntry(b"a.", F, blob.id)),
  302. TreeChange.add(TreeEntry(b"a./x", F, blob.id)),
  303. ],
  304. tree1,
  305. tree2,
  306. )
  307. def test_tree_changes_prune(self) -> None:
  308. blob_a1 = make_object(Blob, data=b"a1")
  309. blob_a2 = make_object(Blob, data=b"a2")
  310. blob_x = make_object(Blob, data=b"x")
  311. tree1 = self.commit_tree([(b"a", blob_a1), (b"b/x", blob_x)])
  312. tree2 = self.commit_tree([(b"a", blob_a2), (b"b/x", blob_x)])
  313. # Remove identical items so lookups will fail unless we prune.
  314. subtree = self.store[tree1[b"b"][1]]
  315. for entry in subtree.items():
  316. del self.store[entry.sha]
  317. del self.store[subtree.id]
  318. self.assertChangesEqual(
  319. [TreeChange(CHANGE_MODIFY, (b"a", F, blob_a1.id), (b"a", F, blob_a2.id))],
  320. tree1,
  321. tree2,
  322. )
  323. def test_tree_changes_rename_detector(self) -> None:
  324. blob_a1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  325. blob_a2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  326. blob_b = make_object(Blob, data=b"b")
  327. tree1 = self.commit_tree([(b"a", blob_a1), (b"b", blob_b)])
  328. tree2 = self.commit_tree([(b"c", blob_a2), (b"b", blob_b)])
  329. detector = RenameDetector(self.store)
  330. self.assertChangesEqual(
  331. [
  332. TreeChange.delete(TreeEntry(b"a", F, blob_a1.id)),
  333. TreeChange.add(TreeEntry(b"c", F, blob_a2.id)),
  334. ],
  335. tree1,
  336. tree2,
  337. )
  338. self.assertChangesEqual(
  339. [
  340. TreeChange.delete(TreeEntry(b"a", F, blob_a1.id)),
  341. TreeChange(
  342. CHANGE_UNCHANGED,
  343. (b"b", F, blob_b.id),
  344. (b"b", F, blob_b.id),
  345. ),
  346. TreeChange.add(TreeEntry(b"c", F, blob_a2.id)),
  347. ],
  348. tree1,
  349. tree2,
  350. want_unchanged=True,
  351. )
  352. self.assertChangesEqual(
  353. [TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_a2.id))],
  354. tree1,
  355. tree2,
  356. rename_detector=detector,
  357. )
  358. self.assertChangesEqual(
  359. [
  360. TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_a2.id)),
  361. TreeChange(
  362. CHANGE_UNCHANGED,
  363. (b"b", F, blob_b.id),
  364. (b"b", F, blob_b.id),
  365. ),
  366. ],
  367. tree1,
  368. tree2,
  369. rename_detector=detector,
  370. want_unchanged=True,
  371. )
  372. def assertChangesForMergeEqual(
  373. self, expected, parent_trees, merge_tree, **kwargs
  374. ) -> None:
  375. parent_tree_ids = [t.id for t in parent_trees]
  376. actual = list(
  377. tree_changes_for_merge(self.store, parent_tree_ids, merge_tree.id, **kwargs)
  378. )
  379. self.assertEqual(expected, actual)
  380. parent_tree_ids.reverse()
  381. expected = [list(reversed(cs)) for cs in expected]
  382. actual = list(
  383. tree_changes_for_merge(self.store, parent_tree_ids, merge_tree.id, **kwargs)
  384. )
  385. self.assertEqual(expected, actual)
  386. def test_tree_changes_for_merge_add_no_conflict(self) -> None:
  387. blob = make_object(Blob, data=b"blob")
  388. parent1 = self.commit_tree([])
  389. parent2 = merge = self.commit_tree([(b"a", blob)])
  390. self.assertChangesForMergeEqual([], [parent1, parent2], merge)
  391. self.assertChangesForMergeEqual([], [parent2, parent2], merge)
  392. def test_tree_changes_for_merge_add_modify_conflict(self) -> None:
  393. blob1 = make_object(Blob, data=b"1")
  394. blob2 = make_object(Blob, data=b"2")
  395. parent1 = self.commit_tree([])
  396. parent2 = self.commit_tree([(b"a", blob1)])
  397. merge = self.commit_tree([(b"a", blob2)])
  398. self.assertChangesForMergeEqual(
  399. [
  400. [
  401. TreeChange.add(TreeEntry(b"a", F, blob2.id)),
  402. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob2.id)),
  403. ]
  404. ],
  405. [parent1, parent2],
  406. merge,
  407. )
  408. def test_tree_changes_for_merge_modify_modify_conflict(self) -> None:
  409. blob1 = make_object(Blob, data=b"1")
  410. blob2 = make_object(Blob, data=b"2")
  411. blob3 = make_object(Blob, data=b"3")
  412. parent1 = self.commit_tree([(b"a", blob1)])
  413. parent2 = self.commit_tree([(b"a", blob2)])
  414. merge = self.commit_tree([(b"a", blob3)])
  415. self.assertChangesForMergeEqual(
  416. [
  417. [
  418. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob3.id)),
  419. TreeChange(CHANGE_MODIFY, (b"a", F, blob2.id), (b"a", F, blob3.id)),
  420. ]
  421. ],
  422. [parent1, parent2],
  423. merge,
  424. )
  425. def test_tree_changes_for_merge_modify_no_conflict(self) -> None:
  426. blob1 = make_object(Blob, data=b"1")
  427. blob2 = make_object(Blob, data=b"2")
  428. parent1 = self.commit_tree([(b"a", blob1)])
  429. parent2 = merge = self.commit_tree([(b"a", blob2)])
  430. self.assertChangesForMergeEqual([], [parent1, parent2], merge)
  431. def test_tree_changes_for_merge_delete_delete_conflict(self) -> None:
  432. blob1 = make_object(Blob, data=b"1")
  433. blob2 = make_object(Blob, data=b"2")
  434. parent1 = self.commit_tree([(b"a", blob1)])
  435. parent2 = self.commit_tree([(b"a", blob2)])
  436. merge = self.commit_tree([])
  437. self.assertChangesForMergeEqual(
  438. [
  439. [
  440. TreeChange.delete(TreeEntry(b"a", F, blob1.id)),
  441. TreeChange.delete(TreeEntry(b"a", F, blob2.id)),
  442. ]
  443. ],
  444. [parent1, parent2],
  445. merge,
  446. )
  447. def test_tree_changes_for_merge_delete_no_conflict(self) -> None:
  448. blob = make_object(Blob, data=b"blob")
  449. has = self.commit_tree([(b"a", blob)])
  450. doesnt_have = self.commit_tree([])
  451. self.assertChangesForMergeEqual([], [has, has], doesnt_have)
  452. self.assertChangesForMergeEqual([], [has, doesnt_have], doesnt_have)
  453. def test_tree_changes_for_merge_octopus_no_conflict(self) -> None:
  454. r = list(range(5))
  455. blobs = [make_object(Blob, data=bytes(i)) for i in r]
  456. parents = [self.commit_tree([(b"a", blobs[i])]) for i in r]
  457. for i in r:
  458. # Take the SHA from each of the parents.
  459. self.assertChangesForMergeEqual([], parents, parents[i])
  460. def test_tree_changes_for_merge_octopus_modify_conflict(self) -> None:
  461. # Because the octopus merge strategy is limited, I doubt it's possible
  462. # to create this with the git command line. But the output is well-
  463. # defined, so test it anyway.
  464. r = list(range(5))
  465. parent_blobs = [make_object(Blob, data=bytes(i)) for i in r]
  466. merge_blob = make_object(Blob, data=b"merge")
  467. parents = [self.commit_tree([(b"a", parent_blobs[i])]) for i in r]
  468. merge = self.commit_tree([(b"a", merge_blob)])
  469. expected = [
  470. [
  471. TreeChange(
  472. CHANGE_MODIFY,
  473. (b"a", F, parent_blobs[i].id),
  474. (b"a", F, merge_blob.id),
  475. )
  476. for i in r
  477. ]
  478. ]
  479. self.assertChangesForMergeEqual(expected, parents, merge)
  480. def test_tree_changes_for_merge_octopus_delete(self) -> None:
  481. blob1 = make_object(Blob, data=b"1")
  482. blob2 = make_object(Blob, data=b"3")
  483. parent1 = self.commit_tree([(b"a", blob1)])
  484. parent2 = self.commit_tree([(b"a", blob2)])
  485. parent3 = merge = self.commit_tree([])
  486. self.assertChangesForMergeEqual([], [parent1, parent1, parent1], merge)
  487. self.assertChangesForMergeEqual([], [parent1, parent1, parent3], merge)
  488. self.assertChangesForMergeEqual([], [parent1, parent3, parent3], merge)
  489. self.assertChangesForMergeEqual(
  490. [
  491. [
  492. TreeChange.delete(TreeEntry(b"a", F, blob1.id)),
  493. TreeChange.delete(TreeEntry(b"a", F, blob2.id)),
  494. None,
  495. ]
  496. ],
  497. [parent1, parent2, parent3],
  498. merge,
  499. )
  500. def test_tree_changes_for_merge_add_add_same_conflict(self) -> None:
  501. blob = make_object(Blob, data=b"a\nb\nc\nd\n")
  502. parent1 = self.commit_tree([(b"a", blob)])
  503. parent2 = self.commit_tree([])
  504. merge = self.commit_tree([(b"b", blob)])
  505. add = TreeChange.add(TreeEntry(b"b", F, blob.id))
  506. self.assertChangesForMergeEqual([[add, add]], [parent1, parent2], merge)
  507. def test_tree_changes_for_merge_add_exact_rename_conflict(self) -> None:
  508. blob = make_object(Blob, data=b"a\nb\nc\nd\n")
  509. parent1 = self.commit_tree([(b"a", blob)])
  510. parent2 = self.commit_tree([])
  511. merge = self.commit_tree([(b"b", blob)])
  512. self.assertChangesForMergeEqual(
  513. [
  514. [
  515. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"b", F, blob.id)),
  516. TreeChange.add(TreeEntry(b"b", F, blob.id)),
  517. ]
  518. ],
  519. [parent1, parent2],
  520. merge,
  521. rename_detector=self.detector,
  522. )
  523. def test_tree_changes_for_merge_add_content_rename_conflict(self) -> None:
  524. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  525. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  526. parent1 = self.commit_tree([(b"a", blob1)])
  527. parent2 = self.commit_tree([])
  528. merge = self.commit_tree([(b"b", blob2)])
  529. self.assertChangesForMergeEqual(
  530. [
  531. [
  532. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  533. TreeChange.add(TreeEntry(b"b", F, blob2.id)),
  534. ]
  535. ],
  536. [parent1, parent2],
  537. merge,
  538. rename_detector=self.detector,
  539. )
  540. def test_tree_changes_for_merge_modify_rename_conflict(self) -> None:
  541. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  542. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  543. parent1 = self.commit_tree([(b"a", blob1)])
  544. parent2 = self.commit_tree([(b"b", blob1)])
  545. merge = self.commit_tree([(b"b", blob2)])
  546. self.assertChangesForMergeEqual(
  547. [
  548. [
  549. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  550. TreeChange(CHANGE_MODIFY, (b"b", F, blob1.id), (b"b", F, blob2.id)),
  551. ]
  552. ],
  553. [parent1, parent2],
  554. merge,
  555. rename_detector=self.detector,
  556. )
  557. def test_tree_changes_with_paths(self) -> None:
  558. # Test filtering to specific paths
  559. blob_a = make_object(Blob, data=b"a")
  560. blob_b = make_object(Blob, data=b"b")
  561. blob_c = make_object(Blob, data=b"c")
  562. tree1 = self.commit_tree(
  563. [
  564. (b"file_a", blob_a),
  565. (b"file_b", blob_b),
  566. ]
  567. )
  568. tree2 = self.commit_tree(
  569. [
  570. (b"file_a", blob_b), # modified
  571. (b"file_b", blob_b), # unchanged
  572. (b"file_c", blob_c), # added
  573. ]
  574. )
  575. # No filtering - should see all changes
  576. self.assertChangesEqual(
  577. [
  578. TreeChange(
  579. CHANGE_MODIFY, (b"file_a", F, blob_a.id), (b"file_a", F, blob_b.id)
  580. ),
  581. TreeChange.add(TreeEntry(b"file_c", F, blob_c.id)),
  582. ],
  583. tree1,
  584. tree2,
  585. )
  586. # Filter to file_a only
  587. changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"file_a"]))
  588. self.assertEqual(
  589. [
  590. TreeChange(
  591. CHANGE_MODIFY, (b"file_a", F, blob_a.id), (b"file_a", F, blob_b.id)
  592. ),
  593. ],
  594. changes,
  595. )
  596. # Filter to file_c only
  597. changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"file_c"]))
  598. self.assertEqual(
  599. [
  600. TreeChange.add(TreeEntry(b"file_c", F, blob_c.id)),
  601. ],
  602. changes,
  603. )
  604. # Filter to multiple files
  605. changes = list(
  606. tree_changes(self.store, tree1.id, tree2.id, paths=[b"file_a", b"file_c"])
  607. )
  608. self.assertEqual(
  609. [
  610. TreeChange(
  611. CHANGE_MODIFY, (b"file_a", F, blob_a.id), (b"file_a", F, blob_b.id)
  612. ),
  613. TreeChange.add(TreeEntry(b"file_c", F, blob_c.id)),
  614. ],
  615. changes,
  616. )
  617. # Filter to non-existent file
  618. changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"file_d"]))
  619. self.assertEqual([], changes)
  620. def test_tree_changes_with_paths_dirs(self) -> None:
  621. # Test filtering with directories
  622. blob_a = make_object(Blob, data=b"a")
  623. blob_b = make_object(Blob, data=b"b")
  624. blob_c = make_object(Blob, data=b"c")
  625. tree1 = self.commit_tree(
  626. [
  627. (b"dir1/file_a", blob_a),
  628. (b"dir2/file_b", blob_b),
  629. ]
  630. )
  631. tree2 = self.commit_tree(
  632. [
  633. (b"dir1/file_a", blob_b), # modified
  634. (b"dir1/file_c", blob_c), # added
  635. (b"dir2/file_b", blob_b), # unchanged
  636. ]
  637. )
  638. # Filter to dir1 only
  639. changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"dir1"]))
  640. self.assertEqual(
  641. [
  642. TreeChange(
  643. CHANGE_MODIFY,
  644. (b"dir1/file_a", F, blob_a.id),
  645. (b"dir1/file_a", F, blob_b.id),
  646. ),
  647. TreeChange.add(TreeEntry(b"dir1/file_c", F, blob_c.id)),
  648. ],
  649. changes,
  650. )
  651. # Filter to specific file in directory
  652. changes = list(
  653. tree_changes(self.store, tree1.id, tree2.id, paths=[b"dir1/file_a"])
  654. )
  655. self.assertEqual(
  656. [
  657. TreeChange(
  658. CHANGE_MODIFY,
  659. (b"dir1/file_a", F, blob_a.id),
  660. (b"dir1/file_a", F, blob_b.id),
  661. ),
  662. ],
  663. changes,
  664. )
  665. # Filter to dir2 (no changes)
  666. changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"dir2"]))
  667. self.assertEqual([], changes)
  668. def test_tree_changes_with_paths_deep(self) -> None:
  669. # Test filtering with deep directory structure
  670. blob_a = make_object(Blob, data=b"a")
  671. blob_b = make_object(Blob, data=b"b")
  672. tree1 = self.commit_tree(
  673. [
  674. (b"a/b/c/file1", blob_a),
  675. (b"a/b/d/file2", blob_a),
  676. (b"x/y/z/file3", blob_a),
  677. ]
  678. )
  679. tree2 = self.commit_tree(
  680. [
  681. (b"a/b/c/file1", blob_b), # modified
  682. (b"a/b/d/file2", blob_a), # unchanged
  683. (b"x/y/z/file3", blob_b), # modified
  684. ]
  685. )
  686. # Filter to a/b/c
  687. changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"a/b/c"]))
  688. self.assertEqual(
  689. [
  690. TreeChange(
  691. CHANGE_MODIFY,
  692. (b"a/b/c/file1", F, blob_a.id),
  693. (b"a/b/c/file1", F, blob_b.id),
  694. ),
  695. ],
  696. changes,
  697. )
  698. # Filter to a/b (should include both subdirs)
  699. changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"a/b"]))
  700. self.assertEqual(
  701. [
  702. TreeChange(
  703. CHANGE_MODIFY,
  704. (b"a/b/c/file1", F, blob_a.id),
  705. (b"a/b/c/file1", F, blob_b.id),
  706. ),
  707. ],
  708. changes,
  709. )
  710. # Filter to multiple deep paths
  711. changes = list(
  712. tree_changes(
  713. self.store, tree1.id, tree2.id, paths=[b"a/b/c/file1", b"x/y/z/file3"]
  714. )
  715. )
  716. self.assertEqual(
  717. [
  718. TreeChange(
  719. CHANGE_MODIFY,
  720. (b"a/b/c/file1", F, blob_a.id),
  721. (b"a/b/c/file1", F, blob_b.id),
  722. ),
  723. TreeChange(
  724. CHANGE_MODIFY,
  725. (b"x/y/z/file3", F, blob_a.id),
  726. (b"x/y/z/file3", F, blob_b.id),
  727. ),
  728. ],
  729. changes,
  730. )
  731. def test_tree_changes_with_paths_include_unchanged(self) -> None:
  732. # Test path filtering with want_unchanged=True
  733. blob_a = make_object(Blob, data=b"a")
  734. blob_b = make_object(Blob, data=b"b")
  735. tree1 = self.commit_tree(
  736. [
  737. (b"file_a", blob_a),
  738. (b"file_b", blob_b),
  739. ]
  740. )
  741. tree2 = self.commit_tree(
  742. [
  743. (b"file_a", blob_b), # modified
  744. (b"file_b", blob_b), # unchanged
  745. ]
  746. )
  747. # Filter to file_b with want_unchanged=True
  748. changes = list(
  749. tree_changes(
  750. self.store, tree1.id, tree2.id, paths=[b"file_b"], want_unchanged=True
  751. )
  752. )
  753. self.assertEqual(
  754. [
  755. TreeChange(
  756. CHANGE_UNCHANGED,
  757. (b"file_b", F, blob_b.id),
  758. (b"file_b", F, blob_b.id),
  759. ),
  760. ],
  761. changes,
  762. )
  763. # Filter to both files with want_unchanged=True
  764. changes = list(
  765. tree_changes(
  766. self.store,
  767. tree1.id,
  768. tree2.id,
  769. paths=[b"file_a", b"file_b"],
  770. want_unchanged=True,
  771. )
  772. )
  773. self.assertEqual(
  774. [
  775. TreeChange(
  776. CHANGE_MODIFY, (b"file_a", F, blob_a.id), (b"file_a", F, blob_b.id)
  777. ),
  778. TreeChange(
  779. CHANGE_UNCHANGED,
  780. (b"file_b", F, blob_b.id),
  781. (b"file_b", F, blob_b.id),
  782. ),
  783. ],
  784. changes,
  785. )
  786. class RenameDetectionTest(DiffTestCase):
  787. def _do_test_count_blocks(self, count_blocks) -> None:
  788. blob = make_object(Blob, data=b"a\nb\na\n")
  789. self.assertBlockCountEqual({b"a\n": 4, b"b\n": 2}, count_blocks(blob))
  790. test_count_blocks = functest_builder(_do_test_count_blocks, _count_blocks_py)
  791. test_count_blocks_extension = ext_functest_builder(
  792. _do_test_count_blocks, _count_blocks
  793. )
  794. def _do_test_count_blocks_no_newline(self, count_blocks) -> None:
  795. blob = make_object(Blob, data=b"a\na")
  796. self.assertBlockCountEqual({b"a\n": 2, b"a": 1}, _count_blocks(blob))
  797. test_count_blocks_no_newline = functest_builder(
  798. _do_test_count_blocks_no_newline, _count_blocks_py
  799. )
  800. test_count_blocks_no_newline_extension = ext_functest_builder(
  801. _do_test_count_blocks_no_newline, _count_blocks
  802. )
  803. def assertBlockCountEqual(self, expected, got) -> None:
  804. self.assertEqual(
  805. {(hash(block) & 0xFFFFFFFF): count for (block, count) in expected.items()},
  806. {(block & 0xFFFFFFFF): count for (block, count) in got.items()},
  807. )
  808. def _do_test_count_blocks_chunks(self, count_blocks) -> None:
  809. blob = ShaFile.from_raw_chunks(Blob.type_num, [b"a\nb", b"\na\n"])
  810. self.assertBlockCountEqual({b"a\n": 4, b"b\n": 2}, _count_blocks(blob))
  811. test_count_blocks_chunks = functest_builder(
  812. _do_test_count_blocks_chunks, _count_blocks_py
  813. )
  814. test_count_blocks_chunks_extension = ext_functest_builder(
  815. _do_test_count_blocks_chunks, _count_blocks
  816. )
  817. def _do_test_count_blocks_long_lines(self, count_blocks) -> None:
  818. a = b"a" * 64
  819. data = a + b"xxx\ny\n" + a + b"zzz\n"
  820. blob = make_object(Blob, data=data)
  821. self.assertBlockCountEqual(
  822. {b"a" * 64: 128, b"xxx\n": 4, b"y\n": 2, b"zzz\n": 4},
  823. _count_blocks(blob),
  824. )
  825. test_count_blocks_long_lines = functest_builder(
  826. _do_test_count_blocks_long_lines, _count_blocks_py
  827. )
  828. test_count_blocks_long_lines_extension = ext_functest_builder(
  829. _do_test_count_blocks_long_lines, _count_blocks
  830. )
  831. def assertSimilar(self, expected_score, blob1, blob2) -> None:
  832. self.assertEqual(expected_score, _similarity_score(blob1, blob2))
  833. self.assertEqual(expected_score, _similarity_score(blob2, blob1))
  834. def test_similarity_score(self) -> None:
  835. blob0 = make_object(Blob, data=b"")
  836. blob1 = make_object(Blob, data=b"ab\ncd\ncd\n")
  837. blob2 = make_object(Blob, data=b"ab\n")
  838. blob3 = make_object(Blob, data=b"cd\n")
  839. blob4 = make_object(Blob, data=b"cd\ncd\n")
  840. self.assertSimilar(100, blob0, blob0)
  841. self.assertSimilar(0, blob0, blob1)
  842. self.assertSimilar(33, blob1, blob2)
  843. self.assertSimilar(33, blob1, blob3)
  844. self.assertSimilar(66, blob1, blob4)
  845. self.assertSimilar(0, blob2, blob3)
  846. self.assertSimilar(50, blob3, blob4)
  847. def test_similarity_score_cache(self) -> None:
  848. blob1 = make_object(Blob, data=b"ab\ncd\n")
  849. blob2 = make_object(Blob, data=b"ab\n")
  850. block_cache: dict[bytes, dict[int, int]] = {}
  851. self.assertEqual(50, _similarity_score(blob1, blob2, block_cache=block_cache))
  852. self.assertEqual({blob1.id, blob2.id}, set(block_cache))
  853. def fail_chunks() -> None:
  854. self.fail("Unexpected call to as_raw_chunks()")
  855. blob1.as_raw_chunks = blob2.as_raw_chunks = fail_chunks
  856. blob1.raw_length = lambda: 6
  857. blob2.raw_length = lambda: 3
  858. self.assertEqual(50, _similarity_score(blob1, blob2, block_cache=block_cache))
  859. def test_tree_entry_sort(self) -> None:
  860. sha = "abcd" * 10
  861. expected_entries = [
  862. TreeChange.add(TreeEntry(b"aaa", F, sha)),
  863. TreeChange(
  864. CHANGE_COPY,
  865. TreeEntry(b"bbb", F, sha),
  866. TreeEntry(b"aab", F, sha),
  867. ),
  868. TreeChange(
  869. CHANGE_MODIFY,
  870. TreeEntry(b"bbb", F, sha),
  871. TreeEntry(b"bbb", F, b"dabc" * 10),
  872. ),
  873. TreeChange(
  874. CHANGE_RENAME,
  875. TreeEntry(b"bbc", F, sha),
  876. TreeEntry(b"ddd", F, sha),
  877. ),
  878. TreeChange.delete(TreeEntry(b"ccc", F, sha)),
  879. ]
  880. for perm in permutations(expected_entries):
  881. self.assertEqual(expected_entries, sorted(perm, key=_tree_change_key))
  882. def detect_renames(self, tree1, tree2, want_unchanged=False, **kwargs):
  883. detector = RenameDetector(self.store, **kwargs)
  884. return detector.changes_with_renames(
  885. tree1.id, tree2.id, want_unchanged=want_unchanged
  886. )
  887. def test_no_renames(self) -> None:
  888. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  889. blob2 = make_object(Blob, data=b"a\nb\ne\nf\n")
  890. blob3 = make_object(Blob, data=b"a\nb\ng\nh\n")
  891. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  892. tree2 = self.commit_tree([(b"a", blob1), (b"b", blob3)])
  893. self.assertEqual(
  894. [TreeChange(CHANGE_MODIFY, (b"b", F, blob2.id), (b"b", F, blob3.id))],
  895. self.detect_renames(tree1, tree2),
  896. )
  897. def test_exact_rename_one_to_one(self) -> None:
  898. blob1 = make_object(Blob, data=b"1")
  899. blob2 = make_object(Blob, data=b"2")
  900. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  901. tree2 = self.commit_tree([(b"c", blob1), (b"d", blob2)])
  902. self.assertEqual(
  903. [
  904. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob1.id)),
  905. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"d", F, blob2.id)),
  906. ],
  907. self.detect_renames(tree1, tree2),
  908. )
  909. def test_exact_rename_split_different_type(self) -> None:
  910. blob = make_object(Blob, data=b"/foo")
  911. tree1 = self.commit_tree([(b"a", blob, 0o100644)])
  912. tree2 = self.commit_tree([(b"a", blob, 0o120000)])
  913. self.assertEqual(
  914. [
  915. TreeChange.add(TreeEntry(b"a", 0o120000, blob.id)),
  916. TreeChange.delete(TreeEntry(b"a", 0o100644, blob.id)),
  917. ],
  918. self.detect_renames(tree1, tree2),
  919. )
  920. def test_exact_rename_and_different_type(self) -> None:
  921. blob1 = make_object(Blob, data=b"1")
  922. blob2 = make_object(Blob, data=b"2")
  923. tree1 = self.commit_tree([(b"a", blob1)])
  924. tree2 = self.commit_tree([(b"a", blob2, 0o120000), (b"b", blob1)])
  925. self.assertEqual(
  926. [
  927. TreeChange.add(TreeEntry(b"a", 0o120000, blob2.id)),
  928. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob1.id)),
  929. ],
  930. self.detect_renames(tree1, tree2),
  931. )
  932. def test_exact_rename_one_to_many(self) -> None:
  933. blob = make_object(Blob, data=b"1")
  934. tree1 = self.commit_tree([(b"a", blob)])
  935. tree2 = self.commit_tree([(b"b", blob), (b"c", blob)])
  936. self.assertEqual(
  937. [
  938. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"b", F, blob.id)),
  939. TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"c", F, blob.id)),
  940. ],
  941. self.detect_renames(tree1, tree2),
  942. )
  943. def test_exact_rename_many_to_one(self) -> None:
  944. blob = make_object(Blob, data=b"1")
  945. tree1 = self.commit_tree([(b"a", blob), (b"b", blob)])
  946. tree2 = self.commit_tree([(b"c", blob)])
  947. self.assertEqual(
  948. [
  949. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"c", F, blob.id)),
  950. TreeChange.delete(TreeEntry(b"b", F, blob.id)),
  951. ],
  952. self.detect_renames(tree1, tree2),
  953. )
  954. def test_exact_rename_many_to_many(self) -> None:
  955. blob = make_object(Blob, data=b"1")
  956. tree1 = self.commit_tree([(b"a", blob), (b"b", blob)])
  957. tree2 = self.commit_tree([(b"c", blob), (b"d", blob), (b"e", blob)])
  958. self.assertEqual(
  959. [
  960. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"c", F, blob.id)),
  961. TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"e", F, blob.id)),
  962. TreeChange(CHANGE_RENAME, (b"b", F, blob.id), (b"d", F, blob.id)),
  963. ],
  964. self.detect_renames(tree1, tree2),
  965. )
  966. def test_exact_copy_modify(self) -> None:
  967. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  968. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  969. tree1 = self.commit_tree([(b"a", blob1)])
  970. tree2 = self.commit_tree([(b"a", blob2), (b"b", blob1)])
  971. self.assertEqual(
  972. [
  973. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob2.id)),
  974. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob1.id)),
  975. ],
  976. self.detect_renames(tree1, tree2),
  977. )
  978. def test_exact_copy_change_mode(self) -> None:
  979. blob = make_object(Blob, data=b"a\nb\nc\nd\n")
  980. tree1 = self.commit_tree([(b"a", blob)])
  981. tree2 = self.commit_tree([(b"a", blob, 0o100755), (b"b", blob)])
  982. self.assertEqual(
  983. [
  984. TreeChange(
  985. CHANGE_MODIFY,
  986. (b"a", F, blob.id),
  987. (b"a", 0o100755, blob.id),
  988. ),
  989. TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"b", F, blob.id)),
  990. ],
  991. self.detect_renames(tree1, tree2),
  992. )
  993. def test_rename_threshold(self) -> None:
  994. blob1 = make_object(Blob, data=b"a\nb\nc\n")
  995. blob2 = make_object(Blob, data=b"a\nb\nd\n")
  996. tree1 = self.commit_tree([(b"a", blob1)])
  997. tree2 = self.commit_tree([(b"b", blob2)])
  998. self.assertEqual(
  999. [TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id))],
  1000. self.detect_renames(tree1, tree2, rename_threshold=50),
  1001. )
  1002. self.assertEqual(
  1003. [
  1004. TreeChange.delete(TreeEntry(b"a", F, blob1.id)),
  1005. TreeChange.add(TreeEntry(b"b", F, blob2.id)),
  1006. ],
  1007. self.detect_renames(tree1, tree2, rename_threshold=75),
  1008. )
  1009. def test_content_rename_max_files(self) -> None:
  1010. blob1 = make_object(Blob, data=b"a\nb\nc\nd")
  1011. blob4 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1012. blob2 = make_object(Blob, data=b"e\nf\ng\nh\n")
  1013. blob3 = make_object(Blob, data=b"e\nf\ng\ni\n")
  1014. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  1015. tree2 = self.commit_tree([(b"c", blob3), (b"d", blob4)])
  1016. self.assertEqual(
  1017. [
  1018. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"d", F, blob4.id)),
  1019. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"c", F, blob3.id)),
  1020. ],
  1021. self.detect_renames(tree1, tree2),
  1022. )
  1023. self.assertEqual(
  1024. [
  1025. TreeChange.delete(TreeEntry(b"a", F, blob1.id)),
  1026. TreeChange.delete(TreeEntry(b"b", F, blob2.id)),
  1027. TreeChange.add(TreeEntry(b"c", F, blob3.id)),
  1028. TreeChange.add(TreeEntry(b"d", F, blob4.id)),
  1029. ],
  1030. self.detect_renames(tree1, tree2, max_files=1),
  1031. )
  1032. def test_content_rename_one_to_one(self) -> None:
  1033. b11 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1034. b12 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1035. b21 = make_object(Blob, data=b"e\nf\ng\n\nh")
  1036. b22 = make_object(Blob, data=b"e\nf\ng\n\ni")
  1037. tree1 = self.commit_tree([(b"a", b11), (b"b", b21)])
  1038. tree2 = self.commit_tree([(b"c", b12), (b"d", b22)])
  1039. self.assertEqual(
  1040. [
  1041. TreeChange(CHANGE_RENAME, (b"a", F, b11.id), (b"c", F, b12.id)),
  1042. TreeChange(CHANGE_RENAME, (b"b", F, b21.id), (b"d", F, b22.id)),
  1043. ],
  1044. self.detect_renames(tree1, tree2),
  1045. )
  1046. def test_content_rename_one_to_one_ordering(self) -> None:
  1047. blob1 = make_object(Blob, data=b"a\nb\nc\nd\ne\nf\n")
  1048. blob2 = make_object(Blob, data=b"a\nb\nc\nd\ng\nh\n")
  1049. # 6/10 match to blob1, 8/10 match to blob2
  1050. blob3 = make_object(Blob, data=b"a\nb\nc\nd\ng\ni\n")
  1051. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  1052. tree2 = self.commit_tree([(b"c", blob3)])
  1053. self.assertEqual(
  1054. [
  1055. TreeChange.delete(TreeEntry(b"a", F, blob1.id)),
  1056. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"c", F, blob3.id)),
  1057. ],
  1058. self.detect_renames(tree1, tree2),
  1059. )
  1060. tree3 = self.commit_tree([(b"a", blob2), (b"b", blob1)])
  1061. tree4 = self.commit_tree([(b"c", blob3)])
  1062. self.assertEqual(
  1063. [
  1064. TreeChange(CHANGE_RENAME, (b"a", F, blob2.id), (b"c", F, blob3.id)),
  1065. TreeChange.delete(TreeEntry(b"b", F, blob1.id)),
  1066. ],
  1067. self.detect_renames(tree3, tree4),
  1068. )
  1069. def test_content_rename_one_to_many(self) -> None:
  1070. blob1 = make_object(Blob, data=b"aa\nb\nc\nd\ne\n")
  1071. blob2 = make_object(Blob, data=b"ab\nb\nc\nd\ne\n") # 8/11 match
  1072. blob3 = make_object(Blob, data=b"aa\nb\nc\nd\nf\n") # 9/11 match
  1073. tree1 = self.commit_tree([(b"a", blob1)])
  1074. tree2 = self.commit_tree([(b"b", blob2), (b"c", blob3)])
  1075. self.assertEqual(
  1076. [
  1077. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  1078. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob3.id)),
  1079. ],
  1080. self.detect_renames(tree1, tree2),
  1081. )
  1082. def test_content_rename_many_to_one(self) -> None:
  1083. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1084. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1085. blob3 = make_object(Blob, data=b"a\nb\nc\nf\n")
  1086. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  1087. tree2 = self.commit_tree([(b"c", blob3)])
  1088. self.assertEqual(
  1089. [
  1090. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob3.id)),
  1091. TreeChange.delete(TreeEntry(b"b", F, blob2.id)),
  1092. ],
  1093. self.detect_renames(tree1, tree2),
  1094. )
  1095. def test_content_rename_many_to_many(self) -> None:
  1096. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1097. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1098. blob3 = make_object(Blob, data=b"a\nb\nc\nf\n")
  1099. blob4 = make_object(Blob, data=b"a\nb\nc\ng\n")
  1100. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  1101. tree2 = self.commit_tree([(b"c", blob3), (b"d", blob4)])
  1102. # TODO(dborowitz): Distribute renames rather than greedily choosing
  1103. # copies.
  1104. self.assertEqual(
  1105. [
  1106. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob3.id)),
  1107. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"d", F, blob4.id)),
  1108. TreeChange.delete(TreeEntry(b"b", F, blob2.id)),
  1109. ],
  1110. self.detect_renames(tree1, tree2),
  1111. )
  1112. def test_content_rename_with_more_deletions(self) -> None:
  1113. blob1 = make_object(Blob, data=b"")
  1114. tree1 = self.commit_tree(
  1115. [(b"a", blob1), (b"b", blob1), (b"c", blob1), (b"d", blob1)]
  1116. )
  1117. tree2 = self.commit_tree([(b"e", blob1), (b"f", blob1), (b"g", blob1)])
  1118. self.maxDiff = None
  1119. self.assertEqual(
  1120. [
  1121. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"e", F, blob1.id)),
  1122. TreeChange(CHANGE_RENAME, (b"b", F, blob1.id), (b"f", F, blob1.id)),
  1123. TreeChange(CHANGE_RENAME, (b"c", F, blob1.id), (b"g", F, blob1.id)),
  1124. TreeChange.delete(TreeEntry(b"d", F, blob1.id)),
  1125. ],
  1126. self.detect_renames(tree1, tree2),
  1127. )
  1128. def test_content_rename_gitlink(self) -> None:
  1129. blob1 = make_object(Blob, data=b"blob1")
  1130. blob2 = make_object(Blob, data=b"blob2")
  1131. link1 = b"1" * 40
  1132. link2 = b"2" * 40
  1133. tree1 = self.commit_tree([(b"a", blob1), (b"b", link1, 0o160000)])
  1134. tree2 = self.commit_tree([(b"c", blob2), (b"d", link2, 0o160000)])
  1135. self.assertEqual(
  1136. [
  1137. TreeChange.delete(TreeEntry(b"a", 0o100644, blob1.id)),
  1138. TreeChange.delete(TreeEntry(b"b", 0o160000, link1)),
  1139. TreeChange.add(TreeEntry(b"c", 0o100644, blob2.id)),
  1140. TreeChange.add(TreeEntry(b"d", 0o160000, link2)),
  1141. ],
  1142. self.detect_renames(tree1, tree2),
  1143. )
  1144. def test_exact_rename_swap(self) -> None:
  1145. blob1 = make_object(Blob, data=b"1")
  1146. blob2 = make_object(Blob, data=b"2")
  1147. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  1148. tree2 = self.commit_tree([(b"a", blob2), (b"b", blob1)])
  1149. self.assertEqual(
  1150. [
  1151. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob2.id)),
  1152. TreeChange(CHANGE_MODIFY, (b"b", F, blob2.id), (b"b", F, blob1.id)),
  1153. ],
  1154. self.detect_renames(tree1, tree2),
  1155. )
  1156. self.assertEqual(
  1157. [
  1158. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob1.id)),
  1159. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"a", F, blob2.id)),
  1160. ],
  1161. self.detect_renames(tree1, tree2, rewrite_threshold=50),
  1162. )
  1163. def test_content_rename_swap(self) -> None:
  1164. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1165. blob2 = make_object(Blob, data=b"e\nf\ng\nh\n")
  1166. blob3 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1167. blob4 = make_object(Blob, data=b"e\nf\ng\ni\n")
  1168. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  1169. tree2 = self.commit_tree([(b"a", blob4), (b"b", blob3)])
  1170. self.assertEqual(
  1171. [
  1172. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob3.id)),
  1173. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"a", F, blob4.id)),
  1174. ],
  1175. self.detect_renames(tree1, tree2, rewrite_threshold=60),
  1176. )
  1177. def test_rewrite_threshold(self) -> None:
  1178. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1179. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1180. blob3 = make_object(Blob, data=b"a\nb\nf\ng\n")
  1181. tree1 = self.commit_tree([(b"a", blob1)])
  1182. tree2 = self.commit_tree([(b"a", blob3), (b"b", blob2)])
  1183. no_renames = [
  1184. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob3.id)),
  1185. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  1186. ]
  1187. self.assertEqual(no_renames, self.detect_renames(tree1, tree2))
  1188. self.assertEqual(
  1189. no_renames, self.detect_renames(tree1, tree2, rewrite_threshold=40)
  1190. )
  1191. self.assertEqual(
  1192. [
  1193. TreeChange.add(TreeEntry(b"a", F, blob3.id)),
  1194. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  1195. ],
  1196. self.detect_renames(tree1, tree2, rewrite_threshold=80),
  1197. )
  1198. def test_find_copies_harder_exact(self) -> None:
  1199. blob = make_object(Blob, data=b"blob")
  1200. tree1 = self.commit_tree([(b"a", blob)])
  1201. tree2 = self.commit_tree([(b"a", blob), (b"b", blob)])
  1202. self.assertEqual(
  1203. [TreeChange.add(TreeEntry(b"b", F, blob.id))],
  1204. self.detect_renames(tree1, tree2),
  1205. )
  1206. self.assertEqual(
  1207. [TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"b", F, blob.id))],
  1208. self.detect_renames(tree1, tree2, find_copies_harder=True),
  1209. )
  1210. def test_find_copies_harder_content(self) -> None:
  1211. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1212. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1213. tree1 = self.commit_tree([(b"a", blob1)])
  1214. tree2 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  1215. self.assertEqual(
  1216. [TreeChange.add(TreeEntry(b"b", F, blob2.id))],
  1217. self.detect_renames(tree1, tree2),
  1218. )
  1219. self.assertEqual(
  1220. [TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob2.id))],
  1221. self.detect_renames(tree1, tree2, find_copies_harder=True),
  1222. )
  1223. def test_find_copies_harder_with_rewrites(self) -> None:
  1224. blob_a1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1225. blob_a2 = make_object(Blob, data=b"f\ng\nh\ni\n")
  1226. blob_b2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1227. tree1 = self.commit_tree([(b"a", blob_a1)])
  1228. tree2 = self.commit_tree([(b"a", blob_a2), (b"b", blob_b2)])
  1229. self.assertEqual(
  1230. [
  1231. TreeChange(CHANGE_MODIFY, (b"a", F, blob_a1.id), (b"a", F, blob_a2.id)),
  1232. TreeChange(CHANGE_COPY, (b"a", F, blob_a1.id), (b"b", F, blob_b2.id)),
  1233. ],
  1234. self.detect_renames(tree1, tree2, find_copies_harder=True),
  1235. )
  1236. self.assertEqual(
  1237. [
  1238. TreeChange.add(TreeEntry(b"a", F, blob_a2.id)),
  1239. TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"b", F, blob_b2.id)),
  1240. ],
  1241. self.detect_renames(
  1242. tree1, tree2, rewrite_threshold=50, find_copies_harder=True
  1243. ),
  1244. )
  1245. def test_reuse_detector(self) -> None:
  1246. blob = make_object(Blob, data=b"blob")
  1247. tree1 = self.commit_tree([(b"a", blob)])
  1248. tree2 = self.commit_tree([(b"b", blob)])
  1249. detector = RenameDetector(self.store)
  1250. changes = [TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"b", F, blob.id))]
  1251. self.assertEqual(changes, detector.changes_with_renames(tree1.id, tree2.id))
  1252. self.assertEqual(changes, detector.changes_with_renames(tree1.id, tree2.id))
  1253. def test_want_unchanged(self) -> None:
  1254. blob_a1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1255. blob_b = make_object(Blob, data=b"b")
  1256. blob_c2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1257. tree1 = self.commit_tree([(b"a", blob_a1), (b"b", blob_b)])
  1258. tree2 = self.commit_tree([(b"c", blob_c2), (b"b", blob_b)])
  1259. self.assertEqual(
  1260. [TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_c2.id))],
  1261. self.detect_renames(tree1, tree2),
  1262. )
  1263. self.assertEqual(
  1264. [
  1265. TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_c2.id)),
  1266. TreeChange(
  1267. CHANGE_UNCHANGED,
  1268. (b"b", F, blob_b.id),
  1269. (b"b", F, blob_b.id),
  1270. ),
  1271. ],
  1272. self.detect_renames(tree1, tree2, want_unchanged=True),
  1273. )