test_diff_tree.py 52 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398
  1. # test_diff_tree.py -- Tests for file and tree diff utilities.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for file and tree diff utilities."""
  22. from itertools import permutations
  23. from dulwich.diff_tree import (
  24. CHANGE_COPY,
  25. CHANGE_MODIFY,
  26. CHANGE_RENAME,
  27. CHANGE_UNCHANGED,
  28. RenameDetector,
  29. TreeChange,
  30. _count_blocks,
  31. _count_blocks_py,
  32. _is_tree,
  33. _is_tree_py,
  34. _merge_entries,
  35. _merge_entries_py,
  36. _similarity_score,
  37. _tree_change_key,
  38. tree_changes,
  39. tree_changes_for_merge,
  40. )
  41. from dulwich.index import commit_tree
  42. from dulwich.object_store import MemoryObjectStore
  43. from dulwich.objects import Blob, ShaFile, Tree, TreeEntry
  44. from dulwich.tests.utils import F, ext_functest_builder, functest_builder, make_object
  45. from . import TestCase
  46. class DiffTestCase(TestCase):
  47. def setUp(self) -> None:
  48. super().setUp()
  49. self.store = MemoryObjectStore()
  50. self.empty_tree = self.commit_tree([])
  51. def commit_tree(self, entries):
  52. commit_blobs = []
  53. for entry in entries:
  54. if len(entry) == 2:
  55. path, obj = entry
  56. mode = F
  57. else:
  58. path, obj, mode = entry
  59. if isinstance(obj, Blob):
  60. self.store.add_object(obj)
  61. sha = obj.id
  62. else:
  63. sha = obj
  64. commit_blobs.append((path, sha, mode))
  65. return self.store[commit_tree(self.store, commit_blobs)]
  66. class TreeChangesTest(DiffTestCase):
  67. def setUp(self) -> None:
  68. super().setUp()
  69. self.detector = RenameDetector(self.store)
  70. def assertMergeFails(self, merge_entries, name, mode, sha) -> None:
  71. t = Tree()
  72. t[name] = (mode, sha)
  73. self.assertRaises((TypeError, ValueError), merge_entries, "", t, t)
  74. def _do_test_merge_entries(self, merge_entries) -> None:
  75. blob_a1 = make_object(Blob, data=b"a1")
  76. blob_a2 = make_object(Blob, data=b"a2")
  77. blob_b1 = make_object(Blob, data=b"b1")
  78. blob_c2 = make_object(Blob, data=b"c2")
  79. tree1 = self.commit_tree([(b"a", blob_a1, 0o100644), (b"b", blob_b1, 0o100755)])
  80. tree2 = self.commit_tree([(b"a", blob_a2, 0o100644), (b"c", blob_c2, 0o100755)])
  81. self.assertEqual([], merge_entries(b"", self.empty_tree, self.empty_tree))
  82. self.assertEqual(
  83. [
  84. (None, TreeEntry(b"a", 0o100644, blob_a1.id)),
  85. (None, TreeEntry(b"b", 0o100755, blob_b1.id)),
  86. ],
  87. merge_entries(b"", self.empty_tree, tree1),
  88. )
  89. self.assertEqual(
  90. [
  91. (None, TreeEntry(b"x/a", 0o100644, blob_a1.id)),
  92. (None, TreeEntry(b"x/b", 0o100755, blob_b1.id)),
  93. ],
  94. merge_entries(b"x", self.empty_tree, tree1),
  95. )
  96. self.assertEqual(
  97. [
  98. (TreeEntry(b"a", 0o100644, blob_a2.id), None),
  99. (TreeEntry(b"c", 0o100755, blob_c2.id), None),
  100. ],
  101. merge_entries(b"", tree2, self.empty_tree),
  102. )
  103. self.assertEqual(
  104. [
  105. (
  106. TreeEntry(b"a", 0o100644, blob_a1.id),
  107. TreeEntry(b"a", 0o100644, blob_a2.id),
  108. ),
  109. (TreeEntry(b"b", 0o100755, blob_b1.id), None),
  110. (None, TreeEntry(b"c", 0o100755, blob_c2.id)),
  111. ],
  112. merge_entries(b"", tree1, tree2),
  113. )
  114. self.assertEqual(
  115. [
  116. (
  117. TreeEntry(b"a", 0o100644, blob_a2.id),
  118. TreeEntry(b"a", 0o100644, blob_a1.id),
  119. ),
  120. (None, TreeEntry(b"b", 0o100755, blob_b1.id)),
  121. (TreeEntry(b"c", 0o100755, blob_c2.id), None),
  122. ],
  123. merge_entries(b"", tree2, tree1),
  124. )
  125. self.assertMergeFails(merge_entries, 0xDEADBEEF, 0o100644, "1" * 40)
  126. self.assertMergeFails(merge_entries, b"a", b"deadbeef", "1" * 40)
  127. self.assertMergeFails(merge_entries, b"a", 0o100644, 0xDEADBEEF)
  128. test_merge_entries = functest_builder(_do_test_merge_entries, _merge_entries_py)
  129. test_merge_entries_extension = ext_functest_builder(
  130. _do_test_merge_entries, _merge_entries
  131. )
  132. def _do_test_is_tree(self, is_tree) -> None:
  133. self.assertFalse(is_tree(None))
  134. self.assertFalse(is_tree(TreeEntry(b"a", 0o100644, b"a" * 40)))
  135. self.assertFalse(is_tree(TreeEntry(b"a", 0o100755, b"a" * 40)))
  136. self.assertFalse(is_tree(TreeEntry(b"a", 0o120000, b"a" * 40)))
  137. self.assertTrue(is_tree(TreeEntry(b"a", 0o040000, b"a" * 40)))
  138. self.assertRaises(TypeError, is_tree, TreeEntry(b"a", b"x", b"a" * 40))
  139. self.assertRaises(AttributeError, is_tree, 1234)
  140. test_is_tree = functest_builder(_do_test_is_tree, _is_tree_py)
  141. test_is_tree_extension = ext_functest_builder(_do_test_is_tree, _is_tree)
  142. def assertChangesEqual(self, expected, tree1, tree2, **kwargs) -> None:
  143. actual = list(tree_changes(self.store, tree1.id, tree2.id, **kwargs))
  144. self.assertEqual(expected, actual)
  145. # For brevity, the following tests use tuples instead of TreeEntry objects.
  146. def test_tree_changes_empty(self) -> None:
  147. self.assertChangesEqual([], self.empty_tree, self.empty_tree)
  148. def test_tree_changes_no_changes(self) -> None:
  149. blob = make_object(Blob, data=b"blob")
  150. tree = self.commit_tree([(b"a", blob), (b"b/c", blob)])
  151. self.assertChangesEqual([], self.empty_tree, self.empty_tree)
  152. self.assertChangesEqual([], tree, tree)
  153. self.assertChangesEqual(
  154. [
  155. TreeChange(CHANGE_UNCHANGED, (b"a", F, blob.id), (b"a", F, blob.id)),
  156. TreeChange(
  157. CHANGE_UNCHANGED,
  158. (b"b/c", F, blob.id),
  159. (b"b/c", F, blob.id),
  160. ),
  161. ],
  162. tree,
  163. tree,
  164. want_unchanged=True,
  165. )
  166. def test_tree_changes_add_delete(self) -> None:
  167. blob_a = make_object(Blob, data=b"a")
  168. blob_b = make_object(Blob, data=b"b")
  169. tree = self.commit_tree([(b"a", blob_a, 0o100644), (b"x/b", blob_b, 0o100755)])
  170. self.assertChangesEqual(
  171. [
  172. TreeChange.add(TreeEntry(b"a", 0o100644, blob_a.id)),
  173. TreeChange.add(TreeEntry(b"x/b", 0o100755, blob_b.id)),
  174. ],
  175. self.empty_tree,
  176. tree,
  177. )
  178. self.assertChangesEqual(
  179. [
  180. TreeChange.delete(TreeEntry(b"a", 0o100644, blob_a.id)),
  181. TreeChange.delete(TreeEntry(b"x/b", 0o100755, blob_b.id)),
  182. ],
  183. tree,
  184. self.empty_tree,
  185. )
  186. def test_tree_changes_modify_contents(self) -> None:
  187. blob_a1 = make_object(Blob, data=b"a1")
  188. blob_a2 = make_object(Blob, data=b"a2")
  189. tree1 = self.commit_tree([(b"a", blob_a1)])
  190. tree2 = self.commit_tree([(b"a", blob_a2)])
  191. self.assertChangesEqual(
  192. [TreeChange(CHANGE_MODIFY, (b"a", F, blob_a1.id), (b"a", F, blob_a2.id))],
  193. tree1,
  194. tree2,
  195. )
  196. def test_tree_changes_modify_mode(self) -> None:
  197. blob_a = make_object(Blob, data=b"a")
  198. tree1 = self.commit_tree([(b"a", blob_a, 0o100644)])
  199. tree2 = self.commit_tree([(b"a", blob_a, 0o100755)])
  200. self.assertChangesEqual(
  201. [
  202. TreeChange(
  203. CHANGE_MODIFY,
  204. (b"a", 0o100644, blob_a.id),
  205. (b"a", 0o100755, blob_a.id),
  206. )
  207. ],
  208. tree1,
  209. tree2,
  210. )
  211. def test_tree_changes_change_type(self) -> None:
  212. blob_a1 = make_object(Blob, data=b"a")
  213. blob_a2 = make_object(Blob, data=b"/foo/bar")
  214. tree1 = self.commit_tree([(b"a", blob_a1, 0o100644)])
  215. tree2 = self.commit_tree([(b"a", blob_a2, 0o120000)])
  216. self.assertChangesEqual(
  217. [
  218. TreeChange.delete(TreeEntry(b"a", 0o100644, blob_a1.id)),
  219. TreeChange.add(TreeEntry(b"a", 0o120000, blob_a2.id)),
  220. ],
  221. tree1,
  222. tree2,
  223. )
  224. def test_tree_changes_change_type_same(self) -> None:
  225. blob_a1 = make_object(Blob, data=b"a")
  226. blob_a2 = make_object(Blob, data=b"/foo/bar")
  227. tree1 = self.commit_tree([(b"a", blob_a1, 0o100644)])
  228. tree2 = self.commit_tree([(b"a", blob_a2, 0o120000)])
  229. self.assertChangesEqual(
  230. [
  231. TreeChange(
  232. CHANGE_MODIFY,
  233. (b"a", 0o100644, blob_a1.id),
  234. (b"a", 0o120000, blob_a2.id),
  235. )
  236. ],
  237. tree1,
  238. tree2,
  239. change_type_same=True,
  240. )
  241. def test_tree_changes_to_tree(self) -> None:
  242. blob_a = make_object(Blob, data=b"a")
  243. blob_x = make_object(Blob, data=b"x")
  244. tree1 = self.commit_tree([(b"a", blob_a)])
  245. tree2 = self.commit_tree([(b"a/x", blob_x)])
  246. self.assertChangesEqual(
  247. [
  248. TreeChange.delete(TreeEntry(b"a", F, blob_a.id)),
  249. TreeChange.add(TreeEntry(b"a/x", F, blob_x.id)),
  250. ],
  251. tree1,
  252. tree2,
  253. )
  254. def test_tree_changes_complex(self) -> None:
  255. blob_a_1 = make_object(Blob, data=b"a1_1")
  256. blob_bx1_1 = make_object(Blob, data=b"bx1_1")
  257. blob_bx2_1 = make_object(Blob, data=b"bx2_1")
  258. blob_by1_1 = make_object(Blob, data=b"by1_1")
  259. blob_by2_1 = make_object(Blob, data=b"by2_1")
  260. tree1 = self.commit_tree(
  261. [
  262. (b"a", blob_a_1),
  263. (b"b/x/1", blob_bx1_1),
  264. (b"b/x/2", blob_bx2_1),
  265. (b"b/y/1", blob_by1_1),
  266. (b"b/y/2", blob_by2_1),
  267. ]
  268. )
  269. blob_a_2 = make_object(Blob, data=b"a1_2")
  270. blob_bx1_2 = blob_bx1_1
  271. blob_by_2 = make_object(Blob, data=b"by_2")
  272. blob_c_2 = make_object(Blob, data=b"c_2")
  273. tree2 = self.commit_tree(
  274. [
  275. (b"a", blob_a_2),
  276. (b"b/x/1", blob_bx1_2),
  277. (b"b/y", blob_by_2),
  278. (b"c", blob_c_2),
  279. ]
  280. )
  281. self.assertChangesEqual(
  282. [
  283. TreeChange(
  284. CHANGE_MODIFY,
  285. (b"a", F, blob_a_1.id),
  286. (b"a", F, blob_a_2.id),
  287. ),
  288. TreeChange.delete(TreeEntry(b"b/x/2", F, blob_bx2_1.id)),
  289. TreeChange.add(TreeEntry(b"b/y", F, blob_by_2.id)),
  290. TreeChange.delete(TreeEntry(b"b/y/1", F, blob_by1_1.id)),
  291. TreeChange.delete(TreeEntry(b"b/y/2", F, blob_by2_1.id)),
  292. TreeChange.add(TreeEntry(b"c", F, blob_c_2.id)),
  293. ],
  294. tree1,
  295. tree2,
  296. )
  297. def test_tree_changes_name_order(self) -> None:
  298. blob = make_object(Blob, data=b"a")
  299. tree1 = self.commit_tree([(b"a", blob), (b"a.", blob), (b"a..", blob)])
  300. # Tree order is the reverse of this, so if we used tree order, 'a..'
  301. # would not be merged.
  302. tree2 = self.commit_tree([(b"a/x", blob), (b"a./x", blob), (b"a..", blob)])
  303. self.assertChangesEqual(
  304. [
  305. TreeChange.delete(TreeEntry(b"a", F, blob.id)),
  306. TreeChange.add(TreeEntry(b"a/x", F, blob.id)),
  307. TreeChange.delete(TreeEntry(b"a.", F, blob.id)),
  308. TreeChange.add(TreeEntry(b"a./x", F, blob.id)),
  309. ],
  310. tree1,
  311. tree2,
  312. )
  313. def test_tree_changes_prune(self) -> None:
  314. blob_a1 = make_object(Blob, data=b"a1")
  315. blob_a2 = make_object(Blob, data=b"a2")
  316. blob_x = make_object(Blob, data=b"x")
  317. tree1 = self.commit_tree([(b"a", blob_a1), (b"b/x", blob_x)])
  318. tree2 = self.commit_tree([(b"a", blob_a2), (b"b/x", blob_x)])
  319. # Remove identical items so lookups will fail unless we prune.
  320. subtree = self.store[tree1[b"b"][1]]
  321. for entry in subtree.items():
  322. del self.store[entry.sha]
  323. del self.store[subtree.id]
  324. self.assertChangesEqual(
  325. [TreeChange(CHANGE_MODIFY, (b"a", F, blob_a1.id), (b"a", F, blob_a2.id))],
  326. tree1,
  327. tree2,
  328. )
  329. def test_tree_changes_rename_detector(self) -> None:
  330. blob_a1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  331. blob_a2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  332. blob_b = make_object(Blob, data=b"b")
  333. tree1 = self.commit_tree([(b"a", blob_a1), (b"b", blob_b)])
  334. tree2 = self.commit_tree([(b"c", blob_a2), (b"b", blob_b)])
  335. detector = RenameDetector(self.store)
  336. self.assertChangesEqual(
  337. [
  338. TreeChange.delete(TreeEntry(b"a", F, blob_a1.id)),
  339. TreeChange.add(TreeEntry(b"c", F, blob_a2.id)),
  340. ],
  341. tree1,
  342. tree2,
  343. )
  344. self.assertChangesEqual(
  345. [
  346. TreeChange.delete(TreeEntry(b"a", F, blob_a1.id)),
  347. TreeChange(
  348. CHANGE_UNCHANGED,
  349. (b"b", F, blob_b.id),
  350. (b"b", F, blob_b.id),
  351. ),
  352. TreeChange.add(TreeEntry(b"c", F, blob_a2.id)),
  353. ],
  354. tree1,
  355. tree2,
  356. want_unchanged=True,
  357. )
  358. self.assertChangesEqual(
  359. [TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_a2.id))],
  360. tree1,
  361. tree2,
  362. rename_detector=detector,
  363. )
  364. self.assertChangesEqual(
  365. [
  366. TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_a2.id)),
  367. TreeChange(
  368. CHANGE_UNCHANGED,
  369. (b"b", F, blob_b.id),
  370. (b"b", F, blob_b.id),
  371. ),
  372. ],
  373. tree1,
  374. tree2,
  375. rename_detector=detector,
  376. want_unchanged=True,
  377. )
  378. def assertChangesForMergeEqual(
  379. self, expected, parent_trees, merge_tree, **kwargs
  380. ) -> None:
  381. parent_tree_ids = [t.id for t in parent_trees]
  382. actual = list(
  383. tree_changes_for_merge(self.store, parent_tree_ids, merge_tree.id, **kwargs)
  384. )
  385. self.assertEqual(expected, actual)
  386. parent_tree_ids.reverse()
  387. expected = [list(reversed(cs)) for cs in expected]
  388. actual = list(
  389. tree_changes_for_merge(self.store, parent_tree_ids, merge_tree.id, **kwargs)
  390. )
  391. self.assertEqual(expected, actual)
  392. def test_tree_changes_for_merge_add_no_conflict(self) -> None:
  393. blob = make_object(Blob, data=b"blob")
  394. parent1 = self.commit_tree([])
  395. parent2 = merge = self.commit_tree([(b"a", blob)])
  396. self.assertChangesForMergeEqual([], [parent1, parent2], merge)
  397. self.assertChangesForMergeEqual([], [parent2, parent2], merge)
  398. def test_tree_changes_for_merge_add_modify_conflict(self) -> None:
  399. blob1 = make_object(Blob, data=b"1")
  400. blob2 = make_object(Blob, data=b"2")
  401. parent1 = self.commit_tree([])
  402. parent2 = self.commit_tree([(b"a", blob1)])
  403. merge = self.commit_tree([(b"a", blob2)])
  404. self.assertChangesForMergeEqual(
  405. [
  406. [
  407. TreeChange.add(TreeEntry(b"a", F, blob2.id)),
  408. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob2.id)),
  409. ]
  410. ],
  411. [parent1, parent2],
  412. merge,
  413. )
  414. def test_tree_changes_for_merge_modify_modify_conflict(self) -> None:
  415. blob1 = make_object(Blob, data=b"1")
  416. blob2 = make_object(Blob, data=b"2")
  417. blob3 = make_object(Blob, data=b"3")
  418. parent1 = self.commit_tree([(b"a", blob1)])
  419. parent2 = self.commit_tree([(b"a", blob2)])
  420. merge = self.commit_tree([(b"a", blob3)])
  421. self.assertChangesForMergeEqual(
  422. [
  423. [
  424. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob3.id)),
  425. TreeChange(CHANGE_MODIFY, (b"a", F, blob2.id), (b"a", F, blob3.id)),
  426. ]
  427. ],
  428. [parent1, parent2],
  429. merge,
  430. )
  431. def test_tree_changes_for_merge_modify_no_conflict(self) -> None:
  432. blob1 = make_object(Blob, data=b"1")
  433. blob2 = make_object(Blob, data=b"2")
  434. parent1 = self.commit_tree([(b"a", blob1)])
  435. parent2 = merge = self.commit_tree([(b"a", blob2)])
  436. self.assertChangesForMergeEqual([], [parent1, parent2], merge)
  437. def test_tree_changes_for_merge_delete_delete_conflict(self) -> None:
  438. blob1 = make_object(Blob, data=b"1")
  439. blob2 = make_object(Blob, data=b"2")
  440. parent1 = self.commit_tree([(b"a", blob1)])
  441. parent2 = self.commit_tree([(b"a", blob2)])
  442. merge = self.commit_tree([])
  443. self.assertChangesForMergeEqual(
  444. [
  445. [
  446. TreeChange.delete(TreeEntry(b"a", F, blob1.id)),
  447. TreeChange.delete(TreeEntry(b"a", F, blob2.id)),
  448. ]
  449. ],
  450. [parent1, parent2],
  451. merge,
  452. )
  453. def test_tree_changes_for_merge_delete_no_conflict(self) -> None:
  454. blob = make_object(Blob, data=b"blob")
  455. has = self.commit_tree([(b"a", blob)])
  456. doesnt_have = self.commit_tree([])
  457. self.assertChangesForMergeEqual([], [has, has], doesnt_have)
  458. self.assertChangesForMergeEqual([], [has, doesnt_have], doesnt_have)
  459. def test_tree_changes_for_merge_octopus_no_conflict(self) -> None:
  460. r = list(range(5))
  461. blobs = [make_object(Blob, data=bytes(i)) for i in r]
  462. parents = [self.commit_tree([(b"a", blobs[i])]) for i in r]
  463. for i in r:
  464. # Take the SHA from each of the parents.
  465. self.assertChangesForMergeEqual([], parents, parents[i])
  466. def test_tree_changes_for_merge_octopus_modify_conflict(self) -> None:
  467. # Because the octopus merge strategy is limited, I doubt it's possible
  468. # to create this with the git command line. But the output is well-
  469. # defined, so test it anyway.
  470. r = list(range(5))
  471. parent_blobs = [make_object(Blob, data=bytes(i)) for i in r]
  472. merge_blob = make_object(Blob, data=b"merge")
  473. parents = [self.commit_tree([(b"a", parent_blobs[i])]) for i in r]
  474. merge = self.commit_tree([(b"a", merge_blob)])
  475. expected = [
  476. [
  477. TreeChange(
  478. CHANGE_MODIFY,
  479. (b"a", F, parent_blobs[i].id),
  480. (b"a", F, merge_blob.id),
  481. )
  482. for i in r
  483. ]
  484. ]
  485. self.assertChangesForMergeEqual(expected, parents, merge)
  486. def test_tree_changes_for_merge_octopus_delete(self) -> None:
  487. blob1 = make_object(Blob, data=b"1")
  488. blob2 = make_object(Blob, data=b"3")
  489. parent1 = self.commit_tree([(b"a", blob1)])
  490. parent2 = self.commit_tree([(b"a", blob2)])
  491. parent3 = merge = self.commit_tree([])
  492. self.assertChangesForMergeEqual([], [parent1, parent1, parent1], merge)
  493. self.assertChangesForMergeEqual([], [parent1, parent1, parent3], merge)
  494. self.assertChangesForMergeEqual([], [parent1, parent3, parent3], merge)
  495. self.assertChangesForMergeEqual(
  496. [
  497. [
  498. TreeChange.delete(TreeEntry(b"a", F, blob1.id)),
  499. TreeChange.delete(TreeEntry(b"a", F, blob2.id)),
  500. None,
  501. ]
  502. ],
  503. [parent1, parent2, parent3],
  504. merge,
  505. )
  506. def test_tree_changes_for_merge_add_add_same_conflict(self) -> None:
  507. blob = make_object(Blob, data=b"a\nb\nc\nd\n")
  508. parent1 = self.commit_tree([(b"a", blob)])
  509. parent2 = self.commit_tree([])
  510. merge = self.commit_tree([(b"b", blob)])
  511. add = TreeChange.add(TreeEntry(b"b", F, blob.id))
  512. self.assertChangesForMergeEqual([[add, add]], [parent1, parent2], merge)
  513. def test_tree_changes_for_merge_add_exact_rename_conflict(self) -> None:
  514. blob = make_object(Blob, data=b"a\nb\nc\nd\n")
  515. parent1 = self.commit_tree([(b"a", blob)])
  516. parent2 = self.commit_tree([])
  517. merge = self.commit_tree([(b"b", blob)])
  518. self.assertChangesForMergeEqual(
  519. [
  520. [
  521. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"b", F, blob.id)),
  522. TreeChange.add(TreeEntry(b"b", F, blob.id)),
  523. ]
  524. ],
  525. [parent1, parent2],
  526. merge,
  527. rename_detector=self.detector,
  528. )
  529. def test_tree_changes_for_merge_add_content_rename_conflict(self) -> None:
  530. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  531. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  532. parent1 = self.commit_tree([(b"a", blob1)])
  533. parent2 = self.commit_tree([])
  534. merge = self.commit_tree([(b"b", blob2)])
  535. self.assertChangesForMergeEqual(
  536. [
  537. [
  538. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  539. TreeChange.add(TreeEntry(b"b", F, blob2.id)),
  540. ]
  541. ],
  542. [parent1, parent2],
  543. merge,
  544. rename_detector=self.detector,
  545. )
  546. def test_tree_changes_for_merge_modify_rename_conflict(self) -> None:
  547. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  548. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  549. parent1 = self.commit_tree([(b"a", blob1)])
  550. parent2 = self.commit_tree([(b"b", blob1)])
  551. merge = self.commit_tree([(b"b", blob2)])
  552. self.assertChangesForMergeEqual(
  553. [
  554. [
  555. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  556. TreeChange(CHANGE_MODIFY, (b"b", F, blob1.id), (b"b", F, blob2.id)),
  557. ]
  558. ],
  559. [parent1, parent2],
  560. merge,
  561. rename_detector=self.detector,
  562. )
  563. def test_tree_changes_with_paths(self) -> None:
  564. # Test filtering to specific paths
  565. blob_a = make_object(Blob, data=b"a")
  566. blob_b = make_object(Blob, data=b"b")
  567. blob_c = make_object(Blob, data=b"c")
  568. tree1 = self.commit_tree(
  569. [
  570. (b"file_a", blob_a),
  571. (b"file_b", blob_b),
  572. ]
  573. )
  574. tree2 = self.commit_tree(
  575. [
  576. (b"file_a", blob_b), # modified
  577. (b"file_b", blob_b), # unchanged
  578. (b"file_c", blob_c), # added
  579. ]
  580. )
  581. # No filtering - should see all changes
  582. self.assertChangesEqual(
  583. [
  584. TreeChange(
  585. CHANGE_MODIFY, (b"file_a", F, blob_a.id), (b"file_a", F, blob_b.id)
  586. ),
  587. TreeChange.add(TreeEntry(b"file_c", F, blob_c.id)),
  588. ],
  589. tree1,
  590. tree2,
  591. )
  592. # Filter to file_a only
  593. changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"file_a"]))
  594. self.assertEqual(
  595. [
  596. TreeChange(
  597. CHANGE_MODIFY, (b"file_a", F, blob_a.id), (b"file_a", F, blob_b.id)
  598. ),
  599. ],
  600. changes,
  601. )
  602. # Filter to file_c only
  603. changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"file_c"]))
  604. self.assertEqual(
  605. [
  606. TreeChange.add(TreeEntry(b"file_c", F, blob_c.id)),
  607. ],
  608. changes,
  609. )
  610. # Filter to multiple files
  611. changes = list(
  612. tree_changes(self.store, tree1.id, tree2.id, paths=[b"file_a", b"file_c"])
  613. )
  614. self.assertEqual(
  615. [
  616. TreeChange(
  617. CHANGE_MODIFY, (b"file_a", F, blob_a.id), (b"file_a", F, blob_b.id)
  618. ),
  619. TreeChange.add(TreeEntry(b"file_c", F, blob_c.id)),
  620. ],
  621. changes,
  622. )
  623. # Filter to non-existent file
  624. changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"file_d"]))
  625. self.assertEqual([], changes)
  626. def test_tree_changes_with_paths_dirs(self) -> None:
  627. # Test filtering with directories
  628. blob_a = make_object(Blob, data=b"a")
  629. blob_b = make_object(Blob, data=b"b")
  630. blob_c = make_object(Blob, data=b"c")
  631. tree1 = self.commit_tree(
  632. [
  633. (b"dir1/file_a", blob_a),
  634. (b"dir2/file_b", blob_b),
  635. ]
  636. )
  637. tree2 = self.commit_tree(
  638. [
  639. (b"dir1/file_a", blob_b), # modified
  640. (b"dir1/file_c", blob_c), # added
  641. (b"dir2/file_b", blob_b), # unchanged
  642. ]
  643. )
  644. # Filter to dir1 only
  645. changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"dir1"]))
  646. self.assertEqual(
  647. [
  648. TreeChange(
  649. CHANGE_MODIFY,
  650. (b"dir1/file_a", F, blob_a.id),
  651. (b"dir1/file_a", F, blob_b.id),
  652. ),
  653. TreeChange.add(TreeEntry(b"dir1/file_c", F, blob_c.id)),
  654. ],
  655. changes,
  656. )
  657. # Filter to specific file in directory
  658. changes = list(
  659. tree_changes(self.store, tree1.id, tree2.id, paths=[b"dir1/file_a"])
  660. )
  661. self.assertEqual(
  662. [
  663. TreeChange(
  664. CHANGE_MODIFY,
  665. (b"dir1/file_a", F, blob_a.id),
  666. (b"dir1/file_a", F, blob_b.id),
  667. ),
  668. ],
  669. changes,
  670. )
  671. # Filter to dir2 (no changes)
  672. changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"dir2"]))
  673. self.assertEqual([], changes)
  674. def test_tree_changes_with_paths_deep(self) -> None:
  675. # Test filtering with deep directory structure
  676. blob_a = make_object(Blob, data=b"a")
  677. blob_b = make_object(Blob, data=b"b")
  678. tree1 = self.commit_tree(
  679. [
  680. (b"a/b/c/file1", blob_a),
  681. (b"a/b/d/file2", blob_a),
  682. (b"x/y/z/file3", blob_a),
  683. ]
  684. )
  685. tree2 = self.commit_tree(
  686. [
  687. (b"a/b/c/file1", blob_b), # modified
  688. (b"a/b/d/file2", blob_a), # unchanged
  689. (b"x/y/z/file3", blob_b), # modified
  690. ]
  691. )
  692. # Filter to a/b/c
  693. changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"a/b/c"]))
  694. self.assertEqual(
  695. [
  696. TreeChange(
  697. CHANGE_MODIFY,
  698. (b"a/b/c/file1", F, blob_a.id),
  699. (b"a/b/c/file1", F, blob_b.id),
  700. ),
  701. ],
  702. changes,
  703. )
  704. # Filter to a/b (should include both subdirs)
  705. changes = list(tree_changes(self.store, tree1.id, tree2.id, paths=[b"a/b"]))
  706. self.assertEqual(
  707. [
  708. TreeChange(
  709. CHANGE_MODIFY,
  710. (b"a/b/c/file1", F, blob_a.id),
  711. (b"a/b/c/file1", F, blob_b.id),
  712. ),
  713. ],
  714. changes,
  715. )
  716. # Filter to multiple deep paths
  717. changes = list(
  718. tree_changes(
  719. self.store, tree1.id, tree2.id, paths=[b"a/b/c/file1", b"x/y/z/file3"]
  720. )
  721. )
  722. self.assertEqual(
  723. [
  724. TreeChange(
  725. CHANGE_MODIFY,
  726. (b"a/b/c/file1", F, blob_a.id),
  727. (b"a/b/c/file1", F, blob_b.id),
  728. ),
  729. TreeChange(
  730. CHANGE_MODIFY,
  731. (b"x/y/z/file3", F, blob_a.id),
  732. (b"x/y/z/file3", F, blob_b.id),
  733. ),
  734. ],
  735. changes,
  736. )
  737. def test_tree_changes_with_paths_include_unchanged(self) -> None:
  738. # Test path filtering with want_unchanged=True
  739. blob_a = make_object(Blob, data=b"a")
  740. blob_b = make_object(Blob, data=b"b")
  741. tree1 = self.commit_tree(
  742. [
  743. (b"file_a", blob_a),
  744. (b"file_b", blob_b),
  745. ]
  746. )
  747. tree2 = self.commit_tree(
  748. [
  749. (b"file_a", blob_b), # modified
  750. (b"file_b", blob_b), # unchanged
  751. ]
  752. )
  753. # Filter to file_b with want_unchanged=True
  754. changes = list(
  755. tree_changes(
  756. self.store, tree1.id, tree2.id, paths=[b"file_b"], want_unchanged=True
  757. )
  758. )
  759. self.assertEqual(
  760. [
  761. TreeChange(
  762. CHANGE_UNCHANGED,
  763. (b"file_b", F, blob_b.id),
  764. (b"file_b", F, blob_b.id),
  765. ),
  766. ],
  767. changes,
  768. )
  769. # Filter to both files with want_unchanged=True
  770. changes = list(
  771. tree_changes(
  772. self.store,
  773. tree1.id,
  774. tree2.id,
  775. paths=[b"file_a", b"file_b"],
  776. want_unchanged=True,
  777. )
  778. )
  779. self.assertEqual(
  780. [
  781. TreeChange(
  782. CHANGE_MODIFY, (b"file_a", F, blob_a.id), (b"file_a", F, blob_b.id)
  783. ),
  784. TreeChange(
  785. CHANGE_UNCHANGED,
  786. (b"file_b", F, blob_b.id),
  787. (b"file_b", F, blob_b.id),
  788. ),
  789. ],
  790. changes,
  791. )
  792. class RenameDetectionTest(DiffTestCase):
  793. def _do_test_count_blocks(self, count_blocks) -> None:
  794. blob = make_object(Blob, data=b"a\nb\na\n")
  795. self.assertBlockCountEqual({b"a\n": 4, b"b\n": 2}, count_blocks(blob))
  796. test_count_blocks = functest_builder(_do_test_count_blocks, _count_blocks_py)
  797. test_count_blocks_extension = ext_functest_builder(
  798. _do_test_count_blocks, _count_blocks
  799. )
  800. def _do_test_count_blocks_no_newline(self, count_blocks) -> None:
  801. blob = make_object(Blob, data=b"a\na")
  802. self.assertBlockCountEqual({b"a\n": 2, b"a": 1}, _count_blocks(blob))
  803. test_count_blocks_no_newline = functest_builder(
  804. _do_test_count_blocks_no_newline, _count_blocks_py
  805. )
  806. test_count_blocks_no_newline_extension = ext_functest_builder(
  807. _do_test_count_blocks_no_newline, _count_blocks
  808. )
  809. def assertBlockCountEqual(self, expected, got) -> None:
  810. self.assertEqual(
  811. {(hash(block) & 0xFFFFFFFF): count for (block, count) in expected.items()},
  812. {(block & 0xFFFFFFFF): count for (block, count) in got.items()},
  813. )
  814. def _do_test_count_blocks_chunks(self, count_blocks) -> None:
  815. blob = ShaFile.from_raw_chunks(Blob.type_num, [b"a\nb", b"\na\n"])
  816. self.assertBlockCountEqual({b"a\n": 4, b"b\n": 2}, _count_blocks(blob))
  817. test_count_blocks_chunks = functest_builder(
  818. _do_test_count_blocks_chunks, _count_blocks_py
  819. )
  820. test_count_blocks_chunks_extension = ext_functest_builder(
  821. _do_test_count_blocks_chunks, _count_blocks
  822. )
  823. def _do_test_count_blocks_long_lines(self, count_blocks) -> None:
  824. a = b"a" * 64
  825. data = a + b"xxx\ny\n" + a + b"zzz\n"
  826. blob = make_object(Blob, data=data)
  827. self.assertBlockCountEqual(
  828. {b"a" * 64: 128, b"xxx\n": 4, b"y\n": 2, b"zzz\n": 4},
  829. _count_blocks(blob),
  830. )
  831. test_count_blocks_long_lines = functest_builder(
  832. _do_test_count_blocks_long_lines, _count_blocks_py
  833. )
  834. test_count_blocks_long_lines_extension = ext_functest_builder(
  835. _do_test_count_blocks_long_lines, _count_blocks
  836. )
  837. def assertSimilar(self, expected_score, blob1, blob2) -> None:
  838. self.assertEqual(expected_score, _similarity_score(blob1, blob2))
  839. self.assertEqual(expected_score, _similarity_score(blob2, blob1))
  840. def test_similarity_score(self) -> None:
  841. blob0 = make_object(Blob, data=b"")
  842. blob1 = make_object(Blob, data=b"ab\ncd\ncd\n")
  843. blob2 = make_object(Blob, data=b"ab\n")
  844. blob3 = make_object(Blob, data=b"cd\n")
  845. blob4 = make_object(Blob, data=b"cd\ncd\n")
  846. self.assertSimilar(100, blob0, blob0)
  847. self.assertSimilar(0, blob0, blob1)
  848. self.assertSimilar(33, blob1, blob2)
  849. self.assertSimilar(33, blob1, blob3)
  850. self.assertSimilar(66, blob1, blob4)
  851. self.assertSimilar(0, blob2, blob3)
  852. self.assertSimilar(50, blob3, blob4)
  853. def test_similarity_score_cache(self) -> None:
  854. blob1 = make_object(Blob, data=b"ab\ncd\n")
  855. blob2 = make_object(Blob, data=b"ab\n")
  856. block_cache: dict[bytes, dict[int, int]] = {}
  857. self.assertEqual(50, _similarity_score(blob1, blob2, block_cache=block_cache))
  858. self.assertEqual({blob1.id, blob2.id}, set(block_cache))
  859. def fail_chunks() -> None:
  860. self.fail("Unexpected call to as_raw_chunks()")
  861. blob1.as_raw_chunks = blob2.as_raw_chunks = fail_chunks
  862. blob1.raw_length = lambda: 6
  863. blob2.raw_length = lambda: 3
  864. self.assertEqual(50, _similarity_score(blob1, blob2, block_cache=block_cache))
  865. def test_tree_entry_sort(self) -> None:
  866. sha = "abcd" * 10
  867. expected_entries = [
  868. TreeChange.add(TreeEntry(b"aaa", F, sha)),
  869. TreeChange(
  870. CHANGE_COPY,
  871. TreeEntry(b"bbb", F, sha),
  872. TreeEntry(b"aab", F, sha),
  873. ),
  874. TreeChange(
  875. CHANGE_MODIFY,
  876. TreeEntry(b"bbb", F, sha),
  877. TreeEntry(b"bbb", F, b"dabc" * 10),
  878. ),
  879. TreeChange(
  880. CHANGE_RENAME,
  881. TreeEntry(b"bbc", F, sha),
  882. TreeEntry(b"ddd", F, sha),
  883. ),
  884. TreeChange.delete(TreeEntry(b"ccc", F, sha)),
  885. ]
  886. for perm in permutations(expected_entries):
  887. self.assertEqual(expected_entries, sorted(perm, key=_tree_change_key))
  888. def detect_renames(self, tree1, tree2, want_unchanged=False, **kwargs):
  889. detector = RenameDetector(self.store, **kwargs)
  890. return detector.changes_with_renames(
  891. tree1.id, tree2.id, want_unchanged=want_unchanged
  892. )
  893. def test_no_renames(self) -> None:
  894. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  895. blob2 = make_object(Blob, data=b"a\nb\ne\nf\n")
  896. blob3 = make_object(Blob, data=b"a\nb\ng\nh\n")
  897. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  898. tree2 = self.commit_tree([(b"a", blob1), (b"b", blob3)])
  899. self.assertEqual(
  900. [TreeChange(CHANGE_MODIFY, (b"b", F, blob2.id), (b"b", F, blob3.id))],
  901. self.detect_renames(tree1, tree2),
  902. )
  903. def test_exact_rename_one_to_one(self) -> None:
  904. blob1 = make_object(Blob, data=b"1")
  905. blob2 = make_object(Blob, data=b"2")
  906. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  907. tree2 = self.commit_tree([(b"c", blob1), (b"d", blob2)])
  908. self.assertEqual(
  909. [
  910. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob1.id)),
  911. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"d", F, blob2.id)),
  912. ],
  913. self.detect_renames(tree1, tree2),
  914. )
  915. def test_exact_rename_split_different_type(self) -> None:
  916. blob = make_object(Blob, data=b"/foo")
  917. tree1 = self.commit_tree([(b"a", blob, 0o100644)])
  918. tree2 = self.commit_tree([(b"a", blob, 0o120000)])
  919. self.assertEqual(
  920. [
  921. TreeChange.add(TreeEntry(b"a", 0o120000, blob.id)),
  922. TreeChange.delete(TreeEntry(b"a", 0o100644, blob.id)),
  923. ],
  924. self.detect_renames(tree1, tree2),
  925. )
  926. def test_exact_rename_and_different_type(self) -> None:
  927. blob1 = make_object(Blob, data=b"1")
  928. blob2 = make_object(Blob, data=b"2")
  929. tree1 = self.commit_tree([(b"a", blob1)])
  930. tree2 = self.commit_tree([(b"a", blob2, 0o120000), (b"b", blob1)])
  931. self.assertEqual(
  932. [
  933. TreeChange.add(TreeEntry(b"a", 0o120000, blob2.id)),
  934. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob1.id)),
  935. ],
  936. self.detect_renames(tree1, tree2),
  937. )
  938. def test_exact_rename_one_to_many(self) -> None:
  939. blob = make_object(Blob, data=b"1")
  940. tree1 = self.commit_tree([(b"a", blob)])
  941. tree2 = self.commit_tree([(b"b", blob), (b"c", blob)])
  942. self.assertEqual(
  943. [
  944. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"b", F, blob.id)),
  945. TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"c", F, blob.id)),
  946. ],
  947. self.detect_renames(tree1, tree2),
  948. )
  949. def test_exact_rename_many_to_one(self) -> None:
  950. blob = make_object(Blob, data=b"1")
  951. tree1 = self.commit_tree([(b"a", blob), (b"b", blob)])
  952. tree2 = self.commit_tree([(b"c", blob)])
  953. self.assertEqual(
  954. [
  955. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"c", F, blob.id)),
  956. TreeChange.delete(TreeEntry(b"b", F, blob.id)),
  957. ],
  958. self.detect_renames(tree1, tree2),
  959. )
  960. def test_exact_rename_many_to_many(self) -> None:
  961. blob = make_object(Blob, data=b"1")
  962. tree1 = self.commit_tree([(b"a", blob), (b"b", blob)])
  963. tree2 = self.commit_tree([(b"c", blob), (b"d", blob), (b"e", blob)])
  964. self.assertEqual(
  965. [
  966. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"c", F, blob.id)),
  967. TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"e", F, blob.id)),
  968. TreeChange(CHANGE_RENAME, (b"b", F, blob.id), (b"d", F, blob.id)),
  969. ],
  970. self.detect_renames(tree1, tree2),
  971. )
  972. def test_exact_copy_modify(self) -> None:
  973. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  974. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  975. tree1 = self.commit_tree([(b"a", blob1)])
  976. tree2 = self.commit_tree([(b"a", blob2), (b"b", blob1)])
  977. self.assertEqual(
  978. [
  979. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob2.id)),
  980. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob1.id)),
  981. ],
  982. self.detect_renames(tree1, tree2),
  983. )
  984. def test_exact_copy_change_mode(self) -> None:
  985. blob = make_object(Blob, data=b"a\nb\nc\nd\n")
  986. tree1 = self.commit_tree([(b"a", blob)])
  987. tree2 = self.commit_tree([(b"a", blob, 0o100755), (b"b", blob)])
  988. self.assertEqual(
  989. [
  990. TreeChange(
  991. CHANGE_MODIFY,
  992. (b"a", F, blob.id),
  993. (b"a", 0o100755, blob.id),
  994. ),
  995. TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"b", F, blob.id)),
  996. ],
  997. self.detect_renames(tree1, tree2),
  998. )
  999. def test_rename_threshold(self) -> None:
  1000. blob1 = make_object(Blob, data=b"a\nb\nc\n")
  1001. blob2 = make_object(Blob, data=b"a\nb\nd\n")
  1002. tree1 = self.commit_tree([(b"a", blob1)])
  1003. tree2 = self.commit_tree([(b"b", blob2)])
  1004. self.assertEqual(
  1005. [TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id))],
  1006. self.detect_renames(tree1, tree2, rename_threshold=50),
  1007. )
  1008. self.assertEqual(
  1009. [
  1010. TreeChange.delete(TreeEntry(b"a", F, blob1.id)),
  1011. TreeChange.add(TreeEntry(b"b", F, blob2.id)),
  1012. ],
  1013. self.detect_renames(tree1, tree2, rename_threshold=75),
  1014. )
  1015. def test_content_rename_max_files(self) -> None:
  1016. blob1 = make_object(Blob, data=b"a\nb\nc\nd")
  1017. blob4 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1018. blob2 = make_object(Blob, data=b"e\nf\ng\nh\n")
  1019. blob3 = make_object(Blob, data=b"e\nf\ng\ni\n")
  1020. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  1021. tree2 = self.commit_tree([(b"c", blob3), (b"d", blob4)])
  1022. self.assertEqual(
  1023. [
  1024. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"d", F, blob4.id)),
  1025. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"c", F, blob3.id)),
  1026. ],
  1027. self.detect_renames(tree1, tree2),
  1028. )
  1029. self.assertEqual(
  1030. [
  1031. TreeChange.delete(TreeEntry(b"a", F, blob1.id)),
  1032. TreeChange.delete(TreeEntry(b"b", F, blob2.id)),
  1033. TreeChange.add(TreeEntry(b"c", F, blob3.id)),
  1034. TreeChange.add(TreeEntry(b"d", F, blob4.id)),
  1035. ],
  1036. self.detect_renames(tree1, tree2, max_files=1),
  1037. )
  1038. def test_content_rename_one_to_one(self) -> None:
  1039. b11 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1040. b12 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1041. b21 = make_object(Blob, data=b"e\nf\ng\n\nh")
  1042. b22 = make_object(Blob, data=b"e\nf\ng\n\ni")
  1043. tree1 = self.commit_tree([(b"a", b11), (b"b", b21)])
  1044. tree2 = self.commit_tree([(b"c", b12), (b"d", b22)])
  1045. self.assertEqual(
  1046. [
  1047. TreeChange(CHANGE_RENAME, (b"a", F, b11.id), (b"c", F, b12.id)),
  1048. TreeChange(CHANGE_RENAME, (b"b", F, b21.id), (b"d", F, b22.id)),
  1049. ],
  1050. self.detect_renames(tree1, tree2),
  1051. )
  1052. def test_content_rename_one_to_one_ordering(self) -> None:
  1053. blob1 = make_object(Blob, data=b"a\nb\nc\nd\ne\nf\n")
  1054. blob2 = make_object(Blob, data=b"a\nb\nc\nd\ng\nh\n")
  1055. # 6/10 match to blob1, 8/10 match to blob2
  1056. blob3 = make_object(Blob, data=b"a\nb\nc\nd\ng\ni\n")
  1057. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  1058. tree2 = self.commit_tree([(b"c", blob3)])
  1059. self.assertEqual(
  1060. [
  1061. TreeChange.delete(TreeEntry(b"a", F, blob1.id)),
  1062. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"c", F, blob3.id)),
  1063. ],
  1064. self.detect_renames(tree1, tree2),
  1065. )
  1066. tree3 = self.commit_tree([(b"a", blob2), (b"b", blob1)])
  1067. tree4 = self.commit_tree([(b"c", blob3)])
  1068. self.assertEqual(
  1069. [
  1070. TreeChange(CHANGE_RENAME, (b"a", F, blob2.id), (b"c", F, blob3.id)),
  1071. TreeChange.delete(TreeEntry(b"b", F, blob1.id)),
  1072. ],
  1073. self.detect_renames(tree3, tree4),
  1074. )
  1075. def test_content_rename_one_to_many(self) -> None:
  1076. blob1 = make_object(Blob, data=b"aa\nb\nc\nd\ne\n")
  1077. blob2 = make_object(Blob, data=b"ab\nb\nc\nd\ne\n") # 8/11 match
  1078. blob3 = make_object(Blob, data=b"aa\nb\nc\nd\nf\n") # 9/11 match
  1079. tree1 = self.commit_tree([(b"a", blob1)])
  1080. tree2 = self.commit_tree([(b"b", blob2), (b"c", blob3)])
  1081. self.assertEqual(
  1082. [
  1083. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  1084. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob3.id)),
  1085. ],
  1086. self.detect_renames(tree1, tree2),
  1087. )
  1088. def test_content_rename_many_to_one(self) -> None:
  1089. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1090. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1091. blob3 = make_object(Blob, data=b"a\nb\nc\nf\n")
  1092. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  1093. tree2 = self.commit_tree([(b"c", blob3)])
  1094. self.assertEqual(
  1095. [
  1096. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob3.id)),
  1097. TreeChange.delete(TreeEntry(b"b", F, blob2.id)),
  1098. ],
  1099. self.detect_renames(tree1, tree2),
  1100. )
  1101. def test_content_rename_many_to_many(self) -> None:
  1102. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1103. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1104. blob3 = make_object(Blob, data=b"a\nb\nc\nf\n")
  1105. blob4 = make_object(Blob, data=b"a\nb\nc\ng\n")
  1106. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  1107. tree2 = self.commit_tree([(b"c", blob3), (b"d", blob4)])
  1108. # TODO(dborowitz): Distribute renames rather than greedily choosing
  1109. # copies.
  1110. self.assertEqual(
  1111. [
  1112. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob3.id)),
  1113. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"d", F, blob4.id)),
  1114. TreeChange.delete(TreeEntry(b"b", F, blob2.id)),
  1115. ],
  1116. self.detect_renames(tree1, tree2),
  1117. )
  1118. def test_content_rename_with_more_deletions(self) -> None:
  1119. blob1 = make_object(Blob, data=b"")
  1120. tree1 = self.commit_tree(
  1121. [(b"a", blob1), (b"b", blob1), (b"c", blob1), (b"d", blob1)]
  1122. )
  1123. tree2 = self.commit_tree([(b"e", blob1), (b"f", blob1), (b"g", blob1)])
  1124. self.maxDiff = None
  1125. self.assertEqual(
  1126. [
  1127. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"e", F, blob1.id)),
  1128. TreeChange(CHANGE_RENAME, (b"b", F, blob1.id), (b"f", F, blob1.id)),
  1129. TreeChange(CHANGE_RENAME, (b"c", F, blob1.id), (b"g", F, blob1.id)),
  1130. TreeChange.delete(TreeEntry(b"d", F, blob1.id)),
  1131. ],
  1132. self.detect_renames(tree1, tree2),
  1133. )
  1134. def test_content_rename_gitlink(self) -> None:
  1135. blob1 = make_object(Blob, data=b"blob1")
  1136. blob2 = make_object(Blob, data=b"blob2")
  1137. link1 = b"1" * 40
  1138. link2 = b"2" * 40
  1139. tree1 = self.commit_tree([(b"a", blob1), (b"b", link1, 0o160000)])
  1140. tree2 = self.commit_tree([(b"c", blob2), (b"d", link2, 0o160000)])
  1141. self.assertEqual(
  1142. [
  1143. TreeChange.delete(TreeEntry(b"a", 0o100644, blob1.id)),
  1144. TreeChange.delete(TreeEntry(b"b", 0o160000, link1)),
  1145. TreeChange.add(TreeEntry(b"c", 0o100644, blob2.id)),
  1146. TreeChange.add(TreeEntry(b"d", 0o160000, link2)),
  1147. ],
  1148. self.detect_renames(tree1, tree2),
  1149. )
  1150. def test_exact_rename_swap(self) -> None:
  1151. blob1 = make_object(Blob, data=b"1")
  1152. blob2 = make_object(Blob, data=b"2")
  1153. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  1154. tree2 = self.commit_tree([(b"a", blob2), (b"b", blob1)])
  1155. self.assertEqual(
  1156. [
  1157. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob2.id)),
  1158. TreeChange(CHANGE_MODIFY, (b"b", F, blob2.id), (b"b", F, blob1.id)),
  1159. ],
  1160. self.detect_renames(tree1, tree2),
  1161. )
  1162. self.assertEqual(
  1163. [
  1164. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob1.id)),
  1165. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"a", F, blob2.id)),
  1166. ],
  1167. self.detect_renames(tree1, tree2, rewrite_threshold=50),
  1168. )
  1169. def test_content_rename_swap(self) -> None:
  1170. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1171. blob2 = make_object(Blob, data=b"e\nf\ng\nh\n")
  1172. blob3 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1173. blob4 = make_object(Blob, data=b"e\nf\ng\ni\n")
  1174. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  1175. tree2 = self.commit_tree([(b"a", blob4), (b"b", blob3)])
  1176. self.assertEqual(
  1177. [
  1178. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob3.id)),
  1179. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"a", F, blob4.id)),
  1180. ],
  1181. self.detect_renames(tree1, tree2, rewrite_threshold=60),
  1182. )
  1183. def test_rewrite_threshold(self) -> None:
  1184. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1185. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1186. blob3 = make_object(Blob, data=b"a\nb\nf\ng\n")
  1187. tree1 = self.commit_tree([(b"a", blob1)])
  1188. tree2 = self.commit_tree([(b"a", blob3), (b"b", blob2)])
  1189. no_renames = [
  1190. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob3.id)),
  1191. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  1192. ]
  1193. self.assertEqual(no_renames, self.detect_renames(tree1, tree2))
  1194. self.assertEqual(
  1195. no_renames, self.detect_renames(tree1, tree2, rewrite_threshold=40)
  1196. )
  1197. self.assertEqual(
  1198. [
  1199. TreeChange.add(TreeEntry(b"a", F, blob3.id)),
  1200. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  1201. ],
  1202. self.detect_renames(tree1, tree2, rewrite_threshold=80),
  1203. )
  1204. def test_find_copies_harder_exact(self) -> None:
  1205. blob = make_object(Blob, data=b"blob")
  1206. tree1 = self.commit_tree([(b"a", blob)])
  1207. tree2 = self.commit_tree([(b"a", blob), (b"b", blob)])
  1208. self.assertEqual(
  1209. [TreeChange.add(TreeEntry(b"b", F, blob.id))],
  1210. self.detect_renames(tree1, tree2),
  1211. )
  1212. self.assertEqual(
  1213. [TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"b", F, blob.id))],
  1214. self.detect_renames(tree1, tree2, find_copies_harder=True),
  1215. )
  1216. def test_find_copies_harder_content(self) -> None:
  1217. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1218. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1219. tree1 = self.commit_tree([(b"a", blob1)])
  1220. tree2 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  1221. self.assertEqual(
  1222. [TreeChange.add(TreeEntry(b"b", F, blob2.id))],
  1223. self.detect_renames(tree1, tree2),
  1224. )
  1225. self.assertEqual(
  1226. [TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob2.id))],
  1227. self.detect_renames(tree1, tree2, find_copies_harder=True),
  1228. )
  1229. def test_find_copies_harder_with_rewrites(self) -> None:
  1230. blob_a1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1231. blob_a2 = make_object(Blob, data=b"f\ng\nh\ni\n")
  1232. blob_b2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1233. tree1 = self.commit_tree([(b"a", blob_a1)])
  1234. tree2 = self.commit_tree([(b"a", blob_a2), (b"b", blob_b2)])
  1235. self.assertEqual(
  1236. [
  1237. TreeChange(CHANGE_MODIFY, (b"a", F, blob_a1.id), (b"a", F, blob_a2.id)),
  1238. TreeChange(CHANGE_COPY, (b"a", F, blob_a1.id), (b"b", F, blob_b2.id)),
  1239. ],
  1240. self.detect_renames(tree1, tree2, find_copies_harder=True),
  1241. )
  1242. self.assertEqual(
  1243. [
  1244. TreeChange.add(TreeEntry(b"a", F, blob_a2.id)),
  1245. TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"b", F, blob_b2.id)),
  1246. ],
  1247. self.detect_renames(
  1248. tree1, tree2, rewrite_threshold=50, find_copies_harder=True
  1249. ),
  1250. )
  1251. def test_reuse_detector(self) -> None:
  1252. blob = make_object(Blob, data=b"blob")
  1253. tree1 = self.commit_tree([(b"a", blob)])
  1254. tree2 = self.commit_tree([(b"b", blob)])
  1255. detector = RenameDetector(self.store)
  1256. changes = [TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"b", F, blob.id))]
  1257. self.assertEqual(changes, detector.changes_with_renames(tree1.id, tree2.id))
  1258. self.assertEqual(changes, detector.changes_with_renames(tree1.id, tree2.id))
  1259. def test_want_unchanged(self) -> None:
  1260. blob_a1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1261. blob_b = make_object(Blob, data=b"b")
  1262. blob_c2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1263. tree1 = self.commit_tree([(b"a", blob_a1), (b"b", blob_b)])
  1264. tree2 = self.commit_tree([(b"c", blob_c2), (b"b", blob_b)])
  1265. self.assertEqual(
  1266. [TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_c2.id))],
  1267. self.detect_renames(tree1, tree2),
  1268. )
  1269. self.assertEqual(
  1270. [
  1271. TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_c2.id)),
  1272. TreeChange(
  1273. CHANGE_UNCHANGED,
  1274. (b"b", F, blob_b.id),
  1275. (b"b", F, blob_b.id),
  1276. ),
  1277. ],
  1278. self.detect_renames(tree1, tree2, want_unchanged=True),
  1279. )