test_diff_tree.py 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146
  1. # test_diff_tree.py -- Tests for file and tree diff utilities.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for file and tree diff utilities."""
  22. from itertools import permutations
  23. from dulwich.diff_tree import (
  24. CHANGE_COPY,
  25. CHANGE_MODIFY,
  26. CHANGE_RENAME,
  27. CHANGE_UNCHANGED,
  28. RenameDetector,
  29. TreeChange,
  30. _count_blocks,
  31. _count_blocks_py,
  32. _is_tree,
  33. _is_tree_py,
  34. _merge_entries,
  35. _merge_entries_py,
  36. _similarity_score,
  37. _tree_change_key,
  38. tree_changes,
  39. tree_changes_for_merge,
  40. )
  41. from dulwich.index import commit_tree
  42. from dulwich.object_store import MemoryObjectStore
  43. from dulwich.objects import Blob, ShaFile, Tree, TreeEntry
  44. from dulwich.tests.utils import F, ext_functest_builder, functest_builder, make_object
  45. from . import TestCase
  46. class DiffTestCase(TestCase):
  47. def setUp(self) -> None:
  48. super().setUp()
  49. self.store = MemoryObjectStore()
  50. self.empty_tree = self.commit_tree([])
  51. def commit_tree(self, entries):
  52. commit_blobs = []
  53. for entry in entries:
  54. if len(entry) == 2:
  55. path, obj = entry
  56. mode = F
  57. else:
  58. path, obj, mode = entry
  59. if isinstance(obj, Blob):
  60. self.store.add_object(obj)
  61. sha = obj.id
  62. else:
  63. sha = obj
  64. commit_blobs.append((path, sha, mode))
  65. return self.store[commit_tree(self.store, commit_blobs)]
  66. class TreeChangesTest(DiffTestCase):
  67. def setUp(self) -> None:
  68. super().setUp()
  69. self.detector = RenameDetector(self.store)
  70. def assertMergeFails(self, merge_entries, name, mode, sha) -> None:
  71. t = Tree()
  72. t[name] = (mode, sha)
  73. self.assertRaises((TypeError, ValueError), merge_entries, "", t, t)
  74. def _do_test_merge_entries(self, merge_entries) -> None:
  75. blob_a1 = make_object(Blob, data=b"a1")
  76. blob_a2 = make_object(Blob, data=b"a2")
  77. blob_b1 = make_object(Blob, data=b"b1")
  78. blob_c2 = make_object(Blob, data=b"c2")
  79. tree1 = self.commit_tree([(b"a", blob_a1, 0o100644), (b"b", blob_b1, 0o100755)])
  80. tree2 = self.commit_tree([(b"a", blob_a2, 0o100644), (b"c", blob_c2, 0o100755)])
  81. self.assertEqual([], merge_entries(b"", self.empty_tree, self.empty_tree))
  82. self.assertEqual(
  83. [
  84. ((None, None, None), (b"a", 0o100644, blob_a1.id)),
  85. ((None, None, None), (b"b", 0o100755, blob_b1.id)),
  86. ],
  87. merge_entries(b"", self.empty_tree, tree1),
  88. )
  89. self.assertEqual(
  90. [
  91. ((None, None, None), (b"x/a", 0o100644, blob_a1.id)),
  92. ((None, None, None), (b"x/b", 0o100755, blob_b1.id)),
  93. ],
  94. merge_entries(b"x", self.empty_tree, tree1),
  95. )
  96. self.assertEqual(
  97. [
  98. ((b"a", 0o100644, blob_a2.id), (None, None, None)),
  99. ((b"c", 0o100755, blob_c2.id), (None, None, None)),
  100. ],
  101. merge_entries(b"", tree2, self.empty_tree),
  102. )
  103. self.assertEqual(
  104. [
  105. ((b"a", 0o100644, blob_a1.id), (b"a", 0o100644, blob_a2.id)),
  106. ((b"b", 0o100755, blob_b1.id), (None, None, None)),
  107. ((None, None, None), (b"c", 0o100755, blob_c2.id)),
  108. ],
  109. merge_entries(b"", tree1, tree2),
  110. )
  111. self.assertEqual(
  112. [
  113. ((b"a", 0o100644, blob_a2.id), (b"a", 0o100644, blob_a1.id)),
  114. ((None, None, None), (b"b", 0o100755, blob_b1.id)),
  115. ((b"c", 0o100755, blob_c2.id), (None, None, None)),
  116. ],
  117. merge_entries(b"", tree2, tree1),
  118. )
  119. self.assertMergeFails(merge_entries, 0xDEADBEEF, 0o100644, "1" * 40)
  120. self.assertMergeFails(merge_entries, b"a", b"deadbeef", "1" * 40)
  121. self.assertMergeFails(merge_entries, b"a", 0o100644, 0xDEADBEEF)
  122. test_merge_entries = functest_builder(_do_test_merge_entries, _merge_entries_py)
  123. test_merge_entries_extension = ext_functest_builder(
  124. _do_test_merge_entries, _merge_entries
  125. )
  126. def _do_test_is_tree(self, is_tree) -> None:
  127. self.assertFalse(is_tree(TreeEntry(None, None, None)))
  128. self.assertFalse(is_tree(TreeEntry(b"a", 0o100644, b"a" * 40)))
  129. self.assertFalse(is_tree(TreeEntry(b"a", 0o100755, b"a" * 40)))
  130. self.assertFalse(is_tree(TreeEntry(b"a", 0o120000, b"a" * 40)))
  131. self.assertTrue(is_tree(TreeEntry(b"a", 0o040000, b"a" * 40)))
  132. self.assertRaises(TypeError, is_tree, TreeEntry(b"a", b"x", b"a" * 40))
  133. self.assertRaises(AttributeError, is_tree, 1234)
  134. test_is_tree = functest_builder(_do_test_is_tree, _is_tree_py)
  135. test_is_tree_extension = ext_functest_builder(_do_test_is_tree, _is_tree)
  136. def assertChangesEqual(self, expected, tree1, tree2, **kwargs) -> None:
  137. actual = list(tree_changes(self.store, tree1.id, tree2.id, **kwargs))
  138. self.assertEqual(expected, actual)
  139. # For brevity, the following tests use tuples instead of TreeEntry objects.
  140. def test_tree_changes_empty(self) -> None:
  141. self.assertChangesEqual([], self.empty_tree, self.empty_tree)
  142. def test_tree_changes_no_changes(self) -> None:
  143. blob = make_object(Blob, data=b"blob")
  144. tree = self.commit_tree([(b"a", blob), (b"b/c", blob)])
  145. self.assertChangesEqual([], self.empty_tree, self.empty_tree)
  146. self.assertChangesEqual([], tree, tree)
  147. self.assertChangesEqual(
  148. [
  149. TreeChange(CHANGE_UNCHANGED, (b"a", F, blob.id), (b"a", F, blob.id)),
  150. TreeChange(
  151. CHANGE_UNCHANGED,
  152. (b"b/c", F, blob.id),
  153. (b"b/c", F, blob.id),
  154. ),
  155. ],
  156. tree,
  157. tree,
  158. want_unchanged=True,
  159. )
  160. def test_tree_changes_add_delete(self) -> None:
  161. blob_a = make_object(Blob, data=b"a")
  162. blob_b = make_object(Blob, data=b"b")
  163. tree = self.commit_tree([(b"a", blob_a, 0o100644), (b"x/b", blob_b, 0o100755)])
  164. self.assertChangesEqual(
  165. [
  166. TreeChange.add((b"a", 0o100644, blob_a.id)),
  167. TreeChange.add((b"x/b", 0o100755, blob_b.id)),
  168. ],
  169. self.empty_tree,
  170. tree,
  171. )
  172. self.assertChangesEqual(
  173. [
  174. TreeChange.delete((b"a", 0o100644, blob_a.id)),
  175. TreeChange.delete((b"x/b", 0o100755, blob_b.id)),
  176. ],
  177. tree,
  178. self.empty_tree,
  179. )
  180. def test_tree_changes_modify_contents(self) -> None:
  181. blob_a1 = make_object(Blob, data=b"a1")
  182. blob_a2 = make_object(Blob, data=b"a2")
  183. tree1 = self.commit_tree([(b"a", blob_a1)])
  184. tree2 = self.commit_tree([(b"a", blob_a2)])
  185. self.assertChangesEqual(
  186. [TreeChange(CHANGE_MODIFY, (b"a", F, blob_a1.id), (b"a", F, blob_a2.id))],
  187. tree1,
  188. tree2,
  189. )
  190. def test_tree_changes_modify_mode(self) -> None:
  191. blob_a = make_object(Blob, data=b"a")
  192. tree1 = self.commit_tree([(b"a", blob_a, 0o100644)])
  193. tree2 = self.commit_tree([(b"a", blob_a, 0o100755)])
  194. self.assertChangesEqual(
  195. [
  196. TreeChange(
  197. CHANGE_MODIFY,
  198. (b"a", 0o100644, blob_a.id),
  199. (b"a", 0o100755, blob_a.id),
  200. )
  201. ],
  202. tree1,
  203. tree2,
  204. )
  205. def test_tree_changes_change_type(self) -> None:
  206. blob_a1 = make_object(Blob, data=b"a")
  207. blob_a2 = make_object(Blob, data=b"/foo/bar")
  208. tree1 = self.commit_tree([(b"a", blob_a1, 0o100644)])
  209. tree2 = self.commit_tree([(b"a", blob_a2, 0o120000)])
  210. self.assertChangesEqual(
  211. [
  212. TreeChange.delete((b"a", 0o100644, blob_a1.id)),
  213. TreeChange.add((b"a", 0o120000, blob_a2.id)),
  214. ],
  215. tree1,
  216. tree2,
  217. )
  218. def test_tree_changes_change_type_same(self) -> None:
  219. blob_a1 = make_object(Blob, data=b"a")
  220. blob_a2 = make_object(Blob, data=b"/foo/bar")
  221. tree1 = self.commit_tree([(b"a", blob_a1, 0o100644)])
  222. tree2 = self.commit_tree([(b"a", blob_a2, 0o120000)])
  223. self.assertChangesEqual(
  224. [
  225. TreeChange(
  226. CHANGE_MODIFY,
  227. (b"a", 0o100644, blob_a1.id),
  228. (b"a", 0o120000, blob_a2.id),
  229. )
  230. ],
  231. tree1,
  232. tree2,
  233. change_type_same=True,
  234. )
  235. def test_tree_changes_to_tree(self) -> None:
  236. blob_a = make_object(Blob, data=b"a")
  237. blob_x = make_object(Blob, data=b"x")
  238. tree1 = self.commit_tree([(b"a", blob_a)])
  239. tree2 = self.commit_tree([(b"a/x", blob_x)])
  240. self.assertChangesEqual(
  241. [
  242. TreeChange.delete((b"a", F, blob_a.id)),
  243. TreeChange.add((b"a/x", F, blob_x.id)),
  244. ],
  245. tree1,
  246. tree2,
  247. )
  248. def test_tree_changes_complex(self) -> None:
  249. blob_a_1 = make_object(Blob, data=b"a1_1")
  250. blob_bx1_1 = make_object(Blob, data=b"bx1_1")
  251. blob_bx2_1 = make_object(Blob, data=b"bx2_1")
  252. blob_by1_1 = make_object(Blob, data=b"by1_1")
  253. blob_by2_1 = make_object(Blob, data=b"by2_1")
  254. tree1 = self.commit_tree(
  255. [
  256. (b"a", blob_a_1),
  257. (b"b/x/1", blob_bx1_1),
  258. (b"b/x/2", blob_bx2_1),
  259. (b"b/y/1", blob_by1_1),
  260. (b"b/y/2", blob_by2_1),
  261. ]
  262. )
  263. blob_a_2 = make_object(Blob, data=b"a1_2")
  264. blob_bx1_2 = blob_bx1_1
  265. blob_by_2 = make_object(Blob, data=b"by_2")
  266. blob_c_2 = make_object(Blob, data=b"c_2")
  267. tree2 = self.commit_tree(
  268. [
  269. (b"a", blob_a_2),
  270. (b"b/x/1", blob_bx1_2),
  271. (b"b/y", blob_by_2),
  272. (b"c", blob_c_2),
  273. ]
  274. )
  275. self.assertChangesEqual(
  276. [
  277. TreeChange(
  278. CHANGE_MODIFY,
  279. (b"a", F, blob_a_1.id),
  280. (b"a", F, blob_a_2.id),
  281. ),
  282. TreeChange.delete((b"b/x/2", F, blob_bx2_1.id)),
  283. TreeChange.add((b"b/y", F, blob_by_2.id)),
  284. TreeChange.delete((b"b/y/1", F, blob_by1_1.id)),
  285. TreeChange.delete((b"b/y/2", F, blob_by2_1.id)),
  286. TreeChange.add((b"c", F, blob_c_2.id)),
  287. ],
  288. tree1,
  289. tree2,
  290. )
  291. def test_tree_changes_name_order(self) -> None:
  292. blob = make_object(Blob, data=b"a")
  293. tree1 = self.commit_tree([(b"a", blob), (b"a.", blob), (b"a..", blob)])
  294. # Tree order is the reverse of this, so if we used tree order, 'a..'
  295. # would not be merged.
  296. tree2 = self.commit_tree([(b"a/x", blob), (b"a./x", blob), (b"a..", blob)])
  297. self.assertChangesEqual(
  298. [
  299. TreeChange.delete((b"a", F, blob.id)),
  300. TreeChange.add((b"a/x", F, blob.id)),
  301. TreeChange.delete((b"a.", F, blob.id)),
  302. TreeChange.add((b"a./x", F, blob.id)),
  303. ],
  304. tree1,
  305. tree2,
  306. )
  307. def test_tree_changes_prune(self) -> None:
  308. blob_a1 = make_object(Blob, data=b"a1")
  309. blob_a2 = make_object(Blob, data=b"a2")
  310. blob_x = make_object(Blob, data=b"x")
  311. tree1 = self.commit_tree([(b"a", blob_a1), (b"b/x", blob_x)])
  312. tree2 = self.commit_tree([(b"a", blob_a2), (b"b/x", blob_x)])
  313. # Remove identical items so lookups will fail unless we prune.
  314. subtree = self.store[tree1[b"b"][1]]
  315. for entry in subtree.items():
  316. del self.store[entry.sha]
  317. del self.store[subtree.id]
  318. self.assertChangesEqual(
  319. [TreeChange(CHANGE_MODIFY, (b"a", F, blob_a1.id), (b"a", F, blob_a2.id))],
  320. tree1,
  321. tree2,
  322. )
  323. def test_tree_changes_rename_detector(self) -> None:
  324. blob_a1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  325. blob_a2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  326. blob_b = make_object(Blob, data=b"b")
  327. tree1 = self.commit_tree([(b"a", blob_a1), (b"b", blob_b)])
  328. tree2 = self.commit_tree([(b"c", blob_a2), (b"b", blob_b)])
  329. detector = RenameDetector(self.store)
  330. self.assertChangesEqual(
  331. [
  332. TreeChange.delete((b"a", F, blob_a1.id)),
  333. TreeChange.add((b"c", F, blob_a2.id)),
  334. ],
  335. tree1,
  336. tree2,
  337. )
  338. self.assertChangesEqual(
  339. [
  340. TreeChange.delete((b"a", F, blob_a1.id)),
  341. TreeChange(
  342. CHANGE_UNCHANGED,
  343. (b"b", F, blob_b.id),
  344. (b"b", F, blob_b.id),
  345. ),
  346. TreeChange.add((b"c", F, blob_a2.id)),
  347. ],
  348. tree1,
  349. tree2,
  350. want_unchanged=True,
  351. )
  352. self.assertChangesEqual(
  353. [TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_a2.id))],
  354. tree1,
  355. tree2,
  356. rename_detector=detector,
  357. )
  358. self.assertChangesEqual(
  359. [
  360. TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_a2.id)),
  361. TreeChange(
  362. CHANGE_UNCHANGED,
  363. (b"b", F, blob_b.id),
  364. (b"b", F, blob_b.id),
  365. ),
  366. ],
  367. tree1,
  368. tree2,
  369. rename_detector=detector,
  370. want_unchanged=True,
  371. )
  372. def assertChangesForMergeEqual(
  373. self, expected, parent_trees, merge_tree, **kwargs
  374. ) -> None:
  375. parent_tree_ids = [t.id for t in parent_trees]
  376. actual = list(
  377. tree_changes_for_merge(self.store, parent_tree_ids, merge_tree.id, **kwargs)
  378. )
  379. self.assertEqual(expected, actual)
  380. parent_tree_ids.reverse()
  381. expected = [list(reversed(cs)) for cs in expected]
  382. actual = list(
  383. tree_changes_for_merge(self.store, parent_tree_ids, merge_tree.id, **kwargs)
  384. )
  385. self.assertEqual(expected, actual)
  386. def test_tree_changes_for_merge_add_no_conflict(self) -> None:
  387. blob = make_object(Blob, data=b"blob")
  388. parent1 = self.commit_tree([])
  389. parent2 = merge = self.commit_tree([(b"a", blob)])
  390. self.assertChangesForMergeEqual([], [parent1, parent2], merge)
  391. self.assertChangesForMergeEqual([], [parent2, parent2], merge)
  392. def test_tree_changes_for_merge_add_modify_conflict(self) -> None:
  393. blob1 = make_object(Blob, data=b"1")
  394. blob2 = make_object(Blob, data=b"2")
  395. parent1 = self.commit_tree([])
  396. parent2 = self.commit_tree([(b"a", blob1)])
  397. merge = self.commit_tree([(b"a", blob2)])
  398. self.assertChangesForMergeEqual(
  399. [
  400. [
  401. TreeChange.add((b"a", F, blob2.id)),
  402. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob2.id)),
  403. ]
  404. ],
  405. [parent1, parent2],
  406. merge,
  407. )
  408. def test_tree_changes_for_merge_modify_modify_conflict(self) -> None:
  409. blob1 = make_object(Blob, data=b"1")
  410. blob2 = make_object(Blob, data=b"2")
  411. blob3 = make_object(Blob, data=b"3")
  412. parent1 = self.commit_tree([(b"a", blob1)])
  413. parent2 = self.commit_tree([(b"a", blob2)])
  414. merge = self.commit_tree([(b"a", blob3)])
  415. self.assertChangesForMergeEqual(
  416. [
  417. [
  418. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob3.id)),
  419. TreeChange(CHANGE_MODIFY, (b"a", F, blob2.id), (b"a", F, blob3.id)),
  420. ]
  421. ],
  422. [parent1, parent2],
  423. merge,
  424. )
  425. def test_tree_changes_for_merge_modify_no_conflict(self) -> None:
  426. blob1 = make_object(Blob, data=b"1")
  427. blob2 = make_object(Blob, data=b"2")
  428. parent1 = self.commit_tree([(b"a", blob1)])
  429. parent2 = merge = self.commit_tree([(b"a", blob2)])
  430. self.assertChangesForMergeEqual([], [parent1, parent2], merge)
  431. def test_tree_changes_for_merge_delete_delete_conflict(self) -> None:
  432. blob1 = make_object(Blob, data=b"1")
  433. blob2 = make_object(Blob, data=b"2")
  434. parent1 = self.commit_tree([(b"a", blob1)])
  435. parent2 = self.commit_tree([(b"a", blob2)])
  436. merge = self.commit_tree([])
  437. self.assertChangesForMergeEqual(
  438. [
  439. [
  440. TreeChange.delete((b"a", F, blob1.id)),
  441. TreeChange.delete((b"a", F, blob2.id)),
  442. ]
  443. ],
  444. [parent1, parent2],
  445. merge,
  446. )
  447. def test_tree_changes_for_merge_delete_no_conflict(self) -> None:
  448. blob = make_object(Blob, data=b"blob")
  449. has = self.commit_tree([(b"a", blob)])
  450. doesnt_have = self.commit_tree([])
  451. self.assertChangesForMergeEqual([], [has, has], doesnt_have)
  452. self.assertChangesForMergeEqual([], [has, doesnt_have], doesnt_have)
  453. def test_tree_changes_for_merge_octopus_no_conflict(self) -> None:
  454. r = list(range(5))
  455. blobs = [make_object(Blob, data=bytes(i)) for i in r]
  456. parents = [self.commit_tree([(b"a", blobs[i])]) for i in r]
  457. for i in r:
  458. # Take the SHA from each of the parents.
  459. self.assertChangesForMergeEqual([], parents, parents[i])
  460. def test_tree_changes_for_merge_octopus_modify_conflict(self) -> None:
  461. # Because the octopus merge strategy is limited, I doubt it's possible
  462. # to create this with the git command line. But the output is well-
  463. # defined, so test it anyway.
  464. r = list(range(5))
  465. parent_blobs = [make_object(Blob, data=bytes(i)) for i in r]
  466. merge_blob = make_object(Blob, data=b"merge")
  467. parents = [self.commit_tree([(b"a", parent_blobs[i])]) for i in r]
  468. merge = self.commit_tree([(b"a", merge_blob)])
  469. expected = [
  470. [
  471. TreeChange(
  472. CHANGE_MODIFY,
  473. (b"a", F, parent_blobs[i].id),
  474. (b"a", F, merge_blob.id),
  475. )
  476. for i in r
  477. ]
  478. ]
  479. self.assertChangesForMergeEqual(expected, parents, merge)
  480. def test_tree_changes_for_merge_octopus_delete(self) -> None:
  481. blob1 = make_object(Blob, data=b"1")
  482. blob2 = make_object(Blob, data=b"3")
  483. parent1 = self.commit_tree([(b"a", blob1)])
  484. parent2 = self.commit_tree([(b"a", blob2)])
  485. parent3 = merge = self.commit_tree([])
  486. self.assertChangesForMergeEqual([], [parent1, parent1, parent1], merge)
  487. self.assertChangesForMergeEqual([], [parent1, parent1, parent3], merge)
  488. self.assertChangesForMergeEqual([], [parent1, parent3, parent3], merge)
  489. self.assertChangesForMergeEqual(
  490. [
  491. [
  492. TreeChange.delete((b"a", F, blob1.id)),
  493. TreeChange.delete((b"a", F, blob2.id)),
  494. None,
  495. ]
  496. ],
  497. [parent1, parent2, parent3],
  498. merge,
  499. )
  500. def test_tree_changes_for_merge_add_add_same_conflict(self) -> None:
  501. blob = make_object(Blob, data=b"a\nb\nc\nd\n")
  502. parent1 = self.commit_tree([(b"a", blob)])
  503. parent2 = self.commit_tree([])
  504. merge = self.commit_tree([(b"b", blob)])
  505. add = TreeChange.add((b"b", F, blob.id))
  506. self.assertChangesForMergeEqual([[add, add]], [parent1, parent2], merge)
  507. def test_tree_changes_for_merge_add_exact_rename_conflict(self) -> None:
  508. blob = make_object(Blob, data=b"a\nb\nc\nd\n")
  509. parent1 = self.commit_tree([(b"a", blob)])
  510. parent2 = self.commit_tree([])
  511. merge = self.commit_tree([(b"b", blob)])
  512. self.assertChangesForMergeEqual(
  513. [
  514. [
  515. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"b", F, blob.id)),
  516. TreeChange.add((b"b", F, blob.id)),
  517. ]
  518. ],
  519. [parent1, parent2],
  520. merge,
  521. rename_detector=self.detector,
  522. )
  523. def test_tree_changes_for_merge_add_content_rename_conflict(self) -> None:
  524. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  525. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  526. parent1 = self.commit_tree([(b"a", blob1)])
  527. parent2 = self.commit_tree([])
  528. merge = self.commit_tree([(b"b", blob2)])
  529. self.assertChangesForMergeEqual(
  530. [
  531. [
  532. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  533. TreeChange.add((b"b", F, blob2.id)),
  534. ]
  535. ],
  536. [parent1, parent2],
  537. merge,
  538. rename_detector=self.detector,
  539. )
  540. def test_tree_changes_for_merge_modify_rename_conflict(self) -> None:
  541. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  542. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  543. parent1 = self.commit_tree([(b"a", blob1)])
  544. parent2 = self.commit_tree([(b"b", blob1)])
  545. merge = self.commit_tree([(b"b", blob2)])
  546. self.assertChangesForMergeEqual(
  547. [
  548. [
  549. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  550. TreeChange(CHANGE_MODIFY, (b"b", F, blob1.id), (b"b", F, blob2.id)),
  551. ]
  552. ],
  553. [parent1, parent2],
  554. merge,
  555. rename_detector=self.detector,
  556. )
  557. class RenameDetectionTest(DiffTestCase):
  558. def _do_test_count_blocks(self, count_blocks) -> None:
  559. blob = make_object(Blob, data=b"a\nb\na\n")
  560. self.assertBlockCountEqual({b"a\n": 4, b"b\n": 2}, count_blocks(blob))
  561. test_count_blocks = functest_builder(_do_test_count_blocks, _count_blocks_py)
  562. test_count_blocks_extension = ext_functest_builder(
  563. _do_test_count_blocks, _count_blocks
  564. )
  565. def _do_test_count_blocks_no_newline(self, count_blocks) -> None:
  566. blob = make_object(Blob, data=b"a\na")
  567. self.assertBlockCountEqual({b"a\n": 2, b"a": 1}, _count_blocks(blob))
  568. test_count_blocks_no_newline = functest_builder(
  569. _do_test_count_blocks_no_newline, _count_blocks_py
  570. )
  571. test_count_blocks_no_newline_extension = ext_functest_builder(
  572. _do_test_count_blocks_no_newline, _count_blocks
  573. )
  574. def assertBlockCountEqual(self, expected, got) -> None:
  575. self.assertEqual(
  576. {(hash(block) & 0xFFFFFFFF): count for (block, count) in expected.items()},
  577. {(block & 0xFFFFFFFF): count for (block, count) in got.items()},
  578. )
  579. def _do_test_count_blocks_chunks(self, count_blocks) -> None:
  580. blob = ShaFile.from_raw_chunks(Blob.type_num, [b"a\nb", b"\na\n"])
  581. self.assertBlockCountEqual({b"a\n": 4, b"b\n": 2}, _count_blocks(blob))
  582. test_count_blocks_chunks = functest_builder(
  583. _do_test_count_blocks_chunks, _count_blocks_py
  584. )
  585. test_count_blocks_chunks_extension = ext_functest_builder(
  586. _do_test_count_blocks_chunks, _count_blocks
  587. )
  588. def _do_test_count_blocks_long_lines(self, count_blocks) -> None:
  589. a = b"a" * 64
  590. data = a + b"xxx\ny\n" + a + b"zzz\n"
  591. blob = make_object(Blob, data=data)
  592. self.assertBlockCountEqual(
  593. {b"a" * 64: 128, b"xxx\n": 4, b"y\n": 2, b"zzz\n": 4},
  594. _count_blocks(blob),
  595. )
  596. test_count_blocks_long_lines = functest_builder(
  597. _do_test_count_blocks_long_lines, _count_blocks_py
  598. )
  599. test_count_blocks_long_lines_extension = ext_functest_builder(
  600. _do_test_count_blocks_long_lines, _count_blocks
  601. )
  602. def assertSimilar(self, expected_score, blob1, blob2) -> None:
  603. self.assertEqual(expected_score, _similarity_score(blob1, blob2))
  604. self.assertEqual(expected_score, _similarity_score(blob2, blob1))
  605. def test_similarity_score(self) -> None:
  606. blob0 = make_object(Blob, data=b"")
  607. blob1 = make_object(Blob, data=b"ab\ncd\ncd\n")
  608. blob2 = make_object(Blob, data=b"ab\n")
  609. blob3 = make_object(Blob, data=b"cd\n")
  610. blob4 = make_object(Blob, data=b"cd\ncd\n")
  611. self.assertSimilar(100, blob0, blob0)
  612. self.assertSimilar(0, blob0, blob1)
  613. self.assertSimilar(33, blob1, blob2)
  614. self.assertSimilar(33, blob1, blob3)
  615. self.assertSimilar(66, blob1, blob4)
  616. self.assertSimilar(0, blob2, blob3)
  617. self.assertSimilar(50, blob3, blob4)
  618. def test_similarity_score_cache(self) -> None:
  619. blob1 = make_object(Blob, data=b"ab\ncd\n")
  620. blob2 = make_object(Blob, data=b"ab\n")
  621. block_cache = {}
  622. self.assertEqual(50, _similarity_score(blob1, blob2, block_cache=block_cache))
  623. self.assertEqual({blob1.id, blob2.id}, set(block_cache))
  624. def fail_chunks() -> None:
  625. self.fail("Unexpected call to as_raw_chunks()")
  626. blob1.as_raw_chunks = blob2.as_raw_chunks = fail_chunks
  627. blob1.raw_length = lambda: 6
  628. blob2.raw_length = lambda: 3
  629. self.assertEqual(50, _similarity_score(blob1, blob2, block_cache=block_cache))
  630. def test_tree_entry_sort(self) -> None:
  631. sha = "abcd" * 10
  632. expected_entries = [
  633. TreeChange.add(TreeEntry(b"aaa", F, sha)),
  634. TreeChange(
  635. CHANGE_COPY,
  636. TreeEntry(b"bbb", F, sha),
  637. TreeEntry(b"aab", F, sha),
  638. ),
  639. TreeChange(
  640. CHANGE_MODIFY,
  641. TreeEntry(b"bbb", F, sha),
  642. TreeEntry(b"bbb", F, b"dabc" * 10),
  643. ),
  644. TreeChange(
  645. CHANGE_RENAME,
  646. TreeEntry(b"bbc", F, sha),
  647. TreeEntry(b"ddd", F, sha),
  648. ),
  649. TreeChange.delete(TreeEntry(b"ccc", F, sha)),
  650. ]
  651. for perm in permutations(expected_entries):
  652. self.assertEqual(expected_entries, sorted(perm, key=_tree_change_key))
  653. def detect_renames(self, tree1, tree2, want_unchanged=False, **kwargs):
  654. detector = RenameDetector(self.store, **kwargs)
  655. return detector.changes_with_renames(
  656. tree1.id, tree2.id, want_unchanged=want_unchanged
  657. )
  658. def test_no_renames(self) -> None:
  659. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  660. blob2 = make_object(Blob, data=b"a\nb\ne\nf\n")
  661. blob3 = make_object(Blob, data=b"a\nb\ng\nh\n")
  662. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  663. tree2 = self.commit_tree([(b"a", blob1), (b"b", blob3)])
  664. self.assertEqual(
  665. [TreeChange(CHANGE_MODIFY, (b"b", F, blob2.id), (b"b", F, blob3.id))],
  666. self.detect_renames(tree1, tree2),
  667. )
  668. def test_exact_rename_one_to_one(self) -> None:
  669. blob1 = make_object(Blob, data=b"1")
  670. blob2 = make_object(Blob, data=b"2")
  671. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  672. tree2 = self.commit_tree([(b"c", blob1), (b"d", blob2)])
  673. self.assertEqual(
  674. [
  675. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob1.id)),
  676. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"d", F, blob2.id)),
  677. ],
  678. self.detect_renames(tree1, tree2),
  679. )
  680. def test_exact_rename_split_different_type(self) -> None:
  681. blob = make_object(Blob, data=b"/foo")
  682. tree1 = self.commit_tree([(b"a", blob, 0o100644)])
  683. tree2 = self.commit_tree([(b"a", blob, 0o120000)])
  684. self.assertEqual(
  685. [
  686. TreeChange.add((b"a", 0o120000, blob.id)),
  687. TreeChange.delete((b"a", 0o100644, blob.id)),
  688. ],
  689. self.detect_renames(tree1, tree2),
  690. )
  691. def test_exact_rename_and_different_type(self) -> None:
  692. blob1 = make_object(Blob, data=b"1")
  693. blob2 = make_object(Blob, data=b"2")
  694. tree1 = self.commit_tree([(b"a", blob1)])
  695. tree2 = self.commit_tree([(b"a", blob2, 0o120000), (b"b", blob1)])
  696. self.assertEqual(
  697. [
  698. TreeChange.add((b"a", 0o120000, blob2.id)),
  699. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob1.id)),
  700. ],
  701. self.detect_renames(tree1, tree2),
  702. )
  703. def test_exact_rename_one_to_many(self) -> None:
  704. blob = make_object(Blob, data=b"1")
  705. tree1 = self.commit_tree([(b"a", blob)])
  706. tree2 = self.commit_tree([(b"b", blob), (b"c", blob)])
  707. self.assertEqual(
  708. [
  709. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"b", F, blob.id)),
  710. TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"c", F, blob.id)),
  711. ],
  712. self.detect_renames(tree1, tree2),
  713. )
  714. def test_exact_rename_many_to_one(self) -> None:
  715. blob = make_object(Blob, data=b"1")
  716. tree1 = self.commit_tree([(b"a", blob), (b"b", blob)])
  717. tree2 = self.commit_tree([(b"c", blob)])
  718. self.assertEqual(
  719. [
  720. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"c", F, blob.id)),
  721. TreeChange.delete((b"b", F, blob.id)),
  722. ],
  723. self.detect_renames(tree1, tree2),
  724. )
  725. def test_exact_rename_many_to_many(self) -> None:
  726. blob = make_object(Blob, data=b"1")
  727. tree1 = self.commit_tree([(b"a", blob), (b"b", blob)])
  728. tree2 = self.commit_tree([(b"c", blob), (b"d", blob), (b"e", blob)])
  729. self.assertEqual(
  730. [
  731. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"c", F, blob.id)),
  732. TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"e", F, blob.id)),
  733. TreeChange(CHANGE_RENAME, (b"b", F, blob.id), (b"d", F, blob.id)),
  734. ],
  735. self.detect_renames(tree1, tree2),
  736. )
  737. def test_exact_copy_modify(self) -> None:
  738. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  739. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  740. tree1 = self.commit_tree([(b"a", blob1)])
  741. tree2 = self.commit_tree([(b"a", blob2), (b"b", blob1)])
  742. self.assertEqual(
  743. [
  744. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob2.id)),
  745. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob1.id)),
  746. ],
  747. self.detect_renames(tree1, tree2),
  748. )
  749. def test_exact_copy_change_mode(self) -> None:
  750. blob = make_object(Blob, data=b"a\nb\nc\nd\n")
  751. tree1 = self.commit_tree([(b"a", blob)])
  752. tree2 = self.commit_tree([(b"a", blob, 0o100755), (b"b", blob)])
  753. self.assertEqual(
  754. [
  755. TreeChange(
  756. CHANGE_MODIFY,
  757. (b"a", F, blob.id),
  758. (b"a", 0o100755, blob.id),
  759. ),
  760. TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"b", F, blob.id)),
  761. ],
  762. self.detect_renames(tree1, tree2),
  763. )
  764. def test_rename_threshold(self) -> None:
  765. blob1 = make_object(Blob, data=b"a\nb\nc\n")
  766. blob2 = make_object(Blob, data=b"a\nb\nd\n")
  767. tree1 = self.commit_tree([(b"a", blob1)])
  768. tree2 = self.commit_tree([(b"b", blob2)])
  769. self.assertEqual(
  770. [TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id))],
  771. self.detect_renames(tree1, tree2, rename_threshold=50),
  772. )
  773. self.assertEqual(
  774. [
  775. TreeChange.delete((b"a", F, blob1.id)),
  776. TreeChange.add((b"b", F, blob2.id)),
  777. ],
  778. self.detect_renames(tree1, tree2, rename_threshold=75),
  779. )
  780. def test_content_rename_max_files(self) -> None:
  781. blob1 = make_object(Blob, data=b"a\nb\nc\nd")
  782. blob4 = make_object(Blob, data=b"a\nb\nc\ne\n")
  783. blob2 = make_object(Blob, data=b"e\nf\ng\nh\n")
  784. blob3 = make_object(Blob, data=b"e\nf\ng\ni\n")
  785. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  786. tree2 = self.commit_tree([(b"c", blob3), (b"d", blob4)])
  787. self.assertEqual(
  788. [
  789. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"d", F, blob4.id)),
  790. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"c", F, blob3.id)),
  791. ],
  792. self.detect_renames(tree1, tree2),
  793. )
  794. self.assertEqual(
  795. [
  796. TreeChange.delete((b"a", F, blob1.id)),
  797. TreeChange.delete((b"b", F, blob2.id)),
  798. TreeChange.add((b"c", F, blob3.id)),
  799. TreeChange.add((b"d", F, blob4.id)),
  800. ],
  801. self.detect_renames(tree1, tree2, max_files=1),
  802. )
  803. def test_content_rename_one_to_one(self) -> None:
  804. b11 = make_object(Blob, data=b"a\nb\nc\nd\n")
  805. b12 = make_object(Blob, data=b"a\nb\nc\ne\n")
  806. b21 = make_object(Blob, data=b"e\nf\ng\n\nh")
  807. b22 = make_object(Blob, data=b"e\nf\ng\n\ni")
  808. tree1 = self.commit_tree([(b"a", b11), (b"b", b21)])
  809. tree2 = self.commit_tree([(b"c", b12), (b"d", b22)])
  810. self.assertEqual(
  811. [
  812. TreeChange(CHANGE_RENAME, (b"a", F, b11.id), (b"c", F, b12.id)),
  813. TreeChange(CHANGE_RENAME, (b"b", F, b21.id), (b"d", F, b22.id)),
  814. ],
  815. self.detect_renames(tree1, tree2),
  816. )
  817. def test_content_rename_one_to_one_ordering(self) -> None:
  818. blob1 = make_object(Blob, data=b"a\nb\nc\nd\ne\nf\n")
  819. blob2 = make_object(Blob, data=b"a\nb\nc\nd\ng\nh\n")
  820. # 6/10 match to blob1, 8/10 match to blob2
  821. blob3 = make_object(Blob, data=b"a\nb\nc\nd\ng\ni\n")
  822. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  823. tree2 = self.commit_tree([(b"c", blob3)])
  824. self.assertEqual(
  825. [
  826. TreeChange.delete((b"a", F, blob1.id)),
  827. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"c", F, blob3.id)),
  828. ],
  829. self.detect_renames(tree1, tree2),
  830. )
  831. tree3 = self.commit_tree([(b"a", blob2), (b"b", blob1)])
  832. tree4 = self.commit_tree([(b"c", blob3)])
  833. self.assertEqual(
  834. [
  835. TreeChange(CHANGE_RENAME, (b"a", F, blob2.id), (b"c", F, blob3.id)),
  836. TreeChange.delete((b"b", F, blob1.id)),
  837. ],
  838. self.detect_renames(tree3, tree4),
  839. )
  840. def test_content_rename_one_to_many(self) -> None:
  841. blob1 = make_object(Blob, data=b"aa\nb\nc\nd\ne\n")
  842. blob2 = make_object(Blob, data=b"ab\nb\nc\nd\ne\n") # 8/11 match
  843. blob3 = make_object(Blob, data=b"aa\nb\nc\nd\nf\n") # 9/11 match
  844. tree1 = self.commit_tree([(b"a", blob1)])
  845. tree2 = self.commit_tree([(b"b", blob2), (b"c", blob3)])
  846. self.assertEqual(
  847. [
  848. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  849. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob3.id)),
  850. ],
  851. self.detect_renames(tree1, tree2),
  852. )
  853. def test_content_rename_many_to_one(self) -> None:
  854. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  855. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  856. blob3 = make_object(Blob, data=b"a\nb\nc\nf\n")
  857. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  858. tree2 = self.commit_tree([(b"c", blob3)])
  859. self.assertEqual(
  860. [
  861. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob3.id)),
  862. TreeChange.delete((b"b", F, blob2.id)),
  863. ],
  864. self.detect_renames(tree1, tree2),
  865. )
  866. def test_content_rename_many_to_many(self) -> None:
  867. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  868. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  869. blob3 = make_object(Blob, data=b"a\nb\nc\nf\n")
  870. blob4 = make_object(Blob, data=b"a\nb\nc\ng\n")
  871. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  872. tree2 = self.commit_tree([(b"c", blob3), (b"d", blob4)])
  873. # TODO(dborowitz): Distribute renames rather than greedily choosing
  874. # copies.
  875. self.assertEqual(
  876. [
  877. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob3.id)),
  878. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"d", F, blob4.id)),
  879. TreeChange.delete((b"b", F, blob2.id)),
  880. ],
  881. self.detect_renames(tree1, tree2),
  882. )
  883. def test_content_rename_with_more_deletions(self) -> None:
  884. blob1 = make_object(Blob, data=b"")
  885. tree1 = self.commit_tree(
  886. [(b"a", blob1), (b"b", blob1), (b"c", blob1), (b"d", blob1)]
  887. )
  888. tree2 = self.commit_tree([(b"e", blob1), (b"f", blob1), (b"g", blob1)])
  889. self.maxDiff = None
  890. self.assertEqual(
  891. [
  892. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"e", F, blob1.id)),
  893. TreeChange(CHANGE_RENAME, (b"b", F, blob1.id), (b"f", F, blob1.id)),
  894. TreeChange(CHANGE_RENAME, (b"c", F, blob1.id), (b"g", F, blob1.id)),
  895. TreeChange.delete((b"d", F, blob1.id)),
  896. ],
  897. self.detect_renames(tree1, tree2),
  898. )
  899. def test_content_rename_gitlink(self) -> None:
  900. blob1 = make_object(Blob, data=b"blob1")
  901. blob2 = make_object(Blob, data=b"blob2")
  902. link1 = b"1" * 40
  903. link2 = b"2" * 40
  904. tree1 = self.commit_tree([(b"a", blob1), (b"b", link1, 0o160000)])
  905. tree2 = self.commit_tree([(b"c", blob2), (b"d", link2, 0o160000)])
  906. self.assertEqual(
  907. [
  908. TreeChange.delete((b"a", 0o100644, blob1.id)),
  909. TreeChange.delete((b"b", 0o160000, link1)),
  910. TreeChange.add((b"c", 0o100644, blob2.id)),
  911. TreeChange.add((b"d", 0o160000, link2)),
  912. ],
  913. self.detect_renames(tree1, tree2),
  914. )
  915. def test_exact_rename_swap(self) -> None:
  916. blob1 = make_object(Blob, data=b"1")
  917. blob2 = make_object(Blob, data=b"2")
  918. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  919. tree2 = self.commit_tree([(b"a", blob2), (b"b", blob1)])
  920. self.assertEqual(
  921. [
  922. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob2.id)),
  923. TreeChange(CHANGE_MODIFY, (b"b", F, blob2.id), (b"b", F, blob1.id)),
  924. ],
  925. self.detect_renames(tree1, tree2),
  926. )
  927. self.assertEqual(
  928. [
  929. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob1.id)),
  930. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"a", F, blob2.id)),
  931. ],
  932. self.detect_renames(tree1, tree2, rewrite_threshold=50),
  933. )
  934. def test_content_rename_swap(self) -> None:
  935. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  936. blob2 = make_object(Blob, data=b"e\nf\ng\nh\n")
  937. blob3 = make_object(Blob, data=b"a\nb\nc\ne\n")
  938. blob4 = make_object(Blob, data=b"e\nf\ng\ni\n")
  939. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  940. tree2 = self.commit_tree([(b"a", blob4), (b"b", blob3)])
  941. self.assertEqual(
  942. [
  943. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob3.id)),
  944. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"a", F, blob4.id)),
  945. ],
  946. self.detect_renames(tree1, tree2, rewrite_threshold=60),
  947. )
  948. def test_rewrite_threshold(self) -> None:
  949. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  950. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  951. blob3 = make_object(Blob, data=b"a\nb\nf\ng\n")
  952. tree1 = self.commit_tree([(b"a", blob1)])
  953. tree2 = self.commit_tree([(b"a", blob3), (b"b", blob2)])
  954. no_renames = [
  955. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob3.id)),
  956. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  957. ]
  958. self.assertEqual(no_renames, self.detect_renames(tree1, tree2))
  959. self.assertEqual(
  960. no_renames, self.detect_renames(tree1, tree2, rewrite_threshold=40)
  961. )
  962. self.assertEqual(
  963. [
  964. TreeChange.add((b"a", F, blob3.id)),
  965. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  966. ],
  967. self.detect_renames(tree1, tree2, rewrite_threshold=80),
  968. )
  969. def test_find_copies_harder_exact(self) -> None:
  970. blob = make_object(Blob, data=b"blob")
  971. tree1 = self.commit_tree([(b"a", blob)])
  972. tree2 = self.commit_tree([(b"a", blob), (b"b", blob)])
  973. self.assertEqual(
  974. [TreeChange.add((b"b", F, blob.id))],
  975. self.detect_renames(tree1, tree2),
  976. )
  977. self.assertEqual(
  978. [TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"b", F, blob.id))],
  979. self.detect_renames(tree1, tree2, find_copies_harder=True),
  980. )
  981. def test_find_copies_harder_content(self) -> None:
  982. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  983. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  984. tree1 = self.commit_tree([(b"a", blob1)])
  985. tree2 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  986. self.assertEqual(
  987. [TreeChange.add((b"b", F, blob2.id))],
  988. self.detect_renames(tree1, tree2),
  989. )
  990. self.assertEqual(
  991. [TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob2.id))],
  992. self.detect_renames(tree1, tree2, find_copies_harder=True),
  993. )
  994. def test_find_copies_harder_with_rewrites(self) -> None:
  995. blob_a1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  996. blob_a2 = make_object(Blob, data=b"f\ng\nh\ni\n")
  997. blob_b2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  998. tree1 = self.commit_tree([(b"a", blob_a1)])
  999. tree2 = self.commit_tree([(b"a", blob_a2), (b"b", blob_b2)])
  1000. self.assertEqual(
  1001. [
  1002. TreeChange(CHANGE_MODIFY, (b"a", F, blob_a1.id), (b"a", F, blob_a2.id)),
  1003. TreeChange(CHANGE_COPY, (b"a", F, blob_a1.id), (b"b", F, blob_b2.id)),
  1004. ],
  1005. self.detect_renames(tree1, tree2, find_copies_harder=True),
  1006. )
  1007. self.assertEqual(
  1008. [
  1009. TreeChange.add((b"a", F, blob_a2.id)),
  1010. TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"b", F, blob_b2.id)),
  1011. ],
  1012. self.detect_renames(
  1013. tree1, tree2, rewrite_threshold=50, find_copies_harder=True
  1014. ),
  1015. )
  1016. def test_reuse_detector(self) -> None:
  1017. blob = make_object(Blob, data=b"blob")
  1018. tree1 = self.commit_tree([(b"a", blob)])
  1019. tree2 = self.commit_tree([(b"b", blob)])
  1020. detector = RenameDetector(self.store)
  1021. changes = [TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"b", F, blob.id))]
  1022. self.assertEqual(changes, detector.changes_with_renames(tree1.id, tree2.id))
  1023. self.assertEqual(changes, detector.changes_with_renames(tree1.id, tree2.id))
  1024. def test_want_unchanged(self) -> None:
  1025. blob_a1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1026. blob_b = make_object(Blob, data=b"b")
  1027. blob_c2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1028. tree1 = self.commit_tree([(b"a", blob_a1), (b"b", blob_b)])
  1029. tree2 = self.commit_tree([(b"c", blob_c2), (b"b", blob_b)])
  1030. self.assertEqual(
  1031. [TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_c2.id))],
  1032. self.detect_renames(tree1, tree2),
  1033. )
  1034. self.assertEqual(
  1035. [
  1036. TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_c2.id)),
  1037. TreeChange(
  1038. CHANGE_UNCHANGED,
  1039. (b"b", F, blob_b.id),
  1040. (b"b", F, blob_b.id),
  1041. ),
  1042. ],
  1043. self.detect_renames(tree1, tree2, want_unchanged=True),
  1044. )