test_diff_tree.py 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143
  1. # test_diff_tree.py -- Tests for file and tree diff utilities.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Tests for file and tree diff utilities."""
  21. from itertools import permutations
  22. from dulwich.diff_tree import (
  23. CHANGE_COPY,
  24. CHANGE_MODIFY,
  25. CHANGE_RENAME,
  26. CHANGE_UNCHANGED,
  27. RenameDetector,
  28. TreeChange,
  29. _count_blocks,
  30. _count_blocks_py,
  31. _is_tree,
  32. _is_tree_py,
  33. _merge_entries,
  34. _merge_entries_py,
  35. _similarity_score,
  36. _tree_change_key,
  37. tree_changes,
  38. tree_changes_for_merge,
  39. )
  40. from dulwich.index import commit_tree
  41. from dulwich.object_store import MemoryObjectStore
  42. from dulwich.objects import Blob, ShaFile, Tree, TreeEntry
  43. from dulwich.tests.utils import F, ext_functest_builder, functest_builder, make_object
  44. from . import TestCase
  45. class DiffTestCase(TestCase):
  46. def setUp(self):
  47. super().setUp()
  48. self.store = MemoryObjectStore()
  49. self.empty_tree = self.commit_tree([])
  50. def commit_tree(self, entries):
  51. commit_blobs = []
  52. for entry in entries:
  53. if len(entry) == 2:
  54. path, obj = entry
  55. mode = F
  56. else:
  57. path, obj, mode = entry
  58. if isinstance(obj, Blob):
  59. self.store.add_object(obj)
  60. sha = obj.id
  61. else:
  62. sha = obj
  63. commit_blobs.append((path, sha, mode))
  64. return self.store[commit_tree(self.store, commit_blobs)]
  65. class TreeChangesTest(DiffTestCase):
  66. def setUp(self):
  67. super().setUp()
  68. self.detector = RenameDetector(self.store)
  69. def assertMergeFails(self, merge_entries, name, mode, sha):
  70. t = Tree()
  71. t[name] = (mode, sha)
  72. self.assertRaises((TypeError, ValueError), merge_entries, "", t, t)
  73. def _do_test_merge_entries(self, merge_entries):
  74. blob_a1 = make_object(Blob, data=b"a1")
  75. blob_a2 = make_object(Blob, data=b"a2")
  76. blob_b1 = make_object(Blob, data=b"b1")
  77. blob_c2 = make_object(Blob, data=b"c2")
  78. tree1 = self.commit_tree([(b"a", blob_a1, 0o100644), (b"b", blob_b1, 0o100755)])
  79. tree2 = self.commit_tree([(b"a", blob_a2, 0o100644), (b"c", blob_c2, 0o100755)])
  80. self.assertEqual([], merge_entries(b"", self.empty_tree, self.empty_tree))
  81. self.assertEqual(
  82. [
  83. ((None, None, None), (b"a", 0o100644, blob_a1.id)),
  84. ((None, None, None), (b"b", 0o100755, blob_b1.id)),
  85. ],
  86. merge_entries(b"", self.empty_tree, tree1),
  87. )
  88. self.assertEqual(
  89. [
  90. ((None, None, None), (b"x/a", 0o100644, blob_a1.id)),
  91. ((None, None, None), (b"x/b", 0o100755, blob_b1.id)),
  92. ],
  93. merge_entries(b"x", self.empty_tree, tree1),
  94. )
  95. self.assertEqual(
  96. [
  97. ((b"a", 0o100644, blob_a2.id), (None, None, None)),
  98. ((b"c", 0o100755, blob_c2.id), (None, None, None)),
  99. ],
  100. merge_entries(b"", tree2, self.empty_tree),
  101. )
  102. self.assertEqual(
  103. [
  104. ((b"a", 0o100644, blob_a1.id), (b"a", 0o100644, blob_a2.id)),
  105. ((b"b", 0o100755, blob_b1.id), (None, None, None)),
  106. ((None, None, None), (b"c", 0o100755, blob_c2.id)),
  107. ],
  108. merge_entries(b"", tree1, tree2),
  109. )
  110. self.assertEqual(
  111. [
  112. ((b"a", 0o100644, blob_a2.id), (b"a", 0o100644, blob_a1.id)),
  113. ((None, None, None), (b"b", 0o100755, blob_b1.id)),
  114. ((b"c", 0o100755, blob_c2.id), (None, None, None)),
  115. ],
  116. merge_entries(b"", tree2, tree1),
  117. )
  118. self.assertMergeFails(merge_entries, 0xDEADBEEF, 0o100644, "1" * 40)
  119. self.assertMergeFails(merge_entries, b"a", b"deadbeef", "1" * 40)
  120. self.assertMergeFails(merge_entries, b"a", 0o100644, 0xDEADBEEF)
  121. test_merge_entries = functest_builder(_do_test_merge_entries, _merge_entries_py)
  122. test_merge_entries_extension = ext_functest_builder(
  123. _do_test_merge_entries, _merge_entries
  124. )
  125. def _do_test_is_tree(self, is_tree):
  126. self.assertFalse(is_tree(TreeEntry(None, None, None)))
  127. self.assertFalse(is_tree(TreeEntry(b"a", 0o100644, b"a" * 40)))
  128. self.assertFalse(is_tree(TreeEntry(b"a", 0o100755, b"a" * 40)))
  129. self.assertFalse(is_tree(TreeEntry(b"a", 0o120000, b"a" * 40)))
  130. self.assertTrue(is_tree(TreeEntry(b"a", 0o040000, b"a" * 40)))
  131. self.assertRaises(TypeError, is_tree, TreeEntry(b"a", b"x", b"a" * 40))
  132. self.assertRaises(AttributeError, is_tree, 1234)
  133. test_is_tree = functest_builder(_do_test_is_tree, _is_tree_py)
  134. test_is_tree_extension = ext_functest_builder(_do_test_is_tree, _is_tree)
  135. def assertChangesEqual(self, expected, tree1, tree2, **kwargs):
  136. actual = list(tree_changes(self.store, tree1.id, tree2.id, **kwargs))
  137. self.assertEqual(expected, actual)
  138. # For brevity, the following tests use tuples instead of TreeEntry objects.
  139. def test_tree_changes_empty(self):
  140. self.assertChangesEqual([], self.empty_tree, self.empty_tree)
  141. def test_tree_changes_no_changes(self):
  142. blob = make_object(Blob, data=b"blob")
  143. tree = self.commit_tree([(b"a", blob), (b"b/c", blob)])
  144. self.assertChangesEqual([], self.empty_tree, self.empty_tree)
  145. self.assertChangesEqual([], tree, tree)
  146. self.assertChangesEqual(
  147. [
  148. TreeChange(CHANGE_UNCHANGED, (b"a", F, blob.id), (b"a", F, blob.id)),
  149. TreeChange(
  150. CHANGE_UNCHANGED,
  151. (b"b/c", F, blob.id),
  152. (b"b/c", F, blob.id),
  153. ),
  154. ],
  155. tree,
  156. tree,
  157. want_unchanged=True,
  158. )
  159. def test_tree_changes_add_delete(self):
  160. blob_a = make_object(Blob, data=b"a")
  161. blob_b = make_object(Blob, data=b"b")
  162. tree = self.commit_tree([(b"a", blob_a, 0o100644), (b"x/b", blob_b, 0o100755)])
  163. self.assertChangesEqual(
  164. [
  165. TreeChange.add((b"a", 0o100644, blob_a.id)),
  166. TreeChange.add((b"x/b", 0o100755, blob_b.id)),
  167. ],
  168. self.empty_tree,
  169. tree,
  170. )
  171. self.assertChangesEqual(
  172. [
  173. TreeChange.delete((b"a", 0o100644, blob_a.id)),
  174. TreeChange.delete((b"x/b", 0o100755, blob_b.id)),
  175. ],
  176. tree,
  177. self.empty_tree,
  178. )
  179. def test_tree_changes_modify_contents(self):
  180. blob_a1 = make_object(Blob, data=b"a1")
  181. blob_a2 = make_object(Blob, data=b"a2")
  182. tree1 = self.commit_tree([(b"a", blob_a1)])
  183. tree2 = self.commit_tree([(b"a", blob_a2)])
  184. self.assertChangesEqual(
  185. [TreeChange(CHANGE_MODIFY, (b"a", F, blob_a1.id), (b"a", F, blob_a2.id))],
  186. tree1,
  187. tree2,
  188. )
  189. def test_tree_changes_modify_mode(self):
  190. blob_a = make_object(Blob, data=b"a")
  191. tree1 = self.commit_tree([(b"a", blob_a, 0o100644)])
  192. tree2 = self.commit_tree([(b"a", blob_a, 0o100755)])
  193. self.assertChangesEqual(
  194. [
  195. TreeChange(
  196. CHANGE_MODIFY,
  197. (b"a", 0o100644, blob_a.id),
  198. (b"a", 0o100755, blob_a.id),
  199. )
  200. ],
  201. tree1,
  202. tree2,
  203. )
  204. def test_tree_changes_change_type(self):
  205. blob_a1 = make_object(Blob, data=b"a")
  206. blob_a2 = make_object(Blob, data=b"/foo/bar")
  207. tree1 = self.commit_tree([(b"a", blob_a1, 0o100644)])
  208. tree2 = self.commit_tree([(b"a", blob_a2, 0o120000)])
  209. self.assertChangesEqual(
  210. [
  211. TreeChange.delete((b"a", 0o100644, blob_a1.id)),
  212. TreeChange.add((b"a", 0o120000, blob_a2.id)),
  213. ],
  214. tree1,
  215. tree2,
  216. )
  217. def test_tree_changes_change_type_same(self):
  218. blob_a1 = make_object(Blob, data=b"a")
  219. blob_a2 = make_object(Blob, data=b"/foo/bar")
  220. tree1 = self.commit_tree([(b"a", blob_a1, 0o100644)])
  221. tree2 = self.commit_tree([(b"a", blob_a2, 0o120000)])
  222. self.assertChangesEqual(
  223. [
  224. TreeChange(
  225. CHANGE_MODIFY,
  226. (b"a", 0o100644, blob_a1.id),
  227. (b"a", 0o120000, blob_a2.id),
  228. )
  229. ],
  230. tree1,
  231. tree2,
  232. change_type_same=True,
  233. )
  234. def test_tree_changes_to_tree(self):
  235. blob_a = make_object(Blob, data=b"a")
  236. blob_x = make_object(Blob, data=b"x")
  237. tree1 = self.commit_tree([(b"a", blob_a)])
  238. tree2 = self.commit_tree([(b"a/x", blob_x)])
  239. self.assertChangesEqual(
  240. [
  241. TreeChange.delete((b"a", F, blob_a.id)),
  242. TreeChange.add((b"a/x", F, blob_x.id)),
  243. ],
  244. tree1,
  245. tree2,
  246. )
  247. def test_tree_changes_complex(self):
  248. blob_a_1 = make_object(Blob, data=b"a1_1")
  249. blob_bx1_1 = make_object(Blob, data=b"bx1_1")
  250. blob_bx2_1 = make_object(Blob, data=b"bx2_1")
  251. blob_by1_1 = make_object(Blob, data=b"by1_1")
  252. blob_by2_1 = make_object(Blob, data=b"by2_1")
  253. tree1 = self.commit_tree(
  254. [
  255. (b"a", blob_a_1),
  256. (b"b/x/1", blob_bx1_1),
  257. (b"b/x/2", blob_bx2_1),
  258. (b"b/y/1", blob_by1_1),
  259. (b"b/y/2", blob_by2_1),
  260. ]
  261. )
  262. blob_a_2 = make_object(Blob, data=b"a1_2")
  263. blob_bx1_2 = blob_bx1_1
  264. blob_by_2 = make_object(Blob, data=b"by_2")
  265. blob_c_2 = make_object(Blob, data=b"c_2")
  266. tree2 = self.commit_tree(
  267. [
  268. (b"a", blob_a_2),
  269. (b"b/x/1", blob_bx1_2),
  270. (b"b/y", blob_by_2),
  271. (b"c", blob_c_2),
  272. ]
  273. )
  274. self.assertChangesEqual(
  275. [
  276. TreeChange(
  277. CHANGE_MODIFY,
  278. (b"a", F, blob_a_1.id),
  279. (b"a", F, blob_a_2.id),
  280. ),
  281. TreeChange.delete((b"b/x/2", F, blob_bx2_1.id)),
  282. TreeChange.add((b"b/y", F, blob_by_2.id)),
  283. TreeChange.delete((b"b/y/1", F, blob_by1_1.id)),
  284. TreeChange.delete((b"b/y/2", F, blob_by2_1.id)),
  285. TreeChange.add((b"c", F, blob_c_2.id)),
  286. ],
  287. tree1,
  288. tree2,
  289. )
  290. def test_tree_changes_name_order(self):
  291. blob = make_object(Blob, data=b"a")
  292. tree1 = self.commit_tree([(b"a", blob), (b"a.", blob), (b"a..", blob)])
  293. # Tree order is the reverse of this, so if we used tree order, 'a..'
  294. # would not be merged.
  295. tree2 = self.commit_tree([(b"a/x", blob), (b"a./x", blob), (b"a..", blob)])
  296. self.assertChangesEqual(
  297. [
  298. TreeChange.delete((b"a", F, blob.id)),
  299. TreeChange.add((b"a/x", F, blob.id)),
  300. TreeChange.delete((b"a.", F, blob.id)),
  301. TreeChange.add((b"a./x", F, blob.id)),
  302. ],
  303. tree1,
  304. tree2,
  305. )
  306. def test_tree_changes_prune(self):
  307. blob_a1 = make_object(Blob, data=b"a1")
  308. blob_a2 = make_object(Blob, data=b"a2")
  309. blob_x = make_object(Blob, data=b"x")
  310. tree1 = self.commit_tree([(b"a", blob_a1), (b"b/x", blob_x)])
  311. tree2 = self.commit_tree([(b"a", blob_a2), (b"b/x", blob_x)])
  312. # Remove identical items so lookups will fail unless we prune.
  313. subtree = self.store[tree1[b"b"][1]]
  314. for entry in subtree.items():
  315. del self.store[entry.sha]
  316. del self.store[subtree.id]
  317. self.assertChangesEqual(
  318. [TreeChange(CHANGE_MODIFY, (b"a", F, blob_a1.id), (b"a", F, blob_a2.id))],
  319. tree1,
  320. tree2,
  321. )
  322. def test_tree_changes_rename_detector(self):
  323. blob_a1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  324. blob_a2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  325. blob_b = make_object(Blob, data=b"b")
  326. tree1 = self.commit_tree([(b"a", blob_a1), (b"b", blob_b)])
  327. tree2 = self.commit_tree([(b"c", blob_a2), (b"b", blob_b)])
  328. detector = RenameDetector(self.store)
  329. self.assertChangesEqual(
  330. [
  331. TreeChange.delete((b"a", F, blob_a1.id)),
  332. TreeChange.add((b"c", F, blob_a2.id)),
  333. ],
  334. tree1,
  335. tree2,
  336. )
  337. self.assertChangesEqual(
  338. [
  339. TreeChange.delete((b"a", F, blob_a1.id)),
  340. TreeChange(
  341. CHANGE_UNCHANGED,
  342. (b"b", F, blob_b.id),
  343. (b"b", F, blob_b.id),
  344. ),
  345. TreeChange.add((b"c", F, blob_a2.id)),
  346. ],
  347. tree1,
  348. tree2,
  349. want_unchanged=True,
  350. )
  351. self.assertChangesEqual(
  352. [TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_a2.id))],
  353. tree1,
  354. tree2,
  355. rename_detector=detector,
  356. )
  357. self.assertChangesEqual(
  358. [
  359. TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_a2.id)),
  360. TreeChange(
  361. CHANGE_UNCHANGED,
  362. (b"b", F, blob_b.id),
  363. (b"b", F, blob_b.id),
  364. ),
  365. ],
  366. tree1,
  367. tree2,
  368. rename_detector=detector,
  369. want_unchanged=True,
  370. )
  371. def assertChangesForMergeEqual(self, expected, parent_trees, merge_tree, **kwargs):
  372. parent_tree_ids = [t.id for t in parent_trees]
  373. actual = list(
  374. tree_changes_for_merge(self.store, parent_tree_ids, merge_tree.id, **kwargs)
  375. )
  376. self.assertEqual(expected, actual)
  377. parent_tree_ids.reverse()
  378. expected = [list(reversed(cs)) for cs in expected]
  379. actual = list(
  380. tree_changes_for_merge(self.store, parent_tree_ids, merge_tree.id, **kwargs)
  381. )
  382. self.assertEqual(expected, actual)
  383. def test_tree_changes_for_merge_add_no_conflict(self):
  384. blob = make_object(Blob, data=b"blob")
  385. parent1 = self.commit_tree([])
  386. parent2 = merge = self.commit_tree([(b"a", blob)])
  387. self.assertChangesForMergeEqual([], [parent1, parent2], merge)
  388. self.assertChangesForMergeEqual([], [parent2, parent2], merge)
  389. def test_tree_changes_for_merge_add_modify_conflict(self):
  390. blob1 = make_object(Blob, data=b"1")
  391. blob2 = make_object(Blob, data=b"2")
  392. parent1 = self.commit_tree([])
  393. parent2 = self.commit_tree([(b"a", blob1)])
  394. merge = self.commit_tree([(b"a", blob2)])
  395. self.assertChangesForMergeEqual(
  396. [
  397. [
  398. TreeChange.add((b"a", F, blob2.id)),
  399. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob2.id)),
  400. ]
  401. ],
  402. [parent1, parent2],
  403. merge,
  404. )
  405. def test_tree_changes_for_merge_modify_modify_conflict(self):
  406. blob1 = make_object(Blob, data=b"1")
  407. blob2 = make_object(Blob, data=b"2")
  408. blob3 = make_object(Blob, data=b"3")
  409. parent1 = self.commit_tree([(b"a", blob1)])
  410. parent2 = self.commit_tree([(b"a", blob2)])
  411. merge = self.commit_tree([(b"a", blob3)])
  412. self.assertChangesForMergeEqual(
  413. [
  414. [
  415. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob3.id)),
  416. TreeChange(CHANGE_MODIFY, (b"a", F, blob2.id), (b"a", F, blob3.id)),
  417. ]
  418. ],
  419. [parent1, parent2],
  420. merge,
  421. )
  422. def test_tree_changes_for_merge_modify_no_conflict(self):
  423. blob1 = make_object(Blob, data=b"1")
  424. blob2 = make_object(Blob, data=b"2")
  425. parent1 = self.commit_tree([(b"a", blob1)])
  426. parent2 = merge = self.commit_tree([(b"a", blob2)])
  427. self.assertChangesForMergeEqual([], [parent1, parent2], merge)
  428. def test_tree_changes_for_merge_delete_delete_conflict(self):
  429. blob1 = make_object(Blob, data=b"1")
  430. blob2 = make_object(Blob, data=b"2")
  431. parent1 = self.commit_tree([(b"a", blob1)])
  432. parent2 = self.commit_tree([(b"a", blob2)])
  433. merge = self.commit_tree([])
  434. self.assertChangesForMergeEqual(
  435. [
  436. [
  437. TreeChange.delete((b"a", F, blob1.id)),
  438. TreeChange.delete((b"a", F, blob2.id)),
  439. ]
  440. ],
  441. [parent1, parent2],
  442. merge,
  443. )
  444. def test_tree_changes_for_merge_delete_no_conflict(self):
  445. blob = make_object(Blob, data=b"blob")
  446. has = self.commit_tree([(b"a", blob)])
  447. doesnt_have = self.commit_tree([])
  448. self.assertChangesForMergeEqual([], [has, has], doesnt_have)
  449. self.assertChangesForMergeEqual([], [has, doesnt_have], doesnt_have)
  450. def test_tree_changes_for_merge_octopus_no_conflict(self):
  451. r = list(range(5))
  452. blobs = [make_object(Blob, data=bytes(i)) for i in r]
  453. parents = [self.commit_tree([(b"a", blobs[i])]) for i in r]
  454. for i in r:
  455. # Take the SHA from each of the parents.
  456. self.assertChangesForMergeEqual([], parents, parents[i])
  457. def test_tree_changes_for_merge_octopus_modify_conflict(self):
  458. # Because the octopus merge strategy is limited, I doubt it's possible
  459. # to create this with the git command line. But the output is well-
  460. # defined, so test it anyway.
  461. r = list(range(5))
  462. parent_blobs = [make_object(Blob, data=bytes(i)) for i in r]
  463. merge_blob = make_object(Blob, data=b"merge")
  464. parents = [self.commit_tree([(b"a", parent_blobs[i])]) for i in r]
  465. merge = self.commit_tree([(b"a", merge_blob)])
  466. expected = [
  467. [
  468. TreeChange(
  469. CHANGE_MODIFY,
  470. (b"a", F, parent_blobs[i].id),
  471. (b"a", F, merge_blob.id),
  472. )
  473. for i in r
  474. ]
  475. ]
  476. self.assertChangesForMergeEqual(expected, parents, merge)
  477. def test_tree_changes_for_merge_octopus_delete(self):
  478. blob1 = make_object(Blob, data=b"1")
  479. blob2 = make_object(Blob, data=b"3")
  480. parent1 = self.commit_tree([(b"a", blob1)])
  481. parent2 = self.commit_tree([(b"a", blob2)])
  482. parent3 = merge = self.commit_tree([])
  483. self.assertChangesForMergeEqual([], [parent1, parent1, parent1], merge)
  484. self.assertChangesForMergeEqual([], [parent1, parent1, parent3], merge)
  485. self.assertChangesForMergeEqual([], [parent1, parent3, parent3], merge)
  486. self.assertChangesForMergeEqual(
  487. [
  488. [
  489. TreeChange.delete((b"a", F, blob1.id)),
  490. TreeChange.delete((b"a", F, blob2.id)),
  491. None,
  492. ]
  493. ],
  494. [parent1, parent2, parent3],
  495. merge,
  496. )
  497. def test_tree_changes_for_merge_add_add_same_conflict(self):
  498. blob = make_object(Blob, data=b"a\nb\nc\nd\n")
  499. parent1 = self.commit_tree([(b"a", blob)])
  500. parent2 = self.commit_tree([])
  501. merge = self.commit_tree([(b"b", blob)])
  502. add = TreeChange.add((b"b", F, blob.id))
  503. self.assertChangesForMergeEqual([[add, add]], [parent1, parent2], merge)
  504. def test_tree_changes_for_merge_add_exact_rename_conflict(self):
  505. blob = make_object(Blob, data=b"a\nb\nc\nd\n")
  506. parent1 = self.commit_tree([(b"a", blob)])
  507. parent2 = self.commit_tree([])
  508. merge = self.commit_tree([(b"b", blob)])
  509. self.assertChangesForMergeEqual(
  510. [
  511. [
  512. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"b", F, blob.id)),
  513. TreeChange.add((b"b", F, blob.id)),
  514. ]
  515. ],
  516. [parent1, parent2],
  517. merge,
  518. rename_detector=self.detector,
  519. )
  520. def test_tree_changes_for_merge_add_content_rename_conflict(self):
  521. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  522. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  523. parent1 = self.commit_tree([(b"a", blob1)])
  524. parent2 = self.commit_tree([])
  525. merge = self.commit_tree([(b"b", blob2)])
  526. self.assertChangesForMergeEqual(
  527. [
  528. [
  529. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  530. TreeChange.add((b"b", F, blob2.id)),
  531. ]
  532. ],
  533. [parent1, parent2],
  534. merge,
  535. rename_detector=self.detector,
  536. )
  537. def test_tree_changes_for_merge_modify_rename_conflict(self):
  538. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  539. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  540. parent1 = self.commit_tree([(b"a", blob1)])
  541. parent2 = self.commit_tree([(b"b", blob1)])
  542. merge = self.commit_tree([(b"b", blob2)])
  543. self.assertChangesForMergeEqual(
  544. [
  545. [
  546. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  547. TreeChange(CHANGE_MODIFY, (b"b", F, blob1.id), (b"b", F, blob2.id)),
  548. ]
  549. ],
  550. [parent1, parent2],
  551. merge,
  552. rename_detector=self.detector,
  553. )
  554. class RenameDetectionTest(DiffTestCase):
  555. def _do_test_count_blocks(self, count_blocks):
  556. blob = make_object(Blob, data=b"a\nb\na\n")
  557. self.assertBlockCountEqual({b"a\n": 4, b"b\n": 2}, count_blocks(blob))
  558. test_count_blocks = functest_builder(_do_test_count_blocks, _count_blocks_py)
  559. test_count_blocks_extension = ext_functest_builder(
  560. _do_test_count_blocks, _count_blocks
  561. )
  562. def _do_test_count_blocks_no_newline(self, count_blocks):
  563. blob = make_object(Blob, data=b"a\na")
  564. self.assertBlockCountEqual({b"a\n": 2, b"a": 1}, _count_blocks(blob))
  565. test_count_blocks_no_newline = functest_builder(
  566. _do_test_count_blocks_no_newline, _count_blocks_py
  567. )
  568. test_count_blocks_no_newline_extension = ext_functest_builder(
  569. _do_test_count_blocks_no_newline, _count_blocks
  570. )
  571. def assertBlockCountEqual(self, expected, got):
  572. self.assertEqual(
  573. {(hash(l) & 0xFFFFFFFF): c for (l, c) in expected.items()},
  574. {(h & 0xFFFFFFFF): c for (h, c) in got.items()},
  575. )
  576. def _do_test_count_blocks_chunks(self, count_blocks):
  577. blob = ShaFile.from_raw_chunks(Blob.type_num, [b"a\nb", b"\na\n"])
  578. self.assertBlockCountEqual({b"a\n": 4, b"b\n": 2}, _count_blocks(blob))
  579. test_count_blocks_chunks = functest_builder(
  580. _do_test_count_blocks_chunks, _count_blocks_py
  581. )
  582. test_count_blocks_chunks_extension = ext_functest_builder(
  583. _do_test_count_blocks_chunks, _count_blocks
  584. )
  585. def _do_test_count_blocks_long_lines(self, count_blocks):
  586. a = b"a" * 64
  587. data = a + b"xxx\ny\n" + a + b"zzz\n"
  588. blob = make_object(Blob, data=data)
  589. self.assertBlockCountEqual(
  590. {b"a" * 64: 128, b"xxx\n": 4, b"y\n": 2, b"zzz\n": 4},
  591. _count_blocks(blob),
  592. )
  593. test_count_blocks_long_lines = functest_builder(
  594. _do_test_count_blocks_long_lines, _count_blocks_py
  595. )
  596. test_count_blocks_long_lines_extension = ext_functest_builder(
  597. _do_test_count_blocks_long_lines, _count_blocks
  598. )
  599. def assertSimilar(self, expected_score, blob1, blob2):
  600. self.assertEqual(expected_score, _similarity_score(blob1, blob2))
  601. self.assertEqual(expected_score, _similarity_score(blob2, blob1))
  602. def test_similarity_score(self):
  603. blob0 = make_object(Blob, data=b"")
  604. blob1 = make_object(Blob, data=b"ab\ncd\ncd\n")
  605. blob2 = make_object(Blob, data=b"ab\n")
  606. blob3 = make_object(Blob, data=b"cd\n")
  607. blob4 = make_object(Blob, data=b"cd\ncd\n")
  608. self.assertSimilar(100, blob0, blob0)
  609. self.assertSimilar(0, blob0, blob1)
  610. self.assertSimilar(33, blob1, blob2)
  611. self.assertSimilar(33, blob1, blob3)
  612. self.assertSimilar(66, blob1, blob4)
  613. self.assertSimilar(0, blob2, blob3)
  614. self.assertSimilar(50, blob3, blob4)
  615. def test_similarity_score_cache(self):
  616. blob1 = make_object(Blob, data=b"ab\ncd\n")
  617. blob2 = make_object(Blob, data=b"ab\n")
  618. block_cache = {}
  619. self.assertEqual(50, _similarity_score(blob1, blob2, block_cache=block_cache))
  620. self.assertEqual({blob1.id, blob2.id}, set(block_cache))
  621. def fail_chunks():
  622. self.fail("Unexpected call to as_raw_chunks()")
  623. blob1.as_raw_chunks = blob2.as_raw_chunks = fail_chunks
  624. blob1.raw_length = lambda: 6
  625. blob2.raw_length = lambda: 3
  626. self.assertEqual(50, _similarity_score(blob1, blob2, block_cache=block_cache))
  627. def test_tree_entry_sort(self):
  628. sha = "abcd" * 10
  629. expected_entries = [
  630. TreeChange.add(TreeEntry(b"aaa", F, sha)),
  631. TreeChange(
  632. CHANGE_COPY,
  633. TreeEntry(b"bbb", F, sha),
  634. TreeEntry(b"aab", F, sha),
  635. ),
  636. TreeChange(
  637. CHANGE_MODIFY,
  638. TreeEntry(b"bbb", F, sha),
  639. TreeEntry(b"bbb", F, b"dabc" * 10),
  640. ),
  641. TreeChange(
  642. CHANGE_RENAME,
  643. TreeEntry(b"bbc", F, sha),
  644. TreeEntry(b"ddd", F, sha),
  645. ),
  646. TreeChange.delete(TreeEntry(b"ccc", F, sha)),
  647. ]
  648. for perm in permutations(expected_entries):
  649. self.assertEqual(expected_entries, sorted(perm, key=_tree_change_key))
  650. def detect_renames(self, tree1, tree2, want_unchanged=False, **kwargs):
  651. detector = RenameDetector(self.store, **kwargs)
  652. return detector.changes_with_renames(
  653. tree1.id, tree2.id, want_unchanged=want_unchanged
  654. )
  655. def test_no_renames(self):
  656. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  657. blob2 = make_object(Blob, data=b"a\nb\ne\nf\n")
  658. blob3 = make_object(Blob, data=b"a\nb\ng\nh\n")
  659. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  660. tree2 = self.commit_tree([(b"a", blob1), (b"b", blob3)])
  661. self.assertEqual(
  662. [TreeChange(CHANGE_MODIFY, (b"b", F, blob2.id), (b"b", F, blob3.id))],
  663. self.detect_renames(tree1, tree2),
  664. )
  665. def test_exact_rename_one_to_one(self):
  666. blob1 = make_object(Blob, data=b"1")
  667. blob2 = make_object(Blob, data=b"2")
  668. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  669. tree2 = self.commit_tree([(b"c", blob1), (b"d", blob2)])
  670. self.assertEqual(
  671. [
  672. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob1.id)),
  673. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"d", F, blob2.id)),
  674. ],
  675. self.detect_renames(tree1, tree2),
  676. )
  677. def test_exact_rename_split_different_type(self):
  678. blob = make_object(Blob, data=b"/foo")
  679. tree1 = self.commit_tree([(b"a", blob, 0o100644)])
  680. tree2 = self.commit_tree([(b"a", blob, 0o120000)])
  681. self.assertEqual(
  682. [
  683. TreeChange.add((b"a", 0o120000, blob.id)),
  684. TreeChange.delete((b"a", 0o100644, blob.id)),
  685. ],
  686. self.detect_renames(tree1, tree2),
  687. )
  688. def test_exact_rename_and_different_type(self):
  689. blob1 = make_object(Blob, data=b"1")
  690. blob2 = make_object(Blob, data=b"2")
  691. tree1 = self.commit_tree([(b"a", blob1)])
  692. tree2 = self.commit_tree([(b"a", blob2, 0o120000), (b"b", blob1)])
  693. self.assertEqual(
  694. [
  695. TreeChange.add((b"a", 0o120000, blob2.id)),
  696. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob1.id)),
  697. ],
  698. self.detect_renames(tree1, tree2),
  699. )
  700. def test_exact_rename_one_to_many(self):
  701. blob = make_object(Blob, data=b"1")
  702. tree1 = self.commit_tree([(b"a", blob)])
  703. tree2 = self.commit_tree([(b"b", blob), (b"c", blob)])
  704. self.assertEqual(
  705. [
  706. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"b", F, blob.id)),
  707. TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"c", F, blob.id)),
  708. ],
  709. self.detect_renames(tree1, tree2),
  710. )
  711. def test_exact_rename_many_to_one(self):
  712. blob = make_object(Blob, data=b"1")
  713. tree1 = self.commit_tree([(b"a", blob), (b"b", blob)])
  714. tree2 = self.commit_tree([(b"c", blob)])
  715. self.assertEqual(
  716. [
  717. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"c", F, blob.id)),
  718. TreeChange.delete((b"b", F, blob.id)),
  719. ],
  720. self.detect_renames(tree1, tree2),
  721. )
  722. def test_exact_rename_many_to_many(self):
  723. blob = make_object(Blob, data=b"1")
  724. tree1 = self.commit_tree([(b"a", blob), (b"b", blob)])
  725. tree2 = self.commit_tree([(b"c", blob), (b"d", blob), (b"e", blob)])
  726. self.assertEqual(
  727. [
  728. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"c", F, blob.id)),
  729. TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"e", F, blob.id)),
  730. TreeChange(CHANGE_RENAME, (b"b", F, blob.id), (b"d", F, blob.id)),
  731. ],
  732. self.detect_renames(tree1, tree2),
  733. )
  734. def test_exact_copy_modify(self):
  735. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  736. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  737. tree1 = self.commit_tree([(b"a", blob1)])
  738. tree2 = self.commit_tree([(b"a", blob2), (b"b", blob1)])
  739. self.assertEqual(
  740. [
  741. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob2.id)),
  742. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob1.id)),
  743. ],
  744. self.detect_renames(tree1, tree2),
  745. )
  746. def test_exact_copy_change_mode(self):
  747. blob = make_object(Blob, data=b"a\nb\nc\nd\n")
  748. tree1 = self.commit_tree([(b"a", blob)])
  749. tree2 = self.commit_tree([(b"a", blob, 0o100755), (b"b", blob)])
  750. self.assertEqual(
  751. [
  752. TreeChange(
  753. CHANGE_MODIFY,
  754. (b"a", F, blob.id),
  755. (b"a", 0o100755, blob.id),
  756. ),
  757. TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"b", F, blob.id)),
  758. ],
  759. self.detect_renames(tree1, tree2),
  760. )
  761. def test_rename_threshold(self):
  762. blob1 = make_object(Blob, data=b"a\nb\nc\n")
  763. blob2 = make_object(Blob, data=b"a\nb\nd\n")
  764. tree1 = self.commit_tree([(b"a", blob1)])
  765. tree2 = self.commit_tree([(b"b", blob2)])
  766. self.assertEqual(
  767. [TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id))],
  768. self.detect_renames(tree1, tree2, rename_threshold=50),
  769. )
  770. self.assertEqual(
  771. [
  772. TreeChange.delete((b"a", F, blob1.id)),
  773. TreeChange.add((b"b", F, blob2.id)),
  774. ],
  775. self.detect_renames(tree1, tree2, rename_threshold=75),
  776. )
  777. def test_content_rename_max_files(self):
  778. blob1 = make_object(Blob, data=b"a\nb\nc\nd")
  779. blob4 = make_object(Blob, data=b"a\nb\nc\ne\n")
  780. blob2 = make_object(Blob, data=b"e\nf\ng\nh\n")
  781. blob3 = make_object(Blob, data=b"e\nf\ng\ni\n")
  782. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  783. tree2 = self.commit_tree([(b"c", blob3), (b"d", blob4)])
  784. self.assertEqual(
  785. [
  786. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"d", F, blob4.id)),
  787. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"c", F, blob3.id)),
  788. ],
  789. self.detect_renames(tree1, tree2),
  790. )
  791. self.assertEqual(
  792. [
  793. TreeChange.delete((b"a", F, blob1.id)),
  794. TreeChange.delete((b"b", F, blob2.id)),
  795. TreeChange.add((b"c", F, blob3.id)),
  796. TreeChange.add((b"d", F, blob4.id)),
  797. ],
  798. self.detect_renames(tree1, tree2, max_files=1),
  799. )
  800. def test_content_rename_one_to_one(self):
  801. b11 = make_object(Blob, data=b"a\nb\nc\nd\n")
  802. b12 = make_object(Blob, data=b"a\nb\nc\ne\n")
  803. b21 = make_object(Blob, data=b"e\nf\ng\n\nh")
  804. b22 = make_object(Blob, data=b"e\nf\ng\n\ni")
  805. tree1 = self.commit_tree([(b"a", b11), (b"b", b21)])
  806. tree2 = self.commit_tree([(b"c", b12), (b"d", b22)])
  807. self.assertEqual(
  808. [
  809. TreeChange(CHANGE_RENAME, (b"a", F, b11.id), (b"c", F, b12.id)),
  810. TreeChange(CHANGE_RENAME, (b"b", F, b21.id), (b"d", F, b22.id)),
  811. ],
  812. self.detect_renames(tree1, tree2),
  813. )
  814. def test_content_rename_one_to_one_ordering(self):
  815. blob1 = make_object(Blob, data=b"a\nb\nc\nd\ne\nf\n")
  816. blob2 = make_object(Blob, data=b"a\nb\nc\nd\ng\nh\n")
  817. # 6/10 match to blob1, 8/10 match to blob2
  818. blob3 = make_object(Blob, data=b"a\nb\nc\nd\ng\ni\n")
  819. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  820. tree2 = self.commit_tree([(b"c", blob3)])
  821. self.assertEqual(
  822. [
  823. TreeChange.delete((b"a", F, blob1.id)),
  824. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"c", F, blob3.id)),
  825. ],
  826. self.detect_renames(tree1, tree2),
  827. )
  828. tree3 = self.commit_tree([(b"a", blob2), (b"b", blob1)])
  829. tree4 = self.commit_tree([(b"c", blob3)])
  830. self.assertEqual(
  831. [
  832. TreeChange(CHANGE_RENAME, (b"a", F, blob2.id), (b"c", F, blob3.id)),
  833. TreeChange.delete((b"b", F, blob1.id)),
  834. ],
  835. self.detect_renames(tree3, tree4),
  836. )
  837. def test_content_rename_one_to_many(self):
  838. blob1 = make_object(Blob, data=b"aa\nb\nc\nd\ne\n")
  839. blob2 = make_object(Blob, data=b"ab\nb\nc\nd\ne\n") # 8/11 match
  840. blob3 = make_object(Blob, data=b"aa\nb\nc\nd\nf\n") # 9/11 match
  841. tree1 = self.commit_tree([(b"a", blob1)])
  842. tree2 = self.commit_tree([(b"b", blob2), (b"c", blob3)])
  843. self.assertEqual(
  844. [
  845. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  846. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob3.id)),
  847. ],
  848. self.detect_renames(tree1, tree2),
  849. )
  850. def test_content_rename_many_to_one(self):
  851. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  852. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  853. blob3 = make_object(Blob, data=b"a\nb\nc\nf\n")
  854. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  855. tree2 = self.commit_tree([(b"c", blob3)])
  856. self.assertEqual(
  857. [
  858. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob3.id)),
  859. TreeChange.delete((b"b", F, blob2.id)),
  860. ],
  861. self.detect_renames(tree1, tree2),
  862. )
  863. def test_content_rename_many_to_many(self):
  864. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  865. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  866. blob3 = make_object(Blob, data=b"a\nb\nc\nf\n")
  867. blob4 = make_object(Blob, data=b"a\nb\nc\ng\n")
  868. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  869. tree2 = self.commit_tree([(b"c", blob3), (b"d", blob4)])
  870. # TODO(dborowitz): Distribute renames rather than greedily choosing
  871. # copies.
  872. self.assertEqual(
  873. [
  874. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob3.id)),
  875. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"d", F, blob4.id)),
  876. TreeChange.delete((b"b", F, blob2.id)),
  877. ],
  878. self.detect_renames(tree1, tree2),
  879. )
  880. def test_content_rename_with_more_deletions(self):
  881. blob1 = make_object(Blob, data=b"")
  882. tree1 = self.commit_tree(
  883. [(b"a", blob1), (b"b", blob1), (b"c", blob1), (b"d", blob1)]
  884. )
  885. tree2 = self.commit_tree([(b"e", blob1), (b"f", blob1), (b"g", blob1)])
  886. self.maxDiff = None
  887. self.assertEqual(
  888. [
  889. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"e", F, blob1.id)),
  890. TreeChange(CHANGE_RENAME, (b"b", F, blob1.id), (b"f", F, blob1.id)),
  891. TreeChange(CHANGE_RENAME, (b"c", F, blob1.id), (b"g", F, blob1.id)),
  892. TreeChange.delete((b"d", F, blob1.id)),
  893. ],
  894. self.detect_renames(tree1, tree2),
  895. )
  896. def test_content_rename_gitlink(self):
  897. blob1 = make_object(Blob, data=b"blob1")
  898. blob2 = make_object(Blob, data=b"blob2")
  899. link1 = b"1" * 40
  900. link2 = b"2" * 40
  901. tree1 = self.commit_tree([(b"a", blob1), (b"b", link1, 0o160000)])
  902. tree2 = self.commit_tree([(b"c", blob2), (b"d", link2, 0o160000)])
  903. self.assertEqual(
  904. [
  905. TreeChange.delete((b"a", 0o100644, blob1.id)),
  906. TreeChange.delete((b"b", 0o160000, link1)),
  907. TreeChange.add((b"c", 0o100644, blob2.id)),
  908. TreeChange.add((b"d", 0o160000, link2)),
  909. ],
  910. self.detect_renames(tree1, tree2),
  911. )
  912. def test_exact_rename_swap(self):
  913. blob1 = make_object(Blob, data=b"1")
  914. blob2 = make_object(Blob, data=b"2")
  915. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  916. tree2 = self.commit_tree([(b"a", blob2), (b"b", blob1)])
  917. self.assertEqual(
  918. [
  919. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob2.id)),
  920. TreeChange(CHANGE_MODIFY, (b"b", F, blob2.id), (b"b", F, blob1.id)),
  921. ],
  922. self.detect_renames(tree1, tree2),
  923. )
  924. self.assertEqual(
  925. [
  926. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob1.id)),
  927. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"a", F, blob2.id)),
  928. ],
  929. self.detect_renames(tree1, tree2, rewrite_threshold=50),
  930. )
  931. def test_content_rename_swap(self):
  932. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  933. blob2 = make_object(Blob, data=b"e\nf\ng\nh\n")
  934. blob3 = make_object(Blob, data=b"a\nb\nc\ne\n")
  935. blob4 = make_object(Blob, data=b"e\nf\ng\ni\n")
  936. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  937. tree2 = self.commit_tree([(b"a", blob4), (b"b", blob3)])
  938. self.assertEqual(
  939. [
  940. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob3.id)),
  941. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"a", F, blob4.id)),
  942. ],
  943. self.detect_renames(tree1, tree2, rewrite_threshold=60),
  944. )
  945. def test_rewrite_threshold(self):
  946. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  947. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  948. blob3 = make_object(Blob, data=b"a\nb\nf\ng\n")
  949. tree1 = self.commit_tree([(b"a", blob1)])
  950. tree2 = self.commit_tree([(b"a", blob3), (b"b", blob2)])
  951. no_renames = [
  952. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob3.id)),
  953. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  954. ]
  955. self.assertEqual(no_renames, self.detect_renames(tree1, tree2))
  956. self.assertEqual(
  957. no_renames, self.detect_renames(tree1, tree2, rewrite_threshold=40)
  958. )
  959. self.assertEqual(
  960. [
  961. TreeChange.add((b"a", F, blob3.id)),
  962. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  963. ],
  964. self.detect_renames(tree1, tree2, rewrite_threshold=80),
  965. )
  966. def test_find_copies_harder_exact(self):
  967. blob = make_object(Blob, data=b"blob")
  968. tree1 = self.commit_tree([(b"a", blob)])
  969. tree2 = self.commit_tree([(b"a", blob), (b"b", blob)])
  970. self.assertEqual(
  971. [TreeChange.add((b"b", F, blob.id))],
  972. self.detect_renames(tree1, tree2),
  973. )
  974. self.assertEqual(
  975. [TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"b", F, blob.id))],
  976. self.detect_renames(tree1, tree2, find_copies_harder=True),
  977. )
  978. def test_find_copies_harder_content(self):
  979. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  980. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  981. tree1 = self.commit_tree([(b"a", blob1)])
  982. tree2 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  983. self.assertEqual(
  984. [TreeChange.add((b"b", F, blob2.id))],
  985. self.detect_renames(tree1, tree2),
  986. )
  987. self.assertEqual(
  988. [TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob2.id))],
  989. self.detect_renames(tree1, tree2, find_copies_harder=True),
  990. )
  991. def test_find_copies_harder_with_rewrites(self):
  992. blob_a1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  993. blob_a2 = make_object(Blob, data=b"f\ng\nh\ni\n")
  994. blob_b2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  995. tree1 = self.commit_tree([(b"a", blob_a1)])
  996. tree2 = self.commit_tree([(b"a", blob_a2), (b"b", blob_b2)])
  997. self.assertEqual(
  998. [
  999. TreeChange(CHANGE_MODIFY, (b"a", F, blob_a1.id), (b"a", F, blob_a2.id)),
  1000. TreeChange(CHANGE_COPY, (b"a", F, blob_a1.id), (b"b", F, blob_b2.id)),
  1001. ],
  1002. self.detect_renames(tree1, tree2, find_copies_harder=True),
  1003. )
  1004. self.assertEqual(
  1005. [
  1006. TreeChange.add((b"a", F, blob_a2.id)),
  1007. TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"b", F, blob_b2.id)),
  1008. ],
  1009. self.detect_renames(
  1010. tree1, tree2, rewrite_threshold=50, find_copies_harder=True
  1011. ),
  1012. )
  1013. def test_reuse_detector(self):
  1014. blob = make_object(Blob, data=b"blob")
  1015. tree1 = self.commit_tree([(b"a", blob)])
  1016. tree2 = self.commit_tree([(b"b", blob)])
  1017. detector = RenameDetector(self.store)
  1018. changes = [TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"b", F, blob.id))]
  1019. self.assertEqual(changes, detector.changes_with_renames(tree1.id, tree2.id))
  1020. self.assertEqual(changes, detector.changes_with_renames(tree1.id, tree2.id))
  1021. def test_want_unchanged(self):
  1022. blob_a1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1023. blob_b = make_object(Blob, data=b"b")
  1024. blob_c2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1025. tree1 = self.commit_tree([(b"a", blob_a1), (b"b", blob_b)])
  1026. tree2 = self.commit_tree([(b"c", blob_c2), (b"b", blob_b)])
  1027. self.assertEqual(
  1028. [TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_c2.id))],
  1029. self.detect_renames(tree1, tree2),
  1030. )
  1031. self.assertEqual(
  1032. [
  1033. TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_c2.id)),
  1034. TreeChange(
  1035. CHANGE_UNCHANGED,
  1036. (b"b", F, blob_b.id),
  1037. (b"b", F, blob_b.id),
  1038. ),
  1039. ],
  1040. self.detect_renames(tree1, tree2, want_unchanged=True),
  1041. )