test_diff_tree.py 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882
  1. # test_diff_tree.py -- Tests for file and tree diff utilities.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; either version 2
  7. # or (at your option) a later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """Tests for file and tree diff utilities."""
  19. from itertools import permutations
  20. from dulwich.diff_tree import (
  21. CHANGE_MODIFY,
  22. CHANGE_RENAME,
  23. CHANGE_COPY,
  24. CHANGE_UNCHANGED,
  25. TreeChange,
  26. _merge_entries,
  27. _merge_entries_py,
  28. tree_changes,
  29. tree_changes_for_merge,
  30. _count_blocks,
  31. _count_blocks_py,
  32. _similarity_score,
  33. _tree_change_key,
  34. RenameDetector,
  35. _is_tree,
  36. _is_tree_py
  37. )
  38. from dulwich.index import (
  39. commit_tree,
  40. )
  41. from dulwich.object_store import (
  42. MemoryObjectStore,
  43. )
  44. from dulwich.objects import (
  45. ShaFile,
  46. Blob,
  47. TreeEntry,
  48. Tree,
  49. )
  50. from dulwich.tests import (
  51. TestCase,
  52. )
  53. from dulwich.tests.utils import (
  54. F,
  55. make_object,
  56. functest_builder,
  57. ext_functest_builder,
  58. )
  59. class DiffTestCase(TestCase):
  60. def setUp(self):
  61. super(DiffTestCase, self).setUp()
  62. self.store = MemoryObjectStore()
  63. self.empty_tree = self.commit_tree([])
  64. def commit_tree(self, entries):
  65. commit_blobs = []
  66. for entry in entries:
  67. if len(entry) == 2:
  68. path, obj = entry
  69. mode = F
  70. else:
  71. path, obj, mode = entry
  72. if isinstance(obj, Blob):
  73. self.store.add_object(obj)
  74. sha = obj.id
  75. else:
  76. sha = obj
  77. commit_blobs.append((path, sha, mode))
  78. return self.store[commit_tree(self.store, commit_blobs)]
  79. class TreeChangesTest(DiffTestCase):
  80. def setUp(self):
  81. super(TreeChangesTest, self).setUp()
  82. self.detector = RenameDetector(self.store)
  83. def assertMergeFails(self, merge_entries, name, mode, sha):
  84. t = Tree()
  85. t[name] = (mode, sha)
  86. self.assertRaises(TypeError, merge_entries, '', t, t)
  87. def _do_test_merge_entries(self, merge_entries):
  88. blob_a1 = make_object(Blob, data='a1')
  89. blob_a2 = make_object(Blob, data='a2')
  90. blob_b1 = make_object(Blob, data='b1')
  91. blob_c2 = make_object(Blob, data='c2')
  92. tree1 = self.commit_tree([('a', blob_a1, 0o100644),
  93. ('b', blob_b1, 0o100755)])
  94. tree2 = self.commit_tree([('a', blob_a2, 0o100644),
  95. ('c', blob_c2, 0o100755)])
  96. self.assertEqual([], merge_entries('', self.empty_tree,
  97. self.empty_tree))
  98. self.assertEqual([
  99. ((None, None, None), ('a', 0o100644, blob_a1.id)),
  100. ((None, None, None), ('b', 0o100755, blob_b1.id)),
  101. ], merge_entries('', self.empty_tree, tree1))
  102. self.assertEqual([
  103. ((None, None, None), ('x/a', 0o100644, blob_a1.id)),
  104. ((None, None, None), ('x/b', 0o100755, blob_b1.id)),
  105. ], merge_entries('x', self.empty_tree, tree1))
  106. self.assertEqual([
  107. (('a', 0o100644, blob_a2.id), (None, None, None)),
  108. (('c', 0o100755, blob_c2.id), (None, None, None)),
  109. ], merge_entries('', tree2, self.empty_tree))
  110. self.assertEqual([
  111. (('a', 0o100644, blob_a1.id), ('a', 0o100644, blob_a2.id)),
  112. (('b', 0o100755, blob_b1.id), (None, None, None)),
  113. ((None, None, None), ('c', 0o100755, blob_c2.id)),
  114. ], merge_entries('', tree1, tree2))
  115. self.assertEqual([
  116. (('a', 0o100644, blob_a2.id), ('a', 0o100644, blob_a1.id)),
  117. ((None, None, None), ('b', 0o100755, blob_b1.id)),
  118. (('c', 0o100755, blob_c2.id), (None, None, None)),
  119. ], merge_entries('', tree2, tree1))
  120. self.assertMergeFails(merge_entries, 0xdeadbeef, 0o100644, '1' * 40)
  121. self.assertMergeFails(merge_entries, 'a', 'deadbeef', '1' * 40)
  122. self.assertMergeFails(merge_entries, 'a', 0o100644, 0xdeadbeef)
  123. test_merge_entries = functest_builder(_do_test_merge_entries,
  124. _merge_entries_py)
  125. test_merge_entries_extension = ext_functest_builder(_do_test_merge_entries,
  126. _merge_entries)
  127. def _do_test_is_tree(self, is_tree):
  128. self.assertFalse(is_tree(TreeEntry(None, None, None)))
  129. self.assertFalse(is_tree(TreeEntry('a', 0o100644, 'a' * 40)))
  130. self.assertFalse(is_tree(TreeEntry('a', 0o100755, 'a' * 40)))
  131. self.assertFalse(is_tree(TreeEntry('a', 0o120000, 'a' * 40)))
  132. self.assertTrue(is_tree(TreeEntry('a', 0o040000, 'a' * 40)))
  133. self.assertRaises(TypeError, is_tree, TreeEntry('a', 'x', 'a' * 40))
  134. self.assertRaises(AttributeError, is_tree, 1234)
  135. test_is_tree = functest_builder(_do_test_is_tree, _is_tree_py)
  136. test_is_tree_extension = ext_functest_builder(_do_test_is_tree, _is_tree)
  137. def assertChangesEqual(self, expected, tree1, tree2, **kwargs):
  138. actual = list(tree_changes(self.store, tree1.id, tree2.id, **kwargs))
  139. self.assertEqual(expected, actual)
  140. # For brevity, the following tests use tuples instead of TreeEntry objects.
  141. def test_tree_changes_empty(self):
  142. self.assertChangesEqual([], self.empty_tree, self.empty_tree)
  143. def test_tree_changes_no_changes(self):
  144. blob = make_object(Blob, data='blob')
  145. tree = self.commit_tree([('a', blob), ('b/c', blob)])
  146. self.assertChangesEqual([], self.empty_tree, self.empty_tree)
  147. self.assertChangesEqual([], tree, tree)
  148. self.assertChangesEqual(
  149. [TreeChange(CHANGE_UNCHANGED, ('a', F, blob.id), ('a', F, blob.id)),
  150. TreeChange(CHANGE_UNCHANGED, ('b/c', F, blob.id),
  151. ('b/c', F, blob.id))],
  152. tree, tree, want_unchanged=True)
  153. def test_tree_changes_add_delete(self):
  154. blob_a = make_object(Blob, data='a')
  155. blob_b = make_object(Blob, data='b')
  156. tree = self.commit_tree([('a', blob_a, 0o100644),
  157. ('x/b', blob_b, 0o100755)])
  158. self.assertChangesEqual(
  159. [TreeChange.add(('a', 0o100644, blob_a.id)),
  160. TreeChange.add(('x/b', 0o100755, blob_b.id))],
  161. self.empty_tree, tree)
  162. self.assertChangesEqual(
  163. [TreeChange.delete(('a', 0o100644, blob_a.id)),
  164. TreeChange.delete(('x/b', 0o100755, blob_b.id))],
  165. tree, self.empty_tree)
  166. def test_tree_changes_modify_contents(self):
  167. blob_a1 = make_object(Blob, data='a1')
  168. blob_a2 = make_object(Blob, data='a2')
  169. tree1 = self.commit_tree([('a', blob_a1)])
  170. tree2 = self.commit_tree([('a', blob_a2)])
  171. self.assertChangesEqual(
  172. [TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
  173. ('a', F, blob_a2.id))], tree1, tree2)
  174. def test_tree_changes_modify_mode(self):
  175. blob_a = make_object(Blob, data='a')
  176. tree1 = self.commit_tree([('a', blob_a, 0o100644)])
  177. tree2 = self.commit_tree([('a', blob_a, 0o100755)])
  178. self.assertChangesEqual(
  179. [TreeChange(CHANGE_MODIFY, ('a', 0o100644, blob_a.id),
  180. ('a', 0o100755, blob_a.id))], tree1, tree2)
  181. def test_tree_changes_change_type(self):
  182. blob_a1 = make_object(Blob, data='a')
  183. blob_a2 = make_object(Blob, data='/foo/bar')
  184. tree1 = self.commit_tree([('a', blob_a1, 0o100644)])
  185. tree2 = self.commit_tree([('a', blob_a2, 0o120000)])
  186. self.assertChangesEqual(
  187. [TreeChange.delete(('a', 0o100644, blob_a1.id)),
  188. TreeChange.add(('a', 0o120000, blob_a2.id))],
  189. tree1, tree2)
  190. def test_tree_changes_to_tree(self):
  191. blob_a = make_object(Blob, data='a')
  192. blob_x = make_object(Blob, data='x')
  193. tree1 = self.commit_tree([('a', blob_a)])
  194. tree2 = self.commit_tree([('a/x', blob_x)])
  195. self.assertChangesEqual(
  196. [TreeChange.delete(('a', F, blob_a.id)),
  197. TreeChange.add(('a/x', F, blob_x.id))],
  198. tree1, tree2)
  199. def test_tree_changes_complex(self):
  200. blob_a_1 = make_object(Blob, data='a1_1')
  201. blob_bx1_1 = make_object(Blob, data='bx1_1')
  202. blob_bx2_1 = make_object(Blob, data='bx2_1')
  203. blob_by1_1 = make_object(Blob, data='by1_1')
  204. blob_by2_1 = make_object(Blob, data='by2_1')
  205. tree1 = self.commit_tree([
  206. ('a', blob_a_1),
  207. ('b/x/1', blob_bx1_1),
  208. ('b/x/2', blob_bx2_1),
  209. ('b/y/1', blob_by1_1),
  210. ('b/y/2', blob_by2_1),
  211. ])
  212. blob_a_2 = make_object(Blob, data='a1_2')
  213. blob_bx1_2 = blob_bx1_1
  214. blob_by_2 = make_object(Blob, data='by_2')
  215. blob_c_2 = make_object(Blob, data='c_2')
  216. tree2 = self.commit_tree([
  217. ('a', blob_a_2),
  218. ('b/x/1', blob_bx1_2),
  219. ('b/y', blob_by_2),
  220. ('c', blob_c_2),
  221. ])
  222. self.assertChangesEqual(
  223. [TreeChange(CHANGE_MODIFY, ('a', F, blob_a_1.id),
  224. ('a', F, blob_a_2.id)),
  225. TreeChange.delete(('b/x/2', F, blob_bx2_1.id)),
  226. TreeChange.add(('b/y', F, blob_by_2.id)),
  227. TreeChange.delete(('b/y/1', F, blob_by1_1.id)),
  228. TreeChange.delete(('b/y/2', F, blob_by2_1.id)),
  229. TreeChange.add(('c', F, blob_c_2.id))],
  230. tree1, tree2)
  231. def test_tree_changes_name_order(self):
  232. blob = make_object(Blob, data='a')
  233. tree1 = self.commit_tree([('a', blob), ('a.', blob), ('a..', blob)])
  234. # Tree order is the reverse of this, so if we used tree order, 'a..'
  235. # would not be merged.
  236. tree2 = self.commit_tree([('a/x', blob), ('a./x', blob), ('a..', blob)])
  237. self.assertChangesEqual(
  238. [TreeChange.delete(('a', F, blob.id)),
  239. TreeChange.add(('a/x', F, blob.id)),
  240. TreeChange.delete(('a.', F, blob.id)),
  241. TreeChange.add(('a./x', F, blob.id))],
  242. tree1, tree2)
  243. def test_tree_changes_prune(self):
  244. blob_a1 = make_object(Blob, data='a1')
  245. blob_a2 = make_object(Blob, data='a2')
  246. blob_x = make_object(Blob, data='x')
  247. tree1 = self.commit_tree([('a', blob_a1), ('b/x', blob_x)])
  248. tree2 = self.commit_tree([('a', blob_a2), ('b/x', blob_x)])
  249. # Remove identical items so lookups will fail unless we prune.
  250. subtree = self.store[tree1['b'][1]]
  251. for entry in subtree.iteritems():
  252. del self.store[entry.sha]
  253. del self.store[subtree.id]
  254. self.assertChangesEqual(
  255. [TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
  256. ('a', F, blob_a2.id))],
  257. tree1, tree2)
  258. def test_tree_changes_rename_detector(self):
  259. blob_a1 = make_object(Blob, data='a\nb\nc\nd\n')
  260. blob_a2 = make_object(Blob, data='a\nb\nc\ne\n')
  261. blob_b = make_object(Blob, data='b')
  262. tree1 = self.commit_tree([('a', blob_a1), ('b', blob_b)])
  263. tree2 = self.commit_tree([('c', blob_a2), ('b', blob_b)])
  264. detector = RenameDetector(self.store)
  265. self.assertChangesEqual(
  266. [TreeChange.delete(('a', F, blob_a1.id)),
  267. TreeChange.add(('c', F, blob_a2.id))],
  268. tree1, tree2)
  269. self.assertChangesEqual(
  270. [TreeChange.delete(('a', F, blob_a1.id)),
  271. TreeChange(CHANGE_UNCHANGED, ('b', F, blob_b.id),
  272. ('b', F, blob_b.id)),
  273. TreeChange.add(('c', F, blob_a2.id))],
  274. tree1, tree2, want_unchanged=True)
  275. self.assertChangesEqual(
  276. [TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id),
  277. ('c', F, blob_a2.id))],
  278. tree1, tree2, rename_detector=detector)
  279. self.assertChangesEqual(
  280. [TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id),
  281. ('c', F, blob_a2.id)),
  282. TreeChange(CHANGE_UNCHANGED, ('b', F, blob_b.id),
  283. ('b', F, blob_b.id))],
  284. tree1, tree2, rename_detector=detector, want_unchanged=True)
  285. def assertChangesForMergeEqual(self, expected, parent_trees, merge_tree,
  286. **kwargs):
  287. parent_tree_ids = [t.id for t in parent_trees]
  288. actual = list(tree_changes_for_merge(
  289. self.store, parent_tree_ids, merge_tree.id, **kwargs))
  290. self.assertEqual(expected, actual)
  291. parent_tree_ids.reverse()
  292. expected = [list(reversed(cs)) for cs in expected]
  293. actual = list(tree_changes_for_merge(
  294. self.store, parent_tree_ids, merge_tree.id, **kwargs))
  295. self.assertEqual(expected, actual)
  296. def test_tree_changes_for_merge_add_no_conflict(self):
  297. blob = make_object(Blob, data='blob')
  298. parent1 = self.commit_tree([])
  299. parent2 = merge = self.commit_tree([('a', blob)])
  300. self.assertChangesForMergeEqual([], [parent1, parent2], merge)
  301. self.assertChangesForMergeEqual([], [parent2, parent2], merge)
  302. def test_tree_changes_for_merge_add_modify_conflict(self):
  303. blob1 = make_object(Blob, data='1')
  304. blob2 = make_object(Blob, data='2')
  305. parent1 = self.commit_tree([])
  306. parent2 = self.commit_tree([('a', blob1)])
  307. merge = self.commit_tree([('a', blob2)])
  308. self.assertChangesForMergeEqual(
  309. [[TreeChange.add(('a', F, blob2.id)),
  310. TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id))]],
  311. [parent1, parent2], merge)
  312. def test_tree_changes_for_merge_modify_modify_conflict(self):
  313. blob1 = make_object(Blob, data='1')
  314. blob2 = make_object(Blob, data='2')
  315. blob3 = make_object(Blob, data='3')
  316. parent1 = self.commit_tree([('a', blob1)])
  317. parent2 = self.commit_tree([('a', blob2)])
  318. merge = self.commit_tree([('a', blob3)])
  319. self.assertChangesForMergeEqual(
  320. [[TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob3.id)),
  321. TreeChange(CHANGE_MODIFY, ('a', F, blob2.id), ('a', F, blob3.id))]],
  322. [parent1, parent2], merge)
  323. def test_tree_changes_for_merge_modify_no_conflict(self):
  324. blob1 = make_object(Blob, data='1')
  325. blob2 = make_object(Blob, data='2')
  326. parent1 = self.commit_tree([('a', blob1)])
  327. parent2 = merge = self.commit_tree([('a', blob2)])
  328. self.assertChangesForMergeEqual([], [parent1, parent2], merge)
  329. def test_tree_changes_for_merge_delete_delete_conflict(self):
  330. blob1 = make_object(Blob, data='1')
  331. blob2 = make_object(Blob, data='2')
  332. parent1 = self.commit_tree([('a', blob1)])
  333. parent2 = self.commit_tree([('a', blob2)])
  334. merge = self.commit_tree([])
  335. self.assertChangesForMergeEqual(
  336. [[TreeChange.delete(('a', F, blob1.id)),
  337. TreeChange.delete(('a', F, blob2.id))]],
  338. [parent1, parent2], merge)
  339. def test_tree_changes_for_merge_delete_no_conflict(self):
  340. blob = make_object(Blob, data='blob')
  341. has = self.commit_tree([('a', blob)])
  342. doesnt_have = self.commit_tree([])
  343. self.assertChangesForMergeEqual([], [has, has], doesnt_have)
  344. self.assertChangesForMergeEqual([], [has, doesnt_have], doesnt_have)
  345. def test_tree_changes_for_merge_octopus_no_conflict(self):
  346. r = list(range(5))
  347. blobs = [make_object(Blob, data=str(i)) for i in r]
  348. parents = [self.commit_tree([('a', blobs[i])]) for i in r]
  349. for i in r:
  350. # Take the SHA from each of the parents.
  351. self.assertChangesForMergeEqual([], parents, parents[i])
  352. def test_tree_changes_for_merge_octopus_modify_conflict(self):
  353. # Because the octopus merge strategy is limited, I doubt it's possible
  354. # to create this with the git command line. But the output is well-
  355. # defined, so test it anyway.
  356. r = list(range(5))
  357. parent_blobs = [make_object(Blob, data=str(i)) for i in r]
  358. merge_blob = make_object(Blob, data='merge')
  359. parents = [self.commit_tree([('a', parent_blobs[i])]) for i in r]
  360. merge = self.commit_tree([('a', merge_blob)])
  361. expected = [[TreeChange(CHANGE_MODIFY, ('a', F, parent_blobs[i].id),
  362. ('a', F, merge_blob.id)) for i in r]]
  363. self.assertChangesForMergeEqual(expected, parents, merge)
  364. def test_tree_changes_for_merge_octopus_delete(self):
  365. blob1 = make_object(Blob, data='1')
  366. blob2 = make_object(Blob, data='3')
  367. parent1 = self.commit_tree([('a', blob1)])
  368. parent2 = self.commit_tree([('a', blob2)])
  369. parent3 = merge = self.commit_tree([])
  370. self.assertChangesForMergeEqual([], [parent1, parent1, parent1], merge)
  371. self.assertChangesForMergeEqual([], [parent1, parent1, parent3], merge)
  372. self.assertChangesForMergeEqual([], [parent1, parent3, parent3], merge)
  373. self.assertChangesForMergeEqual(
  374. [[TreeChange.delete(('a', F, blob1.id)),
  375. TreeChange.delete(('a', F, blob2.id)),
  376. None]],
  377. [parent1, parent2, parent3], merge)
  378. def test_tree_changes_for_merge_add_add_same_conflict(self):
  379. blob = make_object(Blob, data='a\nb\nc\nd\n')
  380. parent1 = self.commit_tree([('a', blob)])
  381. parent2 = self.commit_tree([])
  382. merge = self.commit_tree([('b', blob)])
  383. add = TreeChange.add(('b', F, blob.id))
  384. self.assertChangesForMergeEqual([[add, add]], [parent1, parent2], merge)
  385. def test_tree_changes_for_merge_add_exact_rename_conflict(self):
  386. blob = make_object(Blob, data='a\nb\nc\nd\n')
  387. parent1 = self.commit_tree([('a', blob)])
  388. parent2 = self.commit_tree([])
  389. merge = self.commit_tree([('b', blob)])
  390. self.assertChangesForMergeEqual(
  391. [[TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('b', F, blob.id)),
  392. TreeChange.add(('b', F, blob.id))]],
  393. [parent1, parent2], merge, rename_detector=self.detector)
  394. def test_tree_changes_for_merge_add_content_rename_conflict(self):
  395. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  396. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  397. parent1 = self.commit_tree([('a', blob1)])
  398. parent2 = self.commit_tree([])
  399. merge = self.commit_tree([('b', blob2)])
  400. self.assertChangesForMergeEqual(
  401. [[TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob2.id)),
  402. TreeChange.add(('b', F, blob2.id))]],
  403. [parent1, parent2], merge, rename_detector=self.detector)
  404. def test_tree_changes_for_merge_modify_rename_conflict(self):
  405. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  406. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  407. parent1 = self.commit_tree([('a', blob1)])
  408. parent2 = self.commit_tree([('b', blob1)])
  409. merge = self.commit_tree([('b', blob2)])
  410. self.assertChangesForMergeEqual(
  411. [[TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob2.id)),
  412. TreeChange(CHANGE_MODIFY, ('b', F, blob1.id), ('b', F, blob2.id))]],
  413. [parent1, parent2], merge, rename_detector=self.detector)
  414. class RenameDetectionTest(DiffTestCase):
  415. def _do_test_count_blocks(self, count_blocks):
  416. blob = make_object(Blob, data='a\nb\na\n')
  417. self.assertEqual({hash('a\n'): 4, hash('b\n'): 2}, count_blocks(blob))
  418. test_count_blocks = functest_builder(_do_test_count_blocks,
  419. _count_blocks_py)
  420. test_count_blocks_extension = ext_functest_builder(_do_test_count_blocks,
  421. _count_blocks)
  422. def _do_test_count_blocks_no_newline(self, count_blocks):
  423. blob = make_object(Blob, data='a\na')
  424. self.assertEqual({hash('a\n'): 2, hash('a'): 1}, _count_blocks(blob))
  425. test_count_blocks_no_newline = functest_builder(
  426. _do_test_count_blocks_no_newline, _count_blocks_py)
  427. test_count_blocks_no_newline_extension = ext_functest_builder(
  428. _do_test_count_blocks_no_newline, _count_blocks)
  429. def _do_test_count_blocks_chunks(self, count_blocks):
  430. blob = ShaFile.from_raw_chunks(Blob.type_num, ['a\nb', '\na\n'])
  431. self.assertEqual({hash('a\n'): 4, hash('b\n'): 2}, _count_blocks(blob))
  432. test_count_blocks_chunks = functest_builder(_do_test_count_blocks_chunks,
  433. _count_blocks_py)
  434. test_count_blocks_chunks_extension = ext_functest_builder(
  435. _do_test_count_blocks_chunks, _count_blocks)
  436. def _do_test_count_blocks_long_lines(self, count_blocks):
  437. a = 'a' * 64
  438. data = a + 'xxx\ny\n' + a + 'zzz\n'
  439. blob = make_object(Blob, data=data)
  440. self.assertEqual({hash('a' * 64): 128, hash('xxx\n'): 4, hash('y\n'): 2,
  441. hash('zzz\n'): 4},
  442. _count_blocks(blob))
  443. test_count_blocks_long_lines = functest_builder(
  444. _do_test_count_blocks_long_lines, _count_blocks_py)
  445. test_count_blocks_long_lines_extension = ext_functest_builder(
  446. _do_test_count_blocks_long_lines, _count_blocks)
  447. def assertSimilar(self, expected_score, blob1, blob2):
  448. self.assertEqual(expected_score, _similarity_score(blob1, blob2))
  449. self.assertEqual(expected_score, _similarity_score(blob2, blob1))
  450. def test_similarity_score(self):
  451. blob0 = make_object(Blob, data='')
  452. blob1 = make_object(Blob, data='ab\ncd\ncd\n')
  453. blob2 = make_object(Blob, data='ab\n')
  454. blob3 = make_object(Blob, data='cd\n')
  455. blob4 = make_object(Blob, data='cd\ncd\n')
  456. self.assertSimilar(100, blob0, blob0)
  457. self.assertSimilar(0, blob0, blob1)
  458. self.assertSimilar(33, blob1, blob2)
  459. self.assertSimilar(33, blob1, blob3)
  460. self.assertSimilar(66, blob1, blob4)
  461. self.assertSimilar(0, blob2, blob3)
  462. self.assertSimilar(50, blob3, blob4)
  463. def test_similarity_score_cache(self):
  464. blob1 = make_object(Blob, data='ab\ncd\n')
  465. blob2 = make_object(Blob, data='ab\n')
  466. block_cache = {}
  467. self.assertEqual(
  468. 50, _similarity_score(blob1, blob2, block_cache=block_cache))
  469. self.assertEqual(set([blob1.id, blob2.id]), set(block_cache))
  470. def fail_chunks():
  471. self.fail('Unexpected call to as_raw_chunks()')
  472. blob1.as_raw_chunks = blob2.as_raw_chunks = fail_chunks
  473. blob1.raw_length = lambda: 6
  474. blob2.raw_length = lambda: 3
  475. self.assertEqual(
  476. 50, _similarity_score(blob1, blob2, block_cache=block_cache))
  477. def test_tree_entry_sort(self):
  478. sha = 'abcd' * 10
  479. expected_entries = [
  480. TreeChange.add(TreeEntry('aaa', F, sha)),
  481. TreeChange(CHANGE_COPY, TreeEntry('bbb', F, sha),
  482. TreeEntry('aab', F, sha)),
  483. TreeChange(CHANGE_MODIFY, TreeEntry('bbb', F, sha),
  484. TreeEntry('bbb', F, 'dabc' * 10)),
  485. TreeChange(CHANGE_RENAME, TreeEntry('bbc', F, sha),
  486. TreeEntry('ddd', F, sha)),
  487. TreeChange.delete(TreeEntry('ccc', F, sha)),
  488. ]
  489. for perm in permutations(expected_entries):
  490. self.assertEqual(expected_entries,
  491. sorted(perm, key=_tree_change_key))
  492. def detect_renames(self, tree1, tree2, want_unchanged=False, **kwargs):
  493. detector = RenameDetector(self.store, **kwargs)
  494. return detector.changes_with_renames(tree1.id, tree2.id,
  495. want_unchanged=want_unchanged)
  496. def test_no_renames(self):
  497. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  498. blob2 = make_object(Blob, data='a\nb\ne\nf\n')
  499. blob3 = make_object(Blob, data='a\nb\ng\nh\n')
  500. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  501. tree2 = self.commit_tree([('a', blob1), ('b', blob3)])
  502. self.assertEqual(
  503. [TreeChange(CHANGE_MODIFY, ('b', F, blob2.id), ('b', F, blob3.id))],
  504. self.detect_renames(tree1, tree2))
  505. def test_exact_rename_one_to_one(self):
  506. blob1 = make_object(Blob, data='1')
  507. blob2 = make_object(Blob, data='2')
  508. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  509. tree2 = self.commit_tree([('c', blob1), ('d', blob2)])
  510. self.assertEqual(
  511. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob1.id)),
  512. TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('d', F, blob2.id))],
  513. self.detect_renames(tree1, tree2))
  514. def test_exact_rename_split_different_type(self):
  515. blob = make_object(Blob, data='/foo')
  516. tree1 = self.commit_tree([('a', blob, 0o100644)])
  517. tree2 = self.commit_tree([('a', blob, 0o120000)])
  518. self.assertEqual(
  519. [TreeChange.add(('a', 0o120000, blob.id)),
  520. TreeChange.delete(('a', 0o100644, blob.id))],
  521. self.detect_renames(tree1, tree2))
  522. def test_exact_rename_and_different_type(self):
  523. blob1 = make_object(Blob, data='1')
  524. blob2 = make_object(Blob, data='2')
  525. tree1 = self.commit_tree([('a', blob1)])
  526. tree2 = self.commit_tree([('a', blob2, 0o120000), ('b', blob1)])
  527. self.assertEqual(
  528. [TreeChange.add(('a', 0o120000, blob2.id)),
  529. TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob1.id))],
  530. self.detect_renames(tree1, tree2))
  531. def test_exact_rename_one_to_many(self):
  532. blob = make_object(Blob, data='1')
  533. tree1 = self.commit_tree([('a', blob)])
  534. tree2 = self.commit_tree([('b', blob), ('c', blob)])
  535. self.assertEqual(
  536. [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('b', F, blob.id)),
  537. TreeChange(CHANGE_COPY, ('a', F, blob.id), ('c', F, blob.id))],
  538. self.detect_renames(tree1, tree2))
  539. def test_exact_rename_many_to_one(self):
  540. blob = make_object(Blob, data='1')
  541. tree1 = self.commit_tree([('a', blob), ('b', blob)])
  542. tree2 = self.commit_tree([('c', blob)])
  543. self.assertEqual(
  544. [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('c', F, blob.id)),
  545. TreeChange.delete(('b', F, blob.id))],
  546. self.detect_renames(tree1, tree2))
  547. def test_exact_rename_many_to_many(self):
  548. blob = make_object(Blob, data='1')
  549. tree1 = self.commit_tree([('a', blob), ('b', blob)])
  550. tree2 = self.commit_tree([('c', blob), ('d', blob), ('e', blob)])
  551. self.assertEqual(
  552. [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('c', F, blob.id)),
  553. TreeChange(CHANGE_COPY, ('a', F, blob.id), ('e', F, blob.id)),
  554. TreeChange(CHANGE_RENAME, ('b', F, blob.id), ('d', F, blob.id))],
  555. self.detect_renames(tree1, tree2))
  556. def test_exact_copy_modify(self):
  557. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  558. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  559. tree1 = self.commit_tree([('a', blob1)])
  560. tree2 = self.commit_tree([('a', blob2), ('b', blob1)])
  561. self.assertEqual(
  562. [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)),
  563. TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob1.id))],
  564. self.detect_renames(tree1, tree2))
  565. def test_exact_copy_change_mode(self):
  566. blob = make_object(Blob, data='a\nb\nc\nd\n')
  567. tree1 = self.commit_tree([('a', blob)])
  568. tree2 = self.commit_tree([('a', blob, 0o100755), ('b', blob)])
  569. self.assertEqual(
  570. [TreeChange(CHANGE_MODIFY, ('a', F, blob.id),
  571. ('a', 0o100755, blob.id)),
  572. TreeChange(CHANGE_COPY, ('a', F, blob.id), ('b', F, blob.id))],
  573. self.detect_renames(tree1, tree2))
  574. def test_rename_threshold(self):
  575. blob1 = make_object(Blob, data='a\nb\nc\n')
  576. blob2 = make_object(Blob, data='a\nb\nd\n')
  577. tree1 = self.commit_tree([('a', blob1)])
  578. tree2 = self.commit_tree([('b', blob2)])
  579. self.assertEqual(
  580. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob2.id))],
  581. self.detect_renames(tree1, tree2, rename_threshold=50))
  582. self.assertEqual(
  583. [TreeChange.delete(('a', F, blob1.id)),
  584. TreeChange.add(('b', F, blob2.id))],
  585. self.detect_renames(tree1, tree2, rename_threshold=75))
  586. def test_content_rename_max_files(self):
  587. blob1 = make_object(Blob, data='a\nb\nc\nd')
  588. blob4 = make_object(Blob, data='a\nb\nc\ne\n')
  589. blob2 = make_object(Blob, data='e\nf\ng\nh\n')
  590. blob3 = make_object(Blob, data='e\nf\ng\ni\n')
  591. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  592. tree2 = self.commit_tree([('c', blob3), ('d', blob4)])
  593. self.assertEqual(
  594. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('d', F, blob4.id)),
  595. TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('c', F, blob3.id))],
  596. self.detect_renames(tree1, tree2))
  597. self.assertEqual(
  598. [TreeChange.delete(('a', F, blob1.id)),
  599. TreeChange.delete(('b', F, blob2.id)),
  600. TreeChange.add(('c', F, blob3.id)),
  601. TreeChange.add(('d', F, blob4.id))],
  602. self.detect_renames(tree1, tree2, max_files=1))
  603. def test_content_rename_one_to_one(self):
  604. b11 = make_object(Blob, data='a\nb\nc\nd\n')
  605. b12 = make_object(Blob, data='a\nb\nc\ne\n')
  606. b21 = make_object(Blob, data='e\nf\ng\n\h')
  607. b22 = make_object(Blob, data='e\nf\ng\n\i')
  608. tree1 = self.commit_tree([('a', b11), ('b', b21)])
  609. tree2 = self.commit_tree([('c', b12), ('d', b22)])
  610. self.assertEqual(
  611. [TreeChange(CHANGE_RENAME, ('a', F, b11.id), ('c', F, b12.id)),
  612. TreeChange(CHANGE_RENAME, ('b', F, b21.id), ('d', F, b22.id))],
  613. self.detect_renames(tree1, tree2))
  614. def test_content_rename_one_to_one_ordering(self):
  615. blob1 = make_object(Blob, data='a\nb\nc\nd\ne\nf\n')
  616. blob2 = make_object(Blob, data='a\nb\nc\nd\ng\nh\n')
  617. # 6/10 match to blob1, 8/10 match to blob2
  618. blob3 = make_object(Blob, data='a\nb\nc\nd\ng\ni\n')
  619. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  620. tree2 = self.commit_tree([('c', blob3)])
  621. self.assertEqual(
  622. [TreeChange.delete(('a', F, blob1.id)),
  623. TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('c', F, blob3.id))],
  624. self.detect_renames(tree1, tree2))
  625. tree3 = self.commit_tree([('a', blob2), ('b', blob1)])
  626. tree4 = self.commit_tree([('c', blob3)])
  627. self.assertEqual(
  628. [TreeChange(CHANGE_RENAME, ('a', F, blob2.id), ('c', F, blob3.id)),
  629. TreeChange.delete(('b', F, blob1.id))],
  630. self.detect_renames(tree3, tree4))
  631. def test_content_rename_one_to_many(self):
  632. blob1 = make_object(Blob, data='aa\nb\nc\nd\ne\n')
  633. blob2 = make_object(Blob, data='ab\nb\nc\nd\ne\n') # 8/11 match
  634. blob3 = make_object(Blob, data='aa\nb\nc\nd\nf\n') # 9/11 match
  635. tree1 = self.commit_tree([('a', blob1)])
  636. tree2 = self.commit_tree([('b', blob2), ('c', blob3)])
  637. self.assertEqual(
  638. [TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id)),
  639. TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob3.id))],
  640. self.detect_renames(tree1, tree2))
  641. def test_content_rename_many_to_one(self):
  642. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  643. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  644. blob3 = make_object(Blob, data='a\nb\nc\nf\n')
  645. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  646. tree2 = self.commit_tree([('c', blob3)])
  647. self.assertEqual(
  648. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob3.id)),
  649. TreeChange.delete(('b', F, blob2.id))],
  650. self.detect_renames(tree1, tree2))
  651. def test_content_rename_many_to_many(self):
  652. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  653. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  654. blob3 = make_object(Blob, data='a\nb\nc\nf\n')
  655. blob4 = make_object(Blob, data='a\nb\nc\ng\n')
  656. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  657. tree2 = self.commit_tree([('c', blob3), ('d', blob4)])
  658. # TODO(dborowitz): Distribute renames rather than greedily choosing
  659. # copies.
  660. self.assertEqual(
  661. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob3.id)),
  662. TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('d', F, blob4.id)),
  663. TreeChange.delete(('b', F, blob2.id))],
  664. self.detect_renames(tree1, tree2))
  665. def test_content_rename_gitlink(self):
  666. blob1 = make_object(Blob, data='blob1')
  667. blob2 = make_object(Blob, data='blob2')
  668. link1 = '1' * 40
  669. link2 = '2' * 40
  670. tree1 = self.commit_tree([('a', blob1), ('b', link1, 0o160000)])
  671. tree2 = self.commit_tree([('c', blob2), ('d', link2, 0o160000)])
  672. self.assertEqual(
  673. [TreeChange.delete(('a', 0o100644, blob1.id)),
  674. TreeChange.delete(('b', 0o160000, link1)),
  675. TreeChange.add(('c', 0o100644, blob2.id)),
  676. TreeChange.add(('d', 0o160000, link2))],
  677. self.detect_renames(tree1, tree2))
  678. def test_exact_rename_swap(self):
  679. blob1 = make_object(Blob, data='1')
  680. blob2 = make_object(Blob, data='2')
  681. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  682. tree2 = self.commit_tree([('a', blob2), ('b', blob1)])
  683. self.assertEqual(
  684. [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)),
  685. TreeChange(CHANGE_MODIFY, ('b', F, blob2.id), ('b', F, blob1.id))],
  686. self.detect_renames(tree1, tree2))
  687. self.assertEqual(
  688. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob1.id)),
  689. TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('a', F, blob2.id))],
  690. self.detect_renames(tree1, tree2, rewrite_threshold=50))
  691. def test_content_rename_swap(self):
  692. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  693. blob2 = make_object(Blob, data='e\nf\ng\nh\n')
  694. blob3 = make_object(Blob, data='a\nb\nc\ne\n')
  695. blob4 = make_object(Blob, data='e\nf\ng\ni\n')
  696. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  697. tree2 = self.commit_tree([('a', blob4), ('b', blob3)])
  698. self.assertEqual(
  699. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob3.id)),
  700. TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('a', F, blob4.id))],
  701. self.detect_renames(tree1, tree2, rewrite_threshold=60))
  702. def test_rewrite_threshold(self):
  703. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  704. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  705. blob3 = make_object(Blob, data='a\nb\nf\ng\n')
  706. tree1 = self.commit_tree([('a', blob1)])
  707. tree2 = self.commit_tree([('a', blob3), ('b', blob2)])
  708. no_renames = [
  709. TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob3.id)),
  710. TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id))]
  711. self.assertEqual(
  712. no_renames, self.detect_renames(tree1, tree2))
  713. self.assertEqual(
  714. no_renames, self.detect_renames(tree1, tree2, rewrite_threshold=40))
  715. self.assertEqual(
  716. [TreeChange.add(('a', F, blob3.id)),
  717. TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob2.id))],
  718. self.detect_renames(tree1, tree2, rewrite_threshold=80))
  719. def test_find_copies_harder_exact(self):
  720. blob = make_object(Blob, data='blob')
  721. tree1 = self.commit_tree([('a', blob)])
  722. tree2 = self.commit_tree([('a', blob), ('b', blob)])
  723. self.assertEqual([TreeChange.add(('b', F, blob.id))],
  724. self.detect_renames(tree1, tree2))
  725. self.assertEqual(
  726. [TreeChange(CHANGE_COPY, ('a', F, blob.id), ('b', F, blob.id))],
  727. self.detect_renames(tree1, tree2, find_copies_harder=True))
  728. def test_find_copies_harder_content(self):
  729. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  730. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  731. tree1 = self.commit_tree([('a', blob1)])
  732. tree2 = self.commit_tree([('a', blob1), ('b', blob2)])
  733. self.assertEqual([TreeChange.add(('b', F, blob2.id))],
  734. self.detect_renames(tree1, tree2))
  735. self.assertEqual(
  736. [TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id))],
  737. self.detect_renames(tree1, tree2, find_copies_harder=True))
  738. def test_find_copies_harder_with_rewrites(self):
  739. blob_a1 = make_object(Blob, data='a\nb\nc\nd\n')
  740. blob_a2 = make_object(Blob, data='f\ng\nh\ni\n')
  741. blob_b2 = make_object(Blob, data='a\nb\nc\ne\n')
  742. tree1 = self.commit_tree([('a', blob_a1)])
  743. tree2 = self.commit_tree([('a', blob_a2), ('b', blob_b2)])
  744. self.assertEqual(
  745. [TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
  746. ('a', F, blob_a2.id)),
  747. TreeChange(CHANGE_COPY, ('a', F, blob_a1.id), ('b', F, blob_b2.id))],
  748. self.detect_renames(tree1, tree2, find_copies_harder=True))
  749. self.assertEqual(
  750. [TreeChange.add(('a', F, blob_a2.id)),
  751. TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id),
  752. ('b', F, blob_b2.id))],
  753. self.detect_renames(tree1, tree2, rewrite_threshold=50,
  754. find_copies_harder=True))
  755. def test_reuse_detector(self):
  756. blob = make_object(Blob, data='blob')
  757. tree1 = self.commit_tree([('a', blob)])
  758. tree2 = self.commit_tree([('b', blob)])
  759. detector = RenameDetector(self.store)
  760. changes = [TreeChange(CHANGE_RENAME, ('a', F, blob.id),
  761. ('b', F, blob.id))]
  762. self.assertEqual(changes,
  763. detector.changes_with_renames(tree1.id, tree2.id))
  764. self.assertEqual(changes,
  765. detector.changes_with_renames(tree1.id, tree2.id))
  766. def test_want_unchanged(self):
  767. blob_a1 = make_object(Blob, data='a\nb\nc\nd\n')
  768. blob_b = make_object(Blob, data='b')
  769. blob_c2 = make_object(Blob, data='a\nb\nc\ne\n')
  770. tree1 = self.commit_tree([('a', blob_a1), ('b', blob_b)])
  771. tree2 = self.commit_tree([('c', blob_c2), ('b', blob_b)])
  772. detector = RenameDetector(self.store)
  773. self.assertEqual(
  774. [TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id),
  775. ('c', F, blob_c2.id))],
  776. self.detect_renames(tree1, tree2))
  777. self.assertEqual(
  778. [TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id),
  779. ('c', F, blob_c2.id)),
  780. TreeChange(CHANGE_UNCHANGED, ('b', F, blob_b.id),
  781. ('b', F, blob_b.id))],
  782. self.detect_renames(tree1, tree2, want_unchanged=True))