test_diff_tree.py 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885
  1. # test_diff_tree.py -- Tests for file and tree diff utilities.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; either version 2
  7. # or (at your option) a later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """Tests for file and tree diff utilities."""
  19. from itertools import (
  20. permutations,
  21. )
  22. from dulwich.diff_tree import (
  23. CHANGE_MODIFY,
  24. CHANGE_RENAME,
  25. CHANGE_COPY,
  26. CHANGE_UNCHANGED,
  27. TreeChange,
  28. _merge_entries,
  29. _merge_entries_py,
  30. tree_changes,
  31. tree_changes_for_merge,
  32. _count_blocks,
  33. _count_blocks_py,
  34. _similarity_score,
  35. _tree_change_key,
  36. RenameDetector,
  37. _is_tree,
  38. _is_tree_py
  39. )
  40. from dulwich.index import (
  41. commit_tree,
  42. )
  43. from dulwich.object_store import (
  44. MemoryObjectStore,
  45. )
  46. from dulwich.objects import (
  47. ShaFile,
  48. Blob,
  49. TreeEntry,
  50. Tree,
  51. )
  52. from dulwich.tests import (
  53. TestCase,
  54. )
  55. from dulwich.tests.utils import (
  56. F,
  57. make_object,
  58. functest_builder,
  59. ext_functest_builder,
  60. )
  61. class DiffTestCase(TestCase):
  62. def setUp(self):
  63. super(DiffTestCase, self).setUp()
  64. self.store = MemoryObjectStore()
  65. self.empty_tree = self.commit_tree([])
  66. def commit_tree(self, entries):
  67. commit_blobs = []
  68. for entry in entries:
  69. if len(entry) == 2:
  70. path, obj = entry
  71. mode = F
  72. else:
  73. path, obj, mode = entry
  74. if isinstance(obj, Blob):
  75. self.store.add_object(obj)
  76. sha = obj.id
  77. else:
  78. sha = obj
  79. commit_blobs.append((path, sha, mode))
  80. return self.store[commit_tree(self.store, commit_blobs)]
  81. class TreeChangesTest(DiffTestCase):
  82. def setUp(self):
  83. super(TreeChangesTest, self).setUp()
  84. self.detector = RenameDetector(self.store)
  85. def assertMergeFails(self, merge_entries, name, mode, sha):
  86. t = Tree()
  87. t[name] = (mode, sha)
  88. self.assertRaises(TypeError, merge_entries, '', t, t)
  89. def _do_test_merge_entries(self, merge_entries):
  90. blob_a1 = make_object(Blob, data='a1')
  91. blob_a2 = make_object(Blob, data='a2')
  92. blob_b1 = make_object(Blob, data='b1')
  93. blob_c2 = make_object(Blob, data='c2')
  94. tree1 = self.commit_tree([('a', blob_a1, 0100644),
  95. ('b', blob_b1, 0100755)])
  96. tree2 = self.commit_tree([('a', blob_a2, 0100644),
  97. ('c', blob_c2, 0100755)])
  98. self.assertEqual([], merge_entries('', self.empty_tree,
  99. self.empty_tree))
  100. self.assertEqual([
  101. ((None, None, None), ('a', 0100644, blob_a1.id)),
  102. ((None, None, None), ('b', 0100755, blob_b1.id)),
  103. ], merge_entries('', self.empty_tree, tree1))
  104. self.assertEqual([
  105. ((None, None, None), ('x/a', 0100644, blob_a1.id)),
  106. ((None, None, None), ('x/b', 0100755, blob_b1.id)),
  107. ], merge_entries('x', self.empty_tree, tree1))
  108. self.assertEqual([
  109. (('a', 0100644, blob_a2.id), (None, None, None)),
  110. (('c', 0100755, blob_c2.id), (None, None, None)),
  111. ], merge_entries('', tree2, self.empty_tree))
  112. self.assertEqual([
  113. (('a', 0100644, blob_a1.id), ('a', 0100644, blob_a2.id)),
  114. (('b', 0100755, blob_b1.id), (None, None, None)),
  115. ((None, None, None), ('c', 0100755, blob_c2.id)),
  116. ], merge_entries('', tree1, tree2))
  117. self.assertEqual([
  118. (('a', 0100644, blob_a2.id), ('a', 0100644, blob_a1.id)),
  119. ((None, None, None), ('b', 0100755, blob_b1.id)),
  120. (('c', 0100755, blob_c2.id), (None, None, None)),
  121. ], merge_entries('', tree2, tree1))
  122. self.assertMergeFails(merge_entries, 0xdeadbeef, 0100644, '1' * 40)
  123. self.assertMergeFails(merge_entries, 'a', 'deadbeef', '1' * 40)
  124. self.assertMergeFails(merge_entries, 'a', 0100644, 0xdeadbeef)
  125. test_merge_entries = functest_builder(_do_test_merge_entries,
  126. _merge_entries_py)
  127. test_merge_entries_extension = ext_functest_builder(_do_test_merge_entries,
  128. _merge_entries)
  129. def _do_test_is_tree(self, is_tree):
  130. self.assertFalse(is_tree(TreeEntry(None, None, None)))
  131. self.assertFalse(is_tree(TreeEntry('a', 0100644, 'a' * 40)))
  132. self.assertFalse(is_tree(TreeEntry('a', 0100755, 'a' * 40)))
  133. self.assertFalse(is_tree(TreeEntry('a', 0120000, 'a' * 40)))
  134. self.assertTrue(is_tree(TreeEntry('a', 0040000, 'a' * 40)))
  135. self.assertRaises(TypeError, is_tree, TreeEntry('a', 'x', 'a' * 40))
  136. self.assertRaises(AttributeError, is_tree, 1234)
  137. test_is_tree = functest_builder(_do_test_is_tree, _is_tree_py)
  138. test_is_tree_extension = ext_functest_builder(_do_test_is_tree, _is_tree)
  139. def assertChangesEqual(self, expected, tree1, tree2, **kwargs):
  140. actual = list(tree_changes(self.store, tree1.id, tree2.id, **kwargs))
  141. self.assertEqual(expected, actual)
  142. # For brevity, the following tests use tuples instead of TreeEntry objects.
  143. def test_tree_changes_empty(self):
  144. self.assertChangesEqual([], self.empty_tree, self.empty_tree)
  145. def test_tree_changes_no_changes(self):
  146. blob = make_object(Blob, data='blob')
  147. tree = self.commit_tree([('a', blob), ('b/c', blob)])
  148. self.assertChangesEqual([], self.empty_tree, self.empty_tree)
  149. self.assertChangesEqual([], tree, tree)
  150. self.assertChangesEqual(
  151. [TreeChange(CHANGE_UNCHANGED, ('a', F, blob.id), ('a', F, blob.id)),
  152. TreeChange(CHANGE_UNCHANGED, ('b/c', F, blob.id),
  153. ('b/c', F, blob.id))],
  154. tree, tree, want_unchanged=True)
  155. def test_tree_changes_add_delete(self):
  156. blob_a = make_object(Blob, data='a')
  157. blob_b = make_object(Blob, data='b')
  158. tree = self.commit_tree([('a', blob_a, 0100644),
  159. ('x/b', blob_b, 0100755)])
  160. self.assertChangesEqual(
  161. [TreeChange.add(('a', 0100644, blob_a.id)),
  162. TreeChange.add(('x/b', 0100755, blob_b.id))],
  163. self.empty_tree, tree)
  164. self.assertChangesEqual(
  165. [TreeChange.delete(('a', 0100644, blob_a.id)),
  166. TreeChange.delete(('x/b', 0100755, blob_b.id))],
  167. tree, self.empty_tree)
  168. def test_tree_changes_modify_contents(self):
  169. blob_a1 = make_object(Blob, data='a1')
  170. blob_a2 = make_object(Blob, data='a2')
  171. tree1 = self.commit_tree([('a', blob_a1)])
  172. tree2 = self.commit_tree([('a', blob_a2)])
  173. self.assertChangesEqual(
  174. [TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
  175. ('a', F, blob_a2.id))], tree1, tree2)
  176. def test_tree_changes_modify_mode(self):
  177. blob_a = make_object(Blob, data='a')
  178. tree1 = self.commit_tree([('a', blob_a, 0100644)])
  179. tree2 = self.commit_tree([('a', blob_a, 0100755)])
  180. self.assertChangesEqual(
  181. [TreeChange(CHANGE_MODIFY, ('a', 0100644, blob_a.id),
  182. ('a', 0100755, blob_a.id))], tree1, tree2)
  183. def test_tree_changes_change_type(self):
  184. blob_a1 = make_object(Blob, data='a')
  185. blob_a2 = make_object(Blob, data='/foo/bar')
  186. tree1 = self.commit_tree([('a', blob_a1, 0100644)])
  187. tree2 = self.commit_tree([('a', blob_a2, 0120000)])
  188. self.assertChangesEqual(
  189. [TreeChange.delete(('a', 0100644, blob_a1.id)),
  190. TreeChange.add(('a', 0120000, blob_a2.id))],
  191. tree1, tree2)
  192. def test_tree_changes_to_tree(self):
  193. blob_a = make_object(Blob, data='a')
  194. blob_x = make_object(Blob, data='x')
  195. tree1 = self.commit_tree([('a', blob_a)])
  196. tree2 = self.commit_tree([('a/x', blob_x)])
  197. self.assertChangesEqual(
  198. [TreeChange.delete(('a', F, blob_a.id)),
  199. TreeChange.add(('a/x', F, blob_x.id))],
  200. tree1, tree2)
  201. def test_tree_changes_complex(self):
  202. blob_a_1 = make_object(Blob, data='a1_1')
  203. blob_bx1_1 = make_object(Blob, data='bx1_1')
  204. blob_bx2_1 = make_object(Blob, data='bx2_1')
  205. blob_by1_1 = make_object(Blob, data='by1_1')
  206. blob_by2_1 = make_object(Blob, data='by2_1')
  207. tree1 = self.commit_tree([
  208. ('a', blob_a_1),
  209. ('b/x/1', blob_bx1_1),
  210. ('b/x/2', blob_bx2_1),
  211. ('b/y/1', blob_by1_1),
  212. ('b/y/2', blob_by2_1),
  213. ])
  214. blob_a_2 = make_object(Blob, data='a1_2')
  215. blob_bx1_2 = blob_bx1_1
  216. blob_by_2 = make_object(Blob, data='by_2')
  217. blob_c_2 = make_object(Blob, data='c_2')
  218. tree2 = self.commit_tree([
  219. ('a', blob_a_2),
  220. ('b/x/1', blob_bx1_2),
  221. ('b/y', blob_by_2),
  222. ('c', blob_c_2),
  223. ])
  224. self.assertChangesEqual(
  225. [TreeChange(CHANGE_MODIFY, ('a', F, blob_a_1.id),
  226. ('a', F, blob_a_2.id)),
  227. TreeChange.delete(('b/x/2', F, blob_bx2_1.id)),
  228. TreeChange.add(('b/y', F, blob_by_2.id)),
  229. TreeChange.delete(('b/y/1', F, blob_by1_1.id)),
  230. TreeChange.delete(('b/y/2', F, blob_by2_1.id)),
  231. TreeChange.add(('c', F, blob_c_2.id))],
  232. tree1, tree2)
  233. def test_tree_changes_name_order(self):
  234. blob = make_object(Blob, data='a')
  235. tree1 = self.commit_tree([('a', blob), ('a.', blob), ('a..', blob)])
  236. # Tree order is the reverse of this, so if we used tree order, 'a..'
  237. # would not be merged.
  238. tree2 = self.commit_tree([('a/x', blob), ('a./x', blob), ('a..', blob)])
  239. self.assertChangesEqual(
  240. [TreeChange.delete(('a', F, blob.id)),
  241. TreeChange.add(('a/x', F, blob.id)),
  242. TreeChange.delete(('a.', F, blob.id)),
  243. TreeChange.add(('a./x', F, blob.id))],
  244. tree1, tree2)
  245. def test_tree_changes_prune(self):
  246. blob_a1 = make_object(Blob, data='a1')
  247. blob_a2 = make_object(Blob, data='a2')
  248. blob_x = make_object(Blob, data='x')
  249. tree1 = self.commit_tree([('a', blob_a1), ('b/x', blob_x)])
  250. tree2 = self.commit_tree([('a', blob_a2), ('b/x', blob_x)])
  251. # Remove identical items so lookups will fail unless we prune.
  252. subtree = self.store[tree1['b'][1]]
  253. for entry in subtree.iteritems():
  254. del self.store[entry.sha]
  255. del self.store[subtree.id]
  256. self.assertChangesEqual(
  257. [TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
  258. ('a', F, blob_a2.id))],
  259. tree1, tree2)
  260. def test_tree_changes_rename_detector(self):
  261. blob_a1 = make_object(Blob, data='a\nb\nc\nd\n')
  262. blob_a2 = make_object(Blob, data='a\nb\nc\ne\n')
  263. blob_b = make_object(Blob, data='b')
  264. tree1 = self.commit_tree([('a', blob_a1), ('b', blob_b)])
  265. tree2 = self.commit_tree([('c', blob_a2), ('b', blob_b)])
  266. detector = RenameDetector(self.store)
  267. self.assertChangesEqual(
  268. [TreeChange.delete(('a', F, blob_a1.id)),
  269. TreeChange.add(('c', F, blob_a2.id))],
  270. tree1, tree2)
  271. self.assertChangesEqual(
  272. [TreeChange.delete(('a', F, blob_a1.id)),
  273. TreeChange(CHANGE_UNCHANGED, ('b', F, blob_b.id),
  274. ('b', F, blob_b.id)),
  275. TreeChange.add(('c', F, blob_a2.id))],
  276. tree1, tree2, want_unchanged=True)
  277. self.assertChangesEqual(
  278. [TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id),
  279. ('c', F, blob_a2.id))],
  280. tree1, tree2, rename_detector=detector)
  281. self.assertChangesEqual(
  282. [TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id),
  283. ('c', F, blob_a2.id)),
  284. TreeChange(CHANGE_UNCHANGED, ('b', F, blob_b.id),
  285. ('b', F, blob_b.id))],
  286. tree1, tree2, rename_detector=detector, want_unchanged=True)
  287. def assertChangesForMergeEqual(self, expected, parent_trees, merge_tree,
  288. **kwargs):
  289. parent_tree_ids = [t.id for t in parent_trees]
  290. actual = list(tree_changes_for_merge(
  291. self.store, parent_tree_ids, merge_tree.id, **kwargs))
  292. self.assertEqual(expected, actual)
  293. parent_tree_ids.reverse()
  294. expected = [list(reversed(cs)) for cs in expected]
  295. actual = list(tree_changes_for_merge(
  296. self.store, parent_tree_ids, merge_tree.id, **kwargs))
  297. self.assertEqual(expected, actual)
  298. def test_tree_changes_for_merge_add_no_conflict(self):
  299. blob = make_object(Blob, data='blob')
  300. parent1 = self.commit_tree([])
  301. parent2 = merge = self.commit_tree([('a', blob)])
  302. self.assertChangesForMergeEqual([], [parent1, parent2], merge)
  303. self.assertChangesForMergeEqual([], [parent2, parent2], merge)
  304. def test_tree_changes_for_merge_add_modify_conflict(self):
  305. blob1 = make_object(Blob, data='1')
  306. blob2 = make_object(Blob, data='2')
  307. parent1 = self.commit_tree([])
  308. parent2 = self.commit_tree([('a', blob1)])
  309. merge = self.commit_tree([('a', blob2)])
  310. self.assertChangesForMergeEqual(
  311. [[TreeChange.add(('a', F, blob2.id)),
  312. TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id))]],
  313. [parent1, parent2], merge)
  314. def test_tree_changes_for_merge_modify_modify_conflict(self):
  315. blob1 = make_object(Blob, data='1')
  316. blob2 = make_object(Blob, data='2')
  317. blob3 = make_object(Blob, data='3')
  318. parent1 = self.commit_tree([('a', blob1)])
  319. parent2 = self.commit_tree([('a', blob2)])
  320. merge = self.commit_tree([('a', blob3)])
  321. self.assertChangesForMergeEqual(
  322. [[TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob3.id)),
  323. TreeChange(CHANGE_MODIFY, ('a', F, blob2.id), ('a', F, blob3.id))]],
  324. [parent1, parent2], merge)
  325. def test_tree_changes_for_merge_modify_no_conflict(self):
  326. blob1 = make_object(Blob, data='1')
  327. blob2 = make_object(Blob, data='2')
  328. parent1 = self.commit_tree([('a', blob1)])
  329. parent2 = merge = self.commit_tree([('a', blob2)])
  330. self.assertChangesForMergeEqual([], [parent1, parent2], merge)
  331. def test_tree_changes_for_merge_delete_delete_conflict(self):
  332. blob1 = make_object(Blob, data='1')
  333. blob2 = make_object(Blob, data='2')
  334. parent1 = self.commit_tree([('a', blob1)])
  335. parent2 = self.commit_tree([('a', blob2)])
  336. merge = self.commit_tree([])
  337. self.assertChangesForMergeEqual(
  338. [[TreeChange.delete(('a', F, blob1.id)),
  339. TreeChange.delete(('a', F, blob2.id))]],
  340. [parent1, parent2], merge)
  341. def test_tree_changes_for_merge_delete_no_conflict(self):
  342. blob = make_object(Blob, data='blob')
  343. has = self.commit_tree([('a', blob)])
  344. doesnt_have = self.commit_tree([])
  345. self.assertChangesForMergeEqual([], [has, has], doesnt_have)
  346. self.assertChangesForMergeEqual([], [has, doesnt_have], doesnt_have)
  347. def test_tree_changes_for_merge_octopus_no_conflict(self):
  348. r = range(5)
  349. blobs = [make_object(Blob, data=str(i)) for i in r]
  350. parents = [self.commit_tree([('a', blobs[i])]) for i in r]
  351. for i in r:
  352. # Take the SHA from each of the parents.
  353. self.assertChangesForMergeEqual([], parents, parents[i])
  354. def test_tree_changes_for_merge_octopus_modify_conflict(self):
  355. # Because the octopus merge strategy is limited, I doubt it's possible
  356. # to create this with the git command line. But the output is well-
  357. # defined, so test it anyway.
  358. r = range(5)
  359. parent_blobs = [make_object(Blob, data=str(i)) for i in r]
  360. merge_blob = make_object(Blob, data='merge')
  361. parents = [self.commit_tree([('a', parent_blobs[i])]) for i in r]
  362. merge = self.commit_tree([('a', merge_blob)])
  363. expected = [[TreeChange(CHANGE_MODIFY, ('a', F, parent_blobs[i].id),
  364. ('a', F, merge_blob.id)) for i in r]]
  365. self.assertChangesForMergeEqual(expected, parents, merge)
  366. def test_tree_changes_for_merge_octopus_delete(self):
  367. blob1 = make_object(Blob, data='1')
  368. blob2 = make_object(Blob, data='3')
  369. parent1 = self.commit_tree([('a', blob1)])
  370. parent2 = self.commit_tree([('a', blob2)])
  371. parent3 = merge = self.commit_tree([])
  372. self.assertChangesForMergeEqual([], [parent1, parent1, parent1], merge)
  373. self.assertChangesForMergeEqual([], [parent1, parent1, parent3], merge)
  374. self.assertChangesForMergeEqual([], [parent1, parent3, parent3], merge)
  375. self.assertChangesForMergeEqual(
  376. [[TreeChange.delete(('a', F, blob1.id)),
  377. TreeChange.delete(('a', F, blob2.id)),
  378. None]],
  379. [parent1, parent2, parent3], merge)
  380. def test_tree_changes_for_merge_add_add_same_conflict(self):
  381. blob = make_object(Blob, data='a\nb\nc\nd\n')
  382. parent1 = self.commit_tree([('a', blob)])
  383. parent2 = self.commit_tree([])
  384. merge = self.commit_tree([('b', blob)])
  385. add = TreeChange.add(('b', F, blob.id))
  386. self.assertChangesForMergeEqual([[add, add]], [parent1, parent2], merge)
  387. def test_tree_changes_for_merge_add_exact_rename_conflict(self):
  388. blob = make_object(Blob, data='a\nb\nc\nd\n')
  389. parent1 = self.commit_tree([('a', blob)])
  390. parent2 = self.commit_tree([])
  391. merge = self.commit_tree([('b', blob)])
  392. self.assertChangesForMergeEqual(
  393. [[TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('b', F, blob.id)),
  394. TreeChange.add(('b', F, blob.id))]],
  395. [parent1, parent2], merge, rename_detector=self.detector)
  396. def test_tree_changes_for_merge_add_content_rename_conflict(self):
  397. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  398. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  399. parent1 = self.commit_tree([('a', blob1)])
  400. parent2 = self.commit_tree([])
  401. merge = self.commit_tree([('b', blob2)])
  402. self.assertChangesForMergeEqual(
  403. [[TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob2.id)),
  404. TreeChange.add(('b', F, blob2.id))]],
  405. [parent1, parent2], merge, rename_detector=self.detector)
  406. def test_tree_changes_for_merge_modify_rename_conflict(self):
  407. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  408. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  409. parent1 = self.commit_tree([('a', blob1)])
  410. parent2 = self.commit_tree([('b', blob1)])
  411. merge = self.commit_tree([('b', blob2)])
  412. self.assertChangesForMergeEqual(
  413. [[TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob2.id)),
  414. TreeChange(CHANGE_MODIFY, ('b', F, blob1.id), ('b', F, blob2.id))]],
  415. [parent1, parent2], merge, rename_detector=self.detector)
  416. class RenameDetectionTest(DiffTestCase):
  417. def _do_test_count_blocks(self, count_blocks):
  418. blob = make_object(Blob, data='a\nb\na\n')
  419. self.assertEqual({hash('a\n'): 4, hash('b\n'): 2}, count_blocks(blob))
  420. test_count_blocks = functest_builder(_do_test_count_blocks,
  421. _count_blocks_py)
  422. test_count_blocks_extension = ext_functest_builder(_do_test_count_blocks,
  423. _count_blocks)
  424. def _do_test_count_blocks_no_newline(self, count_blocks):
  425. blob = make_object(Blob, data='a\na')
  426. self.assertEqual({hash('a\n'): 2, hash('a'): 1}, _count_blocks(blob))
  427. test_count_blocks_no_newline = functest_builder(
  428. _do_test_count_blocks_no_newline, _count_blocks_py)
  429. test_count_blocks_no_newline_extension = ext_functest_builder(
  430. _do_test_count_blocks_no_newline, _count_blocks)
  431. def _do_test_count_blocks_chunks(self, count_blocks):
  432. blob = ShaFile.from_raw_chunks(Blob.type_num, ['a\nb', '\na\n'])
  433. self.assertEqual({hash('a\n'): 4, hash('b\n'): 2}, _count_blocks(blob))
  434. test_count_blocks_chunks = functest_builder(_do_test_count_blocks_chunks,
  435. _count_blocks_py)
  436. test_count_blocks_chunks_extension = ext_functest_builder(
  437. _do_test_count_blocks_chunks, _count_blocks)
  438. def _do_test_count_blocks_long_lines(self, count_blocks):
  439. a = 'a' * 64
  440. data = a + 'xxx\ny\n' + a + 'zzz\n'
  441. blob = make_object(Blob, data=data)
  442. self.assertEqual({hash('a' * 64): 128, hash('xxx\n'): 4, hash('y\n'): 2,
  443. hash('zzz\n'): 4},
  444. _count_blocks(blob))
  445. test_count_blocks_long_lines = functest_builder(
  446. _do_test_count_blocks_long_lines, _count_blocks_py)
  447. test_count_blocks_long_lines_extension = ext_functest_builder(
  448. _do_test_count_blocks_long_lines, _count_blocks)
  449. def assertSimilar(self, expected_score, blob1, blob2):
  450. self.assertEqual(expected_score, _similarity_score(blob1, blob2))
  451. self.assertEqual(expected_score, _similarity_score(blob2, blob1))
  452. def test_similarity_score(self):
  453. blob0 = make_object(Blob, data='')
  454. blob1 = make_object(Blob, data='ab\ncd\ncd\n')
  455. blob2 = make_object(Blob, data='ab\n')
  456. blob3 = make_object(Blob, data='cd\n')
  457. blob4 = make_object(Blob, data='cd\ncd\n')
  458. self.assertSimilar(100, blob0, blob0)
  459. self.assertSimilar(0, blob0, blob1)
  460. self.assertSimilar(33, blob1, blob2)
  461. self.assertSimilar(33, blob1, blob3)
  462. self.assertSimilar(66, blob1, blob4)
  463. self.assertSimilar(0, blob2, blob3)
  464. self.assertSimilar(50, blob3, blob4)
  465. def test_similarity_score_cache(self):
  466. blob1 = make_object(Blob, data='ab\ncd\n')
  467. blob2 = make_object(Blob, data='ab\n')
  468. block_cache = {}
  469. self.assertEqual(
  470. 50, _similarity_score(blob1, blob2, block_cache=block_cache))
  471. self.assertEqual(set([blob1.id, blob2.id]), set(block_cache))
  472. def fail_chunks():
  473. self.fail('Unexpected call to as_raw_chunks()')
  474. blob1.as_raw_chunks = blob2.as_raw_chunks = fail_chunks
  475. blob1.raw_length = lambda: 6
  476. blob2.raw_length = lambda: 3
  477. self.assertEqual(
  478. 50, _similarity_score(blob1, blob2, block_cache=block_cache))
  479. def test_tree_entry_sort(self):
  480. sha = 'abcd' * 10
  481. expected_entries = [
  482. TreeChange.add(TreeEntry('aaa', F, sha)),
  483. TreeChange(CHANGE_COPY, TreeEntry('bbb', F, sha),
  484. TreeEntry('aab', F, sha)),
  485. TreeChange(CHANGE_MODIFY, TreeEntry('bbb', F, sha),
  486. TreeEntry('bbb', F, 'dabc' * 10)),
  487. TreeChange(CHANGE_RENAME, TreeEntry('bbc', F, sha),
  488. TreeEntry('ddd', F, sha)),
  489. TreeChange.delete(TreeEntry('ccc', F, sha)),
  490. ]
  491. for perm in permutations(expected_entries):
  492. self.assertEqual(expected_entries,
  493. sorted(perm, key=_tree_change_key))
  494. def detect_renames(self, tree1, tree2, want_unchanged=False, **kwargs):
  495. detector = RenameDetector(self.store, **kwargs)
  496. return detector.changes_with_renames(tree1.id, tree2.id,
  497. want_unchanged=want_unchanged)
  498. def test_no_renames(self):
  499. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  500. blob2 = make_object(Blob, data='a\nb\ne\nf\n')
  501. blob3 = make_object(Blob, data='a\nb\ng\nh\n')
  502. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  503. tree2 = self.commit_tree([('a', blob1), ('b', blob3)])
  504. self.assertEqual(
  505. [TreeChange(CHANGE_MODIFY, ('b', F, blob2.id), ('b', F, blob3.id))],
  506. self.detect_renames(tree1, tree2))
  507. def test_exact_rename_one_to_one(self):
  508. blob1 = make_object(Blob, data='1')
  509. blob2 = make_object(Blob, data='2')
  510. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  511. tree2 = self.commit_tree([('c', blob1), ('d', blob2)])
  512. self.assertEqual(
  513. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob1.id)),
  514. TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('d', F, blob2.id))],
  515. self.detect_renames(tree1, tree2))
  516. def test_exact_rename_split_different_type(self):
  517. blob = make_object(Blob, data='/foo')
  518. tree1 = self.commit_tree([('a', blob, 0100644)])
  519. tree2 = self.commit_tree([('a', blob, 0120000)])
  520. self.assertEqual(
  521. [TreeChange.add(('a', 0120000, blob.id)),
  522. TreeChange.delete(('a', 0100644, blob.id))],
  523. self.detect_renames(tree1, tree2))
  524. def test_exact_rename_and_different_type(self):
  525. blob1 = make_object(Blob, data='1')
  526. blob2 = make_object(Blob, data='2')
  527. tree1 = self.commit_tree([('a', blob1)])
  528. tree2 = self.commit_tree([('a', blob2, 0120000), ('b', blob1)])
  529. self.assertEqual(
  530. [TreeChange.add(('a', 0120000, blob2.id)),
  531. TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob1.id))],
  532. self.detect_renames(tree1, tree2))
  533. def test_exact_rename_one_to_many(self):
  534. blob = make_object(Blob, data='1')
  535. tree1 = self.commit_tree([('a', blob)])
  536. tree2 = self.commit_tree([('b', blob), ('c', blob)])
  537. self.assertEqual(
  538. [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('b', F, blob.id)),
  539. TreeChange(CHANGE_COPY, ('a', F, blob.id), ('c', F, blob.id))],
  540. self.detect_renames(tree1, tree2))
  541. def test_exact_rename_many_to_one(self):
  542. blob = make_object(Blob, data='1')
  543. tree1 = self.commit_tree([('a', blob), ('b', blob)])
  544. tree2 = self.commit_tree([('c', blob)])
  545. self.assertEqual(
  546. [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('c', F, blob.id)),
  547. TreeChange.delete(('b', F, blob.id))],
  548. self.detect_renames(tree1, tree2))
  549. def test_exact_rename_many_to_many(self):
  550. blob = make_object(Blob, data='1')
  551. tree1 = self.commit_tree([('a', blob), ('b', blob)])
  552. tree2 = self.commit_tree([('c', blob), ('d', blob), ('e', blob)])
  553. self.assertEqual(
  554. [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('c', F, blob.id)),
  555. TreeChange(CHANGE_COPY, ('a', F, blob.id), ('e', F, blob.id)),
  556. TreeChange(CHANGE_RENAME, ('b', F, blob.id), ('d', F, blob.id))],
  557. self.detect_renames(tree1, tree2))
  558. def test_exact_copy_modify(self):
  559. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  560. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  561. tree1 = self.commit_tree([('a', blob1)])
  562. tree2 = self.commit_tree([('a', blob2), ('b', blob1)])
  563. self.assertEqual(
  564. [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)),
  565. TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob1.id))],
  566. self.detect_renames(tree1, tree2))
  567. def test_exact_copy_change_mode(self):
  568. blob = make_object(Blob, data='a\nb\nc\nd\n')
  569. tree1 = self.commit_tree([('a', blob)])
  570. tree2 = self.commit_tree([('a', blob, 0100755), ('b', blob)])
  571. self.assertEqual(
  572. [TreeChange(CHANGE_MODIFY, ('a', F, blob.id),
  573. ('a', 0100755, blob.id)),
  574. TreeChange(CHANGE_COPY, ('a', F, blob.id), ('b', F, blob.id))],
  575. self.detect_renames(tree1, tree2))
  576. def test_rename_threshold(self):
  577. blob1 = make_object(Blob, data='a\nb\nc\n')
  578. blob2 = make_object(Blob, data='a\nb\nd\n')
  579. tree1 = self.commit_tree([('a', blob1)])
  580. tree2 = self.commit_tree([('b', blob2)])
  581. self.assertEqual(
  582. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob2.id))],
  583. self.detect_renames(tree1, tree2, rename_threshold=50))
  584. self.assertEqual(
  585. [TreeChange.delete(('a', F, blob1.id)),
  586. TreeChange.add(('b', F, blob2.id))],
  587. self.detect_renames(tree1, tree2, rename_threshold=75))
  588. def test_content_rename_max_files(self):
  589. blob1 = make_object(Blob, data='a\nb\nc\nd')
  590. blob4 = make_object(Blob, data='a\nb\nc\ne\n')
  591. blob2 = make_object(Blob, data='e\nf\ng\nh\n')
  592. blob3 = make_object(Blob, data='e\nf\ng\ni\n')
  593. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  594. tree2 = self.commit_tree([('c', blob3), ('d', blob4)])
  595. self.assertEqual(
  596. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('d', F, blob4.id)),
  597. TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('c', F, blob3.id))],
  598. self.detect_renames(tree1, tree2))
  599. self.assertEqual(
  600. [TreeChange.delete(('a', F, blob1.id)),
  601. TreeChange.delete(('b', F, blob2.id)),
  602. TreeChange.add(('c', F, blob3.id)),
  603. TreeChange.add(('d', F, blob4.id))],
  604. self.detect_renames(tree1, tree2, max_files=1))
  605. def test_content_rename_one_to_one(self):
  606. b11 = make_object(Blob, data='a\nb\nc\nd\n')
  607. b12 = make_object(Blob, data='a\nb\nc\ne\n')
  608. b21 = make_object(Blob, data='e\nf\ng\n\h')
  609. b22 = make_object(Blob, data='e\nf\ng\n\i')
  610. tree1 = self.commit_tree([('a', b11), ('b', b21)])
  611. tree2 = self.commit_tree([('c', b12), ('d', b22)])
  612. self.assertEqual(
  613. [TreeChange(CHANGE_RENAME, ('a', F, b11.id), ('c', F, b12.id)),
  614. TreeChange(CHANGE_RENAME, ('b', F, b21.id), ('d', F, b22.id))],
  615. self.detect_renames(tree1, tree2))
  616. def test_content_rename_one_to_one_ordering(self):
  617. blob1 = make_object(Blob, data='a\nb\nc\nd\ne\nf\n')
  618. blob2 = make_object(Blob, data='a\nb\nc\nd\ng\nh\n')
  619. # 6/10 match to blob1, 8/10 match to blob2
  620. blob3 = make_object(Blob, data='a\nb\nc\nd\ng\ni\n')
  621. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  622. tree2 = self.commit_tree([('c', blob3)])
  623. self.assertEqual(
  624. [TreeChange.delete(('a', F, blob1.id)),
  625. TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('c', F, blob3.id))],
  626. self.detect_renames(tree1, tree2))
  627. tree3 = self.commit_tree([('a', blob2), ('b', blob1)])
  628. tree4 = self.commit_tree([('c', blob3)])
  629. self.assertEqual(
  630. [TreeChange(CHANGE_RENAME, ('a', F, blob2.id), ('c', F, blob3.id)),
  631. TreeChange.delete(('b', F, blob1.id))],
  632. self.detect_renames(tree3, tree4))
  633. def test_content_rename_one_to_many(self):
  634. blob1 = make_object(Blob, data='aa\nb\nc\nd\ne\n')
  635. blob2 = make_object(Blob, data='ab\nb\nc\nd\ne\n') # 8/11 match
  636. blob3 = make_object(Blob, data='aa\nb\nc\nd\nf\n') # 9/11 match
  637. tree1 = self.commit_tree([('a', blob1)])
  638. tree2 = self.commit_tree([('b', blob2), ('c', blob3)])
  639. self.assertEqual(
  640. [TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id)),
  641. TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob3.id))],
  642. self.detect_renames(tree1, tree2))
  643. def test_content_rename_many_to_one(self):
  644. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  645. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  646. blob3 = make_object(Blob, data='a\nb\nc\nf\n')
  647. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  648. tree2 = self.commit_tree([('c', blob3)])
  649. self.assertEqual(
  650. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob3.id)),
  651. TreeChange.delete(('b', F, blob2.id))],
  652. self.detect_renames(tree1, tree2))
  653. def test_content_rename_many_to_many(self):
  654. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  655. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  656. blob3 = make_object(Blob, data='a\nb\nc\nf\n')
  657. blob4 = make_object(Blob, data='a\nb\nc\ng\n')
  658. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  659. tree2 = self.commit_tree([('c', blob3), ('d', blob4)])
  660. # TODO(dborowitz): Distribute renames rather than greedily choosing
  661. # copies.
  662. self.assertEqual(
  663. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob3.id)),
  664. TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('d', F, blob4.id)),
  665. TreeChange.delete(('b', F, blob2.id))],
  666. self.detect_renames(tree1, tree2))
  667. def test_content_rename_gitlink(self):
  668. blob1 = make_object(Blob, data='blob1')
  669. blob2 = make_object(Blob, data='blob2')
  670. link1 = '1' * 40
  671. link2 = '2' * 40
  672. tree1 = self.commit_tree([('a', blob1), ('b', link1, 0160000)])
  673. tree2 = self.commit_tree([('c', blob2), ('d', link2, 0160000)])
  674. self.assertEqual(
  675. [TreeChange.delete(('a', 0100644, blob1.id)),
  676. TreeChange.delete(('b', 0160000, link1)),
  677. TreeChange.add(('c', 0100644, blob2.id)),
  678. TreeChange.add(('d', 0160000, link2))],
  679. self.detect_renames(tree1, tree2))
  680. def test_exact_rename_swap(self):
  681. blob1 = make_object(Blob, data='1')
  682. blob2 = make_object(Blob, data='2')
  683. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  684. tree2 = self.commit_tree([('a', blob2), ('b', blob1)])
  685. self.assertEqual(
  686. [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)),
  687. TreeChange(CHANGE_MODIFY, ('b', F, blob2.id), ('b', F, blob1.id))],
  688. self.detect_renames(tree1, tree2))
  689. self.assertEqual(
  690. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob1.id)),
  691. TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('a', F, blob2.id))],
  692. self.detect_renames(tree1, tree2, rewrite_threshold=50))
  693. def test_content_rename_swap(self):
  694. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  695. blob2 = make_object(Blob, data='e\nf\ng\nh\n')
  696. blob3 = make_object(Blob, data='a\nb\nc\ne\n')
  697. blob4 = make_object(Blob, data='e\nf\ng\ni\n')
  698. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  699. tree2 = self.commit_tree([('a', blob4), ('b', blob3)])
  700. self.assertEqual(
  701. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob3.id)),
  702. TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('a', F, blob4.id))],
  703. self.detect_renames(tree1, tree2, rewrite_threshold=60))
  704. def test_rewrite_threshold(self):
  705. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  706. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  707. blob3 = make_object(Blob, data='a\nb\nf\ng\n')
  708. tree1 = self.commit_tree([('a', blob1)])
  709. tree2 = self.commit_tree([('a', blob3), ('b', blob2)])
  710. no_renames = [
  711. TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob3.id)),
  712. TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id))]
  713. self.assertEqual(
  714. no_renames, self.detect_renames(tree1, tree2))
  715. self.assertEqual(
  716. no_renames, self.detect_renames(tree1, tree2, rewrite_threshold=40))
  717. self.assertEqual(
  718. [TreeChange.add(('a', F, blob3.id)),
  719. TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob2.id))],
  720. self.detect_renames(tree1, tree2, rewrite_threshold=80))
  721. def test_find_copies_harder_exact(self):
  722. blob = make_object(Blob, data='blob')
  723. tree1 = self.commit_tree([('a', blob)])
  724. tree2 = self.commit_tree([('a', blob), ('b', blob)])
  725. self.assertEqual([TreeChange.add(('b', F, blob.id))],
  726. self.detect_renames(tree1, tree2))
  727. self.assertEqual(
  728. [TreeChange(CHANGE_COPY, ('a', F, blob.id), ('b', F, blob.id))],
  729. self.detect_renames(tree1, tree2, find_copies_harder=True))
  730. def test_find_copies_harder_content(self):
  731. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  732. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  733. tree1 = self.commit_tree([('a', blob1)])
  734. tree2 = self.commit_tree([('a', blob1), ('b', blob2)])
  735. self.assertEqual([TreeChange.add(('b', F, blob2.id))],
  736. self.detect_renames(tree1, tree2))
  737. self.assertEqual(
  738. [TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id))],
  739. self.detect_renames(tree1, tree2, find_copies_harder=True))
  740. def test_find_copies_harder_with_rewrites(self):
  741. blob_a1 = make_object(Blob, data='a\nb\nc\nd\n')
  742. blob_a2 = make_object(Blob, data='f\ng\nh\ni\n')
  743. blob_b2 = make_object(Blob, data='a\nb\nc\ne\n')
  744. tree1 = self.commit_tree([('a', blob_a1)])
  745. tree2 = self.commit_tree([('a', blob_a2), ('b', blob_b2)])
  746. self.assertEqual(
  747. [TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
  748. ('a', F, blob_a2.id)),
  749. TreeChange(CHANGE_COPY, ('a', F, blob_a1.id), ('b', F, blob_b2.id))],
  750. self.detect_renames(tree1, tree2, find_copies_harder=True))
  751. self.assertEqual(
  752. [TreeChange.add(('a', F, blob_a2.id)),
  753. TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id),
  754. ('b', F, blob_b2.id))],
  755. self.detect_renames(tree1, tree2, rewrite_threshold=50,
  756. find_copies_harder=True))
  757. def test_reuse_detector(self):
  758. blob = make_object(Blob, data='blob')
  759. tree1 = self.commit_tree([('a', blob)])
  760. tree2 = self.commit_tree([('b', blob)])
  761. detector = RenameDetector(self.store)
  762. changes = [TreeChange(CHANGE_RENAME, ('a', F, blob.id),
  763. ('b', F, blob.id))]
  764. self.assertEqual(changes,
  765. detector.changes_with_renames(tree1.id, tree2.id))
  766. self.assertEqual(changes,
  767. detector.changes_with_renames(tree1.id, tree2.id))
  768. def test_want_unchanged(self):
  769. blob_a1 = make_object(Blob, data='a\nb\nc\nd\n')
  770. blob_b = make_object(Blob, data='b')
  771. blob_c2 = make_object(Blob, data='a\nb\nc\ne\n')
  772. tree1 = self.commit_tree([('a', blob_a1), ('b', blob_b)])
  773. tree2 = self.commit_tree([('c', blob_c2), ('b', blob_b)])
  774. detector = RenameDetector(self.store)
  775. self.assertEqual(
  776. [TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id),
  777. ('c', F, blob_c2.id))],
  778. self.detect_renames(tree1, tree2))
  779. self.assertEqual(
  780. [TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id),
  781. ('c', F, blob_c2.id)),
  782. TreeChange(CHANGE_UNCHANGED, ('b', F, blob_b.id),
  783. ('b', F, blob_b.id))],
  784. self.detect_renames(tree1, tree2, want_unchanged=True))