test_diff_tree.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671
  1. # test_diff_tree.py -- Tests for file and tree diff utilities.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; either version 2
  7. # or (at your option) a later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """Tests for file and tree diff utilities."""
  19. from dulwich.diff_tree import (
  20. CHANGE_MODIFY,
  21. CHANGE_RENAME,
  22. CHANGE_COPY,
  23. CHANGE_UNCHANGED,
  24. TreeChange,
  25. _merge_entries,
  26. _merge_entries_py,
  27. tree_changes,
  28. _count_blocks,
  29. _count_blocks_py,
  30. _similarity_score,
  31. _tree_change_key,
  32. RenameDetector,
  33. _is_tree,
  34. _is_tree_py
  35. )
  36. from dulwich.index import (
  37. commit_tree,
  38. )
  39. from dulwich.misc import (
  40. permutations,
  41. )
  42. from dulwich.object_store import (
  43. MemoryObjectStore,
  44. )
  45. from dulwich.objects import (
  46. ShaFile,
  47. Blob,
  48. TreeEntry,
  49. Tree,
  50. )
  51. from dulwich.tests import (
  52. TestCase,
  53. )
  54. from dulwich.tests.utils import (
  55. make_object,
  56. functest_builder,
  57. ext_functest_builder,
  58. )
  59. # Shorthand mode for Files.
  60. F = 0100644
  61. class DiffTestCase(TestCase):
  62. def setUp(self):
  63. super(DiffTestCase, self).setUp()
  64. self.store = MemoryObjectStore()
  65. self.empty_tree = self.commit_tree([])
  66. def commit_tree(self, entries):
  67. commit_blobs = []
  68. for entry in entries:
  69. if len(entry) == 2:
  70. path, obj = entry
  71. mode = F
  72. else:
  73. path, obj, mode = entry
  74. if isinstance(obj, Blob):
  75. self.store.add_object(obj)
  76. sha = obj.id
  77. else:
  78. sha = obj
  79. commit_blobs.append((path, sha, mode))
  80. return self.store[commit_tree(self.store, commit_blobs)]
  81. class TreeChangesTest(DiffTestCase):
  82. def assertMergeFails(self, merge_entries, name, mode, sha):
  83. t = Tree()
  84. t[name] = (mode, sha)
  85. self.assertRaises(TypeError, merge_entries, '', t, t)
  86. def _do_test_merge_entries(self, merge_entries):
  87. blob_a1 = make_object(Blob, data='a1')
  88. blob_a2 = make_object(Blob, data='a2')
  89. blob_b1 = make_object(Blob, data='b1')
  90. blob_c2 = make_object(Blob, data='c2')
  91. tree1 = self.commit_tree([('a', blob_a1, 0100644),
  92. ('b', blob_b1, 0100755)])
  93. tree2 = self.commit_tree([('a', blob_a2, 0100644),
  94. ('c', blob_c2, 0100755)])
  95. self.assertEqual([], merge_entries('', self.empty_tree,
  96. self.empty_tree))
  97. self.assertEqual([
  98. ((None, None, None), ('a', 0100644, blob_a1.id)),
  99. ((None, None, None), ('b', 0100755, blob_b1.id)),
  100. ], merge_entries('', self.empty_tree, tree1))
  101. self.assertEqual([
  102. ((None, None, None), ('x/a', 0100644, blob_a1.id)),
  103. ((None, None, None), ('x/b', 0100755, blob_b1.id)),
  104. ], merge_entries('x', self.empty_tree, tree1))
  105. self.assertEqual([
  106. (('a', 0100644, blob_a2.id), (None, None, None)),
  107. (('c', 0100755, blob_c2.id), (None, None, None)),
  108. ], merge_entries('', tree2, self.empty_tree))
  109. self.assertEqual([
  110. (('a', 0100644, blob_a1.id), ('a', 0100644, blob_a2.id)),
  111. (('b', 0100755, blob_b1.id), (None, None, None)),
  112. ((None, None, None), ('c', 0100755, blob_c2.id)),
  113. ], merge_entries('', tree1, tree2))
  114. self.assertEqual([
  115. (('a', 0100644, blob_a2.id), ('a', 0100644, blob_a1.id)),
  116. ((None, None, None), ('b', 0100755, blob_b1.id)),
  117. (('c', 0100755, blob_c2.id), (None, None, None)),
  118. ], merge_entries('', tree2, tree1))
  119. self.assertMergeFails(merge_entries, 0xdeadbeef, 0100644, '1' * 40)
  120. self.assertMergeFails(merge_entries, 'a', 'deadbeef', '1' * 40)
  121. self.assertMergeFails(merge_entries, 'a', 0100644, 0xdeadbeef)
  122. test_merge_entries = functest_builder(_do_test_merge_entries,
  123. _merge_entries_py)
  124. test_merge_entries_extension = ext_functest_builder(_do_test_merge_entries,
  125. _merge_entries)
  126. def _do_test_is_tree(self, is_tree):
  127. self.assertFalse(is_tree(TreeEntry(None, None, None)))
  128. self.assertFalse(is_tree(TreeEntry('a', 0100644, 'a' * 40)))
  129. self.assertFalse(is_tree(TreeEntry('a', 0100755, 'a' * 40)))
  130. self.assertFalse(is_tree(TreeEntry('a', 0120000, 'a' * 40)))
  131. self.assertTrue(is_tree(TreeEntry('a', 0040000, 'a' * 40)))
  132. self.assertRaises(TypeError, is_tree, TreeEntry('a', 'x', 'a' * 40))
  133. self.assertRaises(AttributeError, is_tree, 1234)
  134. test_is_tree = functest_builder(_do_test_is_tree, _is_tree_py)
  135. test_is_tree_extension = ext_functest_builder(_do_test_is_tree, _is_tree)
  136. def assertChangesEqual(self, expected, tree1, tree2, **kwargs):
  137. actual = list(tree_changes(self.store, tree1.id, tree2.id, **kwargs))
  138. self.assertEqual(expected, actual)
  139. # For brevity, the following tests use tuples instead of TreeEntry objects.
  140. def test_tree_changes_empty(self):
  141. self.assertChangesEqual([], self.empty_tree, self.empty_tree)
  142. def test_tree_changes_no_changes(self):
  143. blob = make_object(Blob, data='blob')
  144. tree = self.commit_tree([('a', blob), ('b/c', blob)])
  145. self.assertChangesEqual([], self.empty_tree, self.empty_tree)
  146. self.assertChangesEqual([], tree, tree)
  147. self.assertChangesEqual(
  148. [TreeChange(CHANGE_UNCHANGED, ('a', F, blob.id), ('a', F, blob.id)),
  149. TreeChange(CHANGE_UNCHANGED, ('b/c', F, blob.id),
  150. ('b/c', F, blob.id))],
  151. tree, tree, want_unchanged=True)
  152. def test_tree_changes_add_delete(self):
  153. blob_a = make_object(Blob, data='a')
  154. blob_b = make_object(Blob, data='b')
  155. tree = self.commit_tree([('a', blob_a, 0100644),
  156. ('x/b', blob_b, 0100755)])
  157. self.assertChangesEqual(
  158. [TreeChange.add(('a', 0100644, blob_a.id)),
  159. TreeChange.add(('x/b', 0100755, blob_b.id))],
  160. self.empty_tree, tree)
  161. self.assertChangesEqual(
  162. [TreeChange.delete(('a', 0100644, blob_a.id)),
  163. TreeChange.delete(('x/b', 0100755, blob_b.id))],
  164. tree, self.empty_tree)
  165. def test_tree_changes_modify_contents(self):
  166. blob_a1 = make_object(Blob, data='a1')
  167. blob_a2 = make_object(Blob, data='a2')
  168. tree1 = self.commit_tree([('a', blob_a1)])
  169. tree2 = self.commit_tree([('a', blob_a2)])
  170. self.assertChangesEqual(
  171. [TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
  172. ('a', F, blob_a2.id))], tree1, tree2)
  173. def test_tree_changes_modify_mode(self):
  174. blob_a = make_object(Blob, data='a')
  175. tree1 = self.commit_tree([('a', blob_a, 0100644)])
  176. tree2 = self.commit_tree([('a', blob_a, 0100755)])
  177. self.assertChangesEqual(
  178. [TreeChange(CHANGE_MODIFY, ('a', 0100644, blob_a.id),
  179. ('a', 0100755, blob_a.id))], tree1, tree2)
  180. def test_tree_changes_change_type(self):
  181. blob_a1 = make_object(Blob, data='a')
  182. blob_a2 = make_object(Blob, data='/foo/bar')
  183. tree1 = self.commit_tree([('a', blob_a1, 0100644)])
  184. tree2 = self.commit_tree([('a', blob_a2, 0120000)])
  185. self.assertChangesEqual(
  186. [TreeChange.delete(('a', 0100644, blob_a1.id)),
  187. TreeChange.add(('a', 0120000, blob_a2.id))],
  188. tree1, tree2)
  189. def test_tree_changes_to_tree(self):
  190. blob_a = make_object(Blob, data='a')
  191. blob_x = make_object(Blob, data='x')
  192. tree1 = self.commit_tree([('a', blob_a)])
  193. tree2 = self.commit_tree([('a/x', blob_x)])
  194. self.assertChangesEqual(
  195. [TreeChange.delete(('a', F, blob_a.id)),
  196. TreeChange.add(('a/x', F, blob_x.id))],
  197. tree1, tree2)
  198. def test_tree_changes_complex(self):
  199. blob_a_1 = make_object(Blob, data='a1_1')
  200. blob_bx1_1 = make_object(Blob, data='bx1_1')
  201. blob_bx2_1 = make_object(Blob, data='bx2_1')
  202. blob_by1_1 = make_object(Blob, data='by1_1')
  203. blob_by2_1 = make_object(Blob, data='by2_1')
  204. tree1 = self.commit_tree([
  205. ('a', blob_a_1),
  206. ('b/x/1', blob_bx1_1),
  207. ('b/x/2', blob_bx2_1),
  208. ('b/y/1', blob_by1_1),
  209. ('b/y/2', blob_by2_1),
  210. ])
  211. blob_a_2 = make_object(Blob, data='a1_2')
  212. blob_bx1_2 = blob_bx1_1
  213. blob_by_2 = make_object(Blob, data='by_2')
  214. blob_c_2 = make_object(Blob, data='c_2')
  215. tree2 = self.commit_tree([
  216. ('a', blob_a_2),
  217. ('b/x/1', blob_bx1_2),
  218. ('b/y', blob_by_2),
  219. ('c', blob_c_2),
  220. ])
  221. self.assertChangesEqual(
  222. [TreeChange(CHANGE_MODIFY, ('a', F, blob_a_1.id),
  223. ('a', F, blob_a_2.id)),
  224. TreeChange.delete(('b/x/2', F, blob_bx2_1.id)),
  225. TreeChange.add(('b/y', F, blob_by_2.id)),
  226. TreeChange.delete(('b/y/1', F, blob_by1_1.id)),
  227. TreeChange.delete(('b/y/2', F, blob_by2_1.id)),
  228. TreeChange.add(('c', F, blob_c_2.id))],
  229. tree1, tree2)
  230. def test_tree_changes_name_order(self):
  231. blob = make_object(Blob, data='a')
  232. tree1 = self.commit_tree([('a', blob), ('a.', blob), ('a..', blob)])
  233. # Tree order is the reverse of this, so if we used tree order, 'a..'
  234. # would not be merged.
  235. tree2 = self.commit_tree([('a/x', blob), ('a./x', blob), ('a..', blob)])
  236. self.assertChangesEqual(
  237. [TreeChange.delete(('a', F, blob.id)),
  238. TreeChange.add(('a/x', F, blob.id)),
  239. TreeChange.delete(('a.', F, blob.id)),
  240. TreeChange.add(('a./x', F, blob.id))],
  241. tree1, tree2)
  242. def test_tree_changes_prune(self):
  243. blob_a1 = make_object(Blob, data='a1')
  244. blob_a2 = make_object(Blob, data='a2')
  245. blob_x = make_object(Blob, data='x')
  246. tree1 = self.commit_tree([('a', blob_a1), ('b/x', blob_x)])
  247. tree2 = self.commit_tree([('a', blob_a2), ('b/x', blob_x)])
  248. # Remove identical items so lookups will fail unless we prune.
  249. subtree = self.store[tree1['b'][1]]
  250. for entry in subtree.iteritems():
  251. del self.store[entry.sha]
  252. del self.store[subtree.id]
  253. self.assertChangesEqual(
  254. [TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
  255. ('a', F, blob_a2.id))],
  256. tree1, tree2)
  257. class RenameDetectionTest(DiffTestCase):
  258. def _do_test_count_blocks(self, count_blocks):
  259. blob = make_object(Blob, data='a\nb\na\n')
  260. self.assertEqual({hash('a\n'): 4, hash('b\n'): 2}, count_blocks(blob))
  261. test_count_blocks = functest_builder(_do_test_count_blocks,
  262. _count_blocks_py)
  263. test_count_blocks_extension = ext_functest_builder(_do_test_count_blocks,
  264. _count_blocks)
  265. def _do_test_count_blocks_no_newline(self, count_blocks):
  266. blob = make_object(Blob, data='a\na')
  267. self.assertEqual({hash('a\n'): 2, hash('a'): 1}, _count_blocks(blob))
  268. test_count_blocks_no_newline = functest_builder(
  269. _do_test_count_blocks_no_newline, _count_blocks_py)
  270. test_count_blocks_no_newline_extension = ext_functest_builder(
  271. _do_test_count_blocks_no_newline, _count_blocks)
  272. def _do_test_count_blocks_chunks(self, count_blocks):
  273. blob = ShaFile.from_raw_chunks(Blob.type_num, ['a\nb', '\na\n'])
  274. self.assertEqual({hash('a\n'): 4, hash('b\n'): 2}, _count_blocks(blob))
  275. test_count_blocks_chunks = functest_builder(_do_test_count_blocks_chunks,
  276. _count_blocks_py)
  277. test_count_blocks_chunks_extension = ext_functest_builder(
  278. _do_test_count_blocks_chunks, _count_blocks)
  279. def _do_test_count_blocks_long_lines(self, count_blocks):
  280. a = 'a' * 64
  281. data = a + 'xxx\ny\n' + a + 'zzz\n'
  282. blob = make_object(Blob, data=data)
  283. self.assertEqual({hash('a' * 64): 128, hash('xxx\n'): 4, hash('y\n'): 2,
  284. hash('zzz\n'): 4},
  285. _count_blocks(blob))
  286. test_count_blocks_long_lines = functest_builder(
  287. _do_test_count_blocks_long_lines, _count_blocks_py)
  288. test_count_blocks_long_lines_extension = ext_functest_builder(
  289. _do_test_count_blocks_long_lines, _count_blocks)
  290. def assertSimilar(self, expected_score, blob1, blob2):
  291. self.assertEqual(expected_score, _similarity_score(blob1, blob2))
  292. self.assertEqual(expected_score, _similarity_score(blob2, blob1))
  293. def test_similarity_score(self):
  294. blob0 = make_object(Blob, data='')
  295. blob1 = make_object(Blob, data='ab\ncd\ncd\n')
  296. blob2 = make_object(Blob, data='ab\n')
  297. blob3 = make_object(Blob, data='cd\n')
  298. blob4 = make_object(Blob, data='cd\ncd\n')
  299. self.assertSimilar(100, blob0, blob0)
  300. self.assertSimilar(0, blob0, blob1)
  301. self.assertSimilar(33, blob1, blob2)
  302. self.assertSimilar(33, blob1, blob3)
  303. self.assertSimilar(66, blob1, blob4)
  304. self.assertSimilar(0, blob2, blob3)
  305. self.assertSimilar(50, blob3, blob4)
  306. def test_similarity_score_cache(self):
  307. blob1 = make_object(Blob, data='ab\ncd\n')
  308. blob2 = make_object(Blob, data='ab\n')
  309. block_cache = {}
  310. self.assertEqual(
  311. 50, _similarity_score(blob1, blob2, block_cache=block_cache))
  312. self.assertEqual(set([blob1.id, blob2.id]), set(block_cache))
  313. def fail_chunks():
  314. self.fail('Unexpected call to as_raw_chunks()')
  315. blob1.as_raw_chunks = blob2.as_raw_chunks = fail_chunks
  316. blob1.raw_length = lambda: 6
  317. blob2.raw_length = lambda: 3
  318. self.assertEqual(
  319. 50, _similarity_score(blob1, blob2, block_cache=block_cache))
  320. def test_tree_entry_sort(self):
  321. sha = 'abcd' * 10
  322. expected_entries = [
  323. TreeChange.add(TreeEntry('aaa', F, sha)),
  324. TreeChange(CHANGE_COPY, TreeEntry('bbb', F, sha),
  325. TreeEntry('aab', F, sha)),
  326. TreeChange(CHANGE_MODIFY, TreeEntry('bbb', F, sha),
  327. TreeEntry('bbb', F, 'dabc' * 10)),
  328. TreeChange(CHANGE_RENAME, TreeEntry('bbc', F, sha),
  329. TreeEntry('ddd', F, sha)),
  330. TreeChange.delete(TreeEntry('ccc', F, sha)),
  331. ]
  332. for perm in permutations(expected_entries):
  333. self.assertEqual(expected_entries,
  334. sorted(perm, key=_tree_change_key))
  335. def detect_renames(self, tree1, tree2, **kwargs):
  336. detector = RenameDetector(self.store, tree1.id, tree2.id, **kwargs)
  337. return detector.changes_with_renames()
  338. def test_no_renames(self):
  339. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  340. blob2 = make_object(Blob, data='a\nb\ne\nf\n')
  341. blob3 = make_object(Blob, data='a\nb\ng\nh\n')
  342. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  343. tree2 = self.commit_tree([('a', blob1), ('b', blob3)])
  344. self.assertEqual(
  345. [TreeChange(CHANGE_MODIFY, ('b', F, blob2.id), ('b', F, blob3.id))],
  346. self.detect_renames(tree1, tree2))
  347. def test_exact_rename_one_to_one(self):
  348. blob1 = make_object(Blob, data='1')
  349. blob2 = make_object(Blob, data='2')
  350. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  351. tree2 = self.commit_tree([('c', blob1), ('d', blob2)])
  352. self.assertEqual(
  353. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob1.id)),
  354. TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('d', F, blob2.id))],
  355. self.detect_renames(tree1, tree2))
  356. def test_exact_rename_split_different_type(self):
  357. blob = make_object(Blob, data='/foo')
  358. tree1 = self.commit_tree([('a', blob, 0100644)])
  359. tree2 = self.commit_tree([('a', blob, 0120000)])
  360. self.assertEqual(
  361. [TreeChange.add(('a', 0120000, blob.id)),
  362. TreeChange.delete(('a', 0100644, blob.id))],
  363. self.detect_renames(tree1, tree2))
  364. def test_exact_rename_and_different_type(self):
  365. blob1 = make_object(Blob, data='1')
  366. blob2 = make_object(Blob, data='2')
  367. tree1 = self.commit_tree([('a', blob1)])
  368. tree2 = self.commit_tree([('a', blob2, 0120000), ('b', blob1)])
  369. self.assertEqual(
  370. [TreeChange.add(('a', 0120000, blob2.id)),
  371. TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob1.id))],
  372. self.detect_renames(tree1, tree2))
  373. def test_exact_rename_one_to_many(self):
  374. blob = make_object(Blob, data='1')
  375. tree1 = self.commit_tree([('a', blob)])
  376. tree2 = self.commit_tree([('b', blob), ('c', blob)])
  377. self.assertEqual(
  378. [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('b', F, blob.id)),
  379. TreeChange(CHANGE_COPY, ('a', F, blob.id), ('c', F, blob.id))],
  380. self.detect_renames(tree1, tree2))
  381. def test_exact_rename_many_to_one(self):
  382. blob = make_object(Blob, data='1')
  383. tree1 = self.commit_tree([('a', blob), ('b', blob)])
  384. tree2 = self.commit_tree([('c', blob)])
  385. self.assertEqual(
  386. [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('c', F, blob.id)),
  387. TreeChange.delete(('b', F, blob.id))],
  388. self.detect_renames(tree1, tree2))
  389. def test_exact_rename_many_to_many(self):
  390. blob = make_object(Blob, data='1')
  391. tree1 = self.commit_tree([('a', blob), ('b', blob)])
  392. tree2 = self.commit_tree([('c', blob), ('d', blob), ('e', blob)])
  393. self.assertEqual(
  394. [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('c', F, blob.id)),
  395. TreeChange(CHANGE_COPY, ('a', F, blob.id), ('e', F, blob.id)),
  396. TreeChange(CHANGE_RENAME, ('b', F, blob.id), ('d', F, blob.id))],
  397. self.detect_renames(tree1, tree2))
  398. def test_rename_threshold(self):
  399. blob1 = make_object(Blob, data='a\nb\nc\n')
  400. blob2 = make_object(Blob, data='a\nb\nd\n')
  401. tree1 = self.commit_tree([('a', blob1)])
  402. tree2 = self.commit_tree([('b', blob2)])
  403. self.assertEqual(
  404. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob2.id))],
  405. self.detect_renames(tree1, tree2, rename_threshold=50))
  406. self.assertEqual(
  407. [TreeChange.delete(('a', F, blob1.id)),
  408. TreeChange.add(('b', F, blob2.id))],
  409. self.detect_renames(tree1, tree2, rename_threshold=75))
  410. def test_content_rename_max_files(self):
  411. blob1 = make_object(Blob, data='a\nb\nc\nd')
  412. blob4 = make_object(Blob, data='a\nb\nc\ne\n')
  413. blob2 = make_object(Blob, data='e\nf\ng\nh\n')
  414. blob3 = make_object(Blob, data='e\nf\ng\ni\n')
  415. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  416. tree2 = self.commit_tree([('c', blob3), ('d', blob4)])
  417. self.assertEqual(
  418. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('d', F, blob4.id)),
  419. TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('c', F, blob3.id))],
  420. self.detect_renames(tree1, tree2))
  421. self.assertEqual(
  422. [TreeChange.delete(('a', F, blob1.id)),
  423. TreeChange.delete(('b', F, blob2.id)),
  424. TreeChange.add(('c', F, blob3.id)),
  425. TreeChange.add(('d', F, blob4.id))],
  426. self.detect_renames(tree1, tree2, max_files=1))
  427. def test_content_rename_one_to_one(self):
  428. b11 = make_object(Blob, data='a\nb\nc\nd\n')
  429. b12 = make_object(Blob, data='a\nb\nc\ne\n')
  430. b21 = make_object(Blob, data='e\nf\ng\n\h')
  431. b22 = make_object(Blob, data='e\nf\ng\n\i')
  432. tree1 = self.commit_tree([('a', b11), ('b', b21)])
  433. tree2 = self.commit_tree([('c', b12), ('d', b22)])
  434. self.assertEqual(
  435. [TreeChange(CHANGE_RENAME, ('a', F, b11.id), ('c', F, b12.id)),
  436. TreeChange(CHANGE_RENAME, ('b', F, b21.id), ('d', F, b22.id))],
  437. self.detect_renames(tree1, tree2))
  438. def test_content_rename_one_to_one_ordering(self):
  439. blob1 = make_object(Blob, data='a\nb\nc\nd\ne\nf\n')
  440. blob2 = make_object(Blob, data='a\nb\nc\nd\ng\nh\n')
  441. # 6/10 match to blob1, 8/10 match to blob2
  442. blob3 = make_object(Blob, data='a\nb\nc\nd\ng\ni\n')
  443. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  444. tree2 = self.commit_tree([('c', blob3)])
  445. self.assertEqual(
  446. [TreeChange.delete(('a', F, blob1.id)),
  447. TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('c', F, blob3.id))],
  448. self.detect_renames(tree1, tree2))
  449. tree3 = self.commit_tree([('a', blob2), ('b', blob1)])
  450. tree4 = self.commit_tree([('c', blob3)])
  451. self.assertEqual(
  452. [TreeChange(CHANGE_RENAME, ('a', F, blob2.id), ('c', F, blob3.id)),
  453. TreeChange.delete(('b', F, blob1.id))],
  454. self.detect_renames(tree3, tree4))
  455. def test_content_rename_one_to_many(self):
  456. blob1 = make_object(Blob, data='aa\nb\nc\nd\ne\n')
  457. blob2 = make_object(Blob, data='ab\nb\nc\nd\ne\n') # 8/11 match
  458. blob3 = make_object(Blob, data='aa\nb\nc\nd\nf\n') # 9/11 match
  459. tree1 = self.commit_tree([('a', blob1)])
  460. tree2 = self.commit_tree([('b', blob2), ('c', blob3)])
  461. self.assertEqual(
  462. [TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id)),
  463. TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob3.id))],
  464. self.detect_renames(tree1, tree2))
  465. def test_content_rename_many_to_one(self):
  466. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  467. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  468. blob3 = make_object(Blob, data='a\nb\nc\nf\n')
  469. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  470. tree2 = self.commit_tree([('c', blob3)])
  471. self.assertEqual(
  472. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob3.id)),
  473. TreeChange.delete(('b', F, blob2.id))],
  474. self.detect_renames(tree1, tree2))
  475. def test_content_rename_many_to_many(self):
  476. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  477. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  478. blob3 = make_object(Blob, data='a\nb\nc\nf\n')
  479. blob4 = make_object(Blob, data='a\nb\nc\ng\n')
  480. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  481. tree2 = self.commit_tree([('c', blob3), ('d', blob4)])
  482. # TODO(dborowitz): Distribute renames rather than greedily choosing
  483. # copies.
  484. self.assertEqual(
  485. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob3.id)),
  486. TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('d', F, blob4.id)),
  487. TreeChange.delete(('b', F, blob2.id))],
  488. self.detect_renames(tree1, tree2))
  489. def test_content_rename_gitlink(self):
  490. blob1 = make_object(Blob, data='blob1')
  491. blob2 = make_object(Blob, data='blob2')
  492. link1 = '1' * 40
  493. link2 = '2' * 40
  494. tree1 = self.commit_tree([('a', blob1), ('b', link1, 0160000)])
  495. tree2 = self.commit_tree([('c', blob2), ('d', link2, 0160000)])
  496. self.assertEqual(
  497. [TreeChange.delete(('a', 0100644, blob1.id)),
  498. TreeChange.delete(('b', 0160000, link1)),
  499. TreeChange.add(('c', 0100644, blob2.id)),
  500. TreeChange.add(('d', 0160000, link2))],
  501. self.detect_renames(tree1, tree2))
  502. def test_exact_rename_swap(self):
  503. blob1 = make_object(Blob, data='1')
  504. blob2 = make_object(Blob, data='2')
  505. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  506. tree2 = self.commit_tree([('a', blob2), ('b', blob1)])
  507. self.assertEqual(
  508. [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)),
  509. TreeChange(CHANGE_MODIFY, ('b', F, blob2.id), ('b', F, blob1.id))],
  510. self.detect_renames(tree1, tree2))
  511. self.assertEqual(
  512. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob1.id)),
  513. TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('a', F, blob2.id))],
  514. self.detect_renames(tree1, tree2, rewrite_threshold=50))
  515. def test_content_rename_swap(self):
  516. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  517. blob2 = make_object(Blob, data='e\nf\ng\nh\n')
  518. blob3 = make_object(Blob, data='a\nb\nc\ne\n')
  519. blob4 = make_object(Blob, data='e\nf\ng\ni\n')
  520. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  521. tree2 = self.commit_tree([('a', blob4), ('b', blob3)])
  522. self.assertEqual(
  523. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob3.id)),
  524. TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('a', F, blob4.id))],
  525. self.detect_renames(tree1, tree2, rewrite_threshold=60))
  526. def test_rewrite_threshold(self):
  527. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  528. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  529. blob3 = make_object(Blob, data='a\nb\nf\ng\n')
  530. tree1 = self.commit_tree([('a', blob1)])
  531. tree2 = self.commit_tree([('a', blob3), ('b', blob2)])
  532. no_renames = [
  533. TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob3.id)),
  534. TreeChange.add(('b', F, blob2.id))]
  535. self.assertEqual(
  536. no_renames, self.detect_renames(tree1, tree2))
  537. self.assertEqual(
  538. no_renames, self.detect_renames(tree1, tree2, rewrite_threshold=40))
  539. self.assertEqual(
  540. [TreeChange.add(('a', F, blob3.id)),
  541. TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob2.id))],
  542. self.detect_renames(tree1, tree2, rewrite_threshold=80))
  543. def test_find_copies_harder_exact(self):
  544. blob = make_object(Blob, data='blob')
  545. tree1 = self.commit_tree([('a', blob)])
  546. tree2 = self.commit_tree([('a', blob), ('b', blob)])
  547. self.assertEqual([TreeChange.add(('b', F, blob.id))],
  548. self.detect_renames(tree1, tree2))
  549. self.assertEqual(
  550. [TreeChange(CHANGE_COPY, ('a', F, blob.id), ('b', F, blob.id))],
  551. self.detect_renames(tree1, tree2, find_copies_harder=True))
  552. def test_find_copies_harder_content(self):
  553. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  554. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  555. tree1 = self.commit_tree([('a', blob1)])
  556. tree2 = self.commit_tree([('a', blob1), ('b', blob2)])
  557. self.assertEqual([TreeChange.add(('b', F, blob2.id))],
  558. self.detect_renames(tree1, tree2))
  559. self.assertEqual(
  560. [TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id))],
  561. self.detect_renames(tree1, tree2, find_copies_harder=True))
  562. def test_find_copies_harder_modify(self):
  563. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  564. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  565. tree1 = self.commit_tree([('a', blob1)])
  566. tree2 = self.commit_tree([('a', blob2), ('b', blob2)])
  567. self.assertEqual(
  568. [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)),
  569. TreeChange.add(('b', F, blob2.id))],
  570. self.detect_renames(tree1, tree2))
  571. self.assertEqual(
  572. [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)),
  573. TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id))],
  574. self.detect_renames(tree1, tree2, find_copies_harder=True))
  575. def test_find_copies_harder_with_rewrites(self):
  576. blob_a1 = make_object(Blob, data='a\nb\nc\nd\n')
  577. blob_a2 = make_object(Blob, data='f\ng\nh\ni\n')
  578. blob_b2 = make_object(Blob, data='a\nb\nc\ne\n')
  579. tree1 = self.commit_tree([('a', blob_a1)])
  580. tree2 = self.commit_tree([('a', blob_a2), ('b', blob_b2)])
  581. self.assertEqual(
  582. [TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
  583. ('a', F, blob_a2.id)),
  584. TreeChange(CHANGE_COPY, ('a', F, blob_a1.id), ('b', F, blob_b2.id))],
  585. self.detect_renames(tree1, tree2, find_copies_harder=True))
  586. self.assertEqual(
  587. [TreeChange.add(('a', F, blob_a2.id)),
  588. TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id),
  589. ('b', F, blob_b2.id))],
  590. self.detect_renames(tree1, tree2, rewrite_threshold=50,
  591. find_copies_harder=True))