test_diff_tree.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603
  1. # test_diff_tree.py -- Tests for file and tree diff utilities.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; either version 2
  7. # or (at your option) a later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """Tests for file and tree diff utilities."""
  19. from dulwich.diff_tree import (
  20. CHANGE_MODIFY,
  21. CHANGE_RENAME,
  22. CHANGE_COPY,
  23. CHANGE_UNCHANGED,
  24. TreeChange,
  25. _merge_entries,
  26. _merge_entries_py,
  27. tree_changes,
  28. _count_blocks,
  29. _similarity_score,
  30. _tree_change_key,
  31. RenameDetector,
  32. _is_tree,
  33. _is_tree_py
  34. )
  35. from dulwich.index import (
  36. commit_tree,
  37. )
  38. from dulwich.misc import (
  39. permutations,
  40. )
  41. from dulwich.object_store import (
  42. MemoryObjectStore,
  43. )
  44. from dulwich.objects import (
  45. ShaFile,
  46. Blob,
  47. TreeEntry,
  48. Tree,
  49. )
  50. from dulwich.tests import (
  51. TestCase,
  52. TestSkipped,
  53. )
  54. from dulwich.tests.utils import (
  55. make_object,
  56. )
  57. # Shorthand mode for Files.
  58. F = 0100644
  59. class DiffTestCase(TestCase):
  60. def setUp(self):
  61. super(DiffTestCase, self).setUp()
  62. self.store = MemoryObjectStore()
  63. self.empty_tree = self.commit_tree([])
  64. def commit_tree(self, entries):
  65. commit_blobs = []
  66. for entry in entries:
  67. if len(entry) == 2:
  68. path, obj = entry
  69. mode = F
  70. else:
  71. path, obj, mode = entry
  72. if isinstance(obj, Blob):
  73. self.store.add_object(obj)
  74. sha = obj.id
  75. else:
  76. sha = obj
  77. commit_blobs.append((path, sha, mode))
  78. return self.store[commit_tree(self.store, commit_blobs)]
  79. class TreeChangesTest(DiffTestCase):
  80. def assertMergeFails(self, merge_entries, name, mode, sha):
  81. t = Tree()
  82. t[name] = (mode, sha)
  83. self.assertRaises(TypeError, merge_entries, '', t, t)
  84. def _do_test_merge_entries(self, merge_entries):
  85. blob_a1 = make_object(Blob, data='a1')
  86. blob_a2 = make_object(Blob, data='a2')
  87. blob_b1 = make_object(Blob, data='b1')
  88. blob_c2 = make_object(Blob, data='c2')
  89. tree1 = self.commit_tree([('a', blob_a1, 0100644),
  90. ('b', blob_b1, 0100755)])
  91. tree2 = self.commit_tree([('a', blob_a2, 0100644),
  92. ('c', blob_c2, 0100755)])
  93. self.assertEqual([], merge_entries('', self.empty_tree,
  94. self.empty_tree))
  95. self.assertEqual([
  96. ((None, None, None), ('a', 0100644, blob_a1.id)),
  97. ((None, None, None), ('b', 0100755, blob_b1.id)),
  98. ], merge_entries('', self.empty_tree, tree1))
  99. self.assertEqual([
  100. ((None, None, None), ('x/a', 0100644, blob_a1.id)),
  101. ((None, None, None), ('x/b', 0100755, blob_b1.id)),
  102. ], merge_entries('x', self.empty_tree, tree1))
  103. self.assertEqual([
  104. (('a', 0100644, blob_a2.id), (None, None, None)),
  105. (('c', 0100755, blob_c2.id), (None, None, None)),
  106. ], merge_entries('', tree2, self.empty_tree))
  107. self.assertEqual([
  108. (('a', 0100644, blob_a1.id), ('a', 0100644, blob_a2.id)),
  109. (('b', 0100755, blob_b1.id), (None, None, None)),
  110. ((None, None, None), ('c', 0100755, blob_c2.id)),
  111. ], merge_entries('', tree1, tree2))
  112. self.assertEqual([
  113. (('a', 0100644, blob_a2.id), ('a', 0100644, blob_a1.id)),
  114. ((None, None, None), ('b', 0100755, blob_b1.id)),
  115. (('c', 0100755, blob_c2.id), (None, None, None)),
  116. ], merge_entries('', tree2, tree1))
  117. self.assertMergeFails(merge_entries, 0xdeadbeef, 0100644, '1' * 40)
  118. self.assertMergeFails(merge_entries, 'a', 'deadbeef', '1' * 40)
  119. self.assertMergeFails(merge_entries, 'a', 0100644, 0xdeadbeef)
  120. def test_merge_entries(self):
  121. self._do_test_merge_entries(_merge_entries_py)
  122. def test_merge_entries_extension(self):
  123. if _merge_entries is _merge_entries_py:
  124. raise TestSkipped('merge_entries extension not found')
  125. self._do_test_merge_entries(_merge_entries)
  126. def _do_test_is_tree(self, is_tree):
  127. self.assertFalse(is_tree(TreeEntry(None, None, None)))
  128. self.assertFalse(is_tree(TreeEntry('a', 0100644, 'a' * 40)))
  129. self.assertFalse(is_tree(TreeEntry('a', 0100755, 'a' * 40)))
  130. self.assertFalse(is_tree(TreeEntry('a', 0120000, 'a' * 40)))
  131. self.assertTrue(is_tree(TreeEntry('a', 0040000, 'a' * 40)))
  132. self.assertRaises(TypeError, is_tree, TreeEntry('a', 'x', 'a' * 40))
  133. self.assertRaises(AttributeError, is_tree, 1234)
  134. def test_is_tree(self):
  135. self._do_test_is_tree(_is_tree_py)
  136. def test_is_tree_extension(self):
  137. if _is_tree is _is_tree_py:
  138. raise TestSkipped('is_tree extension not found')
  139. self._do_test_is_tree(_is_tree)
  140. def assertChangesEqual(self, expected, tree1, tree2, **kwargs):
  141. actual = list(tree_changes(self.store, tree1.id, tree2.id, **kwargs))
  142. self.assertEqual(expected, actual)
  143. # For brevity, the following tests use tuples instead of TreeEntry objects.
  144. def test_tree_changes_empty(self):
  145. self.assertChangesEqual([], self.empty_tree, self.empty_tree)
  146. def test_tree_changes_no_changes(self):
  147. blob = make_object(Blob, data='blob')
  148. tree = self.commit_tree([('a', blob), ('b/c', blob)])
  149. self.assertChangesEqual([], self.empty_tree, self.empty_tree)
  150. self.assertChangesEqual([], tree, tree)
  151. self.assertChangesEqual(
  152. [TreeChange(CHANGE_UNCHANGED, ('a', F, blob.id), ('a', F, blob.id)),
  153. TreeChange(CHANGE_UNCHANGED, ('b/c', F, blob.id),
  154. ('b/c', F, blob.id))],
  155. tree, tree, want_unchanged=True)
  156. def test_tree_changes_add_delete(self):
  157. blob_a = make_object(Blob, data='a')
  158. blob_b = make_object(Blob, data='b')
  159. tree = self.commit_tree([('a', blob_a, 0100644),
  160. ('x/b', blob_b, 0100755)])
  161. self.assertChangesEqual(
  162. [TreeChange.add(('a', 0100644, blob_a.id)),
  163. TreeChange.add(('x/b', 0100755, blob_b.id))],
  164. self.empty_tree, tree)
  165. self.assertChangesEqual(
  166. [TreeChange.delete(('a', 0100644, blob_a.id)),
  167. TreeChange.delete(('x/b', 0100755, blob_b.id))],
  168. tree, self.empty_tree)
  169. def test_tree_changes_modify_contents(self):
  170. blob_a1 = make_object(Blob, data='a1')
  171. blob_a2 = make_object(Blob, data='a2')
  172. tree1 = self.commit_tree([('a', blob_a1)])
  173. tree2 = self.commit_tree([('a', blob_a2)])
  174. self.assertChangesEqual(
  175. [TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
  176. ('a', F, blob_a2.id))], tree1, tree2)
  177. def test_tree_changes_modify_mode(self):
  178. blob_a = make_object(Blob, data='a')
  179. tree1 = self.commit_tree([('a', blob_a, 0100644)])
  180. tree2 = self.commit_tree([('a', blob_a, 0100755)])
  181. self.assertChangesEqual(
  182. [TreeChange(CHANGE_MODIFY, ('a', 0100644, blob_a.id),
  183. ('a', 0100755, blob_a.id))], tree1, tree2)
  184. def test_tree_changes_change_type(self):
  185. blob_a1 = make_object(Blob, data='a')
  186. blob_a2 = make_object(Blob, data='/foo/bar')
  187. tree1 = self.commit_tree([('a', blob_a1, 0100644)])
  188. tree2 = self.commit_tree([('a', blob_a2, 0120000)])
  189. self.assertChangesEqual(
  190. [TreeChange.delete(('a', 0100644, blob_a1.id)),
  191. TreeChange.add(('a', 0120000, blob_a2.id))],
  192. tree1, tree2)
  193. def test_tree_changes_to_tree(self):
  194. blob_a = make_object(Blob, data='a')
  195. blob_x = make_object(Blob, data='x')
  196. tree1 = self.commit_tree([('a', blob_a)])
  197. tree2 = self.commit_tree([('a/x', blob_x)])
  198. self.assertChangesEqual(
  199. [TreeChange.delete(('a', F, blob_a.id)),
  200. TreeChange.add(('a/x', F, blob_x.id))],
  201. tree1, tree2)
  202. def test_tree_changes_complex(self):
  203. blob_a_1 = make_object(Blob, data='a1_1')
  204. blob_bx1_1 = make_object(Blob, data='bx1_1')
  205. blob_bx2_1 = make_object(Blob, data='bx2_1')
  206. blob_by1_1 = make_object(Blob, data='by1_1')
  207. blob_by2_1 = make_object(Blob, data='by2_1')
  208. tree1 = self.commit_tree([
  209. ('a', blob_a_1),
  210. ('b/x/1', blob_bx1_1),
  211. ('b/x/2', blob_bx2_1),
  212. ('b/y/1', blob_by1_1),
  213. ('b/y/2', blob_by2_1),
  214. ])
  215. blob_a_2 = make_object(Blob, data='a1_2')
  216. blob_bx1_2 = blob_bx1_1
  217. blob_by_2 = make_object(Blob, data='by_2')
  218. blob_c_2 = make_object(Blob, data='c_2')
  219. tree2 = self.commit_tree([
  220. ('a', blob_a_2),
  221. ('b/x/1', blob_bx1_2),
  222. ('b/y', blob_by_2),
  223. ('c', blob_c_2),
  224. ])
  225. self.assertChangesEqual(
  226. [TreeChange(CHANGE_MODIFY, ('a', F, blob_a_1.id),
  227. ('a', F, blob_a_2.id)),
  228. TreeChange.delete(('b/x/2', F, blob_bx2_1.id)),
  229. TreeChange.add(('b/y', F, blob_by_2.id)),
  230. TreeChange.delete(('b/y/1', F, blob_by1_1.id)),
  231. TreeChange.delete(('b/y/2', F, blob_by2_1.id)),
  232. TreeChange.add(('c', F, blob_c_2.id))],
  233. tree1, tree2)
  234. def test_tree_changes_name_order(self):
  235. blob = make_object(Blob, data='a')
  236. tree1 = self.commit_tree([('a', blob), ('a.', blob), ('a..', blob)])
  237. # Tree order is the reverse of this, so if we used tree order, 'a..'
  238. # would not be merged.
  239. tree2 = self.commit_tree([('a/x', blob), ('a./x', blob), ('a..', blob)])
  240. self.assertChangesEqual(
  241. [TreeChange.delete(('a', F, blob.id)),
  242. TreeChange.add(('a/x', F, blob.id)),
  243. TreeChange.delete(('a.', F, blob.id)),
  244. TreeChange.add(('a./x', F, blob.id))],
  245. tree1, tree2)
  246. def test_tree_changes_prune(self):
  247. blob_a1 = make_object(Blob, data='a1')
  248. blob_a2 = make_object(Blob, data='a2')
  249. blob_x = make_object(Blob, data='x')
  250. tree1 = self.commit_tree([('a', blob_a1), ('b/x', blob_x)])
  251. tree2 = self.commit_tree([('a', blob_a2), ('b/x', blob_x)])
  252. # Remove identical items so lookups will fail unless we prune.
  253. subtree = self.store[tree1['b'][1]]
  254. for entry in subtree.iteritems():
  255. del self.store[entry.sha]
  256. del self.store[subtree.id]
  257. self.assertChangesEqual(
  258. [TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
  259. ('a', F, blob_a2.id))],
  260. tree1, tree2)
  261. class RenameDetectionTest(DiffTestCase):
  262. def test_count_blocks(self):
  263. blob = make_object(Blob, data='a\nb\na\n')
  264. self.assertEqual({'a\n': 2, 'b\n': 1}, _count_blocks(blob))
  265. def test_count_blocks_no_newline(self):
  266. blob = make_object(Blob, data='a\na')
  267. self.assertEqual({'a\n': 1, 'a': 1}, _count_blocks(blob))
  268. def test_count_blocks_chunks(self):
  269. blob = ShaFile.from_raw_chunks(Blob.type_num, ['a\nb', '\na\n'])
  270. self.assertEqual({'a\n': 2, 'b\n': 1}, _count_blocks(blob))
  271. def test_count_blocks_long_lines(self):
  272. a = 'a' * 64
  273. data = a + 'xxx\ny\n' + a + 'zzz\n'
  274. blob = make_object(Blob, data=data)
  275. self.assertEqual({'a' * 64: 2, 'xxx\n': 1, 'y\n': 1, 'zzz\n': 1},
  276. _count_blocks(blob))
  277. def assertSimilar(self, expected_score, blob1, blob2):
  278. self.assertEqual(expected_score, _similarity_score(blob1, blob2))
  279. self.assertEqual(expected_score, _similarity_score(blob2, blob1))
  280. def test_similarity_score(self):
  281. blob0 = make_object(Blob, data='')
  282. blob1 = make_object(Blob, data='ab\ncd\ncd\n')
  283. blob2 = make_object(Blob, data='ab\n')
  284. blob3 = make_object(Blob, data='cd\n')
  285. blob4 = make_object(Blob, data='cd\ncd\n')
  286. self.assertSimilar(100, blob0, blob0)
  287. self.assertSimilar(0, blob0, blob1)
  288. self.assertSimilar(33, blob1, blob2)
  289. self.assertSimilar(33, blob1, blob3)
  290. self.assertSimilar(66, blob1, blob4)
  291. self.assertSimilar(0, blob2, blob3)
  292. self.assertSimilar(50, blob3, blob4)
  293. def test_similarity_score_cache(self):
  294. blob1 = make_object(Blob, data='ab\ncd\n')
  295. blob2 = make_object(Blob, data='ab\n')
  296. block_cache = {}
  297. self.assertEqual(
  298. 50, _similarity_score(blob1, blob2, block_cache=block_cache))
  299. self.assertEqual(set([blob1.id, blob2.id]), set(block_cache))
  300. def fail_chunks():
  301. self.fail('Unexpected call to as_raw_chunks()')
  302. blob1.as_raw_chunks = blob2.as_raw_chunks = fail_chunks
  303. blob1.raw_length = lambda: 6
  304. blob2.raw_length = lambda: 3
  305. self.assertEqual(
  306. 50, _similarity_score(blob1, blob2, block_cache=block_cache))
  307. def test_tree_entry_sort(self):
  308. sha = 'abcd' * 10
  309. expected_entries = [
  310. TreeChange.add(TreeEntry('aaa', F, sha)),
  311. TreeChange(CHANGE_COPY, TreeEntry('bbb', F, sha),
  312. TreeEntry('aab', F, sha)),
  313. TreeChange(CHANGE_MODIFY, TreeEntry('bbb', F, sha),
  314. TreeEntry('bbb', F, 'dabc' * 10)),
  315. TreeChange(CHANGE_RENAME, TreeEntry('bbc', F, sha),
  316. TreeEntry('ddd', F, sha)),
  317. TreeChange.delete(TreeEntry('ccc', F, sha)),
  318. ]
  319. for perm in permutations(expected_entries):
  320. self.assertEqual(expected_entries,
  321. sorted(perm, key=_tree_change_key))
  322. def detect_renames(self, tree1, tree2, **kwargs):
  323. detector = RenameDetector(self.store, tree1.id, tree2.id, **kwargs)
  324. return detector.changes_with_renames()
  325. def test_no_renames(self):
  326. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  327. blob2 = make_object(Blob, data='a\nb\ne\nf\n')
  328. blob3 = make_object(Blob, data='a\nb\ng\nh\n')
  329. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  330. tree2 = self.commit_tree([('a', blob1), ('b', blob3)])
  331. self.assertEqual(
  332. [TreeChange(CHANGE_MODIFY, ('b', F, blob2.id), ('b', F, blob3.id))],
  333. self.detect_renames(tree1, tree2))
  334. def test_exact_rename_one_to_one(self):
  335. blob1 = make_object(Blob, data='1')
  336. blob2 = make_object(Blob, data='2')
  337. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  338. tree2 = self.commit_tree([('c', blob1), ('d', blob2)])
  339. self.assertEqual(
  340. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob1.id)),
  341. TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('d', F, blob2.id))],
  342. self.detect_renames(tree1, tree2))
  343. def test_exact_rename_split_different_type(self):
  344. blob = make_object(Blob, data='/foo')
  345. tree1 = self.commit_tree([('a', blob, 0100644)])
  346. tree2 = self.commit_tree([('a', blob, 0120000)])
  347. self.assertEqual(
  348. [TreeChange.add(('a', 0120000, blob.id)),
  349. TreeChange.delete(('a', 0100644, blob.id))],
  350. self.detect_renames(tree1, tree2))
  351. def test_exact_rename_and_different_type(self):
  352. blob1 = make_object(Blob, data='1')
  353. blob2 = make_object(Blob, data='2')
  354. tree1 = self.commit_tree([('a', blob1)])
  355. tree2 = self.commit_tree([('a', blob2, 0120000), ('b', blob1)])
  356. self.assertEqual(
  357. [TreeChange.add(('a', 0120000, blob2.id)),
  358. TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob1.id))],
  359. self.detect_renames(tree1, tree2))
  360. def test_exact_rename_one_to_many(self):
  361. blob = make_object(Blob, data='1')
  362. tree1 = self.commit_tree([('a', blob)])
  363. tree2 = self.commit_tree([('b', blob), ('c', blob)])
  364. self.assertEqual(
  365. [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('b', F, blob.id)),
  366. TreeChange(CHANGE_COPY, ('a', F, blob.id), ('c', F, blob.id))],
  367. self.detect_renames(tree1, tree2))
  368. def test_exact_rename_many_to_one(self):
  369. blob = make_object(Blob, data='1')
  370. tree1 = self.commit_tree([('a', blob), ('b', blob)])
  371. tree2 = self.commit_tree([('c', blob)])
  372. self.assertEqual(
  373. [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('c', F, blob.id)),
  374. TreeChange.delete(('b', F, blob.id))],
  375. self.detect_renames(tree1, tree2))
  376. def test_exact_rename_many_to_many(self):
  377. blob = make_object(Blob, data='1')
  378. tree1 = self.commit_tree([('a', blob), ('b', blob)])
  379. tree2 = self.commit_tree([('c', blob), ('d', blob), ('e', blob)])
  380. self.assertEqual(
  381. [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('c', F, blob.id)),
  382. TreeChange(CHANGE_COPY, ('a', F, blob.id), ('e', F, blob.id)),
  383. TreeChange(CHANGE_RENAME, ('b', F, blob.id), ('d', F, blob.id))],
  384. self.detect_renames(tree1, tree2))
  385. def test_rename_threshold(self):
  386. blob1 = make_object(Blob, data='a\nb\nc\n')
  387. blob2 = make_object(Blob, data='a\nb\nd\n')
  388. tree1 = self.commit_tree([('a', blob1)])
  389. tree2 = self.commit_tree([('b', blob2)])
  390. self.assertEqual(
  391. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob2.id))],
  392. self.detect_renames(tree1, tree2, rename_threshold=50))
  393. self.assertEqual(
  394. [TreeChange.delete(('a', F, blob1.id)),
  395. TreeChange.add(('b', F, blob2.id))],
  396. self.detect_renames(tree1, tree2, rename_threshold=75))
  397. def test_content_rename_max_files(self):
  398. blob1 = make_object(Blob, data='a\nb\nc\nd')
  399. blob4 = make_object(Blob, data='a\nb\nc\ne\n')
  400. blob2 = make_object(Blob, data='e\nf\ng\nh\n')
  401. blob3 = make_object(Blob, data='e\nf\ng\ni\n')
  402. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  403. tree2 = self.commit_tree([('c', blob3), ('d', blob4)])
  404. self.assertEqual(
  405. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('d', F, blob4.id)),
  406. TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('c', F, blob3.id))],
  407. self.detect_renames(tree1, tree2))
  408. self.assertEqual(
  409. [TreeChange.delete(('a', F, blob1.id)),
  410. TreeChange.delete(('b', F, blob2.id)),
  411. TreeChange.add(('c', F, blob3.id)),
  412. TreeChange.add(('d', F, blob4.id))],
  413. self.detect_renames(tree1, tree2, max_files=1))
  414. def test_content_rename_one_to_one(self):
  415. b11 = make_object(Blob, data='a\nb\nc\nd\n')
  416. b12 = make_object(Blob, data='a\nb\nc\ne\n')
  417. b21 = make_object(Blob, data='e\nf\ng\n\h')
  418. b22 = make_object(Blob, data='e\nf\ng\n\i')
  419. tree1 = self.commit_tree([('a', b11), ('b', b21)])
  420. tree2 = self.commit_tree([('c', b12), ('d', b22)])
  421. self.assertEqual(
  422. [TreeChange(CHANGE_RENAME, ('a', F, b11.id), ('c', F, b12.id)),
  423. TreeChange(CHANGE_RENAME, ('b', F, b21.id), ('d', F, b22.id))],
  424. self.detect_renames(tree1, tree2))
  425. def test_content_rename_one_to_one_ordering(self):
  426. blob1 = make_object(Blob, data='a\nb\nc\nd\ne\nf\n')
  427. blob2 = make_object(Blob, data='a\nb\nc\nd\ng\nh\n')
  428. # 6/10 match to blob1, 8/10 match to blob2
  429. blob3 = make_object(Blob, data='a\nb\nc\nd\ng\ni\n')
  430. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  431. tree2 = self.commit_tree([('c', blob3)])
  432. self.assertEqual(
  433. [TreeChange.delete(('a', F, blob1.id)),
  434. TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('c', F, blob3.id))],
  435. self.detect_renames(tree1, tree2))
  436. tree3 = self.commit_tree([('a', blob2), ('b', blob1)])
  437. tree4 = self.commit_tree([('c', blob3)])
  438. self.assertEqual(
  439. [TreeChange(CHANGE_RENAME, ('a', F, blob2.id), ('c', F, blob3.id)),
  440. TreeChange.delete(('b', F, blob1.id))],
  441. self.detect_renames(tree3, tree4))
  442. def test_content_rename_one_to_many(self):
  443. blob1 = make_object(Blob, data='aa\nb\nc\nd\ne\n')
  444. blob2 = make_object(Blob, data='ab\nb\nc\nd\ne\n') # 8/11 match
  445. blob3 = make_object(Blob, data='aa\nb\nc\nd\nf\n') # 9/11 match
  446. tree1 = self.commit_tree([('a', blob1)])
  447. tree2 = self.commit_tree([('b', blob2), ('c', blob3)])
  448. self.assertEqual(
  449. [TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id)),
  450. TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob3.id))],
  451. self.detect_renames(tree1, tree2))
  452. def test_content_rename_many_to_one(self):
  453. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  454. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  455. blob3 = make_object(Blob, data='a\nb\nc\nf\n')
  456. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  457. tree2 = self.commit_tree([('c', blob3)])
  458. self.assertEqual(
  459. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob3.id)),
  460. TreeChange.delete(('b', F, blob2.id))],
  461. self.detect_renames(tree1, tree2))
  462. def test_content_rename_many_to_many(self):
  463. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  464. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  465. blob3 = make_object(Blob, data='a\nb\nc\nf\n')
  466. blob4 = make_object(Blob, data='a\nb\nc\ng\n')
  467. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  468. tree2 = self.commit_tree([('c', blob3), ('d', blob4)])
  469. # TODO(dborowitz): Distribute renames rather than greedily choosing
  470. # copies.
  471. self.assertEqual(
  472. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob3.id)),
  473. TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('d', F, blob4.id)),
  474. TreeChange.delete(('b', F, blob2.id))],
  475. self.detect_renames(tree1, tree2))
  476. def test_content_rename_gitlink(self):
  477. blob1 = make_object(Blob, data='blob1')
  478. blob2 = make_object(Blob, data='blob2')
  479. link1 = '1' * 40
  480. link2 = '2' * 40
  481. tree1 = self.commit_tree([('a', blob1), ('b', link1, 0160000)])
  482. tree2 = self.commit_tree([('c', blob2), ('d', link2, 0160000)])
  483. self.assertEqual(
  484. [TreeChange.delete(('a', 0100644, blob1.id)),
  485. TreeChange.delete(('b', 0160000, link1)),
  486. TreeChange.add(('c', 0100644, blob2.id)),
  487. TreeChange.add(('d', 0160000, link2))],
  488. self.detect_renames(tree1, tree2))
  489. def test_exact_rename_swap(self):
  490. blob1 = make_object(Blob, data='1')
  491. blob2 = make_object(Blob, data='2')
  492. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  493. tree2 = self.commit_tree([('a', blob2), ('b', blob1)])
  494. self.assertEqual(
  495. [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)),
  496. TreeChange(CHANGE_MODIFY, ('b', F, blob2.id), ('b', F, blob1.id))],
  497. self.detect_renames(tree1, tree2))
  498. self.assertEqual(
  499. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob1.id)),
  500. TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('a', F, blob2.id))],
  501. self.detect_renames(tree1, tree2, rewrite_threshold=50))
  502. def test_content_rename_swap(self):
  503. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  504. blob2 = make_object(Blob, data='e\nf\ng\nh\n')
  505. blob3 = make_object(Blob, data='a\nb\nc\ne\n')
  506. blob4 = make_object(Blob, data='e\nf\ng\ni\n')
  507. tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
  508. tree2 = self.commit_tree([('a', blob4), ('b', blob3)])
  509. self.assertEqual(
  510. [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob3.id)),
  511. TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('a', F, blob4.id))],
  512. self.detect_renames(tree1, tree2, rewrite_threshold=60))
  513. def test_rewrite_threshold(self):
  514. blob1 = make_object(Blob, data='a\nb\nc\nd\n')
  515. blob2 = make_object(Blob, data='a\nb\nc\ne\n')
  516. blob3 = make_object(Blob, data='a\nb\nf\ng\n')
  517. tree1 = self.commit_tree([('a', blob1)])
  518. tree2 = self.commit_tree([('a', blob3), ('b', blob2)])
  519. no_renames = [
  520. TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob3.id)),
  521. TreeChange.add(('b', F, blob2.id))]
  522. self.assertEqual(
  523. no_renames, self.detect_renames(tree1, tree2))
  524. self.assertEqual(
  525. no_renames, self.detect_renames(tree1, tree2, rewrite_threshold=40))
  526. self.assertEqual(
  527. [TreeChange.add(('a', F, blob3.id)),
  528. TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob2.id))],
  529. self.detect_renames(tree1, tree2, rewrite_threshold=80))