test_diff_tree.py 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157
  1. # test_diff_tree.py -- Tests for file and tree diff utilities.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Tests for file and tree diff utilities."""
  21. from itertools import permutations
  22. from dulwich.diff_tree import (
  23. CHANGE_MODIFY,
  24. CHANGE_RENAME,
  25. CHANGE_COPY,
  26. CHANGE_UNCHANGED,
  27. TreeChange,
  28. _merge_entries,
  29. _merge_entries_py,
  30. tree_changes,
  31. tree_changes_for_merge,
  32. _count_blocks,
  33. _count_blocks_py,
  34. _similarity_score,
  35. _tree_change_key,
  36. RenameDetector,
  37. _is_tree,
  38. _is_tree_py,
  39. )
  40. from dulwich.index import (
  41. commit_tree,
  42. )
  43. from dulwich.object_store import (
  44. MemoryObjectStore,
  45. )
  46. from dulwich.objects import (
  47. ShaFile,
  48. Blob,
  49. TreeEntry,
  50. Tree,
  51. )
  52. from dulwich.tests import (
  53. TestCase,
  54. )
  55. from dulwich.tests.utils import (
  56. F,
  57. make_object,
  58. functest_builder,
  59. ext_functest_builder,
  60. )
  61. class DiffTestCase(TestCase):
  62. def setUp(self):
  63. super(DiffTestCase, self).setUp()
  64. self.store = MemoryObjectStore()
  65. self.empty_tree = self.commit_tree([])
  66. def commit_tree(self, entries):
  67. commit_blobs = []
  68. for entry in entries:
  69. if len(entry) == 2:
  70. path, obj = entry
  71. mode = F
  72. else:
  73. path, obj, mode = entry
  74. if isinstance(obj, Blob):
  75. self.store.add_object(obj)
  76. sha = obj.id
  77. else:
  78. sha = obj
  79. commit_blobs.append((path, sha, mode))
  80. return self.store[commit_tree(self.store, commit_blobs)]
  81. class TreeChangesTest(DiffTestCase):
  82. def setUp(self):
  83. super(TreeChangesTest, self).setUp()
  84. self.detector = RenameDetector(self.store)
  85. def assertMergeFails(self, merge_entries, name, mode, sha):
  86. t = Tree()
  87. t[name] = (mode, sha)
  88. self.assertRaises((TypeError, ValueError), merge_entries, "", t, t)
  89. def _do_test_merge_entries(self, merge_entries):
  90. blob_a1 = make_object(Blob, data=b"a1")
  91. blob_a2 = make_object(Blob, data=b"a2")
  92. blob_b1 = make_object(Blob, data=b"b1")
  93. blob_c2 = make_object(Blob, data=b"c2")
  94. tree1 = self.commit_tree([(b"a", blob_a1, 0o100644), (b"b", blob_b1, 0o100755)])
  95. tree2 = self.commit_tree([(b"a", blob_a2, 0o100644), (b"c", blob_c2, 0o100755)])
  96. self.assertEqual([], merge_entries(b"", self.empty_tree, self.empty_tree))
  97. self.assertEqual(
  98. [
  99. ((None, None, None), (b"a", 0o100644, blob_a1.id)),
  100. ((None, None, None), (b"b", 0o100755, blob_b1.id)),
  101. ],
  102. merge_entries(b"", self.empty_tree, tree1),
  103. )
  104. self.assertEqual(
  105. [
  106. ((None, None, None), (b"x/a", 0o100644, blob_a1.id)),
  107. ((None, None, None), (b"x/b", 0o100755, blob_b1.id)),
  108. ],
  109. merge_entries(b"x", self.empty_tree, tree1),
  110. )
  111. self.assertEqual(
  112. [
  113. ((b"a", 0o100644, blob_a2.id), (None, None, None)),
  114. ((b"c", 0o100755, blob_c2.id), (None, None, None)),
  115. ],
  116. merge_entries(b"", tree2, self.empty_tree),
  117. )
  118. self.assertEqual(
  119. [
  120. ((b"a", 0o100644, blob_a1.id), (b"a", 0o100644, blob_a2.id)),
  121. ((b"b", 0o100755, blob_b1.id), (None, None, None)),
  122. ((None, None, None), (b"c", 0o100755, blob_c2.id)),
  123. ],
  124. merge_entries(b"", tree1, tree2),
  125. )
  126. self.assertEqual(
  127. [
  128. ((b"a", 0o100644, blob_a2.id), (b"a", 0o100644, blob_a1.id)),
  129. ((None, None, None), (b"b", 0o100755, blob_b1.id)),
  130. ((b"c", 0o100755, blob_c2.id), (None, None, None)),
  131. ],
  132. merge_entries(b"", tree2, tree1),
  133. )
  134. self.assertMergeFails(merge_entries, 0xDEADBEEF, 0o100644, "1" * 40)
  135. self.assertMergeFails(merge_entries, b"a", b"deadbeef", "1" * 40)
  136. self.assertMergeFails(merge_entries, b"a", 0o100644, 0xDEADBEEF)
  137. test_merge_entries = functest_builder(_do_test_merge_entries, _merge_entries_py)
  138. test_merge_entries_extension = ext_functest_builder(
  139. _do_test_merge_entries, _merge_entries
  140. )
  141. def _do_test_is_tree(self, is_tree):
  142. self.assertFalse(is_tree(TreeEntry(None, None, None)))
  143. self.assertFalse(is_tree(TreeEntry(b"a", 0o100644, b"a" * 40)))
  144. self.assertFalse(is_tree(TreeEntry(b"a", 0o100755, b"a" * 40)))
  145. self.assertFalse(is_tree(TreeEntry(b"a", 0o120000, b"a" * 40)))
  146. self.assertTrue(is_tree(TreeEntry(b"a", 0o040000, b"a" * 40)))
  147. self.assertRaises(TypeError, is_tree, TreeEntry(b"a", b"x", b"a" * 40))
  148. self.assertRaises(AttributeError, is_tree, 1234)
  149. test_is_tree = functest_builder(_do_test_is_tree, _is_tree_py)
  150. test_is_tree_extension = ext_functest_builder(_do_test_is_tree, _is_tree)
  151. def assertChangesEqual(self, expected, tree1, tree2, **kwargs):
  152. actual = list(tree_changes(self.store, tree1.id, tree2.id, **kwargs))
  153. self.assertEqual(expected, actual)
  154. # For brevity, the following tests use tuples instead of TreeEntry objects.
  155. def test_tree_changes_empty(self):
  156. self.assertChangesEqual([], self.empty_tree, self.empty_tree)
  157. def test_tree_changes_no_changes(self):
  158. blob = make_object(Blob, data=b"blob")
  159. tree = self.commit_tree([(b"a", blob), (b"b/c", blob)])
  160. self.assertChangesEqual([], self.empty_tree, self.empty_tree)
  161. self.assertChangesEqual([], tree, tree)
  162. self.assertChangesEqual(
  163. [
  164. TreeChange(CHANGE_UNCHANGED, (b"a", F, blob.id), (b"a", F, blob.id)),
  165. TreeChange(
  166. CHANGE_UNCHANGED,
  167. (b"b/c", F, blob.id),
  168. (b"b/c", F, blob.id),
  169. ),
  170. ],
  171. tree,
  172. tree,
  173. want_unchanged=True,
  174. )
  175. def test_tree_changes_add_delete(self):
  176. blob_a = make_object(Blob, data=b"a")
  177. blob_b = make_object(Blob, data=b"b")
  178. tree = self.commit_tree([(b"a", blob_a, 0o100644), (b"x/b", blob_b, 0o100755)])
  179. self.assertChangesEqual(
  180. [
  181. TreeChange.add((b"a", 0o100644, blob_a.id)),
  182. TreeChange.add((b"x/b", 0o100755, blob_b.id)),
  183. ],
  184. self.empty_tree,
  185. tree,
  186. )
  187. self.assertChangesEqual(
  188. [
  189. TreeChange.delete((b"a", 0o100644, blob_a.id)),
  190. TreeChange.delete((b"x/b", 0o100755, blob_b.id)),
  191. ],
  192. tree,
  193. self.empty_tree,
  194. )
  195. def test_tree_changes_modify_contents(self):
  196. blob_a1 = make_object(Blob, data=b"a1")
  197. blob_a2 = make_object(Blob, data=b"a2")
  198. tree1 = self.commit_tree([(b"a", blob_a1)])
  199. tree2 = self.commit_tree([(b"a", blob_a2)])
  200. self.assertChangesEqual(
  201. [TreeChange(CHANGE_MODIFY, (b"a", F, blob_a1.id), (b"a", F, blob_a2.id))],
  202. tree1,
  203. tree2,
  204. )
  205. def test_tree_changes_modify_mode(self):
  206. blob_a = make_object(Blob, data=b"a")
  207. tree1 = self.commit_tree([(b"a", blob_a, 0o100644)])
  208. tree2 = self.commit_tree([(b"a", blob_a, 0o100755)])
  209. self.assertChangesEqual(
  210. [
  211. TreeChange(
  212. CHANGE_MODIFY,
  213. (b"a", 0o100644, blob_a.id),
  214. (b"a", 0o100755, blob_a.id),
  215. )
  216. ],
  217. tree1,
  218. tree2,
  219. )
  220. def test_tree_changes_change_type(self):
  221. blob_a1 = make_object(Blob, data=b"a")
  222. blob_a2 = make_object(Blob, data=b"/foo/bar")
  223. tree1 = self.commit_tree([(b"a", blob_a1, 0o100644)])
  224. tree2 = self.commit_tree([(b"a", blob_a2, 0o120000)])
  225. self.assertChangesEqual(
  226. [
  227. TreeChange.delete((b"a", 0o100644, blob_a1.id)),
  228. TreeChange.add((b"a", 0o120000, blob_a2.id)),
  229. ],
  230. tree1,
  231. tree2,
  232. )
  233. def test_tree_changes_change_type_same(self):
  234. blob_a1 = make_object(Blob, data=b"a")
  235. blob_a2 = make_object(Blob, data=b"/foo/bar")
  236. tree1 = self.commit_tree([(b"a", blob_a1, 0o100644)])
  237. tree2 = self.commit_tree([(b"a", blob_a2, 0o120000)])
  238. self.assertChangesEqual(
  239. [
  240. TreeChange(
  241. CHANGE_MODIFY,
  242. (b"a", 0o100644, blob_a1.id),
  243. (b"a", 0o120000, blob_a2.id),
  244. )
  245. ],
  246. tree1,
  247. tree2,
  248. change_type_same=True,
  249. )
  250. def test_tree_changes_to_tree(self):
  251. blob_a = make_object(Blob, data=b"a")
  252. blob_x = make_object(Blob, data=b"x")
  253. tree1 = self.commit_tree([(b"a", blob_a)])
  254. tree2 = self.commit_tree([(b"a/x", blob_x)])
  255. self.assertChangesEqual(
  256. [
  257. TreeChange.delete((b"a", F, blob_a.id)),
  258. TreeChange.add((b"a/x", F, blob_x.id)),
  259. ],
  260. tree1,
  261. tree2,
  262. )
  263. def test_tree_changes_complex(self):
  264. blob_a_1 = make_object(Blob, data=b"a1_1")
  265. blob_bx1_1 = make_object(Blob, data=b"bx1_1")
  266. blob_bx2_1 = make_object(Blob, data=b"bx2_1")
  267. blob_by1_1 = make_object(Blob, data=b"by1_1")
  268. blob_by2_1 = make_object(Blob, data=b"by2_1")
  269. tree1 = self.commit_tree(
  270. [
  271. (b"a", blob_a_1),
  272. (b"b/x/1", blob_bx1_1),
  273. (b"b/x/2", blob_bx2_1),
  274. (b"b/y/1", blob_by1_1),
  275. (b"b/y/2", blob_by2_1),
  276. ]
  277. )
  278. blob_a_2 = make_object(Blob, data=b"a1_2")
  279. blob_bx1_2 = blob_bx1_1
  280. blob_by_2 = make_object(Blob, data=b"by_2")
  281. blob_c_2 = make_object(Blob, data=b"c_2")
  282. tree2 = self.commit_tree(
  283. [
  284. (b"a", blob_a_2),
  285. (b"b/x/1", blob_bx1_2),
  286. (b"b/y", blob_by_2),
  287. (b"c", blob_c_2),
  288. ]
  289. )
  290. self.assertChangesEqual(
  291. [
  292. TreeChange(
  293. CHANGE_MODIFY,
  294. (b"a", F, blob_a_1.id),
  295. (b"a", F, blob_a_2.id),
  296. ),
  297. TreeChange.delete((b"b/x/2", F, blob_bx2_1.id)),
  298. TreeChange.add((b"b/y", F, blob_by_2.id)),
  299. TreeChange.delete((b"b/y/1", F, blob_by1_1.id)),
  300. TreeChange.delete((b"b/y/2", F, blob_by2_1.id)),
  301. TreeChange.add((b"c", F, blob_c_2.id)),
  302. ],
  303. tree1,
  304. tree2,
  305. )
  306. def test_tree_changes_name_order(self):
  307. blob = make_object(Blob, data=b"a")
  308. tree1 = self.commit_tree([(b"a", blob), (b"a.", blob), (b"a..", blob)])
  309. # Tree order is the reverse of this, so if we used tree order, 'a..'
  310. # would not be merged.
  311. tree2 = self.commit_tree([(b"a/x", blob), (b"a./x", blob), (b"a..", blob)])
  312. self.assertChangesEqual(
  313. [
  314. TreeChange.delete((b"a", F, blob.id)),
  315. TreeChange.add((b"a/x", F, blob.id)),
  316. TreeChange.delete((b"a.", F, blob.id)),
  317. TreeChange.add((b"a./x", F, blob.id)),
  318. ],
  319. tree1,
  320. tree2,
  321. )
  322. def test_tree_changes_prune(self):
  323. blob_a1 = make_object(Blob, data=b"a1")
  324. blob_a2 = make_object(Blob, data=b"a2")
  325. blob_x = make_object(Blob, data=b"x")
  326. tree1 = self.commit_tree([(b"a", blob_a1), (b"b/x", blob_x)])
  327. tree2 = self.commit_tree([(b"a", blob_a2), (b"b/x", blob_x)])
  328. # Remove identical items so lookups will fail unless we prune.
  329. subtree = self.store[tree1[b"b"][1]]
  330. for entry in subtree.items():
  331. del self.store[entry.sha]
  332. del self.store[subtree.id]
  333. self.assertChangesEqual(
  334. [TreeChange(CHANGE_MODIFY, (b"a", F, blob_a1.id), (b"a", F, blob_a2.id))],
  335. tree1,
  336. tree2,
  337. )
  338. def test_tree_changes_rename_detector(self):
  339. blob_a1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  340. blob_a2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  341. blob_b = make_object(Blob, data=b"b")
  342. tree1 = self.commit_tree([(b"a", blob_a1), (b"b", blob_b)])
  343. tree2 = self.commit_tree([(b"c", blob_a2), (b"b", blob_b)])
  344. detector = RenameDetector(self.store)
  345. self.assertChangesEqual(
  346. [
  347. TreeChange.delete((b"a", F, blob_a1.id)),
  348. TreeChange.add((b"c", F, blob_a2.id)),
  349. ],
  350. tree1,
  351. tree2,
  352. )
  353. self.assertChangesEqual(
  354. [
  355. TreeChange.delete((b"a", F, blob_a1.id)),
  356. TreeChange(
  357. CHANGE_UNCHANGED,
  358. (b"b", F, blob_b.id),
  359. (b"b", F, blob_b.id),
  360. ),
  361. TreeChange.add((b"c", F, blob_a2.id)),
  362. ],
  363. tree1,
  364. tree2,
  365. want_unchanged=True,
  366. )
  367. self.assertChangesEqual(
  368. [TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_a2.id))],
  369. tree1,
  370. tree2,
  371. rename_detector=detector,
  372. )
  373. self.assertChangesEqual(
  374. [
  375. TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_a2.id)),
  376. TreeChange(
  377. CHANGE_UNCHANGED,
  378. (b"b", F, blob_b.id),
  379. (b"b", F, blob_b.id),
  380. ),
  381. ],
  382. tree1,
  383. tree2,
  384. rename_detector=detector,
  385. want_unchanged=True,
  386. )
  387. def assertChangesForMergeEqual(self, expected, parent_trees, merge_tree, **kwargs):
  388. parent_tree_ids = [t.id for t in parent_trees]
  389. actual = list(
  390. tree_changes_for_merge(self.store, parent_tree_ids, merge_tree.id, **kwargs)
  391. )
  392. self.assertEqual(expected, actual)
  393. parent_tree_ids.reverse()
  394. expected = [list(reversed(cs)) for cs in expected]
  395. actual = list(
  396. tree_changes_for_merge(self.store, parent_tree_ids, merge_tree.id, **kwargs)
  397. )
  398. self.assertEqual(expected, actual)
  399. def test_tree_changes_for_merge_add_no_conflict(self):
  400. blob = make_object(Blob, data=b"blob")
  401. parent1 = self.commit_tree([])
  402. parent2 = merge = self.commit_tree([(b"a", blob)])
  403. self.assertChangesForMergeEqual([], [parent1, parent2], merge)
  404. self.assertChangesForMergeEqual([], [parent2, parent2], merge)
  405. def test_tree_changes_for_merge_add_modify_conflict(self):
  406. blob1 = make_object(Blob, data=b"1")
  407. blob2 = make_object(Blob, data=b"2")
  408. parent1 = self.commit_tree([])
  409. parent2 = self.commit_tree([(b"a", blob1)])
  410. merge = self.commit_tree([(b"a", blob2)])
  411. self.assertChangesForMergeEqual(
  412. [
  413. [
  414. TreeChange.add((b"a", F, blob2.id)),
  415. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob2.id)),
  416. ]
  417. ],
  418. [parent1, parent2],
  419. merge,
  420. )
  421. def test_tree_changes_for_merge_modify_modify_conflict(self):
  422. blob1 = make_object(Blob, data=b"1")
  423. blob2 = make_object(Blob, data=b"2")
  424. blob3 = make_object(Blob, data=b"3")
  425. parent1 = self.commit_tree([(b"a", blob1)])
  426. parent2 = self.commit_tree([(b"a", blob2)])
  427. merge = self.commit_tree([(b"a", blob3)])
  428. self.assertChangesForMergeEqual(
  429. [
  430. [
  431. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob3.id)),
  432. TreeChange(CHANGE_MODIFY, (b"a", F, blob2.id), (b"a", F, blob3.id)),
  433. ]
  434. ],
  435. [parent1, parent2],
  436. merge,
  437. )
  438. def test_tree_changes_for_merge_modify_no_conflict(self):
  439. blob1 = make_object(Blob, data=b"1")
  440. blob2 = make_object(Blob, data=b"2")
  441. parent1 = self.commit_tree([(b"a", blob1)])
  442. parent2 = merge = self.commit_tree([(b"a", blob2)])
  443. self.assertChangesForMergeEqual([], [parent1, parent2], merge)
  444. def test_tree_changes_for_merge_delete_delete_conflict(self):
  445. blob1 = make_object(Blob, data=b"1")
  446. blob2 = make_object(Blob, data=b"2")
  447. parent1 = self.commit_tree([(b"a", blob1)])
  448. parent2 = self.commit_tree([(b"a", blob2)])
  449. merge = self.commit_tree([])
  450. self.assertChangesForMergeEqual(
  451. [
  452. [
  453. TreeChange.delete((b"a", F, blob1.id)),
  454. TreeChange.delete((b"a", F, blob2.id)),
  455. ]
  456. ],
  457. [parent1, parent2],
  458. merge,
  459. )
  460. def test_tree_changes_for_merge_delete_no_conflict(self):
  461. blob = make_object(Blob, data=b"blob")
  462. has = self.commit_tree([(b"a", blob)])
  463. doesnt_have = self.commit_tree([])
  464. self.assertChangesForMergeEqual([], [has, has], doesnt_have)
  465. self.assertChangesForMergeEqual([], [has, doesnt_have], doesnt_have)
  466. def test_tree_changes_for_merge_octopus_no_conflict(self):
  467. r = list(range(5))
  468. blobs = [make_object(Blob, data=bytes(i)) for i in r]
  469. parents = [self.commit_tree([(b"a", blobs[i])]) for i in r]
  470. for i in r:
  471. # Take the SHA from each of the parents.
  472. self.assertChangesForMergeEqual([], parents, parents[i])
  473. def test_tree_changes_for_merge_octopus_modify_conflict(self):
  474. # Because the octopus merge strategy is limited, I doubt it's possible
  475. # to create this with the git command line. But the output is well-
  476. # defined, so test it anyway.
  477. r = list(range(5))
  478. parent_blobs = [make_object(Blob, data=bytes(i)) for i in r]
  479. merge_blob = make_object(Blob, data=b"merge")
  480. parents = [self.commit_tree([(b"a", parent_blobs[i])]) for i in r]
  481. merge = self.commit_tree([(b"a", merge_blob)])
  482. expected = [
  483. [
  484. TreeChange(
  485. CHANGE_MODIFY,
  486. (b"a", F, parent_blobs[i].id),
  487. (b"a", F, merge_blob.id),
  488. )
  489. for i in r
  490. ]
  491. ]
  492. self.assertChangesForMergeEqual(expected, parents, merge)
  493. def test_tree_changes_for_merge_octopus_delete(self):
  494. blob1 = make_object(Blob, data=b"1")
  495. blob2 = make_object(Blob, data=b"3")
  496. parent1 = self.commit_tree([(b"a", blob1)])
  497. parent2 = self.commit_tree([(b"a", blob2)])
  498. parent3 = merge = self.commit_tree([])
  499. self.assertChangesForMergeEqual([], [parent1, parent1, parent1], merge)
  500. self.assertChangesForMergeEqual([], [parent1, parent1, parent3], merge)
  501. self.assertChangesForMergeEqual([], [parent1, parent3, parent3], merge)
  502. self.assertChangesForMergeEqual(
  503. [
  504. [
  505. TreeChange.delete((b"a", F, blob1.id)),
  506. TreeChange.delete((b"a", F, blob2.id)),
  507. None,
  508. ]
  509. ],
  510. [parent1, parent2, parent3],
  511. merge,
  512. )
  513. def test_tree_changes_for_merge_add_add_same_conflict(self):
  514. blob = make_object(Blob, data=b"a\nb\nc\nd\n")
  515. parent1 = self.commit_tree([(b"a", blob)])
  516. parent2 = self.commit_tree([])
  517. merge = self.commit_tree([(b"b", blob)])
  518. add = TreeChange.add((b"b", F, blob.id))
  519. self.assertChangesForMergeEqual([[add, add]], [parent1, parent2], merge)
  520. def test_tree_changes_for_merge_add_exact_rename_conflict(self):
  521. blob = make_object(Blob, data=b"a\nb\nc\nd\n")
  522. parent1 = self.commit_tree([(b"a", blob)])
  523. parent2 = self.commit_tree([])
  524. merge = self.commit_tree([(b"b", blob)])
  525. self.assertChangesForMergeEqual(
  526. [
  527. [
  528. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"b", F, blob.id)),
  529. TreeChange.add((b"b", F, blob.id)),
  530. ]
  531. ],
  532. [parent1, parent2],
  533. merge,
  534. rename_detector=self.detector,
  535. )
  536. def test_tree_changes_for_merge_add_content_rename_conflict(self):
  537. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  538. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  539. parent1 = self.commit_tree([(b"a", blob1)])
  540. parent2 = self.commit_tree([])
  541. merge = self.commit_tree([(b"b", blob2)])
  542. self.assertChangesForMergeEqual(
  543. [
  544. [
  545. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  546. TreeChange.add((b"b", F, blob2.id)),
  547. ]
  548. ],
  549. [parent1, parent2],
  550. merge,
  551. rename_detector=self.detector,
  552. )
  553. def test_tree_changes_for_merge_modify_rename_conflict(self):
  554. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  555. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  556. parent1 = self.commit_tree([(b"a", blob1)])
  557. parent2 = self.commit_tree([(b"b", blob1)])
  558. merge = self.commit_tree([(b"b", blob2)])
  559. self.assertChangesForMergeEqual(
  560. [
  561. [
  562. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  563. TreeChange(CHANGE_MODIFY, (b"b", F, blob1.id), (b"b", F, blob2.id)),
  564. ]
  565. ],
  566. [parent1, parent2],
  567. merge,
  568. rename_detector=self.detector,
  569. )
  570. class RenameDetectionTest(DiffTestCase):
  571. def _do_test_count_blocks(self, count_blocks):
  572. blob = make_object(Blob, data=b"a\nb\na\n")
  573. self.assertBlockCountEqual({b"a\n": 4, b"b\n": 2}, count_blocks(blob))
  574. test_count_blocks = functest_builder(_do_test_count_blocks, _count_blocks_py)
  575. test_count_blocks_extension = ext_functest_builder(
  576. _do_test_count_blocks, _count_blocks
  577. )
  578. def _do_test_count_blocks_no_newline(self, count_blocks):
  579. blob = make_object(Blob, data=b"a\na")
  580. self.assertBlockCountEqual({b"a\n": 2, b"a": 1}, _count_blocks(blob))
  581. test_count_blocks_no_newline = functest_builder(
  582. _do_test_count_blocks_no_newline, _count_blocks_py
  583. )
  584. test_count_blocks_no_newline_extension = ext_functest_builder(
  585. _do_test_count_blocks_no_newline, _count_blocks
  586. )
  587. def assertBlockCountEqual(self, expected, got):
  588. self.assertEqual(
  589. {(hash(l) & 0xFFFFFFFF): c for (l, c) in expected.items()},
  590. {(h & 0xFFFFFFFF): c for (h, c) in got.items()},
  591. )
  592. def _do_test_count_blocks_chunks(self, count_blocks):
  593. blob = ShaFile.from_raw_chunks(Blob.type_num, [b"a\nb", b"\na\n"])
  594. self.assertBlockCountEqual({b"a\n": 4, b"b\n": 2}, _count_blocks(blob))
  595. test_count_blocks_chunks = functest_builder(
  596. _do_test_count_blocks_chunks, _count_blocks_py
  597. )
  598. test_count_blocks_chunks_extension = ext_functest_builder(
  599. _do_test_count_blocks_chunks, _count_blocks
  600. )
  601. def _do_test_count_blocks_long_lines(self, count_blocks):
  602. a = b"a" * 64
  603. data = a + b"xxx\ny\n" + a + b"zzz\n"
  604. blob = make_object(Blob, data=data)
  605. self.assertBlockCountEqual(
  606. {b"a" * 64: 128, b"xxx\n": 4, b"y\n": 2, b"zzz\n": 4},
  607. _count_blocks(blob),
  608. )
  609. test_count_blocks_long_lines = functest_builder(
  610. _do_test_count_blocks_long_lines, _count_blocks_py
  611. )
  612. test_count_blocks_long_lines_extension = ext_functest_builder(
  613. _do_test_count_blocks_long_lines, _count_blocks
  614. )
  615. def assertSimilar(self, expected_score, blob1, blob2):
  616. self.assertEqual(expected_score, _similarity_score(blob1, blob2))
  617. self.assertEqual(expected_score, _similarity_score(blob2, blob1))
  618. def test_similarity_score(self):
  619. blob0 = make_object(Blob, data=b"")
  620. blob1 = make_object(Blob, data=b"ab\ncd\ncd\n")
  621. blob2 = make_object(Blob, data=b"ab\n")
  622. blob3 = make_object(Blob, data=b"cd\n")
  623. blob4 = make_object(Blob, data=b"cd\ncd\n")
  624. self.assertSimilar(100, blob0, blob0)
  625. self.assertSimilar(0, blob0, blob1)
  626. self.assertSimilar(33, blob1, blob2)
  627. self.assertSimilar(33, blob1, blob3)
  628. self.assertSimilar(66, blob1, blob4)
  629. self.assertSimilar(0, blob2, blob3)
  630. self.assertSimilar(50, blob3, blob4)
  631. def test_similarity_score_cache(self):
  632. blob1 = make_object(Blob, data=b"ab\ncd\n")
  633. blob2 = make_object(Blob, data=b"ab\n")
  634. block_cache = {}
  635. self.assertEqual(50, _similarity_score(blob1, blob2, block_cache=block_cache))
  636. self.assertEqual(set([blob1.id, blob2.id]), set(block_cache))
  637. def fail_chunks():
  638. self.fail("Unexpected call to as_raw_chunks()")
  639. blob1.as_raw_chunks = blob2.as_raw_chunks = fail_chunks
  640. blob1.raw_length = lambda: 6
  641. blob2.raw_length = lambda: 3
  642. self.assertEqual(50, _similarity_score(blob1, blob2, block_cache=block_cache))
  643. def test_tree_entry_sort(self):
  644. sha = "abcd" * 10
  645. expected_entries = [
  646. TreeChange.add(TreeEntry(b"aaa", F, sha)),
  647. TreeChange(
  648. CHANGE_COPY,
  649. TreeEntry(b"bbb", F, sha),
  650. TreeEntry(b"aab", F, sha),
  651. ),
  652. TreeChange(
  653. CHANGE_MODIFY,
  654. TreeEntry(b"bbb", F, sha),
  655. TreeEntry(b"bbb", F, b"dabc" * 10),
  656. ),
  657. TreeChange(
  658. CHANGE_RENAME,
  659. TreeEntry(b"bbc", F, sha),
  660. TreeEntry(b"ddd", F, sha),
  661. ),
  662. TreeChange.delete(TreeEntry(b"ccc", F, sha)),
  663. ]
  664. for perm in permutations(expected_entries):
  665. self.assertEqual(expected_entries, sorted(perm, key=_tree_change_key))
  666. def detect_renames(self, tree1, tree2, want_unchanged=False, **kwargs):
  667. detector = RenameDetector(self.store, **kwargs)
  668. return detector.changes_with_renames(
  669. tree1.id, tree2.id, want_unchanged=want_unchanged
  670. )
  671. def test_no_renames(self):
  672. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  673. blob2 = make_object(Blob, data=b"a\nb\ne\nf\n")
  674. blob3 = make_object(Blob, data=b"a\nb\ng\nh\n")
  675. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  676. tree2 = self.commit_tree([(b"a", blob1), (b"b", blob3)])
  677. self.assertEqual(
  678. [TreeChange(CHANGE_MODIFY, (b"b", F, blob2.id), (b"b", F, blob3.id))],
  679. self.detect_renames(tree1, tree2),
  680. )
  681. def test_exact_rename_one_to_one(self):
  682. blob1 = make_object(Blob, data=b"1")
  683. blob2 = make_object(Blob, data=b"2")
  684. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  685. tree2 = self.commit_tree([(b"c", blob1), (b"d", blob2)])
  686. self.assertEqual(
  687. [
  688. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob1.id)),
  689. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"d", F, blob2.id)),
  690. ],
  691. self.detect_renames(tree1, tree2),
  692. )
  693. def test_exact_rename_split_different_type(self):
  694. blob = make_object(Blob, data=b"/foo")
  695. tree1 = self.commit_tree([(b"a", blob, 0o100644)])
  696. tree2 = self.commit_tree([(b"a", blob, 0o120000)])
  697. self.assertEqual(
  698. [
  699. TreeChange.add((b"a", 0o120000, blob.id)),
  700. TreeChange.delete((b"a", 0o100644, blob.id)),
  701. ],
  702. self.detect_renames(tree1, tree2),
  703. )
  704. def test_exact_rename_and_different_type(self):
  705. blob1 = make_object(Blob, data=b"1")
  706. blob2 = make_object(Blob, data=b"2")
  707. tree1 = self.commit_tree([(b"a", blob1)])
  708. tree2 = self.commit_tree([(b"a", blob2, 0o120000), (b"b", blob1)])
  709. self.assertEqual(
  710. [
  711. TreeChange.add((b"a", 0o120000, blob2.id)),
  712. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob1.id)),
  713. ],
  714. self.detect_renames(tree1, tree2),
  715. )
  716. def test_exact_rename_one_to_many(self):
  717. blob = make_object(Blob, data=b"1")
  718. tree1 = self.commit_tree([(b"a", blob)])
  719. tree2 = self.commit_tree([(b"b", blob), (b"c", blob)])
  720. self.assertEqual(
  721. [
  722. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"b", F, blob.id)),
  723. TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"c", F, blob.id)),
  724. ],
  725. self.detect_renames(tree1, tree2),
  726. )
  727. def test_exact_rename_many_to_one(self):
  728. blob = make_object(Blob, data=b"1")
  729. tree1 = self.commit_tree([(b"a", blob), (b"b", blob)])
  730. tree2 = self.commit_tree([(b"c", blob)])
  731. self.assertEqual(
  732. [
  733. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"c", F, blob.id)),
  734. TreeChange.delete((b"b", F, blob.id)),
  735. ],
  736. self.detect_renames(tree1, tree2),
  737. )
  738. def test_exact_rename_many_to_many(self):
  739. blob = make_object(Blob, data=b"1")
  740. tree1 = self.commit_tree([(b"a", blob), (b"b", blob)])
  741. tree2 = self.commit_tree([(b"c", blob), (b"d", blob), (b"e", blob)])
  742. self.assertEqual(
  743. [
  744. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"c", F, blob.id)),
  745. TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"e", F, blob.id)),
  746. TreeChange(CHANGE_RENAME, (b"b", F, blob.id), (b"d", F, blob.id)),
  747. ],
  748. self.detect_renames(tree1, tree2),
  749. )
  750. def test_exact_copy_modify(self):
  751. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  752. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  753. tree1 = self.commit_tree([(b"a", blob1)])
  754. tree2 = self.commit_tree([(b"a", blob2), (b"b", blob1)])
  755. self.assertEqual(
  756. [
  757. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob2.id)),
  758. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob1.id)),
  759. ],
  760. self.detect_renames(tree1, tree2),
  761. )
  762. def test_exact_copy_change_mode(self):
  763. blob = make_object(Blob, data=b"a\nb\nc\nd\n")
  764. tree1 = self.commit_tree([(b"a", blob)])
  765. tree2 = self.commit_tree([(b"a", blob, 0o100755), (b"b", blob)])
  766. self.assertEqual(
  767. [
  768. TreeChange(
  769. CHANGE_MODIFY,
  770. (b"a", F, blob.id),
  771. (b"a", 0o100755, blob.id),
  772. ),
  773. TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"b", F, blob.id)),
  774. ],
  775. self.detect_renames(tree1, tree2),
  776. )
  777. def test_rename_threshold(self):
  778. blob1 = make_object(Blob, data=b"a\nb\nc\n")
  779. blob2 = make_object(Blob, data=b"a\nb\nd\n")
  780. tree1 = self.commit_tree([(b"a", blob1)])
  781. tree2 = self.commit_tree([(b"b", blob2)])
  782. self.assertEqual(
  783. [TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id))],
  784. self.detect_renames(tree1, tree2, rename_threshold=50),
  785. )
  786. self.assertEqual(
  787. [
  788. TreeChange.delete((b"a", F, blob1.id)),
  789. TreeChange.add((b"b", F, blob2.id)),
  790. ],
  791. self.detect_renames(tree1, tree2, rename_threshold=75),
  792. )
  793. def test_content_rename_max_files(self):
  794. blob1 = make_object(Blob, data=b"a\nb\nc\nd")
  795. blob4 = make_object(Blob, data=b"a\nb\nc\ne\n")
  796. blob2 = make_object(Blob, data=b"e\nf\ng\nh\n")
  797. blob3 = make_object(Blob, data=b"e\nf\ng\ni\n")
  798. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  799. tree2 = self.commit_tree([(b"c", blob3), (b"d", blob4)])
  800. self.assertEqual(
  801. [
  802. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"d", F, blob4.id)),
  803. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"c", F, blob3.id)),
  804. ],
  805. self.detect_renames(tree1, tree2),
  806. )
  807. self.assertEqual(
  808. [
  809. TreeChange.delete((b"a", F, blob1.id)),
  810. TreeChange.delete((b"b", F, blob2.id)),
  811. TreeChange.add((b"c", F, blob3.id)),
  812. TreeChange.add((b"d", F, blob4.id)),
  813. ],
  814. self.detect_renames(tree1, tree2, max_files=1),
  815. )
  816. def test_content_rename_one_to_one(self):
  817. b11 = make_object(Blob, data=b"a\nb\nc\nd\n")
  818. b12 = make_object(Blob, data=b"a\nb\nc\ne\n")
  819. b21 = make_object(Blob, data=b"e\nf\ng\n\nh")
  820. b22 = make_object(Blob, data=b"e\nf\ng\n\ni")
  821. tree1 = self.commit_tree([(b"a", b11), (b"b", b21)])
  822. tree2 = self.commit_tree([(b"c", b12), (b"d", b22)])
  823. self.assertEqual(
  824. [
  825. TreeChange(CHANGE_RENAME, (b"a", F, b11.id), (b"c", F, b12.id)),
  826. TreeChange(CHANGE_RENAME, (b"b", F, b21.id), (b"d", F, b22.id)),
  827. ],
  828. self.detect_renames(tree1, tree2),
  829. )
  830. def test_content_rename_one_to_one_ordering(self):
  831. blob1 = make_object(Blob, data=b"a\nb\nc\nd\ne\nf\n")
  832. blob2 = make_object(Blob, data=b"a\nb\nc\nd\ng\nh\n")
  833. # 6/10 match to blob1, 8/10 match to blob2
  834. blob3 = make_object(Blob, data=b"a\nb\nc\nd\ng\ni\n")
  835. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  836. tree2 = self.commit_tree([(b"c", blob3)])
  837. self.assertEqual(
  838. [
  839. TreeChange.delete((b"a", F, blob1.id)),
  840. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"c", F, blob3.id)),
  841. ],
  842. self.detect_renames(tree1, tree2),
  843. )
  844. tree3 = self.commit_tree([(b"a", blob2), (b"b", blob1)])
  845. tree4 = self.commit_tree([(b"c", blob3)])
  846. self.assertEqual(
  847. [
  848. TreeChange(CHANGE_RENAME, (b"a", F, blob2.id), (b"c", F, blob3.id)),
  849. TreeChange.delete((b"b", F, blob1.id)),
  850. ],
  851. self.detect_renames(tree3, tree4),
  852. )
  853. def test_content_rename_one_to_many(self):
  854. blob1 = make_object(Blob, data=b"aa\nb\nc\nd\ne\n")
  855. blob2 = make_object(Blob, data=b"ab\nb\nc\nd\ne\n") # 8/11 match
  856. blob3 = make_object(Blob, data=b"aa\nb\nc\nd\nf\n") # 9/11 match
  857. tree1 = self.commit_tree([(b"a", blob1)])
  858. tree2 = self.commit_tree([(b"b", blob2), (b"c", blob3)])
  859. self.assertEqual(
  860. [
  861. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  862. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob3.id)),
  863. ],
  864. self.detect_renames(tree1, tree2),
  865. )
  866. def test_content_rename_many_to_one(self):
  867. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  868. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  869. blob3 = make_object(Blob, data=b"a\nb\nc\nf\n")
  870. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  871. tree2 = self.commit_tree([(b"c", blob3)])
  872. self.assertEqual(
  873. [
  874. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob3.id)),
  875. TreeChange.delete((b"b", F, blob2.id)),
  876. ],
  877. self.detect_renames(tree1, tree2),
  878. )
  879. def test_content_rename_many_to_many(self):
  880. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  881. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  882. blob3 = make_object(Blob, data=b"a\nb\nc\nf\n")
  883. blob4 = make_object(Blob, data=b"a\nb\nc\ng\n")
  884. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  885. tree2 = self.commit_tree([(b"c", blob3), (b"d", blob4)])
  886. # TODO(dborowitz): Distribute renames rather than greedily choosing
  887. # copies.
  888. self.assertEqual(
  889. [
  890. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob3.id)),
  891. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"d", F, blob4.id)),
  892. TreeChange.delete((b"b", F, blob2.id)),
  893. ],
  894. self.detect_renames(tree1, tree2),
  895. )
  896. def test_content_rename_with_more_deletions(self):
  897. blob1 = make_object(Blob, data=b"")
  898. tree1 = self.commit_tree(
  899. [(b"a", blob1), (b"b", blob1), (b"c", blob1), (b"d", blob1)]
  900. )
  901. tree2 = self.commit_tree([(b"e", blob1), (b"f", blob1), (b"g", blob1)])
  902. self.maxDiff = None
  903. self.assertEqual(
  904. [
  905. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"e", F, blob1.id)),
  906. TreeChange(CHANGE_RENAME, (b"b", F, blob1.id), (b"f", F, blob1.id)),
  907. TreeChange(CHANGE_RENAME, (b"c", F, blob1.id), (b"g", F, blob1.id)),
  908. TreeChange.delete((b"d", F, blob1.id)),
  909. ],
  910. self.detect_renames(tree1, tree2),
  911. )
  912. def test_content_rename_gitlink(self):
  913. blob1 = make_object(Blob, data=b"blob1")
  914. blob2 = make_object(Blob, data=b"blob2")
  915. link1 = b"1" * 40
  916. link2 = b"2" * 40
  917. tree1 = self.commit_tree([(b"a", blob1), (b"b", link1, 0o160000)])
  918. tree2 = self.commit_tree([(b"c", blob2), (b"d", link2, 0o160000)])
  919. self.assertEqual(
  920. [
  921. TreeChange.delete((b"a", 0o100644, blob1.id)),
  922. TreeChange.delete((b"b", 0o160000, link1)),
  923. TreeChange.add((b"c", 0o100644, blob2.id)),
  924. TreeChange.add((b"d", 0o160000, link2)),
  925. ],
  926. self.detect_renames(tree1, tree2),
  927. )
  928. def test_exact_rename_swap(self):
  929. blob1 = make_object(Blob, data=b"1")
  930. blob2 = make_object(Blob, data=b"2")
  931. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  932. tree2 = self.commit_tree([(b"a", blob2), (b"b", blob1)])
  933. self.assertEqual(
  934. [
  935. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob2.id)),
  936. TreeChange(CHANGE_MODIFY, (b"b", F, blob2.id), (b"b", F, blob1.id)),
  937. ],
  938. self.detect_renames(tree1, tree2),
  939. )
  940. self.assertEqual(
  941. [
  942. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob1.id)),
  943. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"a", F, blob2.id)),
  944. ],
  945. self.detect_renames(tree1, tree2, rewrite_threshold=50),
  946. )
  947. def test_content_rename_swap(self):
  948. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  949. blob2 = make_object(Blob, data=b"e\nf\ng\nh\n")
  950. blob3 = make_object(Blob, data=b"a\nb\nc\ne\n")
  951. blob4 = make_object(Blob, data=b"e\nf\ng\ni\n")
  952. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  953. tree2 = self.commit_tree([(b"a", blob4), (b"b", blob3)])
  954. self.assertEqual(
  955. [
  956. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob3.id)),
  957. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"a", F, blob4.id)),
  958. ],
  959. self.detect_renames(tree1, tree2, rewrite_threshold=60),
  960. )
  961. def test_rewrite_threshold(self):
  962. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  963. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  964. blob3 = make_object(Blob, data=b"a\nb\nf\ng\n")
  965. tree1 = self.commit_tree([(b"a", blob1)])
  966. tree2 = self.commit_tree([(b"a", blob3), (b"b", blob2)])
  967. no_renames = [
  968. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob3.id)),
  969. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  970. ]
  971. self.assertEqual(no_renames, self.detect_renames(tree1, tree2))
  972. self.assertEqual(
  973. no_renames, self.detect_renames(tree1, tree2, rewrite_threshold=40)
  974. )
  975. self.assertEqual(
  976. [
  977. TreeChange.add((b"a", F, blob3.id)),
  978. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  979. ],
  980. self.detect_renames(tree1, tree2, rewrite_threshold=80),
  981. )
  982. def test_find_copies_harder_exact(self):
  983. blob = make_object(Blob, data=b"blob")
  984. tree1 = self.commit_tree([(b"a", blob)])
  985. tree2 = self.commit_tree([(b"a", blob), (b"b", blob)])
  986. self.assertEqual(
  987. [TreeChange.add((b"b", F, blob.id))],
  988. self.detect_renames(tree1, tree2),
  989. )
  990. self.assertEqual(
  991. [TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"b", F, blob.id))],
  992. self.detect_renames(tree1, tree2, find_copies_harder=True),
  993. )
  994. def test_find_copies_harder_content(self):
  995. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  996. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  997. tree1 = self.commit_tree([(b"a", blob1)])
  998. tree2 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  999. self.assertEqual(
  1000. [TreeChange.add((b"b", F, blob2.id))],
  1001. self.detect_renames(tree1, tree2),
  1002. )
  1003. self.assertEqual(
  1004. [TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob2.id))],
  1005. self.detect_renames(tree1, tree2, find_copies_harder=True),
  1006. )
  1007. def test_find_copies_harder_with_rewrites(self):
  1008. blob_a1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1009. blob_a2 = make_object(Blob, data=b"f\ng\nh\ni\n")
  1010. blob_b2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1011. tree1 = self.commit_tree([(b"a", blob_a1)])
  1012. tree2 = self.commit_tree([(b"a", blob_a2), (b"b", blob_b2)])
  1013. self.assertEqual(
  1014. [
  1015. TreeChange(CHANGE_MODIFY, (b"a", F, blob_a1.id), (b"a", F, blob_a2.id)),
  1016. TreeChange(CHANGE_COPY, (b"a", F, blob_a1.id), (b"b", F, blob_b2.id)),
  1017. ],
  1018. self.detect_renames(tree1, tree2, find_copies_harder=True),
  1019. )
  1020. self.assertEqual(
  1021. [
  1022. TreeChange.add((b"a", F, blob_a2.id)),
  1023. TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"b", F, blob_b2.id)),
  1024. ],
  1025. self.detect_renames(
  1026. tree1, tree2, rewrite_threshold=50, find_copies_harder=True
  1027. ),
  1028. )
  1029. def test_reuse_detector(self):
  1030. blob = make_object(Blob, data=b"blob")
  1031. tree1 = self.commit_tree([(b"a", blob)])
  1032. tree2 = self.commit_tree([(b"b", blob)])
  1033. detector = RenameDetector(self.store)
  1034. changes = [TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"b", F, blob.id))]
  1035. self.assertEqual(changes, detector.changes_with_renames(tree1.id, tree2.id))
  1036. self.assertEqual(changes, detector.changes_with_renames(tree1.id, tree2.id))
  1037. def test_want_unchanged(self):
  1038. blob_a1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1039. blob_b = make_object(Blob, data=b"b")
  1040. blob_c2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1041. tree1 = self.commit_tree([(b"a", blob_a1), (b"b", blob_b)])
  1042. tree2 = self.commit_tree([(b"c", blob_c2), (b"b", blob_b)])
  1043. self.assertEqual(
  1044. [TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_c2.id))],
  1045. self.detect_renames(tree1, tree2),
  1046. )
  1047. self.assertEqual(
  1048. [
  1049. TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_c2.id)),
  1050. TreeChange(
  1051. CHANGE_UNCHANGED,
  1052. (b"b", F, blob_b.id),
  1053. (b"b", F, blob_b.id),
  1054. ),
  1055. ],
  1056. self.detect_renames(tree1, tree2, want_unchanged=True),
  1057. )