test_diff_tree.py 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145
  1. # test_diff_tree.py -- Tests for file and tree diff utilities.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. # General Public License as public by the Free Software Foundation; version 2.0
  6. # or (at your option) any later version. You can redistribute it and/or
  7. # modify it under the terms of either of these two licenses.
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # You should have received a copy of the licenses; if not, see
  16. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. # License, Version 2.0.
  19. #
  20. """Tests for file and tree diff utilities."""
  21. from itertools import permutations
  22. from dulwich.diff_tree import (
  23. CHANGE_COPY,
  24. CHANGE_MODIFY,
  25. CHANGE_RENAME,
  26. CHANGE_UNCHANGED,
  27. RenameDetector,
  28. TreeChange,
  29. _count_blocks,
  30. _count_blocks_py,
  31. _is_tree,
  32. _is_tree_py,
  33. _merge_entries,
  34. _merge_entries_py,
  35. _similarity_score,
  36. _tree_change_key,
  37. tree_changes,
  38. tree_changes_for_merge,
  39. )
  40. from dulwich.index import commit_tree
  41. from dulwich.object_store import MemoryObjectStore
  42. from dulwich.objects import Blob, ShaFile, Tree, TreeEntry
  43. from dulwich.tests.utils import F, ext_functest_builder, functest_builder, make_object
  44. from . import TestCase
  45. class DiffTestCase(TestCase):
  46. def setUp(self) -> None:
  47. super().setUp()
  48. self.store = MemoryObjectStore()
  49. self.empty_tree = self.commit_tree([])
  50. def commit_tree(self, entries):
  51. commit_blobs = []
  52. for entry in entries:
  53. if len(entry) == 2:
  54. path, obj = entry
  55. mode = F
  56. else:
  57. path, obj, mode = entry
  58. if isinstance(obj, Blob):
  59. self.store.add_object(obj)
  60. sha = obj.id
  61. else:
  62. sha = obj
  63. commit_blobs.append((path, sha, mode))
  64. return self.store[commit_tree(self.store, commit_blobs)]
  65. class TreeChangesTest(DiffTestCase):
  66. def setUp(self) -> None:
  67. super().setUp()
  68. self.detector = RenameDetector(self.store)
  69. def assertMergeFails(self, merge_entries, name, mode, sha) -> None:
  70. t = Tree()
  71. t[name] = (mode, sha)
  72. self.assertRaises((TypeError, ValueError), merge_entries, "", t, t)
  73. def _do_test_merge_entries(self, merge_entries) -> None:
  74. blob_a1 = make_object(Blob, data=b"a1")
  75. blob_a2 = make_object(Blob, data=b"a2")
  76. blob_b1 = make_object(Blob, data=b"b1")
  77. blob_c2 = make_object(Blob, data=b"c2")
  78. tree1 = self.commit_tree([(b"a", blob_a1, 0o100644), (b"b", blob_b1, 0o100755)])
  79. tree2 = self.commit_tree([(b"a", blob_a2, 0o100644), (b"c", blob_c2, 0o100755)])
  80. self.assertEqual([], merge_entries(b"", self.empty_tree, self.empty_tree))
  81. self.assertEqual(
  82. [
  83. ((None, None, None), (b"a", 0o100644, blob_a1.id)),
  84. ((None, None, None), (b"b", 0o100755, blob_b1.id)),
  85. ],
  86. merge_entries(b"", self.empty_tree, tree1),
  87. )
  88. self.assertEqual(
  89. [
  90. ((None, None, None), (b"x/a", 0o100644, blob_a1.id)),
  91. ((None, None, None), (b"x/b", 0o100755, blob_b1.id)),
  92. ],
  93. merge_entries(b"x", self.empty_tree, tree1),
  94. )
  95. self.assertEqual(
  96. [
  97. ((b"a", 0o100644, blob_a2.id), (None, None, None)),
  98. ((b"c", 0o100755, blob_c2.id), (None, None, None)),
  99. ],
  100. merge_entries(b"", tree2, self.empty_tree),
  101. )
  102. self.assertEqual(
  103. [
  104. ((b"a", 0o100644, blob_a1.id), (b"a", 0o100644, blob_a2.id)),
  105. ((b"b", 0o100755, blob_b1.id), (None, None, None)),
  106. ((None, None, None), (b"c", 0o100755, blob_c2.id)),
  107. ],
  108. merge_entries(b"", tree1, tree2),
  109. )
  110. self.assertEqual(
  111. [
  112. ((b"a", 0o100644, blob_a2.id), (b"a", 0o100644, blob_a1.id)),
  113. ((None, None, None), (b"b", 0o100755, blob_b1.id)),
  114. ((b"c", 0o100755, blob_c2.id), (None, None, None)),
  115. ],
  116. merge_entries(b"", tree2, tree1),
  117. )
  118. self.assertMergeFails(merge_entries, 0xDEADBEEF, 0o100644, "1" * 40)
  119. self.assertMergeFails(merge_entries, b"a", b"deadbeef", "1" * 40)
  120. self.assertMergeFails(merge_entries, b"a", 0o100644, 0xDEADBEEF)
  121. test_merge_entries = functest_builder(_do_test_merge_entries, _merge_entries_py)
  122. test_merge_entries_extension = ext_functest_builder(
  123. _do_test_merge_entries, _merge_entries
  124. )
  125. def _do_test_is_tree(self, is_tree) -> None:
  126. self.assertFalse(is_tree(TreeEntry(None, None, None)))
  127. self.assertFalse(is_tree(TreeEntry(b"a", 0o100644, b"a" * 40)))
  128. self.assertFalse(is_tree(TreeEntry(b"a", 0o100755, b"a" * 40)))
  129. self.assertFalse(is_tree(TreeEntry(b"a", 0o120000, b"a" * 40)))
  130. self.assertTrue(is_tree(TreeEntry(b"a", 0o040000, b"a" * 40)))
  131. self.assertRaises(TypeError, is_tree, TreeEntry(b"a", b"x", b"a" * 40))
  132. self.assertRaises(AttributeError, is_tree, 1234)
  133. test_is_tree = functest_builder(_do_test_is_tree, _is_tree_py)
  134. test_is_tree_extension = ext_functest_builder(_do_test_is_tree, _is_tree)
  135. def assertChangesEqual(self, expected, tree1, tree2, **kwargs) -> None:
  136. actual = list(tree_changes(self.store, tree1.id, tree2.id, **kwargs))
  137. self.assertEqual(expected, actual)
  138. # For brevity, the following tests use tuples instead of TreeEntry objects.
  139. def test_tree_changes_empty(self) -> None:
  140. self.assertChangesEqual([], self.empty_tree, self.empty_tree)
  141. def test_tree_changes_no_changes(self) -> None:
  142. blob = make_object(Blob, data=b"blob")
  143. tree = self.commit_tree([(b"a", blob), (b"b/c", blob)])
  144. self.assertChangesEqual([], self.empty_tree, self.empty_tree)
  145. self.assertChangesEqual([], tree, tree)
  146. self.assertChangesEqual(
  147. [
  148. TreeChange(CHANGE_UNCHANGED, (b"a", F, blob.id), (b"a", F, blob.id)),
  149. TreeChange(
  150. CHANGE_UNCHANGED,
  151. (b"b/c", F, blob.id),
  152. (b"b/c", F, blob.id),
  153. ),
  154. ],
  155. tree,
  156. tree,
  157. want_unchanged=True,
  158. )
  159. def test_tree_changes_add_delete(self) -> None:
  160. blob_a = make_object(Blob, data=b"a")
  161. blob_b = make_object(Blob, data=b"b")
  162. tree = self.commit_tree([(b"a", blob_a, 0o100644), (b"x/b", blob_b, 0o100755)])
  163. self.assertChangesEqual(
  164. [
  165. TreeChange.add((b"a", 0o100644, blob_a.id)),
  166. TreeChange.add((b"x/b", 0o100755, blob_b.id)),
  167. ],
  168. self.empty_tree,
  169. tree,
  170. )
  171. self.assertChangesEqual(
  172. [
  173. TreeChange.delete((b"a", 0o100644, blob_a.id)),
  174. TreeChange.delete((b"x/b", 0o100755, blob_b.id)),
  175. ],
  176. tree,
  177. self.empty_tree,
  178. )
  179. def test_tree_changes_modify_contents(self) -> None:
  180. blob_a1 = make_object(Blob, data=b"a1")
  181. blob_a2 = make_object(Blob, data=b"a2")
  182. tree1 = self.commit_tree([(b"a", blob_a1)])
  183. tree2 = self.commit_tree([(b"a", blob_a2)])
  184. self.assertChangesEqual(
  185. [TreeChange(CHANGE_MODIFY, (b"a", F, blob_a1.id), (b"a", F, blob_a2.id))],
  186. tree1,
  187. tree2,
  188. )
  189. def test_tree_changes_modify_mode(self) -> None:
  190. blob_a = make_object(Blob, data=b"a")
  191. tree1 = self.commit_tree([(b"a", blob_a, 0o100644)])
  192. tree2 = self.commit_tree([(b"a", blob_a, 0o100755)])
  193. self.assertChangesEqual(
  194. [
  195. TreeChange(
  196. CHANGE_MODIFY,
  197. (b"a", 0o100644, blob_a.id),
  198. (b"a", 0o100755, blob_a.id),
  199. )
  200. ],
  201. tree1,
  202. tree2,
  203. )
  204. def test_tree_changes_change_type(self) -> None:
  205. blob_a1 = make_object(Blob, data=b"a")
  206. blob_a2 = make_object(Blob, data=b"/foo/bar")
  207. tree1 = self.commit_tree([(b"a", blob_a1, 0o100644)])
  208. tree2 = self.commit_tree([(b"a", blob_a2, 0o120000)])
  209. self.assertChangesEqual(
  210. [
  211. TreeChange.delete((b"a", 0o100644, blob_a1.id)),
  212. TreeChange.add((b"a", 0o120000, blob_a2.id)),
  213. ],
  214. tree1,
  215. tree2,
  216. )
  217. def test_tree_changes_change_type_same(self) -> None:
  218. blob_a1 = make_object(Blob, data=b"a")
  219. blob_a2 = make_object(Blob, data=b"/foo/bar")
  220. tree1 = self.commit_tree([(b"a", blob_a1, 0o100644)])
  221. tree2 = self.commit_tree([(b"a", blob_a2, 0o120000)])
  222. self.assertChangesEqual(
  223. [
  224. TreeChange(
  225. CHANGE_MODIFY,
  226. (b"a", 0o100644, blob_a1.id),
  227. (b"a", 0o120000, blob_a2.id),
  228. )
  229. ],
  230. tree1,
  231. tree2,
  232. change_type_same=True,
  233. )
  234. def test_tree_changes_to_tree(self) -> None:
  235. blob_a = make_object(Blob, data=b"a")
  236. blob_x = make_object(Blob, data=b"x")
  237. tree1 = self.commit_tree([(b"a", blob_a)])
  238. tree2 = self.commit_tree([(b"a/x", blob_x)])
  239. self.assertChangesEqual(
  240. [
  241. TreeChange.delete((b"a", F, blob_a.id)),
  242. TreeChange.add((b"a/x", F, blob_x.id)),
  243. ],
  244. tree1,
  245. tree2,
  246. )
  247. def test_tree_changes_complex(self) -> None:
  248. blob_a_1 = make_object(Blob, data=b"a1_1")
  249. blob_bx1_1 = make_object(Blob, data=b"bx1_1")
  250. blob_bx2_1 = make_object(Blob, data=b"bx2_1")
  251. blob_by1_1 = make_object(Blob, data=b"by1_1")
  252. blob_by2_1 = make_object(Blob, data=b"by2_1")
  253. tree1 = self.commit_tree(
  254. [
  255. (b"a", blob_a_1),
  256. (b"b/x/1", blob_bx1_1),
  257. (b"b/x/2", blob_bx2_1),
  258. (b"b/y/1", blob_by1_1),
  259. (b"b/y/2", blob_by2_1),
  260. ]
  261. )
  262. blob_a_2 = make_object(Blob, data=b"a1_2")
  263. blob_bx1_2 = blob_bx1_1
  264. blob_by_2 = make_object(Blob, data=b"by_2")
  265. blob_c_2 = make_object(Blob, data=b"c_2")
  266. tree2 = self.commit_tree(
  267. [
  268. (b"a", blob_a_2),
  269. (b"b/x/1", blob_bx1_2),
  270. (b"b/y", blob_by_2),
  271. (b"c", blob_c_2),
  272. ]
  273. )
  274. self.assertChangesEqual(
  275. [
  276. TreeChange(
  277. CHANGE_MODIFY,
  278. (b"a", F, blob_a_1.id),
  279. (b"a", F, blob_a_2.id),
  280. ),
  281. TreeChange.delete((b"b/x/2", F, blob_bx2_1.id)),
  282. TreeChange.add((b"b/y", F, blob_by_2.id)),
  283. TreeChange.delete((b"b/y/1", F, blob_by1_1.id)),
  284. TreeChange.delete((b"b/y/2", F, blob_by2_1.id)),
  285. TreeChange.add((b"c", F, blob_c_2.id)),
  286. ],
  287. tree1,
  288. tree2,
  289. )
  290. def test_tree_changes_name_order(self) -> None:
  291. blob = make_object(Blob, data=b"a")
  292. tree1 = self.commit_tree([(b"a", blob), (b"a.", blob), (b"a..", blob)])
  293. # Tree order is the reverse of this, so if we used tree order, 'a..'
  294. # would not be merged.
  295. tree2 = self.commit_tree([(b"a/x", blob), (b"a./x", blob), (b"a..", blob)])
  296. self.assertChangesEqual(
  297. [
  298. TreeChange.delete((b"a", F, blob.id)),
  299. TreeChange.add((b"a/x", F, blob.id)),
  300. TreeChange.delete((b"a.", F, blob.id)),
  301. TreeChange.add((b"a./x", F, blob.id)),
  302. ],
  303. tree1,
  304. tree2,
  305. )
  306. def test_tree_changes_prune(self) -> None:
  307. blob_a1 = make_object(Blob, data=b"a1")
  308. blob_a2 = make_object(Blob, data=b"a2")
  309. blob_x = make_object(Blob, data=b"x")
  310. tree1 = self.commit_tree([(b"a", blob_a1), (b"b/x", blob_x)])
  311. tree2 = self.commit_tree([(b"a", blob_a2), (b"b/x", blob_x)])
  312. # Remove identical items so lookups will fail unless we prune.
  313. subtree = self.store[tree1[b"b"][1]]
  314. for entry in subtree.items():
  315. del self.store[entry.sha]
  316. del self.store[subtree.id]
  317. self.assertChangesEqual(
  318. [TreeChange(CHANGE_MODIFY, (b"a", F, blob_a1.id), (b"a", F, blob_a2.id))],
  319. tree1,
  320. tree2,
  321. )
  322. def test_tree_changes_rename_detector(self) -> None:
  323. blob_a1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  324. blob_a2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  325. blob_b = make_object(Blob, data=b"b")
  326. tree1 = self.commit_tree([(b"a", blob_a1), (b"b", blob_b)])
  327. tree2 = self.commit_tree([(b"c", blob_a2), (b"b", blob_b)])
  328. detector = RenameDetector(self.store)
  329. self.assertChangesEqual(
  330. [
  331. TreeChange.delete((b"a", F, blob_a1.id)),
  332. TreeChange.add((b"c", F, blob_a2.id)),
  333. ],
  334. tree1,
  335. tree2,
  336. )
  337. self.assertChangesEqual(
  338. [
  339. TreeChange.delete((b"a", F, blob_a1.id)),
  340. TreeChange(
  341. CHANGE_UNCHANGED,
  342. (b"b", F, blob_b.id),
  343. (b"b", F, blob_b.id),
  344. ),
  345. TreeChange.add((b"c", F, blob_a2.id)),
  346. ],
  347. tree1,
  348. tree2,
  349. want_unchanged=True,
  350. )
  351. self.assertChangesEqual(
  352. [TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_a2.id))],
  353. tree1,
  354. tree2,
  355. rename_detector=detector,
  356. )
  357. self.assertChangesEqual(
  358. [
  359. TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_a2.id)),
  360. TreeChange(
  361. CHANGE_UNCHANGED,
  362. (b"b", F, blob_b.id),
  363. (b"b", F, blob_b.id),
  364. ),
  365. ],
  366. tree1,
  367. tree2,
  368. rename_detector=detector,
  369. want_unchanged=True,
  370. )
  371. def assertChangesForMergeEqual(
  372. self, expected, parent_trees, merge_tree, **kwargs
  373. ) -> None:
  374. parent_tree_ids = [t.id for t in parent_trees]
  375. actual = list(
  376. tree_changes_for_merge(self.store, parent_tree_ids, merge_tree.id, **kwargs)
  377. )
  378. self.assertEqual(expected, actual)
  379. parent_tree_ids.reverse()
  380. expected = [list(reversed(cs)) for cs in expected]
  381. actual = list(
  382. tree_changes_for_merge(self.store, parent_tree_ids, merge_tree.id, **kwargs)
  383. )
  384. self.assertEqual(expected, actual)
  385. def test_tree_changes_for_merge_add_no_conflict(self) -> None:
  386. blob = make_object(Blob, data=b"blob")
  387. parent1 = self.commit_tree([])
  388. parent2 = merge = self.commit_tree([(b"a", blob)])
  389. self.assertChangesForMergeEqual([], [parent1, parent2], merge)
  390. self.assertChangesForMergeEqual([], [parent2, parent2], merge)
  391. def test_tree_changes_for_merge_add_modify_conflict(self) -> None:
  392. blob1 = make_object(Blob, data=b"1")
  393. blob2 = make_object(Blob, data=b"2")
  394. parent1 = self.commit_tree([])
  395. parent2 = self.commit_tree([(b"a", blob1)])
  396. merge = self.commit_tree([(b"a", blob2)])
  397. self.assertChangesForMergeEqual(
  398. [
  399. [
  400. TreeChange.add((b"a", F, blob2.id)),
  401. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob2.id)),
  402. ]
  403. ],
  404. [parent1, parent2],
  405. merge,
  406. )
  407. def test_tree_changes_for_merge_modify_modify_conflict(self) -> None:
  408. blob1 = make_object(Blob, data=b"1")
  409. blob2 = make_object(Blob, data=b"2")
  410. blob3 = make_object(Blob, data=b"3")
  411. parent1 = self.commit_tree([(b"a", blob1)])
  412. parent2 = self.commit_tree([(b"a", blob2)])
  413. merge = self.commit_tree([(b"a", blob3)])
  414. self.assertChangesForMergeEqual(
  415. [
  416. [
  417. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob3.id)),
  418. TreeChange(CHANGE_MODIFY, (b"a", F, blob2.id), (b"a", F, blob3.id)),
  419. ]
  420. ],
  421. [parent1, parent2],
  422. merge,
  423. )
  424. def test_tree_changes_for_merge_modify_no_conflict(self) -> None:
  425. blob1 = make_object(Blob, data=b"1")
  426. blob2 = make_object(Blob, data=b"2")
  427. parent1 = self.commit_tree([(b"a", blob1)])
  428. parent2 = merge = self.commit_tree([(b"a", blob2)])
  429. self.assertChangesForMergeEqual([], [parent1, parent2], merge)
  430. def test_tree_changes_for_merge_delete_delete_conflict(self) -> None:
  431. blob1 = make_object(Blob, data=b"1")
  432. blob2 = make_object(Blob, data=b"2")
  433. parent1 = self.commit_tree([(b"a", blob1)])
  434. parent2 = self.commit_tree([(b"a", blob2)])
  435. merge = self.commit_tree([])
  436. self.assertChangesForMergeEqual(
  437. [
  438. [
  439. TreeChange.delete((b"a", F, blob1.id)),
  440. TreeChange.delete((b"a", F, blob2.id)),
  441. ]
  442. ],
  443. [parent1, parent2],
  444. merge,
  445. )
  446. def test_tree_changes_for_merge_delete_no_conflict(self) -> None:
  447. blob = make_object(Blob, data=b"blob")
  448. has = self.commit_tree([(b"a", blob)])
  449. doesnt_have = self.commit_tree([])
  450. self.assertChangesForMergeEqual([], [has, has], doesnt_have)
  451. self.assertChangesForMergeEqual([], [has, doesnt_have], doesnt_have)
  452. def test_tree_changes_for_merge_octopus_no_conflict(self) -> None:
  453. r = list(range(5))
  454. blobs = [make_object(Blob, data=bytes(i)) for i in r]
  455. parents = [self.commit_tree([(b"a", blobs[i])]) for i in r]
  456. for i in r:
  457. # Take the SHA from each of the parents.
  458. self.assertChangesForMergeEqual([], parents, parents[i])
  459. def test_tree_changes_for_merge_octopus_modify_conflict(self) -> None:
  460. # Because the octopus merge strategy is limited, I doubt it's possible
  461. # to create this with the git command line. But the output is well-
  462. # defined, so test it anyway.
  463. r = list(range(5))
  464. parent_blobs = [make_object(Blob, data=bytes(i)) for i in r]
  465. merge_blob = make_object(Blob, data=b"merge")
  466. parents = [self.commit_tree([(b"a", parent_blobs[i])]) for i in r]
  467. merge = self.commit_tree([(b"a", merge_blob)])
  468. expected = [
  469. [
  470. TreeChange(
  471. CHANGE_MODIFY,
  472. (b"a", F, parent_blobs[i].id),
  473. (b"a", F, merge_blob.id),
  474. )
  475. for i in r
  476. ]
  477. ]
  478. self.assertChangesForMergeEqual(expected, parents, merge)
  479. def test_tree_changes_for_merge_octopus_delete(self) -> None:
  480. blob1 = make_object(Blob, data=b"1")
  481. blob2 = make_object(Blob, data=b"3")
  482. parent1 = self.commit_tree([(b"a", blob1)])
  483. parent2 = self.commit_tree([(b"a", blob2)])
  484. parent3 = merge = self.commit_tree([])
  485. self.assertChangesForMergeEqual([], [parent1, parent1, parent1], merge)
  486. self.assertChangesForMergeEqual([], [parent1, parent1, parent3], merge)
  487. self.assertChangesForMergeEqual([], [parent1, parent3, parent3], merge)
  488. self.assertChangesForMergeEqual(
  489. [
  490. [
  491. TreeChange.delete((b"a", F, blob1.id)),
  492. TreeChange.delete((b"a", F, blob2.id)),
  493. None,
  494. ]
  495. ],
  496. [parent1, parent2, parent3],
  497. merge,
  498. )
  499. def test_tree_changes_for_merge_add_add_same_conflict(self) -> None:
  500. blob = make_object(Blob, data=b"a\nb\nc\nd\n")
  501. parent1 = self.commit_tree([(b"a", blob)])
  502. parent2 = self.commit_tree([])
  503. merge = self.commit_tree([(b"b", blob)])
  504. add = TreeChange.add((b"b", F, blob.id))
  505. self.assertChangesForMergeEqual([[add, add]], [parent1, parent2], merge)
  506. def test_tree_changes_for_merge_add_exact_rename_conflict(self) -> None:
  507. blob = make_object(Blob, data=b"a\nb\nc\nd\n")
  508. parent1 = self.commit_tree([(b"a", blob)])
  509. parent2 = self.commit_tree([])
  510. merge = self.commit_tree([(b"b", blob)])
  511. self.assertChangesForMergeEqual(
  512. [
  513. [
  514. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"b", F, blob.id)),
  515. TreeChange.add((b"b", F, blob.id)),
  516. ]
  517. ],
  518. [parent1, parent2],
  519. merge,
  520. rename_detector=self.detector,
  521. )
  522. def test_tree_changes_for_merge_add_content_rename_conflict(self) -> None:
  523. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  524. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  525. parent1 = self.commit_tree([(b"a", blob1)])
  526. parent2 = self.commit_tree([])
  527. merge = self.commit_tree([(b"b", blob2)])
  528. self.assertChangesForMergeEqual(
  529. [
  530. [
  531. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  532. TreeChange.add((b"b", F, blob2.id)),
  533. ]
  534. ],
  535. [parent1, parent2],
  536. merge,
  537. rename_detector=self.detector,
  538. )
  539. def test_tree_changes_for_merge_modify_rename_conflict(self) -> None:
  540. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  541. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  542. parent1 = self.commit_tree([(b"a", blob1)])
  543. parent2 = self.commit_tree([(b"b", blob1)])
  544. merge = self.commit_tree([(b"b", blob2)])
  545. self.assertChangesForMergeEqual(
  546. [
  547. [
  548. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  549. TreeChange(CHANGE_MODIFY, (b"b", F, blob1.id), (b"b", F, blob2.id)),
  550. ]
  551. ],
  552. [parent1, parent2],
  553. merge,
  554. rename_detector=self.detector,
  555. )
  556. class RenameDetectionTest(DiffTestCase):
  557. def _do_test_count_blocks(self, count_blocks) -> None:
  558. blob = make_object(Blob, data=b"a\nb\na\n")
  559. self.assertBlockCountEqual({b"a\n": 4, b"b\n": 2}, count_blocks(blob))
  560. test_count_blocks = functest_builder(_do_test_count_blocks, _count_blocks_py)
  561. test_count_blocks_extension = ext_functest_builder(
  562. _do_test_count_blocks, _count_blocks
  563. )
  564. def _do_test_count_blocks_no_newline(self, count_blocks) -> None:
  565. blob = make_object(Blob, data=b"a\na")
  566. self.assertBlockCountEqual({b"a\n": 2, b"a": 1}, _count_blocks(blob))
  567. test_count_blocks_no_newline = functest_builder(
  568. _do_test_count_blocks_no_newline, _count_blocks_py
  569. )
  570. test_count_blocks_no_newline_extension = ext_functest_builder(
  571. _do_test_count_blocks_no_newline, _count_blocks
  572. )
  573. def assertBlockCountEqual(self, expected, got) -> None:
  574. self.assertEqual(
  575. {(hash(block) & 0xFFFFFFFF): count for (block, count) in expected.items()},
  576. {(block & 0xFFFFFFFF): count for (block, count) in got.items()},
  577. )
  578. def _do_test_count_blocks_chunks(self, count_blocks) -> None:
  579. blob = ShaFile.from_raw_chunks(Blob.type_num, [b"a\nb", b"\na\n"])
  580. self.assertBlockCountEqual({b"a\n": 4, b"b\n": 2}, _count_blocks(blob))
  581. test_count_blocks_chunks = functest_builder(
  582. _do_test_count_blocks_chunks, _count_blocks_py
  583. )
  584. test_count_blocks_chunks_extension = ext_functest_builder(
  585. _do_test_count_blocks_chunks, _count_blocks
  586. )
  587. def _do_test_count_blocks_long_lines(self, count_blocks) -> None:
  588. a = b"a" * 64
  589. data = a + b"xxx\ny\n" + a + b"zzz\n"
  590. blob = make_object(Blob, data=data)
  591. self.assertBlockCountEqual(
  592. {b"a" * 64: 128, b"xxx\n": 4, b"y\n": 2, b"zzz\n": 4},
  593. _count_blocks(blob),
  594. )
  595. test_count_blocks_long_lines = functest_builder(
  596. _do_test_count_blocks_long_lines, _count_blocks_py
  597. )
  598. test_count_blocks_long_lines_extension = ext_functest_builder(
  599. _do_test_count_blocks_long_lines, _count_blocks
  600. )
  601. def assertSimilar(self, expected_score, blob1, blob2) -> None:
  602. self.assertEqual(expected_score, _similarity_score(blob1, blob2))
  603. self.assertEqual(expected_score, _similarity_score(blob2, blob1))
  604. def test_similarity_score(self) -> None:
  605. blob0 = make_object(Blob, data=b"")
  606. blob1 = make_object(Blob, data=b"ab\ncd\ncd\n")
  607. blob2 = make_object(Blob, data=b"ab\n")
  608. blob3 = make_object(Blob, data=b"cd\n")
  609. blob4 = make_object(Blob, data=b"cd\ncd\n")
  610. self.assertSimilar(100, blob0, blob0)
  611. self.assertSimilar(0, blob0, blob1)
  612. self.assertSimilar(33, blob1, blob2)
  613. self.assertSimilar(33, blob1, blob3)
  614. self.assertSimilar(66, blob1, blob4)
  615. self.assertSimilar(0, blob2, blob3)
  616. self.assertSimilar(50, blob3, blob4)
  617. def test_similarity_score_cache(self) -> None:
  618. blob1 = make_object(Blob, data=b"ab\ncd\n")
  619. blob2 = make_object(Blob, data=b"ab\n")
  620. block_cache = {}
  621. self.assertEqual(50, _similarity_score(blob1, blob2, block_cache=block_cache))
  622. self.assertEqual({blob1.id, blob2.id}, set(block_cache))
  623. def fail_chunks() -> None:
  624. self.fail("Unexpected call to as_raw_chunks()")
  625. blob1.as_raw_chunks = blob2.as_raw_chunks = fail_chunks
  626. blob1.raw_length = lambda: 6
  627. blob2.raw_length = lambda: 3
  628. self.assertEqual(50, _similarity_score(blob1, blob2, block_cache=block_cache))
  629. def test_tree_entry_sort(self) -> None:
  630. sha = "abcd" * 10
  631. expected_entries = [
  632. TreeChange.add(TreeEntry(b"aaa", F, sha)),
  633. TreeChange(
  634. CHANGE_COPY,
  635. TreeEntry(b"bbb", F, sha),
  636. TreeEntry(b"aab", F, sha),
  637. ),
  638. TreeChange(
  639. CHANGE_MODIFY,
  640. TreeEntry(b"bbb", F, sha),
  641. TreeEntry(b"bbb", F, b"dabc" * 10),
  642. ),
  643. TreeChange(
  644. CHANGE_RENAME,
  645. TreeEntry(b"bbc", F, sha),
  646. TreeEntry(b"ddd", F, sha),
  647. ),
  648. TreeChange.delete(TreeEntry(b"ccc", F, sha)),
  649. ]
  650. for perm in permutations(expected_entries):
  651. self.assertEqual(expected_entries, sorted(perm, key=_tree_change_key))
  652. def detect_renames(self, tree1, tree2, want_unchanged=False, **kwargs):
  653. detector = RenameDetector(self.store, **kwargs)
  654. return detector.changes_with_renames(
  655. tree1.id, tree2.id, want_unchanged=want_unchanged
  656. )
  657. def test_no_renames(self) -> None:
  658. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  659. blob2 = make_object(Blob, data=b"a\nb\ne\nf\n")
  660. blob3 = make_object(Blob, data=b"a\nb\ng\nh\n")
  661. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  662. tree2 = self.commit_tree([(b"a", blob1), (b"b", blob3)])
  663. self.assertEqual(
  664. [TreeChange(CHANGE_MODIFY, (b"b", F, blob2.id), (b"b", F, blob3.id))],
  665. self.detect_renames(tree1, tree2),
  666. )
  667. def test_exact_rename_one_to_one(self) -> None:
  668. blob1 = make_object(Blob, data=b"1")
  669. blob2 = make_object(Blob, data=b"2")
  670. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  671. tree2 = self.commit_tree([(b"c", blob1), (b"d", blob2)])
  672. self.assertEqual(
  673. [
  674. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob1.id)),
  675. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"d", F, blob2.id)),
  676. ],
  677. self.detect_renames(tree1, tree2),
  678. )
  679. def test_exact_rename_split_different_type(self) -> None:
  680. blob = make_object(Blob, data=b"/foo")
  681. tree1 = self.commit_tree([(b"a", blob, 0o100644)])
  682. tree2 = self.commit_tree([(b"a", blob, 0o120000)])
  683. self.assertEqual(
  684. [
  685. TreeChange.add((b"a", 0o120000, blob.id)),
  686. TreeChange.delete((b"a", 0o100644, blob.id)),
  687. ],
  688. self.detect_renames(tree1, tree2),
  689. )
  690. def test_exact_rename_and_different_type(self) -> None:
  691. blob1 = make_object(Blob, data=b"1")
  692. blob2 = make_object(Blob, data=b"2")
  693. tree1 = self.commit_tree([(b"a", blob1)])
  694. tree2 = self.commit_tree([(b"a", blob2, 0o120000), (b"b", blob1)])
  695. self.assertEqual(
  696. [
  697. TreeChange.add((b"a", 0o120000, blob2.id)),
  698. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob1.id)),
  699. ],
  700. self.detect_renames(tree1, tree2),
  701. )
  702. def test_exact_rename_one_to_many(self) -> None:
  703. blob = make_object(Blob, data=b"1")
  704. tree1 = self.commit_tree([(b"a", blob)])
  705. tree2 = self.commit_tree([(b"b", blob), (b"c", blob)])
  706. self.assertEqual(
  707. [
  708. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"b", F, blob.id)),
  709. TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"c", F, blob.id)),
  710. ],
  711. self.detect_renames(tree1, tree2),
  712. )
  713. def test_exact_rename_many_to_one(self) -> None:
  714. blob = make_object(Blob, data=b"1")
  715. tree1 = self.commit_tree([(b"a", blob), (b"b", blob)])
  716. tree2 = self.commit_tree([(b"c", blob)])
  717. self.assertEqual(
  718. [
  719. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"c", F, blob.id)),
  720. TreeChange.delete((b"b", F, blob.id)),
  721. ],
  722. self.detect_renames(tree1, tree2),
  723. )
  724. def test_exact_rename_many_to_many(self) -> None:
  725. blob = make_object(Blob, data=b"1")
  726. tree1 = self.commit_tree([(b"a", blob), (b"b", blob)])
  727. tree2 = self.commit_tree([(b"c", blob), (b"d", blob), (b"e", blob)])
  728. self.assertEqual(
  729. [
  730. TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"c", F, blob.id)),
  731. TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"e", F, blob.id)),
  732. TreeChange(CHANGE_RENAME, (b"b", F, blob.id), (b"d", F, blob.id)),
  733. ],
  734. self.detect_renames(tree1, tree2),
  735. )
  736. def test_exact_copy_modify(self) -> None:
  737. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  738. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  739. tree1 = self.commit_tree([(b"a", blob1)])
  740. tree2 = self.commit_tree([(b"a", blob2), (b"b", blob1)])
  741. self.assertEqual(
  742. [
  743. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob2.id)),
  744. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob1.id)),
  745. ],
  746. self.detect_renames(tree1, tree2),
  747. )
  748. def test_exact_copy_change_mode(self) -> None:
  749. blob = make_object(Blob, data=b"a\nb\nc\nd\n")
  750. tree1 = self.commit_tree([(b"a", blob)])
  751. tree2 = self.commit_tree([(b"a", blob, 0o100755), (b"b", blob)])
  752. self.assertEqual(
  753. [
  754. TreeChange(
  755. CHANGE_MODIFY,
  756. (b"a", F, blob.id),
  757. (b"a", 0o100755, blob.id),
  758. ),
  759. TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"b", F, blob.id)),
  760. ],
  761. self.detect_renames(tree1, tree2),
  762. )
  763. def test_rename_threshold(self) -> None:
  764. blob1 = make_object(Blob, data=b"a\nb\nc\n")
  765. blob2 = make_object(Blob, data=b"a\nb\nd\n")
  766. tree1 = self.commit_tree([(b"a", blob1)])
  767. tree2 = self.commit_tree([(b"b", blob2)])
  768. self.assertEqual(
  769. [TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id))],
  770. self.detect_renames(tree1, tree2, rename_threshold=50),
  771. )
  772. self.assertEqual(
  773. [
  774. TreeChange.delete((b"a", F, blob1.id)),
  775. TreeChange.add((b"b", F, blob2.id)),
  776. ],
  777. self.detect_renames(tree1, tree2, rename_threshold=75),
  778. )
  779. def test_content_rename_max_files(self) -> None:
  780. blob1 = make_object(Blob, data=b"a\nb\nc\nd")
  781. blob4 = make_object(Blob, data=b"a\nb\nc\ne\n")
  782. blob2 = make_object(Blob, data=b"e\nf\ng\nh\n")
  783. blob3 = make_object(Blob, data=b"e\nf\ng\ni\n")
  784. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  785. tree2 = self.commit_tree([(b"c", blob3), (b"d", blob4)])
  786. self.assertEqual(
  787. [
  788. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"d", F, blob4.id)),
  789. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"c", F, blob3.id)),
  790. ],
  791. self.detect_renames(tree1, tree2),
  792. )
  793. self.assertEqual(
  794. [
  795. TreeChange.delete((b"a", F, blob1.id)),
  796. TreeChange.delete((b"b", F, blob2.id)),
  797. TreeChange.add((b"c", F, blob3.id)),
  798. TreeChange.add((b"d", F, blob4.id)),
  799. ],
  800. self.detect_renames(tree1, tree2, max_files=1),
  801. )
  802. def test_content_rename_one_to_one(self) -> None:
  803. b11 = make_object(Blob, data=b"a\nb\nc\nd\n")
  804. b12 = make_object(Blob, data=b"a\nb\nc\ne\n")
  805. b21 = make_object(Blob, data=b"e\nf\ng\n\nh")
  806. b22 = make_object(Blob, data=b"e\nf\ng\n\ni")
  807. tree1 = self.commit_tree([(b"a", b11), (b"b", b21)])
  808. tree2 = self.commit_tree([(b"c", b12), (b"d", b22)])
  809. self.assertEqual(
  810. [
  811. TreeChange(CHANGE_RENAME, (b"a", F, b11.id), (b"c", F, b12.id)),
  812. TreeChange(CHANGE_RENAME, (b"b", F, b21.id), (b"d", F, b22.id)),
  813. ],
  814. self.detect_renames(tree1, tree2),
  815. )
  816. def test_content_rename_one_to_one_ordering(self) -> None:
  817. blob1 = make_object(Blob, data=b"a\nb\nc\nd\ne\nf\n")
  818. blob2 = make_object(Blob, data=b"a\nb\nc\nd\ng\nh\n")
  819. # 6/10 match to blob1, 8/10 match to blob2
  820. blob3 = make_object(Blob, data=b"a\nb\nc\nd\ng\ni\n")
  821. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  822. tree2 = self.commit_tree([(b"c", blob3)])
  823. self.assertEqual(
  824. [
  825. TreeChange.delete((b"a", F, blob1.id)),
  826. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"c", F, blob3.id)),
  827. ],
  828. self.detect_renames(tree1, tree2),
  829. )
  830. tree3 = self.commit_tree([(b"a", blob2), (b"b", blob1)])
  831. tree4 = self.commit_tree([(b"c", blob3)])
  832. self.assertEqual(
  833. [
  834. TreeChange(CHANGE_RENAME, (b"a", F, blob2.id), (b"c", F, blob3.id)),
  835. TreeChange.delete((b"b", F, blob1.id)),
  836. ],
  837. self.detect_renames(tree3, tree4),
  838. )
  839. def test_content_rename_one_to_many(self) -> None:
  840. blob1 = make_object(Blob, data=b"aa\nb\nc\nd\ne\n")
  841. blob2 = make_object(Blob, data=b"ab\nb\nc\nd\ne\n") # 8/11 match
  842. blob3 = make_object(Blob, data=b"aa\nb\nc\nd\nf\n") # 9/11 match
  843. tree1 = self.commit_tree([(b"a", blob1)])
  844. tree2 = self.commit_tree([(b"b", blob2), (b"c", blob3)])
  845. self.assertEqual(
  846. [
  847. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  848. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob3.id)),
  849. ],
  850. self.detect_renames(tree1, tree2),
  851. )
  852. def test_content_rename_many_to_one(self) -> None:
  853. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  854. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  855. blob3 = make_object(Blob, data=b"a\nb\nc\nf\n")
  856. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  857. tree2 = self.commit_tree([(b"c", blob3)])
  858. self.assertEqual(
  859. [
  860. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob3.id)),
  861. TreeChange.delete((b"b", F, blob2.id)),
  862. ],
  863. self.detect_renames(tree1, tree2),
  864. )
  865. def test_content_rename_many_to_many(self) -> None:
  866. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  867. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  868. blob3 = make_object(Blob, data=b"a\nb\nc\nf\n")
  869. blob4 = make_object(Blob, data=b"a\nb\nc\ng\n")
  870. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  871. tree2 = self.commit_tree([(b"c", blob3), (b"d", blob4)])
  872. # TODO(dborowitz): Distribute renames rather than greedily choosing
  873. # copies.
  874. self.assertEqual(
  875. [
  876. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"c", F, blob3.id)),
  877. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"d", F, blob4.id)),
  878. TreeChange.delete((b"b", F, blob2.id)),
  879. ],
  880. self.detect_renames(tree1, tree2),
  881. )
  882. def test_content_rename_with_more_deletions(self) -> None:
  883. blob1 = make_object(Blob, data=b"")
  884. tree1 = self.commit_tree(
  885. [(b"a", blob1), (b"b", blob1), (b"c", blob1), (b"d", blob1)]
  886. )
  887. tree2 = self.commit_tree([(b"e", blob1), (b"f", blob1), (b"g", blob1)])
  888. self.maxDiff = None
  889. self.assertEqual(
  890. [
  891. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"e", F, blob1.id)),
  892. TreeChange(CHANGE_RENAME, (b"b", F, blob1.id), (b"f", F, blob1.id)),
  893. TreeChange(CHANGE_RENAME, (b"c", F, blob1.id), (b"g", F, blob1.id)),
  894. TreeChange.delete((b"d", F, blob1.id)),
  895. ],
  896. self.detect_renames(tree1, tree2),
  897. )
  898. def test_content_rename_gitlink(self) -> None:
  899. blob1 = make_object(Blob, data=b"blob1")
  900. blob2 = make_object(Blob, data=b"blob2")
  901. link1 = b"1" * 40
  902. link2 = b"2" * 40
  903. tree1 = self.commit_tree([(b"a", blob1), (b"b", link1, 0o160000)])
  904. tree2 = self.commit_tree([(b"c", blob2), (b"d", link2, 0o160000)])
  905. self.assertEqual(
  906. [
  907. TreeChange.delete((b"a", 0o100644, blob1.id)),
  908. TreeChange.delete((b"b", 0o160000, link1)),
  909. TreeChange.add((b"c", 0o100644, blob2.id)),
  910. TreeChange.add((b"d", 0o160000, link2)),
  911. ],
  912. self.detect_renames(tree1, tree2),
  913. )
  914. def test_exact_rename_swap(self) -> None:
  915. blob1 = make_object(Blob, data=b"1")
  916. blob2 = make_object(Blob, data=b"2")
  917. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  918. tree2 = self.commit_tree([(b"a", blob2), (b"b", blob1)])
  919. self.assertEqual(
  920. [
  921. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob2.id)),
  922. TreeChange(CHANGE_MODIFY, (b"b", F, blob2.id), (b"b", F, blob1.id)),
  923. ],
  924. self.detect_renames(tree1, tree2),
  925. )
  926. self.assertEqual(
  927. [
  928. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob1.id)),
  929. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"a", F, blob2.id)),
  930. ],
  931. self.detect_renames(tree1, tree2, rewrite_threshold=50),
  932. )
  933. def test_content_rename_swap(self) -> None:
  934. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  935. blob2 = make_object(Blob, data=b"e\nf\ng\nh\n")
  936. blob3 = make_object(Blob, data=b"a\nb\nc\ne\n")
  937. blob4 = make_object(Blob, data=b"e\nf\ng\ni\n")
  938. tree1 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  939. tree2 = self.commit_tree([(b"a", blob4), (b"b", blob3)])
  940. self.assertEqual(
  941. [
  942. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob3.id)),
  943. TreeChange(CHANGE_RENAME, (b"b", F, blob2.id), (b"a", F, blob4.id)),
  944. ],
  945. self.detect_renames(tree1, tree2, rewrite_threshold=60),
  946. )
  947. def test_rewrite_threshold(self) -> None:
  948. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  949. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  950. blob3 = make_object(Blob, data=b"a\nb\nf\ng\n")
  951. tree1 = self.commit_tree([(b"a", blob1)])
  952. tree2 = self.commit_tree([(b"a", blob3), (b"b", blob2)])
  953. no_renames = [
  954. TreeChange(CHANGE_MODIFY, (b"a", F, blob1.id), (b"a", F, blob3.id)),
  955. TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  956. ]
  957. self.assertEqual(no_renames, self.detect_renames(tree1, tree2))
  958. self.assertEqual(
  959. no_renames, self.detect_renames(tree1, tree2, rewrite_threshold=40)
  960. )
  961. self.assertEqual(
  962. [
  963. TreeChange.add((b"a", F, blob3.id)),
  964. TreeChange(CHANGE_RENAME, (b"a", F, blob1.id), (b"b", F, blob2.id)),
  965. ],
  966. self.detect_renames(tree1, tree2, rewrite_threshold=80),
  967. )
  968. def test_find_copies_harder_exact(self) -> None:
  969. blob = make_object(Blob, data=b"blob")
  970. tree1 = self.commit_tree([(b"a", blob)])
  971. tree2 = self.commit_tree([(b"a", blob), (b"b", blob)])
  972. self.assertEqual(
  973. [TreeChange.add((b"b", F, blob.id))],
  974. self.detect_renames(tree1, tree2),
  975. )
  976. self.assertEqual(
  977. [TreeChange(CHANGE_COPY, (b"a", F, blob.id), (b"b", F, blob.id))],
  978. self.detect_renames(tree1, tree2, find_copies_harder=True),
  979. )
  980. def test_find_copies_harder_content(self) -> None:
  981. blob1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  982. blob2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  983. tree1 = self.commit_tree([(b"a", blob1)])
  984. tree2 = self.commit_tree([(b"a", blob1), (b"b", blob2)])
  985. self.assertEqual(
  986. [TreeChange.add((b"b", F, blob2.id))],
  987. self.detect_renames(tree1, tree2),
  988. )
  989. self.assertEqual(
  990. [TreeChange(CHANGE_COPY, (b"a", F, blob1.id), (b"b", F, blob2.id))],
  991. self.detect_renames(tree1, tree2, find_copies_harder=True),
  992. )
  993. def test_find_copies_harder_with_rewrites(self) -> None:
  994. blob_a1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  995. blob_a2 = make_object(Blob, data=b"f\ng\nh\ni\n")
  996. blob_b2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  997. tree1 = self.commit_tree([(b"a", blob_a1)])
  998. tree2 = self.commit_tree([(b"a", blob_a2), (b"b", blob_b2)])
  999. self.assertEqual(
  1000. [
  1001. TreeChange(CHANGE_MODIFY, (b"a", F, blob_a1.id), (b"a", F, blob_a2.id)),
  1002. TreeChange(CHANGE_COPY, (b"a", F, blob_a1.id), (b"b", F, blob_b2.id)),
  1003. ],
  1004. self.detect_renames(tree1, tree2, find_copies_harder=True),
  1005. )
  1006. self.assertEqual(
  1007. [
  1008. TreeChange.add((b"a", F, blob_a2.id)),
  1009. TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"b", F, blob_b2.id)),
  1010. ],
  1011. self.detect_renames(
  1012. tree1, tree2, rewrite_threshold=50, find_copies_harder=True
  1013. ),
  1014. )
  1015. def test_reuse_detector(self) -> None:
  1016. blob = make_object(Blob, data=b"blob")
  1017. tree1 = self.commit_tree([(b"a", blob)])
  1018. tree2 = self.commit_tree([(b"b", blob)])
  1019. detector = RenameDetector(self.store)
  1020. changes = [TreeChange(CHANGE_RENAME, (b"a", F, blob.id), (b"b", F, blob.id))]
  1021. self.assertEqual(changes, detector.changes_with_renames(tree1.id, tree2.id))
  1022. self.assertEqual(changes, detector.changes_with_renames(tree1.id, tree2.id))
  1023. def test_want_unchanged(self) -> None:
  1024. blob_a1 = make_object(Blob, data=b"a\nb\nc\nd\n")
  1025. blob_b = make_object(Blob, data=b"b")
  1026. blob_c2 = make_object(Blob, data=b"a\nb\nc\ne\n")
  1027. tree1 = self.commit_tree([(b"a", blob_a1), (b"b", blob_b)])
  1028. tree2 = self.commit_tree([(b"c", blob_c2), (b"b", blob_b)])
  1029. self.assertEqual(
  1030. [TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_c2.id))],
  1031. self.detect_renames(tree1, tree2),
  1032. )
  1033. self.assertEqual(
  1034. [
  1035. TreeChange(CHANGE_RENAME, (b"a", F, blob_a1.id), (b"c", F, blob_c2.id)),
  1036. TreeChange(
  1037. CHANGE_UNCHANGED,
  1038. (b"b", F, blob_b.id),
  1039. (b"b", F, blob_b.id),
  1040. ),
  1041. ],
  1042. self.detect_renames(tree1, tree2, want_unchanged=True),
  1043. )