diff_tree.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. # diff_tree.py -- Utilities for diffing files and trees.
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; either version 2
  7. # or (at your option) a later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """Utilities for diffing files and trees."""
  19. import stat
  20. from dulwich.misc import (
  21. TreeChangeTuple,
  22. )
  23. from dulwich.objects import (
  24. TreeEntry,
  25. )
  26. # TreeChange type constants.
  27. CHANGE_ADD = 'add'
  28. CHANGE_MODIFY = 'modify'
  29. CHANGE_DELETE = 'delete'
  30. CHANGE_RENAME = 'rename'
  31. CHANGE_COPY = 'copy'
  32. CHANGE_UNCHANGED = 'unchanged'
  33. _NULL_ENTRY = TreeEntry(None, None, None)
  34. class TreeChange(TreeChangeTuple):
  35. """Class encapsulating a single change between two trees."""
  36. def _tree_entries(path, tree):
  37. result = []
  38. if not tree:
  39. return result
  40. for entry in tree.iteritems(name_order=True):
  41. result.append(entry.in_path(path))
  42. return result
  43. def _merge_entries(path, tree1, tree2):
  44. """Merge the entries of two trees.
  45. :param path: A path to prepend to all tree entry names.
  46. :param tree1: The first Tree object to iterate, or None.
  47. :param tree2: The second Tree object to iterate, or None.
  48. :return: A list of pairs of TreeEntry objects for each pair of entries in
  49. the trees. If an entry exists in one tree but not the other, the other
  50. entry will have all attributes set to None. If neither entry's path is
  51. None, they are guaranteed to match.
  52. """
  53. entries1 = _tree_entries(path, tree1)
  54. entries2 = _tree_entries(path, tree2)
  55. i1 = i2 = 0
  56. len1 = len(entries1)
  57. len2 = len(entries2)
  58. result = []
  59. while i1 < len1 and i2 < len2:
  60. entry1 = entries1[i1]
  61. entry2 = entries2[i2]
  62. if entry1.path < entry2.path:
  63. result.append((entry1, _NULL_ENTRY))
  64. i1 += 1
  65. elif entry1.path > entry2.path:
  66. result.append((_NULL_ENTRY, entry2))
  67. i2 += 1
  68. else:
  69. result.append((entry1, entry2))
  70. i1 += 1
  71. i2 += 1
  72. for i in xrange(i1, len1):
  73. result.append((entries1[i], _NULL_ENTRY))
  74. for i in xrange(i2, len2):
  75. result.append((_NULL_ENTRY, entries2[i]))
  76. return result
  77. def walk_trees(store, tree1_id, tree2_id, prune_identical=False):
  78. """Recursively walk all the entries of two trees.
  79. Iteration is depth-first pre-order, as in e.g. os.walk.
  80. :param store: An ObjectStore for looking up objects.
  81. :param tree1_id: The SHA of the first Tree object to iterate, or None.
  82. :param tree2_id: The SHA of the second Tree object to iterate, or None.
  83. :param prune_identical: If True, identical subtrees will not be walked.
  84. :yield: Pairs of TreeEntry objects for each pair of entries in the trees and
  85. their subtrees recursively. If an entry exists in one tree but not the
  86. other, the other entry will have all attributes set to None. If neither
  87. entry's path is None, they are guaranteed to match.
  88. """
  89. # This could be fairly easily generalized to >2 trees if we find a use case.
  90. mode1 = tree1_id and stat.S_IFDIR or None
  91. mode2 = tree2_id and stat.S_IFDIR or None
  92. todo = [(TreeEntry('', mode1, tree1_id), TreeEntry('', mode2, tree2_id))]
  93. while todo:
  94. entry1, entry2 = todo.pop()
  95. is_tree1 = entry1.mode and stat.S_ISDIR(entry1.mode)
  96. is_tree2 = entry2.mode and stat.S_ISDIR(entry2.mode)
  97. if prune_identical and is_tree1 and is_tree2 and entry1 == entry2:
  98. continue
  99. tree1 = is_tree1 and store[entry1.sha] or None
  100. tree2 = is_tree2 and store[entry2.sha] or None
  101. path = entry1.path or entry2.path
  102. todo.extend(reversed(_merge_entries(path, tree1, tree2)))
  103. yield entry1, entry2
  104. def _skip_tree(entry):
  105. if entry.mode is None or stat.S_ISDIR(entry.mode):
  106. return _NULL_ENTRY
  107. return entry
  108. def tree_changes(store, tree1_id, tree2_id, want_unchanged=False):
  109. """Find the differences between the contents of two trees.
  110. :param store: An ObjectStore for looking up objects.
  111. :param tree1_id: The SHA of the source tree.
  112. :param tree2_id: The SHA of the target tree.
  113. :param want_unchanged: If True, include TreeChanges for unmodified entries
  114. as well.
  115. :yield: TreeChange instances for each change between the source and target
  116. tree.
  117. """
  118. entries = walk_trees(store, tree1_id, tree2_id,
  119. prune_identical=(not want_unchanged))
  120. for entry1, entry2 in entries:
  121. if entry1 == entry2 and not want_unchanged:
  122. continue
  123. # Treat entries for trees as missing.
  124. entry1 = _skip_tree(entry1)
  125. entry2 = _skip_tree(entry2)
  126. if entry1 != _NULL_ENTRY and entry2 != _NULL_ENTRY:
  127. if stat.S_IFMT(entry1.mode) != stat.S_IFMT(entry2.mode):
  128. # File type changed: report as delete/add.
  129. yield TreeChange(CHANGE_DELETE, entry1, _NULL_ENTRY)
  130. entry1 = _NULL_ENTRY
  131. change_type = CHANGE_ADD
  132. elif entry1 == entry2:
  133. change_type = CHANGE_UNCHANGED
  134. else:
  135. change_type = CHANGE_MODIFY
  136. elif entry1 != _NULL_ENTRY:
  137. change_type = CHANGE_DELETE
  138. elif entry2 != _NULL_ENTRY:
  139. change_type = CHANGE_ADD
  140. else:
  141. # Both were None because at least one was a tree.
  142. continue
  143. yield TreeChange(change_type, entry1, entry2)