annotate.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. # annotate.py -- Annotate files with last changed revision
  2. # Copyright (C) 2015 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; either version 2
  7. # or (at your option) a later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """Annotate file contents indicating when they were last changed.
  19. Annotated lines are represented as tuples with last modified revision SHA1
  20. and contents.
  21. Please note that this is a very naive annotate implementation. It works,
  22. but its speed could be improved - in particular because it uses
  23. Python's difflib.
  24. """
  25. import difflib
  26. from dulwich.walk import (
  27. ORDER_DATE,
  28. Walker,
  29. )
  30. # Walk over ancestry graph breadth-first
  31. # When checking each revision, find lines that according to difflib.Differ()
  32. # are common between versions.
  33. # Any lines that are not in common were introduced by the newer revision.
  34. # If there were no lines kept from the older version, stop going deeper in the
  35. # graph.
  36. def update_lines(annotated_lines, new_history_data, new_blob):
  37. """Update annotation lines with old blob lines.
  38. """
  39. ret = []
  40. new_lines = new_blob.splitlines()
  41. matcher = difflib.SequenceMatcher(
  42. a=[l for (h, l) in annotated_lines],
  43. b=new_lines)
  44. for tag, i1, i2, j1, j2 in matcher.get_opcodes():
  45. if tag == 'equal':
  46. ret.extend(annotated_lines[i1:i2])
  47. elif tag in ('insert', 'replace'):
  48. ret.extend([(new_history_data, l) for l in new_lines[j1:j2]])
  49. elif tag == 'delete':
  50. pass # don't care
  51. else:
  52. raise RuntimeError('Unknown tag %s returned in diff' % tag)
  53. return ret
  54. def annotate_lines(store, commit_id, path, order=ORDER_DATE, lines=None,
  55. follow=True):
  56. """Annotate the lines of a blob.
  57. :param store: Object store to retrieve objects from
  58. :param commit_id: Commit id in which to annotate path
  59. :param path: Path to annotate
  60. :param order: Order in which to process history (defaults to ORDER_DATE)
  61. :param lines: Initial lines to compare to (defaults to specified)
  62. :param follow: Wether to follow changes across renames/copies
  63. :return: List of (commit, line) entries where
  64. commit is the oldest commit that changed a line
  65. """
  66. walker = Walker(store, include=[commit_id], paths=[path], order=order,
  67. follow=follow)
  68. revs = []
  69. for log_entry in walker:
  70. for tree_change in log_entry.changes():
  71. if type(tree_change) is not list:
  72. tree_change = [tree_change]
  73. for change in tree_change:
  74. if change.new.path == path:
  75. path = change.old.path
  76. revs.append((log_entry.commit, change.new))
  77. break
  78. lines = []
  79. for (commit, entry) in reversed(revs):
  80. lines = update_lines(lines, (commit, entry), store[entry.sha])
  81. return lines