Browse Source

Split authorship lines from the right instead of from the left

Git authorship lines are in the form 'author Name <em@i.l> timestamp timezone'.
Some clients mess up the 'Name <em@i.l>' part badly, for instance by setting two
email addresses. Splitting identity and timestamp by looking at the '> ' part
from the right instead of the left helps parse some of those messed up commits.

Such commits still fail the check() method (and they raise a warning in git fsck
upstream as well), but we can at least work with them.

(This edge case brought to you by https://forge.softwareheritage.org/T1280)
Nicolas Dandrimont 6 years ago
parent
commit
eac04520d2
2 changed files with 22 additions and 1 deletions
  1. 1 1
      dulwich/objects.py
  2. 21 0
      dulwich/tests/test_objects.py

+ 1 - 1
dulwich/objects.py

@@ -1112,7 +1112,7 @@ def parse_time_entry(value):
     :return: Tuple of (author, time, (timezone, timezone_neg_utc))
     """
     try:
-        sep = value.index(b'> ')
+        sep = value.rindex(b'> ')
     except ValueError:
         return (value, None, (None, False))
     try:

+ 21 - 0
dulwich/tests/test_objects.py

@@ -673,6 +673,27 @@ class CommitParseTests(ShaFileCheckTests):
             with self.assertRaises(ObjectFormatException):
                 commit.check()
 
+    def test_mangled_author_line(self):
+        """Mangled author line should successfully parse"""
+        author_line = (
+            b'Karl MacMillan <kmacmill@redhat.com> <"Karl MacMillan '
+            b'<kmacmill@redhat.com>"> 1197475547 -0500'
+        )
+        expected_identity = (
+            b'Karl MacMillan <kmacmill@redhat.com> <"Karl MacMillan '
+            b'<kmacmill@redhat.com>">'
+        )
+        commit = Commit.from_string(
+            self.make_commit_text(author=author_line)
+        )
+
+        # The commit parses properly
+        self.assertEqual(commit.author, expected_identity)
+
+        # But the check fails because the author identity is bogus
+        with self.assertRaises(ObjectFormatException):
+            commit.check()
+
     def test_parse_gpgsig(self):
         c = Commit.from_string(b"""tree aaff74984cccd156a469afa7d9ab10e4777beb24
 author Jelmer Vernooij <jelmer@samba.org> 1412179807 +0200