瀏覽代碼

objects: Add checks on date fields (tag, commit alike)

This checks for overflow date errors in objects's check()
methods (tag, commit).  If such a situation occurs, an
ObjectFormatException is raised.

Related #567
Antoine R. Dumont (@ardumont) 7 年之前
父節點
當前提交
694d27c2b0
共有 3 個文件被更改,包括 116 次插入25 次删除
  1. 6 0
      NEWS
  2. 54 25
      dulwich/objects.py
  3. 56 0
      dulwich/tests/test_objects.py

+ 6 - 0
NEWS

@@ -8,6 +8,12 @@
   * Raise an error when attempting to add paths that are not under the
     repository. (Jelmer Vernooij)
 
+  IMPROVEMENTS
+
+  * Enforce date field parsing consistency. This also add checks on
+    those date fields for potential overflow.
+    (Antoine R. Dumont, #567)
+
 0.18.5	2017-10-29
 
  BUG FIXES

+ 54 - 25
dulwich/objects.py

@@ -63,6 +63,9 @@ _TAGGER_HEADER = b'tagger'
 S_IFGITLINK = 0o160000
 
 
+MAX_TIME = 9223372036854775807  # (2**63) - 1 - signed long int max
+
+
 def S_ISGITLINK(m):
     """Check if a mode indicates a submodule.
 
@@ -190,6 +193,20 @@ def check_identity(identity, error_msg):
         raise ObjectFormatException(error_msg)
 
 
+def check_time(time_seconds):
+    """Check if the specified time is not prone to overflow error.
+
+    This will raise an exception if the time is not valid.
+
+    :param time_info: author/committer/tagger info
+
+    """
+    # Prevent overflow error
+    if time_seconds > MAX_TIME:
+        raise ObjectFormatException(
+            'Date field should not exceed %s' % MAX_TIME)
+
+
 def git_line(*items):
     """Formats items into a space sepreated line."""
     return b' '.join(items) + b'\n'
@@ -694,6 +711,9 @@ class Tag(ShaFile):
         if getattr(self, "_tagger", None):
             check_identity(self._tagger, "invalid tagger")
 
+        self._check_has_member("_tag_time", "missing tag time")
+        check_time(self._tag_time)
+
         last = None
         for field, _ in _parse_message(self._chunked_text):
             if field == _OBJECT_HEADER and last is not None:
@@ -742,23 +762,10 @@ class Tag(ShaFile):
             elif field == _TAG_HEADER:
                 self._name = value
             elif field == _TAGGER_HEADER:
-                try:
-                    sep = value.index(b'> ')
-                except ValueError:
-                    self._tagger = value
-                    self._tag_time = None
-                    self._tag_timezone = None
-                    self._tag_timezone_neg_utc = False
-                else:
-                    self._tagger = value[0:sep+1]
-                    try:
-                        (timetext, timezonetext) = (
-                                value[sep+2:].rsplit(b' ', 1))
-                        self._tag_time = int(timetext)
-                        self._tag_timezone, self._tag_timezone_neg_utc = (
-                                parse_timezone(timezonetext))
-                    except ValueError as e:
-                        raise ObjectFormatException(e)
+                (self._tagger,
+                 self._tag_time,
+                 (self._tag_timezone,
+                  self._tag_timezone_neg_utc)) = parse_time_entry(value)
             elif field is None:
                 self._message = value
             else:
@@ -1084,6 +1091,29 @@ def format_timezone(offset, unnecessary_negative_timezone=False):
             (sign, offset / 3600, (offset / 60) % 60)).encode('ascii')
 
 
+def parse_time_entry(value):
+    """Parse time entry behavior
+
+    :param value: Bytes representing a git commit/tag line
+    :raise: ObjectFormatException in case of parsing error (malformed
+            field date)
+    :return: Tuple of (author, time, (timezone, timezone_neg_utc))
+    """
+    try:
+        sep = value.index(b'> ')
+    except ValueError:
+        return (value, None, (None, False))
+    try:
+        person = value[0:sep+1]
+        rest = value[sep+2:]
+        timetext, timezonetext = rest.rsplit(b' ', 1)
+        time = int(timetext)
+        timezone, timezone_neg_utc = parse_timezone(timezonetext)
+    except ValueError as e:
+        raise ObjectFormatException(e)
+    return person, time, (timezone, timezone_neg_utc)
+
+
 def parse_commit(chunks):
     """Parse a commit object from chunks.
 
@@ -1108,14 +1138,9 @@ def parse_commit(chunks):
         elif field == _PARENT_HEADER:
             parents.append(value)
         elif field == _AUTHOR_HEADER:
-            author, timetext, timezonetext = value.rsplit(b' ', 2)
-            author_time = int(timetext)
-            author_info = (author, author_time, parse_timezone(timezonetext))
+            author_info = parse_time_entry(value)
         elif field == _COMMITTER_HEADER:
-            committer, timetext, timezonetext = value.rsplit(b' ', 2)
-            commit_time = int(timetext)
-            commit_info = (
-                    committer, commit_time, parse_timezone(timezonetext))
+            commit_info = parse_time_entry(value)
         elif field == _ENCODING_HEADER:
             encoding = value
         elif field == _MERGETAG_HEADER:
@@ -1177,7 +1202,8 @@ class Commit(ShaFile):
         self._check_has_member("_tree", "missing tree")
         self._check_has_member("_author", "missing author")
         self._check_has_member("_committer", "missing committer")
-        # times are currently checked when set
+        self._check_has_member("_author_time", "missing author time")
+        self._check_has_member("_commit_time", "missing commit time")
 
         for parent in self._parents:
             check_hexsha(parent, "invalid parent sha")
@@ -1186,6 +1212,9 @@ class Commit(ShaFile):
         check_identity(self._author, "invalid author")
         check_identity(self._committer, "invalid committer")
 
+        check_time(self._author_time)
+        check_time(self._commit_time)
+
         last = None
         for field, _ in _parse_message(self._chunked_text):
             if field == _TREE_HEADER and last is not None:

+ 56 - 0
dulwich/tests/test_objects.py

@@ -56,6 +56,7 @@ from dulwich.objects import (
     _parse_tree_py,
     sorted_tree_items,
     _sorted_tree_items_py,
+    MAX_TIME
     )
 from dulwich.tests import (
     TestCase,
@@ -639,6 +640,38 @@ class CommitParseTests(ShaFileCheckTests):
             else:
                 self.assertCheckFails(Commit, text)
 
+    def test_check_commit_with_unparseable_time(self):
+        identity_with_wrong_time = (
+            b'Igor Sysoev <igor@sysoev.ru> 18446743887488505614+42707004')
+
+        # Those fail at reading time
+        self.assertCheckFails(
+            Commit,
+            self.make_commit_text(author=default_committer,
+                                  committer=identity_with_wrong_time))
+        self.assertCheckFails(
+            Commit,
+            self.make_commit_text(author=identity_with_wrong_time,
+                                  committer=default_committer))
+
+    def test_check_commit_with_overflow_date(self):
+        """Date with overflow should raise an ObjectFormatException when checked
+
+        """
+        identity_with_wrong_time = (
+            b'Igor Sysoev <igor@sysoev.ru> 18446743887488505614 +42707004')
+        commit0 = Commit.from_string(self.make_commit_text(
+                author=identity_with_wrong_time,
+                committer=default_committer))
+        commit1 = Commit.from_string(self.make_commit_text(
+                author=default_committer,
+                committer=identity_with_wrong_time))
+
+        # Those fails when triggering the check() method
+        for commit in [commit0, commit1]:
+            with self.assertRaises(ObjectFormatException):
+                commit.check()
+
     def test_parse_gpgsig(self):
         c = Commit.from_string(b"""tree aaff74984cccd156a469afa7d9ab10e4777beb24
 author Jelmer Vernooij <jelmer@samba.org> 1412179807 +0200
@@ -1003,6 +1036,21 @@ class TagParseTests(ShaFileCheckTests):
                     b'Sun 7 Jul 2007 12:54:34 +0700')))
         self.assertCheckFails(Tag, self.make_tag_text(object_sha=b'xxx'))
 
+    def test_check_tag_with_unparseable_field(self):
+        self.assertCheckFails(Tag, self.make_tag_text(
+            tagger=(b'Linus Torvalds <torvalds@woody.linux-foundation.org> '
+                    b'423423+0000')))
+
+    def test_check_tag_with_overflow_time(self):
+        """Date with overflow should raise an ObjectFormatException when checked
+
+        """
+        author = 'Some Dude <some@dude.org> %s +0000' % (MAX_TIME+1, )
+        tag = Tag.from_string(self.make_tag_text(
+            tagger=(author.encode())))
+        with self.assertRaises(ObjectFormatException):
+            tag.check()
+
     def test_check_duplicates(self):
         # duplicate each of the header fields
         for i in range(4):
@@ -1218,6 +1266,14 @@ class ShaFileSerializeTests(TestCase):
         with self.assert_serialization_on_change(tag):
             tag.message = b'new message'
 
+    def test_tag_serialize_time_error(self):
+        with self.assertRaises(ObjectFormatException):
+            tag = make_object(
+                Tag, name=b'tag', message=b'some message',
+                tagger=b'Tagger <test@example.com> 1174773719+0000',
+                object=(Commit, b'0' * 40))
+            tag._deserialize(tag._serialize())
+
 
 class PrettyFormatTreeEntryTests(TestCase):