1 year ago · f9ad9a97cd
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,8 @@
 
				 
			
 
				  * Drop Python 3.7 support. (Jelmer Vernooĳ)
			
 
				 
			
 
				+ * Improve fuzzing coverage (David Lakin)
			
 
				+
			
 
				 0.22.1	2024-04-23
			
 
				 
			
 
				  * Handle alternate case for worktreeconfig setting (Will Shanks, #1285)
			
--- a/fuzzing/dictionaries/fuzz_bundle.dict
+++ b/fuzzing/dictionaries/fuzz_bundle.dict
@@ -0,0 +1,5 @@
 
				+"# v2 git bundle"
			
 
				+"# v3 git bundle"
			
 
				+"\\001\\000\\000\\000"
			
 
				+"\\001\\000"
			
 
				+"\\377\\377\\377\\377\\377\\377\\377\\377"
			
--- a/fuzzing/dictionaries/fuzz_configfile.dict
+++ b/fuzzing/dictionaries/fuzz_configfile.dict
@@ -23,9 +23,30 @@
 
				 "%\\000\\000\\000\\000\\000\\000\\000"
			
 
				 "\\000\\000\\000\\000\\000\\000\\000\\\\"
			
 
				 "\\377\\377\\377\\377\\377\\377\\377$"
			
 
				-"[\\000\\000\\000\\000\\000\\000\\000"
			
 
				 "p\\012"
			
 
				-"\\001\\000\\000\\000\\000\\000\\000\\""
			
 
				+"\\001\\000\\000\\000\\000\\000\\000\\"
			
 
				 "\\337\\000\\000\\000\\000\\000\\000\\000"
			
 
				 "\\001\\000\\000\\000\\000\\000\\000\\000"
			
 
				 "\\\\0="
			
 
				+"\\377\\377\\377\\377\\377\\377\\377\\377"
			
 
				+"0\\000\\000\\000\\000\\000\\000\\000"
			
 
				+"1\\000\\000\\000\\000\\000\\000\\000"
			
 
				+"tp\\012"
			
 
				+"\\354\\000\\000\\000\\000\\000\\000\\000"
			
 
				+"]\\000\\000\\000\\000\\000\\000\\000"
			
 
				+"\"\\377\\377"
			
 
				+"\\014\\000\\012"
			
 
				+"k[7"
			
 
				+"~\\000\\000\\000\\000\\000\\000\\000"
			
 
				+"\\035\\000\\000\\000\\000\\000\\000\\000"
			
 
				+"!\\000\\000\\000\\000\\000\\000\\000"
			
 
				+"\\277\\000\\000"
			
 
				+"\\351\\351\\351"
			
 
				+"\\243\\330\\215"
			
 
				+"\\370\\000\\000\\000\\000\\000\\000\\000"
			
 
				+"a\\012"
			
 
				+"9\\000\\000\\000\\000\\000\\000\\000"
			
 
				+"j\\000\\000\\000\\000\\000\\000\\000"
			
 
				+"\\022\\000\\000\\000\\000\\000\\000\\000"
			
 
				+"#\\000\\000\\000\\000\\000\\000\\000"
			
 
				+"\\377\\377\\377"
			
--- a/fuzzing/dictionaries/fuzz_object_store.dict
+++ b/fuzzing/dictionaries/fuzz_object_store.dict
@@ -0,0 +1,12 @@
 
				+"_tu"
			
 
				+"old"
			
 
				+"\\007\\000\\000\\000\\000\\000\\000\\000"
			
 
				+"rename_detector"
			
 
				+"\\000\\000\\000\\000\\000\\000\\000\\000"
			
 
				+"\\031\\000\\000\\000\\000\\000\\000\\000"
			
 
				+"\\032\\000\\000\\000\\000\\000\\000\\000"
			
 
				+"\\001\\000\\000\\000"
			
 
				+"\\000\\000\\000\\000"
			
 
				+"\\001\\000"
			
 
				+"\\032\\000\\000\\000\\000\\000\\000\\000"
			
 
				+"\\002\\000\\000\\000\\000\\000\\000\\000"
			
--- a/fuzzing/dictionaries/fuzz_repo.dict
+++ b/fuzzing/dictionaries/fuzz_repo.dict
@@ -0,0 +1,2 @@
 
				+"{self.size}"
			
 
				+"win32"
			
--- a/fuzzing/fuzz-targets/fuzz_bundle.py
+++ b/fuzzing/fuzz-targets/fuzz_bundle.py
@@ -0,0 +1,54 @@
 
				+import sys
			
 
				+from io import BytesIO
			
 
				+
			
 
				+import atheris
			
 
				+
			
 
				+with atheris.instrument_imports():
			
 
				+    # We instrument `test_utils` as well, so it doesn't block coverage analysis in Fuzz Introspector:
			
 
				+    from test_utils import EnhancedFuzzedDataProvider, is_expected_exception
			
 
				+
			
 
				+    from dulwich.bundle import Bundle, read_bundle, write_bundle
			
 
				+    from dulwich.pack import PackData, write_pack_objects
			
 
				+
			
 
				+
			
 
				+def TestOneInput(data):
			
 
				+    fdp = EnhancedFuzzedDataProvider(data)
			
 
				+    bundle = Bundle()
			
 
				+    bundle.version = fdp.PickValueInList([2, 3, None])
			
 
				+    bundle.references = {fdp.ConsumeRandomString(): fdp.ConsumeBytes(20)}
			
 
				+    bundle.prerequisites = [(fdp.ConsumeBytes(20), fdp.ConsumeRandomBytes())]
			
 
				+    bundle.capabilities = {
			
 
				+        fdp.ConsumeRandomString(): fdp.ConsumeRandomString(),
			
 
				+    }
			
 
				+
			
 
				+    b = BytesIO()
			
 
				+    write_pack_objects(b.write, [])
			
 
				+    b.seek(0)
			
 
				+    bundle.pack_data = PackData.from_file(b)
			
 
				+
			
 
				+    # Test __repr__ method
			
 
				+    _ = repr(bundle)
			
 
				+
			
 
				+    try:
			
 
				+        bundle_file = BytesIO()
			
 
				+        write_bundle(bundle_file, bundle)
			
 
				+        _ = read_bundle(bundle_file)
			
 
				+    except (AttributeError, UnicodeEncodeError, AssertionError) as e:
			
 
				+        expected_exceptions = [
			
 
				+            "'bytes' object has no attribute 'encode'",
			
 
				+            "surrogates not allowed",
			
 
				+            "unsupported bundle format header",
			
 
				+        ]
			
 
				+        if is_expected_exception(expected_exceptions, e):
			
 
				+            return -1
			
 
				+        else:
			
 
				+            raise e
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    atheris.Setup(sys.argv, TestOneInput)
			
 
				+    atheris.Fuzz()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/fuzzing/fuzz-targets/fuzz_configfile.py
+++ b/fuzzing/fuzz-targets/fuzz_configfile.py
@@ -2,23 +2,17 @@ import sys
 
				 from io import BytesIO
			
 
				 
			
 
				 import atheris
			
 
				+from test_utils import is_expected_exception
			
 
				 
			
 
				 with atheris.instrument_imports():
			
 
				     from dulwich.config import ConfigFile
			
 
				 
			
 
				 
			
 
				-def is_expected_error(error_list, error_msg):
			
 
				-    for error in error_list:
			
 
				-        if error in error_msg:
			
 
				-            return True
			
 
				-    return False
			
 
				-
			
 
				-
			
 
				 def TestOneInput(data):
			
 
				     try:
			
 
				         ConfigFile.from_file(BytesIO(data))
			
 
				     except ValueError as e:
			
 
				-        expected_errors = [
			
 
				+        expected_exceptions = [
			
 
				             "without section",
			
 
				             "invalid variable name",
			
 
				             "expected trailing ]",
			
@@ -27,7 +21,7 @@ def TestOneInput(data):
 
				             "escape character",
			
 
				             "missing end quote",
			
 
				         ]
			
 
				-        if is_expected_error(expected_errors, str(e)):
			
 
				+        if is_expected_exception(expected_exceptions, e):
			
 
				             return -1
			
 
				         else:
			
 
				             raise e
			
--- a/fuzzing/fuzz-targets/fuzz_object_store.py
+++ b/fuzzing/fuzz-targets/fuzz_object_store.py
@@ -0,0 +1,94 @@
 
				+import stat
			
 
				+import sys
			
 
				+from io import BytesIO
			
 
				+
			
 
				+import atheris
			
 
				+
			
 
				+with atheris.instrument_imports():
			
 
				+    # We instrument `test_utils` as well, so it doesn't block coverage analysis in Fuzz Introspector:
			
 
				+    from test_utils import EnhancedFuzzedDataProvider, is_expected_exception
			
 
				+
			
 
				+    from dulwich.errors import ObjectFormatException
			
 
				+    from dulwich.objects import S_IFGITLINK, Blob, Commit, Tree
			
 
				+    from dulwich.patch import write_tree_diff
			
 
				+    from dulwich.repo import (
			
 
				+        InvalidUserIdentity,
			
 
				+        MemoryRepo,
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def TestOneInput(data):
			
 
				+    fdp = EnhancedFuzzedDataProvider(data)
			
 
				+    repo = MemoryRepo()
			
 
				+    blob = Blob.from_string(fdp.ConsumeRandomBytes())
			
 
				+    tree = Tree()
			
 
				+    tree.add(
			
 
				+        fdp.ConsumeRandomBytes(),
			
 
				+        fdp.PickValueInList([stat.S_IFREG, stat.S_IFLNK, stat.S_IFDIR, S_IFGITLINK]),
			
 
				+        blob.id,
			
 
				+    )
			
 
				+    commit = Commit()
			
 
				+    commit.tree = tree.id
			
 
				+    commit.author = fdp.ConsumeRandomBytes()
			
 
				+    commit.committer = fdp.ConsumeRandomBytes()
			
 
				+    commit.commit_time = fdp.ConsumeRandomInt()
			
 
				+    commit.commit_timezone = fdp.ConsumeRandomInt()
			
 
				+    commit.author_time = fdp.ConsumeRandomInt()
			
 
				+    commit.author_timezone = fdp.ConsumeRandomInt()
			
 
				+    commit.message = fdp.ConsumeRandomBytes()
			
 
				+
			
 
				+    object_store = repo.object_store
			
 
				+
			
 
				+    try:
			
 
				+        object_store.add_object(blob)
			
 
				+        object_store.add_object(tree)
			
 
				+        object_store.add_object(commit)
			
 
				+    except (InvalidUserIdentity, ObjectFormatException):
			
 
				+        return -1
			
 
				+    except ValueError as e:
			
 
				+        expected_exceptions = [
			
 
				+            "subsection not found",
			
 
				+            "Unable to handle non-minute offset",
			
 
				+        ]
			
 
				+        if is_expected_exception(expected_exceptions, e):
			
 
				+            return -1
			
 
				+        else:
			
 
				+            raise e
			
 
				+
			
 
				+    commit2 = Commit()
			
 
				+    commit2.tree = tree.id
			
 
				+    commit2.parents = [commit.id]
			
 
				+    commit2.author = commit.author
			
 
				+    commit2.committer = commit.committer
			
 
				+    commit2.commit_time = fdp.ConsumeRandomInt()
			
 
				+    commit2.commit_timezone = fdp.ConsumeRandomInt()
			
 
				+    commit2.author_time = fdp.ConsumeRandomInt()
			
 
				+    commit2.author_timezone = fdp.ConsumeRandomInt()
			
 
				+    commit2.message = fdp.ConsumeRandomBytes()
			
 
				+
			
 
				+    try:
			
 
				+        blob.data = fdp.ConsumeRandomBytes()
			
 
				+        repo.object_store.add_object(blob)
			
 
				+        repo.object_store.add_object(tree)
			
 
				+        repo.object_store.add_object(commit2)
			
 
				+        out = BytesIO()
			
 
				+        write_tree_diff(out, repo.object_store, commit.tree, tree.id)
			
 
				+    except (InvalidUserIdentity, ObjectFormatException):
			
 
				+        return -1
			
 
				+    except ValueError as e:
			
 
				+        expected_exceptions = [
			
 
				+            "Unable to handle non-minute offset",
			
 
				+        ]
			
 
				+        if is_expected_exception(expected_exceptions, e):
			
 
				+            return -1
			
 
				+        else:
			
 
				+            raise e
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    atheris.Setup(sys.argv, TestOneInput)
			
 
				+    atheris.Fuzz()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/fuzzing/fuzz-targets/fuzz_repo.py
+++ b/fuzzing/fuzz-targets/fuzz_repo.py
@@ -0,0 +1,62 @@
 
				+import os
			
 
				+import sys
			
 
				+import tempfile
			
 
				+
			
 
				+import atheris
			
 
				+
			
 
				+with atheris.instrument_imports():
			
 
				+    # We instrument `test_utils` as well, so it doesn't block coverage analysis in Fuzz Introspector:
			
 
				+    from test_utils import EnhancedFuzzedDataProvider
			
 
				+
			
 
				+    from dulwich.repo import (
			
 
				+        InvalidUserIdentity,
			
 
				+        Repo,
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def TestOneInput(data):
			
 
				+    fdp = EnhancedFuzzedDataProvider(data)
			
 
				+    with tempfile.TemporaryDirectory() as temp_dir:
			
 
				+        repo = Repo.init(temp_dir)
			
 
				+        repo.set_description(fdp.ConsumeRandomBytes())
			
 
				+        repo.get_description()
			
 
				+
			
 
				+        # Generate a minimal set of files based on fuzz data to minimize I/O operations.
			
 
				+        file_paths = [
			
 
				+            os.path.join(temp_dir, f"File{i}")
			
 
				+            for i in range(min(3, fdp.ConsumeIntInRange(1, 3)))
			
 
				+        ]
			
 
				+        for file_path in file_paths:
			
 
				+            with open(file_path, "wb") as f:
			
 
				+                f.write(fdp.ConsumeRandomBytes())
			
 
				+
			
 
				+        try:
			
 
				+            repo.do_commit(
			
 
				+                message=fdp.ConsumeRandomBytes(),
			
 
				+                committer=fdp.ConsumeRandomBytes(),
			
 
				+                author=fdp.ConsumeRandomBytes(),
			
 
				+                commit_timestamp=fdp.ConsumeRandomInt(),
			
 
				+                commit_timezone=fdp.ConsumeRandomInt(),
			
 
				+                author_timestamp=fdp.ConsumeRandomInt(),
			
 
				+                author_timezone=fdp.ConsumeRandomInt(),
			
 
				+            )
			
 
				+        except InvalidUserIdentity:
			
 
				+            return -1
			
 
				+
			
 
				+        for file_path in file_paths:
			
 
				+            with open(file_path, "wb") as f:
			
 
				+                f.write(fdp.ConsumeRandomBytes())
			
 
				+
			
 
				+        repo.stage(file_paths)
			
 
				+        repo.do_commit(
			
 
				+            message=fdp.ConsumeRandomBytes(),
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    atheris.Setup(sys.argv, TestOneInput)
			
 
				+    atheris.Fuzz()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/fuzzing/fuzz-targets/test_utils.py
+++ b/fuzzing/fuzz-targets/test_utils.py
@@ -0,0 +1,86 @@
 
				+from typing import List  # pragma: no cover
			
 
				+
			
 
				+import atheris  # pragma: no cover
			
 
				+
			
 
				+
			
 
				+@atheris.instrument_func
			
 
				+def is_expected_exception(
			
 
				+    error_message_list: List[str], exception: Exception
			
 
				+):  # pragma: no cover
			
 
				+    """Checks if the message of a given exception matches any of the expected error messages.
			
 
				+
			
 
				+    Args:
			
 
				+         error_message_list (List[str]): A list of error message substrings to check against the exception's message.
			
 
				+         exception (Exception): The exception object raised during execution.
			
 
				+
			
 
				+    Returns:
			
 
				+      bool: True if the exception's message contains any of the substrings from the error_message_list, otherwise False.
			
 
				+    """
			
 
				+    for error in error_message_list:
			
 
				+        if error in str(exception):
			
 
				+            return True
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+class EnhancedFuzzedDataProvider(atheris.FuzzedDataProvider):  # pragma: no cover
			
 
				+    """Extends atheris.FuzzedDataProvider to offer additional methods to make fuzz testing slightly more DRY."""
			
 
				+
			
 
				+    def __init__(self, data):
			
 
				+        """Initializes the EnhancedFuzzedDataProvider with fuzzing data from the argument provided to TestOneInput.
			
 
				+
			
 
				+        Args:
			
 
				+            data (bytes): The binary data used for fuzzing.
			
 
				+        """
			
 
				+        super().__init__(data)
			
 
				+
			
 
				+    def ConsumeRemainingBytes(self) -> bytes:
			
 
				+        """Consume the remaining bytes in the bytes container.
			
 
				+
			
 
				+        Returns:
			
 
				+          bytes: Zero or more bytes.
			
 
				+        """
			
 
				+        return self.ConsumeBytes(self.remaining_bytes())
			
 
				+
			
 
				+    def ConsumeRandomBytes(self, max_length=None) -> bytes:
			
 
				+        """Consume a random count of bytes from the bytes container.
			
 
				+
			
 
				+        Args:
			
 
				+          max_length (int, optional): The maximum length of the string. Defaults to the number of remaining bytes.
			
 
				+
			
 
				+        Returns:
			
 
				+          bytes: Zero or more bytes.
			
 
				+        """
			
 
				+        if max_length is None:
			
 
				+            max_length = self.remaining_bytes()
			
 
				+        else:
			
 
				+            max_length = min(max_length, self.remaining_bytes())
			
 
				+
			
 
				+        return self.ConsumeBytes(self.ConsumeIntInRange(0, max_length))
			
 
				+
			
 
				+    def ConsumeRandomString(self, max_length=None) -> str:
			
 
				+        """Consume bytes to produce a Unicode string.
			
 
				+
			
 
				+        Args:
			
 
				+          max_length (int, optional): The maximum length of the string. Defaults to the number of remaining bytes.
			
 
				+
			
 
				+        Returns:
			
 
				+         str: A Unicode string.
			
 
				+        """
			
 
				+        if max_length is None:
			
 
				+            max_length = self.remaining_bytes()
			
 
				+        else:
			
 
				+            max_length = min(max_length, self.remaining_bytes())
			
 
				+
			
 
				+        return self.ConsumeUnicode(self.ConsumeIntInRange(0, max_length))
			
 
				+
			
 
				+    def ConsumeRandomInt(self, minimum=0, maximum=1234567890) -> int:
			
 
				+        """Consume bytes to produce an integer.
			
 
				+
			
 
				+        Args:
			
 
				+          minimum (int, optional): The minimum value of the integer. Defaults to 0.
			
 
				+          maximum (int, optional): The maximum value of the integer. Defaults to 1234567890.
			
 
				+
			
 
				+        Returns:
			
 
				+          int: An integer.
			
 
				+        """
			
 
				+        return self.ConsumeIntInRange(minimum, maximum)