瀏覽代碼

Build Fuzz Tests with Rust Extensions and Optional Features

Updates the OSS-Fuzz container environment & build script to
build and install Dulwich with all optional features enabled and
refines dictionary handling.

Impact:

- Increases the fuzz-able surface area of the most interesting
  (from a fuzzing & security testing perspective) Dulwich APIs,
  enabling future fuzz harnesses improvements to specifically
  target these features in tests.
- Enhances flexibility of fuzzer seed data generation steps to
  improve dictionary quality and simplify seed corpus management
  for all fuzz targets.

Key Changes in `build.sh`:
- Unset problematic OSS-Fuzz provided `$RUSTFLAGS` to prevent
  build issues that inhibit PyO3 based Rust extension compilation.
- Install Python dependencies with specific features (`fastimport`,
  `gpg`, `paramiko`, etc.).
- Added "drop-in" support for inclusion of seed corpora zip files
  & LibFuzzer options files to be introduced later.

Key Changes in `container-environment-bootstrap.sh`:
- Installs dependencies required to build Dulwich's optional
  features and Rust extensions.
- Updated Python dependencies (`setuptools`, `pyinstaller`, etc.)
  to the latest compatible versions.
- Improved dictionary handling for fuzz targets by encapsulating
  dictionary generation logic in shell functions and adding
  default common dictionary entries to all fuzz target specific
  `.dict` files.
David Lakin 6 月之前
父節點
當前提交
ad41bf97f3
共有 2 個文件被更改,包括 48 次插入37 次删除
  1. 6 25
      fuzzing/oss-fuzz-scripts/build.sh
  2. 42 12
      fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh

+ 6 - 25
fuzzing/oss-fuzz-scripts/build.sh

@@ -2,36 +2,17 @@
 
 set -euo pipefail
 
-python3 -m pip install .
+unset RUSTFLAGS # The OSS-Fuzz provided RUSTFLAGS cause issues that break PyO3 based Rust extension builds.
+export PATH="${PATH}:${HOME}/.cargo/bin"
+python3 -m pip install -v ".[fastimport,paramiko,https,pgp]"
 
-# Directory to look in for dictionaries, options files, and seed corpora:
-SEED_DATA_DIR="$SRC/seed_data"
-
-find "$SEED_DATA_DIR" \( -name '*_seed_corpus.zip' -o -name '*.options' -o -name '*.dict' \) \
-  ! \( -name '__base.*' \) -exec printf 'Copying: %s\n' {} \; \
+find "$SRC" -maxdepth 1 \
+  \( -name '*_seed_corpus.zip' -o -name '*.options' -o -name '*.dict' \) \
+  -exec printf '[%s] Copying: %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" {} \; \
   -exec chmod a-x {} \; \
   -exec cp {} "$OUT" \;
 
 # Build fuzzers in $OUT.
 find "$SRC/dulwich/fuzzing" -name 'fuzz_*.py' -print0 | while IFS= read -r -d '' fuzz_harness; do
   compile_python_fuzzer "$fuzz_harness"
-
-  common_base_dictionary_filename="$SEED_DATA_DIR/__base.dict"
-  if [[ -r "$common_base_dictionary_filename" ]]; then
-    # Strip the `.py` extension from the filename and replace it with `.dict`.
-    fuzz_harness_dictionary_filename="$(basename "$fuzz_harness" .py).dict"
-    output_file="$OUT/$fuzz_harness_dictionary_filename"
-
-    printf 'Appending %s to %s\n' "$common_base_dictionary_filename" "$output_file"
-    if [[ -s "$output_file" ]]; then
-      # If a dictionary file for this fuzzer already exists and is not empty,
-      # we append a new line to the end of it before appending any new entries.
-      #
-      # LibFuzzer will happily ignore multiple empty lines in a dictionary but fail with an error
-      # if any single line has incorrect syntax (e.g., if we accidentally add two entries to the same line.)
-      # See docs for valid syntax: https://llvm.org/docs/LibFuzzer.html#id32
-      echo >>"$output_file"
-    fi
-    cat "$common_base_dictionary_filename" >>"$output_file"
-  fi
 done

+ 42 - 12
fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh

@@ -6,23 +6,20 @@ set -euo pipefail
 # Prerequisites #
 #################
 
-for cmd in python3 git wget rsync; do
+for cmd in python3 git wget zip; do
   command -v "$cmd" >/dev/null 2>&1 || {
     printf '[%s] Required command %s not found, exiting.\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$cmd" >&2
     exit 1
   }
 done
 
-SEED_DATA_DIR="$SRC/seed_data"
-mkdir -p "$SEED_DATA_DIR"
-
 #############
 # Functions #
 #############
 
 download_and_concatenate_common_dictionaries() {
   # Assign the first argument as the target file where all contents will be concatenated
-  target_file="$1"
+  local target_file="$1"
 
   # Shift the arguments so the first argument (target_file path) is removed
   # and only URLs are left for the loop below.
@@ -35,21 +32,54 @@ download_and_concatenate_common_dictionaries() {
   done
 }
 
-fetch_seed_data() {
-    rsync -avc "$SRC/dulwich/fuzzing/dictionaries/" "$SEED_DATA_DIR/"
+prepare_dictionaries_for_fuzz_targets() {
+  local dictionaries_dir="$1"
+  local fuzz_targets_dir="$2"
+  local common_base_dictionary_filename="$WORK/__base.dict"
+
+  printf '[%s] Copying .dict files from %s to %s\n' "$(date '+%Y-%m-%d %H:%M:%S')"  "$dictionaries_dir" "$SRC/"
+  cp -v "$dictionaries_dir"/*.dict "$SRC/"
+
+  download_and_concatenate_common_dictionaries "$common_base_dictionary_filename" \
+    "https://raw.githubusercontent.com/google/fuzzing/master/dictionaries/utf8.dict" \
+    "https://raw.githubusercontent.com/google/fuzzing/master/dictionaries/pem.dict"
+
+  find "$fuzz_targets_dir" -name 'fuzz_*.py' -print0 | while IFS= read -r -d '' fuzz_harness; do
+    if [[ -r "$common_base_dictionary_filename" ]]; then
+      # Strip the `.py` extension from the filename and replace it with `.dict`.
+      fuzz_harness_dictionary_filename="$(basename "$fuzz_harness" .py).dict"
+      local output_file="$SRC/$fuzz_harness_dictionary_filename"
+
+      printf '[%s] Appending %s to %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$common_base_dictionary_filename" "$output_file"
+      if [[ -s "$output_file" ]]; then
+        # If a dictionary file for this fuzzer already exists and is not empty,
+        # we append a new line to the end of it before appending any new entries.
+        #
+        # LibFuzzer will happily ignore multiple empty lines in a dictionary but fail with an error
+        # if any single line has incorrect syntax (e.g., if we accidentally add two entries to the same line.)
+        # See docs for valid syntax: https://llvm.org/docs/LibFuzzer.html#id32
+        echo >>"$output_file"
+      fi
+      cat "$common_base_dictionary_filename" >>"$output_file"
+    fi
+  done
 }
 
 ########################
 # Main execution logic #
 ########################
 
-fetch_seed_data
+prepare_dictionaries_for_fuzz_targets "$SRC/dulwich/fuzzing/dictionaries" "$SRC/dulwich/fuzzing/"
+
+apt-get update && apt-get install -y libgpgme-dev libgpg-error-dev
 
-download_and_concatenate_common_dictionaries "$SEED_DATA_DIR/__base.dict" \
-  "https://raw.githubusercontent.com/google/fuzzing/master/dictionaries/utf8.dict" \
-  "https://raw.githubusercontent.com/google/fuzzing/master/dictionaries/url.dict"
+# The OSS-Fuzz base image includes a modified cargo executable for pure rust projects
+# but it can cause linking errors with PyO3's extension-module feature so we remove it.
+rm -rf /usr/local/bin/cargo
+# Install the Rust toolchain so the Rust extensions can be built in build.sh.
+curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain nightly --profile minimal -y
 
 # The OSS-Fuzz base image has outdated dependencies by default so we upgrade them below.
 python3 -m pip install --upgrade pip
 # Upgrade to the latest versions known to work at the time the below changes were introduced:
-python3 -m pip install 'setuptools~=69.0' 'pyinstaller~=6.0'
+python3 -m pip install -U 'atheris>=2.3.0' 'setuptools~=73.0' 'pyinstaller>=6.10' setuptools-rust