commit ad41bf97f3b41bfe6f663ac1f149eb7b248dea01 from: David Lakin date: Thu Aug 29 23:45:38 2024 UTC Build Fuzz Tests with Rust Extensions and Optional Features Updates the OSS-Fuzz container environment & build script to build and install Dulwich with all optional features enabled and refines dictionary handling. Impact: - Increases the fuzz-able surface area of the most interesting (from a fuzzing & security testing perspective) Dulwich APIs, enabling future fuzz harnesses improvements to specifically target these features in tests. - Enhances flexibility of fuzzer seed data generation steps to improve dictionary quality and simplify seed corpus management for all fuzz targets. Key Changes in `build.sh`: - Unset problematic OSS-Fuzz provided `$RUSTFLAGS` to prevent build issues that inhibit PyO3 based Rust extension compilation. - Install Python dependencies with specific features (`fastimport`, `gpg`, `paramiko`, etc.). - Added "drop-in" support for inclusion of seed corpora zip files & LibFuzzer options files to be introduced later. Key Changes in `container-environment-bootstrap.sh`: - Installs dependencies required to build Dulwich's optional features and Rust extensions. - Updated Python dependencies (`setuptools`, `pyinstaller`, etc.) to the latest compatible versions. - Improved dictionary handling for fuzz targets by encapsulating dictionary generation logic in shell functions and adding default common dictionary entries to all fuzz target specific `.dict` files. commit - 294a2171bae9e95d3015c49783274b7cc852c1e8 commit + ad41bf97f3b41bfe6f663ac1f149eb7b248dea01 blob - 5d0bb4dd1bca68652e2816e9b29132e0a36d1da9 blob + 7e82c2bb99a5691a79a269dfe1b06aae7d5c056d --- fuzzing/oss-fuzz-scripts/build.sh +++ fuzzing/oss-fuzz-scripts/build.sh @@ -2,36 +2,17 @@ set -euo pipefail -python3 -m pip install . +unset RUSTFLAGS # The OSS-Fuzz provided RUSTFLAGS cause issues that break PyO3 based Rust extension builds. +export PATH="${PATH}:${HOME}/.cargo/bin" +python3 -m pip install -v ".[fastimport,paramiko,https,pgp]" -# Directory to look in for dictionaries, options files, and seed corpora: -SEED_DATA_DIR="$SRC/seed_data" - -find "$SEED_DATA_DIR" \( -name '*_seed_corpus.zip' -o -name '*.options' -o -name '*.dict' \) \ - ! \( -name '__base.*' \) -exec printf 'Copying: %s\n' {} \; \ +find "$SRC" -maxdepth 1 \ + \( -name '*_seed_corpus.zip' -o -name '*.options' -o -name '*.dict' \) \ + -exec printf '[%s] Copying: %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" {} \; \ -exec chmod a-x {} \; \ -exec cp {} "$OUT" \; # Build fuzzers in $OUT. find "$SRC/dulwich/fuzzing" -name 'fuzz_*.py' -print0 | while IFS= read -r -d '' fuzz_harness; do compile_python_fuzzer "$fuzz_harness" - - common_base_dictionary_filename="$SEED_DATA_DIR/__base.dict" - if [[ -r "$common_base_dictionary_filename" ]]; then - # Strip the `.py` extension from the filename and replace it with `.dict`. - fuzz_harness_dictionary_filename="$(basename "$fuzz_harness" .py).dict" - output_file="$OUT/$fuzz_harness_dictionary_filename" - - printf 'Appending %s to %s\n' "$common_base_dictionary_filename" "$output_file" - if [[ -s "$output_file" ]]; then - # If a dictionary file for this fuzzer already exists and is not empty, - # we append a new line to the end of it before appending any new entries. - # - # LibFuzzer will happily ignore multiple empty lines in a dictionary but fail with an error - # if any single line has incorrect syntax (e.g., if we accidentally add two entries to the same line.) - # See docs for valid syntax: https://llvm.org/docs/LibFuzzer.html#id32 - echo >>"$output_file" - fi - cat "$common_base_dictionary_filename" >>"$output_file" - fi done blob - b3b0e9f847ea68a0ab856b8cf5caaddf360f06dc blob + 177c6ff0b871528225bcc6b13bf4ccff6a00cf17 --- fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh +++ fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh @@ -6,23 +6,20 @@ set -euo pipefail # Prerequisites # ################# -for cmd in python3 git wget rsync; do +for cmd in python3 git wget zip; do command -v "$cmd" >/dev/null 2>&1 || { printf '[%s] Required command %s not found, exiting.\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$cmd" >&2 exit 1 } done -SEED_DATA_DIR="$SRC/seed_data" -mkdir -p "$SEED_DATA_DIR" - ############# # Functions # ############# download_and_concatenate_common_dictionaries() { # Assign the first argument as the target file where all contents will be concatenated - target_file="$1" + local target_file="$1" # Shift the arguments so the first argument (target_file path) is removed # and only URLs are left for the loop below. @@ -35,21 +32,54 @@ download_and_concatenate_common_dictionaries() { done } -fetch_seed_data() { - rsync -avc "$SRC/dulwich/fuzzing/dictionaries/" "$SEED_DATA_DIR/" +prepare_dictionaries_for_fuzz_targets() { + local dictionaries_dir="$1" + local fuzz_targets_dir="$2" + local common_base_dictionary_filename="$WORK/__base.dict" + + printf '[%s] Copying .dict files from %s to %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$dictionaries_dir" "$SRC/" + cp -v "$dictionaries_dir"/*.dict "$SRC/" + + download_and_concatenate_common_dictionaries "$common_base_dictionary_filename" \ + "https://raw.githubusercontent.com/google/fuzzing/master/dictionaries/utf8.dict" \ + "https://raw.githubusercontent.com/google/fuzzing/master/dictionaries/pem.dict" + + find "$fuzz_targets_dir" -name 'fuzz_*.py' -print0 | while IFS= read -r -d '' fuzz_harness; do + if [[ -r "$common_base_dictionary_filename" ]]; then + # Strip the `.py` extension from the filename and replace it with `.dict`. + fuzz_harness_dictionary_filename="$(basename "$fuzz_harness" .py).dict" + local output_file="$SRC/$fuzz_harness_dictionary_filename" + + printf '[%s] Appending %s to %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$common_base_dictionary_filename" "$output_file" + if [[ -s "$output_file" ]]; then + # If a dictionary file for this fuzzer already exists and is not empty, + # we append a new line to the end of it before appending any new entries. + # + # LibFuzzer will happily ignore multiple empty lines in a dictionary but fail with an error + # if any single line has incorrect syntax (e.g., if we accidentally add two entries to the same line.) + # See docs for valid syntax: https://llvm.org/docs/LibFuzzer.html#id32 + echo >>"$output_file" + fi + cat "$common_base_dictionary_filename" >>"$output_file" + fi + done } ######################## # Main execution logic # ######################## -fetch_seed_data +prepare_dictionaries_for_fuzz_targets "$SRC/dulwich/fuzzing/dictionaries" "$SRC/dulwich/fuzzing/" -download_and_concatenate_common_dictionaries "$SEED_DATA_DIR/__base.dict" \ - "https://raw.githubusercontent.com/google/fuzzing/master/dictionaries/utf8.dict" \ - "https://raw.githubusercontent.com/google/fuzzing/master/dictionaries/url.dict" +apt-get update && apt-get install -y libgpgme-dev libgpg-error-dev +# The OSS-Fuzz base image includes a modified cargo executable for pure rust projects +# but it can cause linking errors with PyO3's extension-module feature so we remove it. +rm -rf /usr/local/bin/cargo +# Install the Rust toolchain so the Rust extensions can be built in build.sh. +curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain nightly --profile minimal -y + # The OSS-Fuzz base image has outdated dependencies by default so we upgrade them below. python3 -m pip install --upgrade pip # Upgrade to the latest versions known to work at the time the below changes were introduced: -python3 -m pip install 'setuptools~=69.0' 'pyinstaller~=6.0' +python3 -m pip install -U 'atheris>=2.3.0' 'setuptools~=73.0' 'pyinstaller>=6.10' setuptools-rust