From a4402120ad7f788d6ce7311fa8bdca6d1ba851b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Fri, 9 Feb 2018 08:00:07 +0100 Subject: [PATCH] Add OSS-Fuzz infrastruture * Makefile.am: Add fuzz/ to SUBDIRS * cfg.mk: Fix 'make syntax-check' * configure.ac: Add --enable-fuzzing * fuzz/Makefile.am: New file * fuzz/README.md: New file * fuzz/fuzzer.h: New file * fuzz/get_all_corpora: New file * fuzz/get_ossfuzz_corpora: New file * fuzz/glob_crash.c: New file * fuzz/main.c: New file * fuzz/run-afl.sh: New file * fuzz/run-clang.sh: New file * fuzz/view-coverage.sh: New file * fuzz/wget_options_fuzzer.c: New file * fuzz/wget_options_fuzzer.dict: New file * src/init.c (cleanup): Free more resources * src/main.c (init_switches): Initialize only once, (print_usage): Don't print if TESTING is defined * src/utils.h: Include wget.h --- Makefile.am | 2 +- cfg.mk | 6 +- configure.ac | 22 +++- fuzz/Makefile.am | 60 +++++++++++ fuzz/README.md | 79 ++++++++++++++ fuzz/fuzzer.h | 26 +++++ fuzz/get_all_corpora | 11 ++ fuzz/get_ossfuzz_corpora | 45 ++++++++ fuzz/glob_crash.c | 32 ++++++ fuzz/main.c | 152 ++++++++++++++++++++++++++ fuzz/run-afl.sh | 59 ++++++++++ fuzz/run-clang.sh | 57 ++++++++++ fuzz/view-coverage.sh | 37 +++++++ fuzz/wget_options_fuzzer.c | 146 +++++++++++++++++++++++++ fuzz/wget_options_fuzzer.dict | 195 ++++++++++++++++++++++++++++++++++ src/init.c | 11 +- src/main.c | 22 +++- src/utils.h | 3 +- 18 files changed, 952 insertions(+), 13 deletions(-) create mode 100644 fuzz/Makefile.am create mode 100644 fuzz/README.md create mode 100644 fuzz/fuzzer.h create mode 100755 fuzz/get_all_corpora create mode 100755 fuzz/get_ossfuzz_corpora create mode 100644 fuzz/glob_crash.c create mode 100644 fuzz/main.c create mode 100755 fuzz/run-afl.sh create mode 100755 fuzz/run-clang.sh create mode 100755 fuzz/view-coverage.sh create mode 100644 fuzz/wget_options_fuzzer.c create mode 100644 fuzz/wget_options_fuzzer.dict diff --git a/Makefile.am b/Makefile.am index f520a2f2..e915da63 100644 --- a/Makefile.am +++ b/Makefile.am @@ -41,7 +41,7 @@ distuninstallcheck_listfiles = find . -type f | \ ACLOCAL_AMFLAGS = -I m4 # subdirectories in the distribution -SUBDIRS = lib src doc po tests util testenv +SUBDIRS = lib src doc po util fuzz tests testenv EXTRA_DIST = MAILING-LIST \ msdos/config.h msdos/Makefile.DJ \ diff --git a/cfg.mk b/cfg.mk index 1da2327d..ef7a3d71 100644 --- a/cfg.mk +++ b/cfg.mk @@ -1,5 +1,9 @@ # Explicit syntax-check exceptions. exclude_file_name_regexp--sc_trailing_blank = ^doc/annou.*$$ +exclude_file_name_regexp--sc_bindtextdomain = ^fuzz/.*.c$$ +exclude_file_name_regexp--sc_require_config_h = fuzz/main.c$$ +exclude_file_name_regexp--sc_require_config_h_first = fuzz/main.c$$ + export VC_LIST_EXCEPT_DEFAULT=^(lib/.*|m4/.*|md5/.*|build-aux/.*|src/gettext\.h|.*ChangeLog|tests/certs/.*)$$ @@ -30,4 +34,4 @@ local-checks-to-skip = \ show-vc-list-except: @$(VC_LIST_EXCEPT) -VC_LIST_ALWAYS_EXCLUDE_REGEX = ^ABOUT-NLS|((msdos|vms)/.*)|ChangeLog-2014-12-10|/.*.der|^tests/certs/.*$$ +VC_LIST_ALWAYS_EXCLUDE_REGEX = ^ABOUT-NLS|((msdos|vms)/.*)|ChangeLog-2014-12-10|/.*.der|^tests/certs/.*|^fuzz/.*.in/.*$$ diff --git a/configure.ac b/configure.ac index d9f00982..310eb4a4 100644 --- a/configure.ac +++ b/configure.ac @@ -87,6 +87,19 @@ dnl dnl Process features dnl +AC_ARG_ENABLE([fuzzing], + [AS_HELP_STRING([--enable-fuzzing], [Turn on fuzzing build (for developers)])], + [enable_fuzzing=yes; AC_SUBST([LIB_FUZZING_ENGINE])], [enable_fuzzing=no; LIB_FUZZING_ENGINE=""]) +#FUZZ_LIBS=$LIBS +#if test $enable_fuzzing = "yes"; then +# OLD_LIBS=$LIBS +# AC_SEARCH_LIBS([dlsym], [dl dld]) +# FUZZ_LIBS=$LIBS +# LIBS=$OLD_LIBS +#fi +#AC_SUBST([FUZZ_LIBS]) +AM_CONDITIONAL([FUZZING], [test "$enable_fuzzing" = "yes"]) + dnl Opie: Support for opie s/key FTP logins AC_ARG_ENABLE([opie], [AS_HELP_STRING([--disable-opie], [disable support for opie or s/key FTP login])], @@ -184,7 +197,7 @@ dnl dnl Gettext dnl AM_GNU_GETTEXT([external],[need-ngettext]) -AM_GNU_GETTEXT_VERSION([0.18.1]) +AM_GNU_GETTEXT_VERSION([0.17]) AC_PROG_RANLIB @@ -237,7 +250,7 @@ dnl AC_HEADER_STDBOOL AC_CHECK_HEADERS(unistd.h sys/time.h) AC_CHECK_HEADERS(termios.h sys/ioctl.h sys/select.h utime.h sys/utime.h) -AC_CHECK_HEADERS(stdint.h inttypes.h pwd.h wchar.h) +AC_CHECK_HEADERS(stdint.h inttypes.h pwd.h wchar.h dlfcn.h) AC_CHECK_DECLS(h_errno,,,[#include ]) @@ -275,7 +288,7 @@ AC_FUNC_MMAP AC_FUNC_FSEEKO AC_CHECK_FUNCS(strptime timegm vsnprintf vasprintf drand48 pathconf) AC_CHECK_FUNCS(strtoll usleep ftello sigblock sigsetjmp memrchr wcwidth mbtowc) -AC_CHECK_FUNCS(sleep symlink utime strlcpy random) +AC_CHECK_FUNCS(sleep symlink utime strlcpy random fmemopen) if test x"$ENABLE_OPIE" = xyes; then AC_LIBOBJ([ftp-opie]) @@ -783,7 +796,7 @@ dnl dnl Create output dnl AC_CONFIG_FILES([Makefile src/Makefile doc/Makefile util/Makefile - po/Makefile.in tests/Makefile + po/Makefile.in tests/Makefile fuzz/Makefile lib/Makefile testenv/Makefile tests/certs/interca.conf tests/certs/rootca.conf]) AC_CONFIG_HEADERS([src/config.h]) @@ -812,4 +825,5 @@ AC_MSG_NOTICE([Summary of build options: Resolver: $RESOLVER_INFO GPGME: $have_gpg IRI: $iri + Fuzzing build: $enable_fuzzing, $LIB_FUZZING_ENGINE ]) diff --git a/fuzz/Makefile.am b/fuzz/Makefile.am new file mode 100644 index 00000000..b8c6e7bc --- /dev/null +++ b/fuzz/Makefile.am @@ -0,0 +1,60 @@ +AM_CFLAGS = $(WERROR_CFLAGS) $(WARN_CFLAGS) -Wno-unused-parameter -Wno-pedantic +AM_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir) -I$(top_builddir)/lib -I$(top_srcdir)/lib \ + -DSRCDIR=\"$(abs_srcdir)\" +LDADD = ../lib/libgnu.a \ + $(GETADDRINFO_LIB) $(HOSTENT_LIB) $(INET_NTOP_LIB) $(INET_PTON_LIB) \ + $(LIBSOCKET) $(LIB_CLOCK_GETTIME) $(LIB_CRYPTO) $(LIB_GETLOGIN) $(LIB_NANOSLEEP) $(LIB_POLL) \ + $(LIB_POSIX_SPAWN) $(LIB_PTHREAD_SIGMASK) $(LIB_SELECT) $(LTLIBICONV) $(LTLIBINTL) \ + $(LTLIBMULTITHREAD) $(LTLIBTHREAD) $(SERVENT_LIB) @INTL_MACOSX_LIBS@ \ + -ldl + +WGET_TESTS = \ + wget_options_fuzzer$(EXEEXT) + +if FUZZING + bin_PROGRAMS = $(WGET_TESTS) + LDADD += $(LIB_FUZZING_ENGINE) + MAIN = fuzzer.h +# AM_LDFLAGS = -no-install -all-static +else + AM_CPPFLAGS += -DTEST_RUN + AM_TESTS_ENVIRONMENT = export VALGRIND_TESTS"=@VALGRIND_TESTS@"; + TESTS = $(WGET_TESTS) + check_PROGRAMS = $(WGET_TESTS) + MAIN = main.c fuzzer.h +endif + +# Make libunittest "PHONY" so we're always sure we're up-to-date. +.PHONY: ../src/libunittest.a +../src/libunittest.a: + $(MAKE) $(AM_MAKEFLAGS) -C ../src libunittest.a + +wget_options_fuzzer_SOURCES = wget_options_fuzzer.c $(MAIN) +wget_options_fuzzer_LDADD = ../src/libunittest.a $(LDADD) + + +#EXTRA_DIST = $(wildcard *.options) $(wildcard *.dict) \ +# $(wildcard *.in) $(wildcard *.repro) + +dist-hook: + find $(srcdir) -name '*.options' -exec cp -v '{}' $(distdir) ';' + find $(srcdir) -name '*.dict' -exec cp -v '{}' $(distdir) ';' + find $(srcdir) -name '*.in' -exec cp -vr '{}' $(distdir) ';' + find $(srcdir) -name '*.repro' -exec cp -vr '{}' $(distdir) ';' + +clean-local: + rm -rf *.gc?? *.log lcov + +oss-fuzz: + if test "$$OUT" != ""; then \ + XLIBS="-lpsl -lgnutls -lnettle -lhogweed -lidn2 -lunistring"; \ + for ccfile in wget*_fuzzer.c; do \ + fuzzer=$$(basename $$ccfile .c); \ + $$CXX $$CXXFLAGS -I$(top_srcdir)/src -I$(top_srcdir) \ + "$${fuzzer}.c" -o "$${fuzzer}" \ + ../src/libunittest.a ../lib/libgnu.a $${LIB_FUZZING_ENGINE} \ + -Wl,-Bstatic $${XLIBS} -Wl,-Bdynamic; \ + done; \ + fi + +.PHONY: oss-fuzz diff --git a/fuzz/README.md b/fuzz/README.md new file mode 100644 index 00000000..0a6f8945 --- /dev/null +++ b/fuzz/README.md @@ -0,0 +1,79 @@ +# Fuzzers + +These are fuzzers designed for use with `libFuzzer` or `afl`. They can +be used to run on Google's OSS-Fuzz (https://github.com/google/oss-fuzz/). + +The convention used here is that the initial values for each parser fuzzer +are taken from the $NAME.in directory. + +Crash reproducers from OSS-Fuzz are put into $NAME.repro directory for +regression testing with top dir 'make check' or 'make check-valgrind'. + + +# Running a fuzzer using clang + +Use the following commands on top dir: +``` +export CC=clang-6.0 +export CFLAGS="-O1 -g -fno-omit-frame-pointer -gline-tables-only -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -fsanitize=undefined,integer,nullability -fsanitize=address -fsanitize-address-use-after-scope -fsanitize-coverage=trace-pc-guard,trace-cmp" +export LIB_FUZZING_ENGINE="-lFuzzer -lstdc++" +./configure --enable-fuzzing +make clean +make -j$(nproc) +cd fuzz + +# run wget_options_fuzzer +UBSAN_OPTIONS=print_stacktrace=1 ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer \ + ./run-clang.sh wget_options_fuzzer +``` + +If you see a crash, then a crash corpora is written that can be used for further +investigation. E.g. +``` +==2410==ERROR: AddressSanitizer: heap-use-after-free on address 0x602000004e90 at pc 0x00000049cf9c bp 0x7fffb5543f70 sp 0x7fffb5543720 +... +Test unit written to ./crash-adc83b19e793491b1c6ea0fd8b46cd9f32e592fc +``` + +To reproduce the crash: +``` +./wget_options_fuzzer < ./crash-adc83b19e793491b1c6ea0fd8b46cd9f32e592fc +``` + +You can also copy/move that file into wget_options_fuzzer.repro/ +and re-build the project without fuzzing for a valgrind run, if you like that better. +Just a `./configure` and a `make check-valgrind` should reproduce it. + + +# Running a fuzzer using AFL + +Use the following commands on top dir: + +``` +$ export LIB_FUZZING_ENGINE="" +$ CC=afl-clang-fast ./configure --enable-fuzzing +$ make -j$(nproc) clean all +$ cd fuzz +$ ./run-afl.sh wget_options_fuzzer +``` + +# Fuzz code coverage using the corpus directories *.in/ + +Code coverage reports currently work best with gcc+lcov+genhtml. + +In the top directory: +``` +CC=gcc CFLAGS="-O0 -g" ./configure +make fuzz-coverage +xdg-open lcov/index.html +``` + +To work on corpora for better coverage, `cd fuzz` and use e.g. +`./view-coverage.sh wget_options_fuzzer`. + + +# Creating wget_options_fuzzer.dict + +``` +for i in `../src/wget --help|tr ' ' '\n'|grep ^--|cut -c 3-|sort`;do echo \"$i\"; done >wget_options_fuzzer.dict +``` diff --git a/fuzz/fuzzer.h b/fuzz/fuzzer.h new file mode 100644 index 00000000..94dcdf14 --- /dev/null +++ b/fuzz/fuzzer.h @@ -0,0 +1,26 @@ +/* + * Copyright(c) 2017-2018 Free Software Foundation, Inc. + * + * This file is part of GNU Wget. + * + * GNU Wget is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * GNU Wget is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Wget. If not, see . + */ + +#include // size_t +#include // uint8_t + +#ifdef __cplusplus +extern "C" +#endif +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size); diff --git a/fuzz/get_all_corpora b/fuzz/get_all_corpora new file mode 100755 index 00000000..87c5b4de --- /dev/null +++ b/fuzz/get_all_corpora @@ -0,0 +1,11 @@ +#!/bin/sh -eu + +if ! grep -q FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION Makefile; then + echo "The fuzzers haven't been built for fuzzing (maybe for regression testing !?)" + echo "Please built regarding README.md and try again." + exit 1 +fi + +for fuzzer in *_fuzzer.c; do + ./get_ossfuzz_corpora $(basename $fuzzer .c) +done diff --git a/fuzz/get_ossfuzz_corpora b/fuzz/get_ossfuzz_corpora new file mode 100755 index 00000000..39b05eff --- /dev/null +++ b/fuzz/get_ossfuzz_corpora @@ -0,0 +1,45 @@ +#!/bin/sh -eu + +# As a first step see README.md and follow the steps under "Running a fuzzer using clang". + +# You might need 'gsutil' to download new corpora from the Google cloud: +# Read the docs at https://github.com/google/oss-fuzz/blob/master/docs/corpora.md +# then install 'google-cloud-sdk' and execute 'gcloud init'. +# Now 'gsutil' should be ready to use. + +if test -z "$1"; then + echo "Usage: $0 " + echo "Example: $0 wget_options_fuzzer" + exit 1 +fi + +if ! grep -q FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION Makefile; then + echo "The fuzzers haven't been built for fuzzing (maybe for regression testing !?)" + echo "Please built regarding README.md and try again." + exit 1 +fi + +fuzzer=$1 +project=wget + +# sync/copy the OSS-Fuzz corpora into the .new directory +mkdir -p ${fuzzer}.new +cp -fp ${fuzzer}.in/* ${fuzzer}.new +gsutil -m rsync gs://${project}-corpus.clusterfuzz-external.appspot.com/libFuzzer/wget_${fuzzer} ${fuzzer}.new + +# create fuzzer target +BUILD_ONLY=1 ./run-clang.sh ${fuzzer} + +# merge the corpora into the .in directory +./${fuzzer} -merge=1 ${fuzzer}.in ${fuzzer}.new + +# now clear .new dir and put all corpora there +rm -rf ${fuzzer}.new +mv ${fuzzer}.in ${fuzzer}.new +mkdir ${fuzzer}.in + +# now merge again (optimizes number of corpora) +./${fuzzer} -merge=1 ${fuzzer}.in ${fuzzer}.new + +echo +echo "If new files have been added, 'git add' and 'git commit' them." diff --git a/fuzz/glob_crash.c b/fuzz/glob_crash.c new file mode 100644 index 00000000..522b3a10 --- /dev/null +++ b/fuzz/glob_crash.c @@ -0,0 +1,32 @@ +/* + * Created 19.10.2017 by Tim Rühsen + * + * Call glob() using data from fuzzer crash file + * + * Build and execute with instrumented gnulib (amend -I paths as needed): + * + * clang build (spills out WRITE heap buffer overflow) + * export CC=clang-6.0 + * export CFLAGS="-O1 -g -fno-omit-frame-pointer -fsanitize=address -fsanitize-address-use-after-scope" + * $CC $CFLAGS -I.. -I../lib glob_crash.c -o glob_crash ../lib/.libs/libgnu.a + * ./glob_crash + * + * gcc build (spills out READ heap buffer overflow): + * export CC=gcc + * export CFLAGS="-O1 -g -fno-omit-frame-pointer -fsanitize=address -fsanitize-address-use-after-scope" + * $CC $CFLAGS -I.. -I../lib glob_crash.c -o glob_crash ../lib/.libs/libgnu.a + * ./glob_crash + */ + +#include + +int main(int argc, char **argv) +{ +static unsigned char data[] = "1e"; + + glob_t pglob; + if (glob(data, GLOB_TILDE|GLOB_ONLYDIR|GLOB_NOCHECK, NULL, &pglob) == 0) + globfree(&pglob); + + return 0; +} diff --git a/fuzz/main.c b/fuzz/main.c new file mode 100644 index 00000000..eafcfd36 --- /dev/null +++ b/fuzz/main.c @@ -0,0 +1,152 @@ +/* + * Copyright(c) 2017-2018 Free Software Foundation, Inc. + * + * This file is part of GNU Wget. + * + * GNU Wget is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * GNU Wget is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Wget. If not, see . + */ + +#include + +#include +#include +#include +#include +#include + +#include "fuzzer.h" +#include "utils.h" + +#ifdef TEST_RUN + +#include + +#ifdef _WIN32 +# define SLASH '\\' +#else +# define SLASH '/' +#endif + +static int test_all_from(const char *dirname) +{ + DIR *dirp; + + if ((dirp = opendir(dirname))) { + struct dirent *dp; + + while ((dp = readdir(dirp))) { + if (*dp->d_name == '.') continue; + + char fname[strlen(dirname) + strlen(dp->d_name) + 2]; + snprintf(fname, sizeof(fname), "%s/%s", dirname, dp->d_name); + + struct file_memory *fmem; + if ((fmem = wget_read_file(fname))) { + printf("testing %ld bytes from '%s'\n", fmem->length, fname); + LLVMFuzzerTestOneInput((uint8_t *)fmem->content, fmem->length); + wget_read_file_free(fmem); + } + } + closedir(dirp); + return 0; + } + + return 1; +} + +int main(int argc, char **argv) +{ + // if VALGRIND testing is enabled, we have to call ourselves with valgrind checking + const char *valgrind = getenv("VALGRIND_TESTS"); + + if (!valgrind || !*valgrind || !strcmp(valgrind, "0")) { + // fallthrough + } + else if (!strcmp(valgrind, "1")) { + char cmd[strlen(argv[0]) + 256]; + + snprintf(cmd, sizeof(cmd), "VALGRIND_TESTS=\"\" valgrind --error-exitcode=301 --leak-check=yes --show-reachable=yes --track-origins=yes %s", argv[0]); + return system(cmd) != 0; + } else { + char cmd[strlen(valgrind) + strlen(argv[0]) + 32]; + + snprintf(cmd, sizeof(cmd), "VALGRIND_TESTS="" %s %s", valgrind, argv[0]); + return system(cmd) != 0; + } + + const char *target = strrchr(argv[0], SLASH); + if (target) + target = strrchr(target, '/'); + else + target = strrchr(argv[0], '/'); + target = target ? target + 1 : argv[0]; + size_t target_len; + + if (strncmp(target, "lt-", 3) == 0) + target += 3; + + target_len = strlen(target); + +#ifdef _WIN32 + target_len -= 4; // ignore .exe +#endif + + char corporadir[sizeof(SRCDIR) + 1 + target_len + 8]; + snprintf(corporadir, sizeof(corporadir), SRCDIR "/%.*s.in", (int) target_len, target); + + if (test_all_from(corporadir)) { + fprintf(stderr, "Failed to find %s\n", corporadir); + exit(EXIT_FAILURE); + } + + snprintf(corporadir, sizeof(corporadir), SRCDIR "/%.*s.repro", (int) target_len, target); + + test_all_from(corporadir); + + return 0; +} + +#else + +#ifndef __AFL_LOOP +static int __AFL_LOOP(int n) +{ + static int first = 1; + + if (first) { + first = 0; + return n && --n > 0; + } + + return 0; +} +#endif + +int main(int argc, char **argv) +{ + int ret; + unsigned char buf[64 * 1024]; + + while (__AFL_LOOP(10000)) { // only works with clang - we have to use 1 because static/global vars in wget + ret = fread(buf, 1, sizeof(buf), stdin); + if (ret < 0) + return 0; + + LLVMFuzzerTestOneInput(buf, ret); + } + + return 0; +} + +#endif /* #ifdef TEST_RUN */ diff --git a/fuzz/run-afl.sh b/fuzz/run-afl.sh new file mode 100755 index 00000000..59aece2d --- /dev/null +++ b/fuzz/run-afl.sh @@ -0,0 +1,59 @@ +#!/bin/sh -eu + +# Copyright(c) 2017 Free Software Foundation, Inc. +# +# This file is part of GNU Wget. +# +# GNU Wget is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# GNU Wget is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with GNU Wget. If not, see . + +if ! grep -q '^CC=.afl-clang-fast' ../config.log; then + echo "compile first library as:" + echo "CC=afl-clang-fast ./configure" + exit 1 +fi + +if test -z "$1"; then + echo "Usage: $0 test-case" + echo "Example: $0 wget_options_fuzzer" + exit 1 +fi + +fuzzer=$1 +#rm -f $fuzzer +#afl-clang-fast -O2 -g -I.. main.c "${fuzzer}.c" -o "${fuzzer}" + +### minimize test corpora +if test -d ${fuzzer}.in; then + mkdir -p ${fuzzer}.min + for i in `ls ${fuzzer}.in`; do + fin="${fuzzer}.in/$i" + fmin="${fuzzer}.min/$i" + if ! test -e $fmin || test $fin -nt $fmin; then + afl-tmin -m 500 -i $fin -o $fmin -- ./${fuzzer} + fi + done +fi + +TMPOUT=${fuzzer}.out +mkdir -p ${TMPOUT} + +if test -f ${fuzzer}.dict; then + afl-fuzz -m 500 -i ${fuzzer}.min -o ${TMPOUT} -x ${fuzzer}.dict -- ./${fuzzer} +else + afl-fuzz -m 500 -i ${fuzzer}.min -o ${TMPOUT} -- ./${fuzzer} +fi + +echo "output was stored in $TMPOUT" + +exit 0 diff --git a/fuzz/run-clang.sh b/fuzz/run-clang.sh new file mode 100755 index 00000000..e6c4835a --- /dev/null +++ b/fuzz/run-clang.sh @@ -0,0 +1,57 @@ +#!/bin/bash -e +# +# Copyright(c) 2017 Free Software Foundation, Inc. +# +# This file is part of GNU Wget. +# +# GNU Wget is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# GNU Wget is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with GNU Wget. If not, see . + +trap ctrl_c INT + +ctrl_c() { + ./${fuzzer} -merge=1 ${fuzzer}.in ${fuzzer}.new + rm -rf ${fuzzer}.new +} + +if test -z "$1"; then + echo "Usage: $0 " + echo "Example: $0 wget_options_fuzzer" + exit 1 +fi + +if ! grep -q FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION Makefile; then + echo "The fuzzers haven't been built for fuzzing (maybe for regression testing !?)" + echo "Please built regarding README.md and try again." + exit 1 +fi + +# you'll need ~2GB free memory per worker ! +fuzzer=$1 +workers=$(($(nproc) - 0)) +jobs=$workers + +if test -n "$BUILD_ONLY"; then + exit 0 +fi + +# create directory for NEW test corpora (covering new areas of code) +mkdir -p ${fuzzer}.new + +if test -f ${fuzzer}.dict; then + ./${fuzzer} -detect_leaks=0 -dict=${fuzzer}.dict ${fuzzer}.new ${fuzzer}.in -jobs=$jobs -workers=$workers +else + ./${fuzzer} ${fuzzer}.new ${fuzzer}.in -jobs=$jobs -workers=$workers -detect_leaks=0 +fi + +exit 0 diff --git a/fuzz/view-coverage.sh b/fuzz/view-coverage.sh new file mode 100755 index 00000000..4ad7642e --- /dev/null +++ b/fuzz/view-coverage.sh @@ -0,0 +1,37 @@ +#!/bin/bash -eu +# +# (C)2017 Tim Ruehsen tim.ruehsen@gmx.de +# +# View the coverage report for one or more fuzzers. + +# 1. execute 'make fuzz-coverage' in the top directory +# 2. execute './view-coverage.sh + +# Example with single fuzzer: +# ./view-coverage.sh wget_options_fuzzer + +# Example with two fuzzers: +# ./view-coverage.sh wget_options_fuzzer wget_html_parse_fuzzer + +if test -z "$1"; then + echo "Usage: $0 " + echo "Example: $0 wget_options_fuzzer" + exit 1 +fi + +#fuzzer="./"$1 +LCOV_INFO=coverage.info +#./coverage.sh $fuzzer +#lcov --capture --initial --directory ../src/.libs --directory . --output-file $LCOV_INFO +#lcov --capture --directory ../src/.libs --output-file $LCOV_INFO +#lcov --remove $LCOV_INFO '*/test_linking.c' '*/css_tokenizer.lex' '*/' '*/*.h' -o $LCOV_INFO +#genhtml --prefix . --ignore-errors source $LCOV_INFO --legend --title "$1" --output-directory=lcov + +lcov --zerocounters --directory ../src/ +lcov --capture --initial --directory ../src/.libs --directory . --output-file $LCOV_INFO +make check TESTS="$*" CFLAGS="$(CFLAGS) --coverage" LDFLAGS="$(LDFLAGS) --coverage" +lcov --capture --directory ../src/.libs --output-file $LCOV_INFO +lcov --remove $LCOV_INFO '*/css_tokenizer.lex' '*/*.h' -o $LCOV_INFO +genhtml --prefix . --ignore-errors source $LCOV_INFO --legend --title "$*" --output-directory=lcov + +xdg-open lcov/index.html diff --git a/fuzz/wget_options_fuzzer.c b/fuzz/wget_options_fuzzer.c new file mode 100644 index 00000000..fac1596c --- /dev/null +++ b/fuzz/wget_options_fuzzer.c @@ -0,0 +1,146 @@ +/* + * Copyright(c) 2017-2018 Free Software Foundation, Inc. + * + * This file is part of GNU Wget. + * + * GNU Wget is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * GNU Wget is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Wget. If not, see . + */ + +#include + +#include +#include // opendir, readdir +#include // uint8_t +#include // fmemopen +#include // strncmp +#include // open flags +#include // close + +#ifdef __cplusplus + extern "C" { +#endif + // declarations for wget internal functions + int main_wget(int argc, const char **argv); + void cleanup(void); +#ifdef __cplusplus + } +#endif + +#include "fuzzer.h" + +static const uint8_t *g_data; +static size_t g_size; +static int dont_write; + +#if defined HAVE_DLFCN_H && defined HAVE_FMEMOPEN +#include +#include +static jmp_buf jmpbuf; + +#ifdef RTLD_NEXT /* Not defined e.g. on CygWin */ +DIR *opendir(const char *name) +{ + DIR *(*libc_opendir)(const char *) = + (DIR *(*)(const char *)) dlsym (RTLD_NEXT, "opendir"); + + if (dont_write) + return NULL; + + return libc_opendir(name); +/* +#ifdef TEST_RUN + printf("opendir %s\n", name); + if (!strcmp(name, SRCDIR"/wget_options_fuzzer.in")) + return libc_opendir(name); + if (!strcmp(name, SRCDIR"/wget_options_fuzzer.new")) + return libc_opendir(name); + if (!strcmp(name, SRCDIR"/wget_options_fuzzer.repro")) + return libc_opendir(name); +#else + if (!strcmp(name, "wget_options_fuzzer.in")) + return libc_opendir(name); + if (!strcmp(name, "wget_options_fuzzer.new")) + return libc_opendir(name); + if (!strcmp(name, "wget_options_fuzzer.repro")) + return libc_opendir(name); +#endif + + return libc_opendir(name); +*/ +} + +FILE *fopen(const char *pathname, const char *mode) +{ + FILE *(*libc_fopen)(const char *, const char *) = + (FILE *(*)(const char *, const char *)) dlsym (RTLD_NEXT, "fopen"); + + if (dont_write) { + size_t len = strlen(pathname); + + if (len >= 7 && !strcmp(pathname + len - 7, ".wgetrc") && !strcmp(mode, "r")) + return fmemopen((void *) g_data, g_size, mode); + + +// if (*mode == 'w') + return libc_fopen("/dev/null", mode); + +// printf("open %s, %s\n", pathname, mode); + } + + return libc_fopen(pathname, mode); +} + +void exit(int status) +{ + if (dont_write) + longjmp(jmpbuf, 1); + + void (*libc_exit)(int) = + (void(*)(int)) dlsym (RTLD_NEXT, "exit"); + + libc_exit(status); +} +#endif +#endif + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ +// static const char *argv[] = { "wget", "-q", "--no-config", "--config", "d41d8cd98f00b204e9800998ecf8427e" }; + static const char *argv[] = { "wget", "-q" }; + + if (size > 2048) // same as max_len = ... in .options file + return 0; + + g_data = data; + g_size = size; + + dont_write = 1; + +// try not to open/write to the file system +#if defined HAVE_DLFCN_H && defined HAVE_FMEMOPEN + if (setjmp(jmpbuf)) { + cleanup(); + dont_write = 0; + return 0; + } + +// enable_testing(); // function in wget to prevent unwanted action while testing + main_wget(sizeof(argv)/sizeof(argv[0]), argv); + cleanup(); +#endif + + dont_write = 0; + + return 0; +} diff --git a/fuzz/wget_options_fuzzer.dict b/fuzz/wget_options_fuzzer.dict new file mode 100644 index 00000000..9a2dbd8e --- /dev/null +++ b/fuzz/wget_options_fuzzer.dict @@ -0,0 +1,195 @@ +"--" +"no-" +"on" +"off" +"=" +"PEM" +"DER" +"ASN1" +"INF" +"INFINITY" +"1" +"1k" +"1m" +"1g" +"1t" +"1s" +"1m" +"1h" +"1d" +"hard" +"soft" +"none" +"ipv4" +"ipv6" +"bar" +"posix" +"pcre" +"bytes" +"bits" +"human" +"csv" +"json" +"accept=" +"accept-regex=" +"adjust-extension=" +"append-output=" +"ask-password=" +"auth-no-challenge=" +"backup-converted=" +"backups=" +"base=" +"bind-address=" +"ca-certificate=" +"cache=" +"ca-directory=" +"certificate=" +"certificate-type=" +"check-certificate=" +"check-hostname=" +"chunk-size=" +"clobber=" +"config=" +"connect-timeout=" +"content-disposition=" +"content-on-error=" +"continue=" +"convert-links=" +"cookies=" +"cookie-suffixes=" +"crl-file=" +"cut-dirs=" +"cut-file-get-vars=" +"cut-url-get-vars=" +"debug=" +"default-page=" +"delete-after=" +"directories=" +"directory-prefix=" +"dns-caching=" +"dns-timeout=" +"domains=" +"egd-file=" +"exclude-domains=" +"execute=" +"filter-mime-type=" +"filter-urls=" +"follow-tags=" +"force-atom=" +"force-css=" +"force-directories=" +"force-html=" +"force-metalink=" +"force-progress=" +"force-rss=" +"force-sitemap=" +"fsync-policy=" +"gnupg-homedir=" +"gnutls-options=" +"header=" +"help=" +"host-directories=" +"hpkp=" +"hpkp-file=" +"hsts=" +"hsts-file=" +"html-extension=" +"http2=" +"http2-request-window=" +"http-keep-alive=" +"http-password=" +"http-proxy=" +"http-proxy-password=" +"http-proxy-user=" +"https-enforce=" +"https-only=" +"https-proxy=" +"http-user=" +"ignore-case=" +"ignore-tags=" +"inet4-only=" +"inet6-only=" +"input-encoding=" +"input-file=" +"iri=" +"keep-session-cookies=" +"level=" +"list-plugins=" +"load-cookies=" +"local-db=" +"local-encoding=" +"local-plugin=" +"max-redirect=" +"max-threads=" +"metalink=" +"mirror=" +"netrc=" +"netrc-file=" +"no-quiet=" +"ocsp=" +"ocsp-file=" +"ocsp-stapling=" +"output-document=" +"output-file=" +"page-requisites=" +"parent=" +"password=" +"plugin=" +"plugin-dirs=" +"plugin-help=" +"plugin-opt=" +"post-data=" +"post-file=" +"prefer-family=" +"private-key=" +"private-key-type=" +"progress=" +"protocol-directories=" +"proxy=" +"quiet=" +"quota=" +"random-file=" +"random-wait=" +"read-timeout=" +"recursive=" +"referer=" +"regex-type=" +"reject=" +"reject-regex=" +"remote-encoding=" +"report-speed=" +"restrict-file-names=" +"robots=" +"save-cookies=" +"save-headers=" +"secure-protocol=" +"server-response=" +"signature-extension=" +"span-hosts=" +"spider=" +"stats-all=" +"stats-dns=" +"stats-ocsp=" +"stats-server=" +"stats-site=" +"stats-tls=" +"strict-comments=" +"tcp-fastopen=" +"timeout=" +"timestamping=" +"tls-false-start=" +"tls-resume=" +"tls-session-file=" +"tries=" +"trust-server-names=" +"use-askpass=" +"user=" +"user-agent=" +"use-server-timestamps=" +"verbose=" +"verify-save-failed=" +"verify-sig=" +"version=" +"wait=" +"waitretry=" +"xattr=" diff --git a/src/init.c b/src/init.c index 525b4f69..1ad150c2 100644 --- a/src/init.c +++ b/src/init.c @@ -69,6 +69,7 @@ as that of the covered work. */ #include "warc.h" /* for warc_close */ #include "spider.h" /* for spider_cleanup */ #include "html-url.h" /* for cleanup_html_url */ +#include "ptimer.h" /* for ptimer_destroy */ #include "c-strcase.h" #ifdef TESTING @@ -1925,6 +1926,8 @@ decode_string (const char *val, const struct decode_item *items, int itemcount, return false; } +extern struct ptimer *timer; + /* Free the memory allocated by global variables. */ void cleanup (void) @@ -1952,7 +1955,7 @@ cleanup (void) because then you can find the real leaks, i.e. the allocated memory which grows with the size of the program. */ -#ifdef DEBUG_MALLOC +#if defined DEBUG_MALLOC || defined TESTING convert_cleanup (); res_cleanup (); http_cleanup (); @@ -2015,6 +2018,10 @@ cleanup (void) xfree (opt.use_askpass); xfree (opt.retry_on_http_error); + xfree (exec_name); + xfree (program_argstring); + ptimer_destroy (timer); timer = NULL; + #ifdef HAVE_LIBCARES #include { @@ -2027,7 +2034,7 @@ cleanup (void) } #endif -#endif /* DEBUG_MALLOC */ +#endif /* DEBUG_MALLOC || TESTING */ } /* Unit testing routines. */ diff --git a/src/main.c b/src/main.c index d1f43792..4b22781b 100644 --- a/src/main.c +++ b/src/main.c @@ -502,8 +502,14 @@ static unsigned char optmap[96]; static void init_switches (void) { + static bool initialized; char *p = short_options; size_t i, o = 0; + + if (initialized) + return; + initialized = 1; + for (i = 0; i < countof (option_data); i++) { struct cmdline_option *cmdopt = &option_data[i]; @@ -565,10 +571,14 @@ init_switches (void) /* Print the usage message. */ static int -print_usage (int error) +print_usage (_GL_UNUSED int error) { +#ifndef TESTING return fprintf (error ? stderr : stdout, _("Usage: %s [OPTION]... [URL]...\n"), exec_name); +#else + return 0; +#endif } /* Print the help message, describing all the available options. If @@ -576,6 +586,7 @@ print_usage (int error) _Noreturn static void print_help (void) { +#ifndef TESTING /* We split the help text this way to ease translation of individual entries. */ static const char *help[] = { @@ -1027,7 +1038,7 @@ Recursive accept/reject:\n"), for (i = 0; i < countof (help); i++) if (fputs (_(help[i]), stdout) < 0) exit (WGET_EXIT_IO_FAIL); - +#endif /* TESTING */ exit (WGET_EXIT_SUCCESS); } @@ -1325,6 +1336,7 @@ There is NO WARRANTY, to the extent permitted by law.\n"), stdout) < 0) const char *program_name; /* Needed by lib/error.c. */ const char *program_argstring; /* Needed by wget_warc.c. */ +struct ptimer *timer; int main (int argc, char **argv) @@ -1338,7 +1350,7 @@ main (int argc, char **argv) bool noconfig = false; bool append_to_log = false; - struct ptimer *timer = ptimer_new (); + timer = ptimer_new (); double start_time = ptimer_measure (timer); total_downloaded_bytes = 0; @@ -1745,12 +1757,14 @@ for details.\n\n")); ) { /* No URL specified. */ +#ifndef TESTING fprintf (stderr, _("%s: missing URL\n"), exec_name); print_usage (1); fprintf (stderr, "\n"); /* #### Something nicer should be printed here -- similar to the pre-1.5 `--help' page. */ fprintf (stderr, _("Try `%s --help' for more options.\n"), exec_name); +#endif exit (WGET_EXIT_GENERIC_ERROR); } @@ -2225,7 +2239,7 @@ only if outputting to a regular file.\n")); char *wall_time = xstrdup (secs_to_human_time (end_time - start_time)); char *download_time = xstrdup (secs_to_human_time (total_download_time)); - ptimer_destroy (timer); + ptimer_destroy (timer); timer = NULL; logprintf (LOG_NOTQUIET, _("FINISHED --%s--\nTotal wall clock time: %s\n" diff --git a/src/utils.h b/src/utils.h index 26e2c28c..d55da2a7 100644 --- a/src/utils.h +++ b/src/utils.h @@ -30,7 +30,8 @@ as that of the covered work. */ #ifndef UTILS_H #define UTILS_H -# include +#include +#include /* Constant is using when we don`t know attempted size exactly */ #define UNKNOWN_ATTEMPTED_SIZE -3