Add new HTML parser fuzzer

* fuzz/Makefile.am: Add wget_html_fuzzer
* fuzz/wget_html_fuzzer.c: New fuzzer
* fuzz/wget_html_fuzzer.dict: HTML dictionary for fuzzing
* fuzz/wget_html_fuzzer.in: Initial corpora
* src/html-url.c: Add new function get_urls_html_fm()
* src/html-url.h: Add ne function get_urls_html_fm()
* src/wget.h: Fix define for fopen_wgetrc()
This commit is contained in:
Tim Rühsen 2018-04-20 22:18:25 +02:00
parent 77c31d301b
commit 23b0275feb
2894 changed files with 896 additions and 14 deletions

View File

@ -10,6 +10,7 @@ LDADD = ../lib/libgnu.a \
WGET_TESTS = \
wget_css_fuzzer$(EXEEXT) \
wget_html_fuzzer$(EXEEXT) \
wget_options_fuzzer$(EXEEXT)
if FUZZING
@ -33,6 +34,9 @@ endif
wget_css_fuzzer_SOURCES = wget_css_fuzzer.c $(MAIN)
wget_css_fuzzer_LDADD = ../src/libunittest.a $(LDADD)
wget_html_fuzzer_SOURCES = wget_html_fuzzer.c $(MAIN)
wget_html_fuzzer_LDADD = ../src/libunittest.a $(LDADD)
wget_options_fuzzer_SOURCES = wget_options_fuzzer.c $(MAIN)
wget_options_fuzzer_LDADD = ../src/libunittest.a $(LDADD)

102
fuzz/wget_html_fuzzer.c Normal file
View File

@ -0,0 +1,102 @@
/*
* Copyright(c) 2017-2018 Free Software Foundation, Inc.
*
* This file is part of GNU Wget.
*
* GNU Wget is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* GNU Wget is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Wget. If not, see <https://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <sys/types.h>
#include <dirent.h> // opendir, readdir
#include <stdint.h> // uint8_t
#include <stdio.h> // fmemopen
#include <string.h> // strncmp
#include <stdlib.h> // free
#include <fcntl.h> // open flags
#include <unistd.h> // close
#include <setjmp.h> // longjmp, setjmp
#ifdef __cplusplus
extern "C" {
#endif
#include "utils.h"
#include "html-url.h"
#include "css-url.h"
// declarations for wget internal functions
int main_wget(int argc, const char **argv);
void cleanup(void);
FILE *fopen_wget(const char *pathname, const char *mode);
FILE *fopen_wgetrc(const char *pathname, const char *mode);
void exit_wget(int status);
#ifdef __cplusplus
}
#endif
#include "fuzzer.h"
static const uint8_t *g_data;
static size_t g_size;
FILE *fopen_wget(const char *pathname, const char *mode)
{
return fopen("/dev/null", mode);
}
#undef fopen_wgetrc
FILE *fopen_wgetrc(const char *pathname, const char *mode)
{
#ifdef HAVE_FMEMOPEN
return fmemopen((void *) g_data, g_size, mode);
#else
return NULL;
#endif
}
#ifdef FUZZING
void exit_wget(int status)
{
}
#else
void exit(int status)
{
}
#endif
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
{
struct urlpos *urls;
struct file_memory fm;
FILE *bak;
if (size > 4096) // same as max_len = ... in .options file
return 0;
bak = stderr;
stderr = fopen("/dev/null", "w");
fm.content = (char *) data;
fm.length = size;
fm.mmap_p = 0;
urls = get_urls_html_fm("xxx", &fm, "https://x.y", NULL, NULL);
free_urlpos(urls);
fclose(stderr);
stderr = bak;
return 0;
}

View File

@ -0,0 +1,22 @@
"<base"
"<link"
"<meta"
"action"
"href"
"src"
"srcset"
"style"
"follow"
"nofollow"
"all"
"none"
"robots="
"name=\"robots\""
"content="
"http-equiv=\"Content-Type\""
"charset="
"style="
"rel=\"shortcut icon\""
"rel=\"stylesheet\""
"rel=\"preload\""
"srcset="

View File

@ -0,0 +1 @@
˙˙=<base href = //[::1::]]UU<lUUCU

View File

@ -0,0 +1 @@
a<s>>

View File

@ -0,0 +1 @@
<lin<linkklin<lin

View File

@ -0,0 +1 @@
<báÿ href ŽŽŽŽŽŽŽŽŽŽŽŽŽŽŽ Ž * * ]]]]]]]]]]]]]]]W]]]3]]]all]]]]]]]]]]]]]]]]m<]]srcset]]]]]]]]]] name="robots"]m]] ! href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]style t]]]]]]]]]]]m]] ! href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W] href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]style t]]]]]]]]]]]m]] ! href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]style t]]]]]]]]]]]m]] ! href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W] href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]style t]]]]]]]]]]]m]] ! href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’â]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]]]] href $ href $ ]]3]]]]]]]]]]]]]]]]]]]]m href $ href $ ]]3]]]]]]]]]]]]]]]]]]]]m]] ! href * ]]]]]]]]]]]]΀€

View File

@ -0,0 +1 @@
]]]]]]]]]]]]]]]W]]]3]]]]]tyle hr] style href m<]]srcset]]]]]]]]]]]<link style hr] style href ="2yhletesethttp-equiv="Cont]]]]m<]]srcset]]]]]]]]]]]<link style hr] style href ="syhletesethttp-equiv="Content-Type"txle<ink nk srel hr] style href ="syhleteset"txlink sre : l Ire="Ls=

View File

@ -0,0 +1 @@
<conten rel="preload" ````chrset=```e``t``et""

View File

@ -0,0 +1 @@
ÿname="robo ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]']ª’Œ¢]]¢Ã¢¢]]]]]]]]]]]W]]]3]]]]]]]]]Yrcset]]]g]]]]]]]]]] <meta name="robots" href $ http-equiv="Content-Type" allc at ion content= ÿÿÿs==txle=brobots==tstcharset=ÿÿöÿvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNcdict=wget_ht“é<aö<61>š/]]] <meÿ

View File

@ -0,0 +1 @@
<!<e==

View File

@ -0,0 +1 @@
<<basem rel="st``` ` ˆ mal @ mallow ˆ &qllÿÿÿÿ <20>š/ÿ'```````<l mall mall &qllÿDÿÿ l mall ) low @ mallow ˆ &qllÿÿÿÿ <20>š/ÿ'``````` è rel="stylesheet"```e``t``et""

View File

@ -0,0 +1 @@
ifra]]]]]]]]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]]m<]ylesheet"txllsrcseô=k srelesheet"k sre]]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]] ]]]]]]]]]]]]]]]]]]m<]]srcset]]]]]]]]]]]<link style hr] style href ="-yhletesethttp-epppq(iv="Content-Type"txle<ink nk srel="stylexlk srel="style

View File

@ -0,0 +1 @@
]]]]]]]]]]] <img href srcset= "!<no] )))))))))))))))))))))))©)))))))))))nyT-pet"<<

View File

@ -0,0 +1 @@
<e/

Some files were not shown because too many files have changed in this diff Show More