mirror of
https://github.com/mirror/wget.git
synced 2025-01-25 03:41:00 +08:00
Add new HTML parser fuzzer
* fuzz/Makefile.am: Add wget_html_fuzzer * fuzz/wget_html_fuzzer.c: New fuzzer * fuzz/wget_html_fuzzer.dict: HTML dictionary for fuzzing * fuzz/wget_html_fuzzer.in: Initial corpora * src/html-url.c: Add new function get_urls_html_fm() * src/html-url.h: Add ne function get_urls_html_fm() * src/wget.h: Fix define for fopen_wgetrc()
This commit is contained in:
parent
77c31d301b
commit
23b0275feb
@ -10,6 +10,7 @@ LDADD = ../lib/libgnu.a \
|
||||
|
||||
WGET_TESTS = \
|
||||
wget_css_fuzzer$(EXEEXT) \
|
||||
wget_html_fuzzer$(EXEEXT) \
|
||||
wget_options_fuzzer$(EXEEXT)
|
||||
|
||||
if FUZZING
|
||||
@ -33,6 +34,9 @@ endif
|
||||
wget_css_fuzzer_SOURCES = wget_css_fuzzer.c $(MAIN)
|
||||
wget_css_fuzzer_LDADD = ../src/libunittest.a $(LDADD)
|
||||
|
||||
wget_html_fuzzer_SOURCES = wget_html_fuzzer.c $(MAIN)
|
||||
wget_html_fuzzer_LDADD = ../src/libunittest.a $(LDADD)
|
||||
|
||||
wget_options_fuzzer_SOURCES = wget_options_fuzzer.c $(MAIN)
|
||||
wget_options_fuzzer_LDADD = ../src/libunittest.a $(LDADD)
|
||||
|
||||
|
102
fuzz/wget_html_fuzzer.c
Normal file
102
fuzz/wget_html_fuzzer.c
Normal file
@ -0,0 +1,102 @@
|
||||
/*
|
||||
* Copyright(c) 2017-2018 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GNU Wget.
|
||||
*
|
||||
* GNU Wget is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNU Wget is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Wget. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <dirent.h> // opendir, readdir
|
||||
#include <stdint.h> // uint8_t
|
||||
#include <stdio.h> // fmemopen
|
||||
#include <string.h> // strncmp
|
||||
#include <stdlib.h> // free
|
||||
#include <fcntl.h> // open flags
|
||||
#include <unistd.h> // close
|
||||
#include <setjmp.h> // longjmp, setjmp
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#include "utils.h"
|
||||
#include "html-url.h"
|
||||
#include "css-url.h"
|
||||
|
||||
// declarations for wget internal functions
|
||||
int main_wget(int argc, const char **argv);
|
||||
void cleanup(void);
|
||||
FILE *fopen_wget(const char *pathname, const char *mode);
|
||||
FILE *fopen_wgetrc(const char *pathname, const char *mode);
|
||||
void exit_wget(int status);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#include "fuzzer.h"
|
||||
|
||||
static const uint8_t *g_data;
|
||||
static size_t g_size;
|
||||
|
||||
FILE *fopen_wget(const char *pathname, const char *mode)
|
||||
{
|
||||
return fopen("/dev/null", mode);
|
||||
}
|
||||
|
||||
#undef fopen_wgetrc
|
||||
FILE *fopen_wgetrc(const char *pathname, const char *mode)
|
||||
{
|
||||
#ifdef HAVE_FMEMOPEN
|
||||
return fmemopen((void *) g_data, g_size, mode);
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef FUZZING
|
||||
void exit_wget(int status)
|
||||
{
|
||||
}
|
||||
#else
|
||||
void exit(int status)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
|
||||
{
|
||||
struct urlpos *urls;
|
||||
struct file_memory fm;
|
||||
FILE *bak;
|
||||
|
||||
if (size > 4096) // same as max_len = ... in .options file
|
||||
return 0;
|
||||
|
||||
bak = stderr;
|
||||
stderr = fopen("/dev/null", "w");
|
||||
|
||||
fm.content = (char *) data;
|
||||
fm.length = size;
|
||||
fm.mmap_p = 0;
|
||||
|
||||
urls = get_urls_html_fm("xxx", &fm, "https://x.y", NULL, NULL);
|
||||
free_urlpos(urls);
|
||||
|
||||
fclose(stderr);
|
||||
stderr = bak;
|
||||
|
||||
return 0;
|
||||
}
|
22
fuzz/wget_html_fuzzer.dict
Normal file
22
fuzz/wget_html_fuzzer.dict
Normal file
@ -0,0 +1,22 @@
|
||||
"<base"
|
||||
"<link"
|
||||
"<meta"
|
||||
"action"
|
||||
"href"
|
||||
"src"
|
||||
"srcset"
|
||||
"style"
|
||||
"follow"
|
||||
"nofollow"
|
||||
"all"
|
||||
"none"
|
||||
"robots="
|
||||
"name=\"robots\""
|
||||
"content="
|
||||
"http-equiv=\"Content-Type\""
|
||||
"charset="
|
||||
"style="
|
||||
"rel=\"shortcut icon\""
|
||||
"rel=\"stylesheet\""
|
||||
"rel=\"preload\""
|
||||
"srcset="
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1 @@
|
||||
˙˙=<base href = //[::1::]]UU<lUUCU
|
Binary file not shown.
Binary file not shown.
@ -0,0 +1 @@
|
||||
a<s>>
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
<lin<linkklin<lin
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1 @@
|
||||
<báÿ href ŽŽŽŽŽŽŽŽŽŽŽŽŽŽŽ Ž * * ]]]]]]]]]]]]]]]W]]]3]]]all]]]]]]]]]]]]]]]]m<]]srcset]]]]]]]]]] name="robots"]m]] ! href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]style t]]]]]]]]]]]m]] ! href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W] href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]style t]]]]]]]]]]]m]] ! href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]style t]]]]]]]]]]]m]] ! href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W] href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]style t]]]]]]]]]]]m]] ! href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’â]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]]]] href $ href $ ]]3]]]]]]]]]]]]]]]]]]]]m href $ href $ ]]3]]]]]]]]]]]]]]]]]]]]m]] ! href * ]]]]]]]]]]]]΀€
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1 @@
|
||||
]]]]]]]]]]]]]]]W]]]3]]]]]tyle hr] style href m<]]srcset]]]]]]]]]]]<link style hr] style href ="2yhletesethttp-equiv="Cont]]]]m<]]srcset]]]]]]]]]]]<link style hr] style href ="syhletesethttp-equiv="Content-Type"txle<inknksrel hr] style href ="syhleteset"txlinksre : l Ire="Ls=
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1 @@
|
||||
<conten rel="preload" ````chrset=```e``t``et""
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1 @@
|
||||
ÿname="robo ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]']ª’Œ¢]]¢Ã¢¢]]]]]]]]]]]W]]]3]]]]]]]]]Yrcset]]]g]]]]]]]]]] <meta name="robots" href $ http-equiv="Content-Type" allc at ion content= ÿÿÿs==txle=brobots==tstcharset=ÿÿöÿvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNcdict=wget_ht“é<aö<61>š/]]] <meÿ
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1 @@
|
||||
<!<e==
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1 @@
|
||||
<<basem rel="st``` ` ˆ mal @ mallow ˆ &qllÿÿÿÿ <20>š/ÿ'```````<l mall mall &qllÿDÿÿ l mall ) low @ mallow ˆ &qllÿÿÿÿ <20>š/ÿ'``````` è rel="stylesheet"```e``t``et""
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
ifra]]]]]]]]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]]m<]ylesheet"txllsrcseô=ksrelesheet"ksre]]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]] ]]]]]]]]]]]]]]]]]]m<]]srcset]]]]]]]]]]]<link style hr] style href ="-yhletesethttp-epppq(iv="Content-Type"txle<inknksrel="stylexlksrel="style
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1 @@
|
||||
]]]]]]]]]]] <img href srcset= "!<no] )))))))))))))))))))))))©)))))))))))nyT-pet"<<
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
<e/
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user