1
0
mirror of https://github.com/mirror/wget.git synced 2025-04-24 20:20:24 +08:00

Add new HTML parser fuzzer

* fuzz/Makefile.am: Add wget_html_fuzzer
* fuzz/wget_html_fuzzer.c: New fuzzer
* fuzz/wget_html_fuzzer.dict: HTML dictionary for fuzzing
* fuzz/wget_html_fuzzer.in: Initial corpora
* src/html-url.c: Add new function get_urls_html_fm()
* src/html-url.h: Add ne function get_urls_html_fm()
* src/wget.h: Fix define for fopen_wgetrc()
This commit is contained in:
Tim Rühsen 2018-04-20 22:18:25 +02:00
parent 77c31d301b
commit 23b0275feb
2894 changed files with 896 additions and 14 deletions
fuzz
Makefile.amwget_html_fuzzer.cwget_html_fuzzer.dict
wget_html_fuzzer.in
00069039dfa75577e348a72e93fa71f6f158318d002114b2f3efa9ceb4ff9628fe67f45a5e76625d002c125bfa4515425cc954b7b736b19ed8c955b4005eca38412fcfc1f617d5c93831efeaf6b18df700750f9a52801f3d7005f5ebbd8f9bed188928b50085381cdfce5572dd179536f64a4ab53e24004700d9aa6cb3e4b434a8a336054c70b0ba2070ec1e00deaa5ab8f2b7bb62ab0406dac3c1f0ea1a8fe700e8932d96e1776782da60989b1f18c3a8e2ed1300e967ec9a95b3aaa5255bc4892b8b63e6e00b6f00f8860783faeeb544309119a2861cb999143d100102c73bd76a9a0e0f5a4ad72bff7c65dabc5a370116f1589d4fd3d9ef0eb2bf4fe5dcec3bac4982012a806df1d96108b581beaaf39b704344545ebf014061e0d1f6dce706d845da46749f440d2e3c420171377527221984428c9b759fe2f3ae0c6306d801f3749f4075613b86b2d942fac2ad89f7842f6b01fe992c1a9d98822eebd8773c4f23dad1fcf3990207cea29bd219b80694a2126fb7708c7d16079a021346ca813d28f75002292382a3eaae822f698f022082778cce79b4eee47e504da4d6dd12248d470224b47db70aaf7b5708612e05827fdb9d559465022c91ac24651cb9bab40165bba366d9fde5fc97022d1560027a146fe73c7608cdf73f455f034371022fb3b868fdb8acb00cb8fa7227411f7a3b433e0233b25b9342069d6ad17760bbbea4202cffcee80238b42c7d7862e4931c5477bd84abb7f8465b010239f914fefacb059808820034a6b95f42beabd9024722a40d4b789b4ede0ea9a4c5c69ed6d5e8c5024dd0965070b8a1974078a7dd4e4cd7e52c13a40258d3ae9a4f95b5d82ef4f76bd1bd1723b5be83025d416bdf80e12009c9e21a18a402313e6782df025fb650c081adf413fcc5a6a74e8b6a9f946854028f7808a15615e9994966c6c8afd0a6e818dca102955459be35bbd1135dfa79e611dd959f8bbe1202a3df35501aa4f7b5541f2ef4a7800fa2e948c202df519f7a5b0e6d0950f6b2d4c84b65800b9ac40300636a41f8c1472df8a3479db2a1109230e109033d40cab303687f52f1556624ec51f7df50f3a8034bdad5fe099f612cf69cccf6db5e7e7e68ddc9036421fd5fd6cc1276a26b3a2e1372dec7b6cf8603ab25234831bd329e61f9bf51941fb5c7b4d33603c78a6bb70e4e1597db8b0803e4d0bfcbac155404054ff6b4d65044f5f5f01953069ff6d5a80bf804097b2472233a8f5316d272cbdcc8e2c124870c04340bc6e62d4be0817ac59afb3e9075cb01e053044686b72c88441f8856733df2ebd70653880a2b0452ae53a21d8a5d24c5f3259d4b33f9b6c012a4045b0830616b9ee4eeb1b9d3a5df26b59997e5550498fcd3c09f2bf2ac274be20f273f481412169e04d5daf208f4bec5a631cb1033935d747025af8d04d80726bf6ab25d85a2d9c3b268ebf62062d31e0510653bbe0824e8cf0c8aa1acd755bdb5058d4505192a6792567169d5737ed75fc869aba349832a051ac73f0991d18f52b9770c008fe4674245bec6051ece3cce96f415c8bc50e6be15883d97d195b605213493ab455cdc04789e1c031c12665d6343c80535815fd8e5b7691ee2da9f164e473570ff182005739dfcd73d2041306d230c62fc2610624a2e3c05adda3fef383c7eaeed3d20da37e9e015f9ad5605b7a67ad91d3fab27bb551232ca3176056ebc6505d7b7f7b21cfc3055261415e04cd020cbee7e8305ebdac10554105e69044708533490773609d07c05fbe99a1de1f72bb93c9eae8a9a4a08bc088b3f062101eb40fcfdafc5ef527b2d5bba48117ac1850621da04230f78e158c45d5df54586f6cb67110d062324df34910564603031fd13aca27bec64d90a06884919e3470643a8a40c671c7b709a70b3519e069b7faef9e3c4fe96cf4695e62caaef623753b406c66d016bdf6f908020fbd2e93e8d9f69dcf0b706cecb31555ff64ab925c92cc54ea93216a38e7506d0b4b2862d8284060a6252306629af2b9e7f1406e3b8a41d4781f3f4628d9d2e1db6ae84a3558c06e55088767948ab2c6d0e8f1073174906679f5d06ed0cfccf5cda94df7a1722da6a81699e10375506ef703eacf4903ac694b7ebec5d7ef91e7abb7707246b1acdc0189387c0ea131ede80919e7c4d9007300bf3e2290b63bfc2633d85b1e8cca4e2427107725bccc4796f8e9e65babb5937a3d918dda1070781716317c87e38d1ebbd945d744984983ca31b07a879ebaf26850bdd2d2664b625984601109beb07a98905d94eb89d5acbbfd7964da780404b5ae907ac20c60028ce8d258f23e7d9757943b95b549607ad21dd2ad1f36a000b022cb6ebb3398a4da41307afeb41ff5209abba687d38bebd1511018778b407b4f95b0fa97526ce3dd47aab172c8c76bc59cb07bba267fc97037baabdb92b178c2973befb86bf07ea7c15d4c0280b8128ff160a5decdf06bcaaa40803f72ac747a3239552a6e8d9df3ff3ba48661808171b6ea2944d83b3b0509235ed18d8b1086f450826ef2e08dadadcaa851f90f39783b70e1c6ff9082eaa36026d6b578d9f90878e05fd610d49c5690836ef22f774751d8353e0d2e22d95bd11507ab0086f2b2ca366bef1cc8595db884cf24f2ef240500878b6a329bb06d7f8945d3be656f21ff86d97b90899c4c26003d7a16c6311e9b041d079fe588096089f31e7667403a4a7d589b513bb7adb29ae2aa0

View File

@ -10,6 +10,7 @@ LDADD = ../lib/libgnu.a \
WGET_TESTS = \
wget_css_fuzzer$(EXEEXT) \
wget_html_fuzzer$(EXEEXT) \
wget_options_fuzzer$(EXEEXT)
if FUZZING
@ -33,6 +34,9 @@ endif
wget_css_fuzzer_SOURCES = wget_css_fuzzer.c $(MAIN)
wget_css_fuzzer_LDADD = ../src/libunittest.a $(LDADD)
wget_html_fuzzer_SOURCES = wget_html_fuzzer.c $(MAIN)
wget_html_fuzzer_LDADD = ../src/libunittest.a $(LDADD)
wget_options_fuzzer_SOURCES = wget_options_fuzzer.c $(MAIN)
wget_options_fuzzer_LDADD = ../src/libunittest.a $(LDADD)

102
fuzz/wget_html_fuzzer.c Normal file
View File

@ -0,0 +1,102 @@
/*
* Copyright(c) 2017-2018 Free Software Foundation, Inc.
*
* This file is part of GNU Wget.
*
* GNU Wget is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* GNU Wget is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Wget. If not, see <https://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <sys/types.h>
#include <dirent.h> // opendir, readdir
#include <stdint.h> // uint8_t
#include <stdio.h> // fmemopen
#include <string.h> // strncmp
#include <stdlib.h> // free
#include <fcntl.h> // open flags
#include <unistd.h> // close
#include <setjmp.h> // longjmp, setjmp
#ifdef __cplusplus
extern "C" {
#endif
#include "utils.h"
#include "html-url.h"
#include "css-url.h"
// declarations for wget internal functions
int main_wget(int argc, const char **argv);
void cleanup(void);
FILE *fopen_wget(const char *pathname, const char *mode);
FILE *fopen_wgetrc(const char *pathname, const char *mode);
void exit_wget(int status);
#ifdef __cplusplus
}
#endif
#include "fuzzer.h"
static const uint8_t *g_data;
static size_t g_size;
FILE *fopen_wget(const char *pathname, const char *mode)
{
return fopen("/dev/null", mode);
}
#undef fopen_wgetrc
FILE *fopen_wgetrc(const char *pathname, const char *mode)
{
#ifdef HAVE_FMEMOPEN
return fmemopen((void *) g_data, g_size, mode);
#else
return NULL;
#endif
}
#ifdef FUZZING
void exit_wget(int status)
{
}
#else
void exit(int status)
{
}
#endif
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
{
struct urlpos *urls;
struct file_memory fm;
FILE *bak;
if (size > 4096) // same as max_len = ... in .options file
return 0;
bak = stderr;
stderr = fopen("/dev/null", "w");
fm.content = (char *) data;
fm.length = size;
fm.mmap_p = 0;
urls = get_urls_html_fm("xxx", &fm, "https://x.y", NULL, NULL);
free_urlpos(urls);
fclose(stderr);
stderr = bak;
return 0;
}

View File

@ -0,0 +1,22 @@
"<base"
"<link"
"<meta"
"action"
"href"
"src"
"srcset"
"style"
"follow"
"nofollow"
"all"
"none"
"robots="
"name=\"robots\""
"content="
"http-equiv=\"Content-Type\""
"charset="
"style="
"rel=\"shortcut icon\""
"rel=\"stylesheet\""
"rel=\"preload\""
"srcset="

View File

@ -0,0 +1 @@
˙˙=<base href = //[::1::]]UU<lUUCU

View File

@ -0,0 +1 @@
a<s>>

View File

@ -0,0 +1 @@
<lin<linkklin<lin

View File

@ -0,0 +1 @@
<báÿ href ŽŽŽŽŽŽŽŽŽŽŽŽŽŽŽ Ž * * ]]]]]]]]]]]]]]]W]]]3]]]all]]]]]]]]]]]]]]]]m<]]srcset]]]]]]]]]] name="robots"]m]] ! href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]style t]]]]]]]]]]]m]] ! href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W] href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]style t]]]]]]]]]]]m]] ! href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]style t]]]]]]]]]]]m]] ! href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W] href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]style t]]]]]]]]]]]m]] ! href style= ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]ª’â]]]]]]]]]]]]ª’⢌¢¢]]]]]]]]]]]]]W]]]3]]]]]]]]]]rcset]]]]]]]]]] href $ href $ ]]3]]]]]]]]]]]]]]]]]]]]m href $ href $ ]]3]]]]]]]]]]]]]]]]]]]]m]] ! href * ]]]]]]]]]]]]΀€

View File

@ -0,0 +1 @@
]]]]]]]]]]]]]]]W]]]3]]]]]tyle hr] style href m<]]srcset]]]]]]]]]]]<link style hr] style href ="2yhletesethttp-equiv="Cont]]]]m<]]srcset]]]]]]]]]]]<link style hr] style href ="syhletesethttp-equiv="Content-Type"txle<ink nk srel hr] style href ="syhleteset"txlink sre : l Ire="Ls=

View File

@ -0,0 +1 @@
<conten rel="preload" ````chrset=```e``t``et""

View File

@ -0,0 +1 @@
ÿname="robo ]]]]]]S]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]']ª’Œ¢]]¢Ã¢¢]]]]]]]]]]]W]]]3]]]]]]]]]Yrcset]]]g]]]]]]]]]] <meta name="robots" href $ http-equiv="Content-Type" allc at ion content= ÿÿÿs==txle=brobots==tstcharset=ÿÿöÿvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNcdict=wget_ht“é<aö<61>š/]]] <meÿ

View File

@ -0,0 +1 @@
<!<e==

View File

@ -0,0 +1 @@
<<basem rel="st``` ` ˆ mal @ mallow ˆ &qllÿÿÿÿ <20>š/ÿ'```````<l mall mall &qllÿDÿÿ l mall ) low @ mallow ˆ &qllÿÿÿÿ <20>š/ÿ'``````` è rel="stylesheet"```e``t``et""

View File

@ -0,0 +1 @@
ifra]]]]]]]]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]]]m<]ylesheet"txllsrcseô=k srelesheet"k sre]]]]]]]]]W]]]3]]]]]]]]]]]]]]]]]] ]]]]]]]]]]]]]]]]]]m<]]srcset]]]]]]]]]]]<link style hr] style href ="-yhletesethttp-epppq(iv="Content-Type"txle<ink nk srel="stylexlk srel="style

View File

@ -0,0 +1 @@
]]]]]]]]]]] <img href srcset= "!<no] )))))))))))))))))))))))©)))))))))))nyT-pet"<<

View File

@ -0,0 +1 @@
<e/

Some files were not shown because too many files have changed in this diff Show More