mirror of
https://github.com/mirror/wget.git
synced 2025-03-25 17:20:13 +08:00
Update CSS grammar from 1.x to 2.2
* src/css-tokens.h: Add enums and fixate values * src/css.l: Include config.h, ignore several compiler warnings, update the grammar to CSS 2.2 Fixes OSS-Fuzz issue #8010 (slowness issue). This is a long standing bug affecting all versions <= 1.19.4. Some crafted CSS input was extremely slow / CPU wasting, so it could be used as a DOS attack against website scanning. The code/grammar changes were backported from Wget2.x.
This commit is contained in:
parent
76fb1fe6f6
commit
caa08d7470
@ -32,36 +32,34 @@ as that of the covered work. */
|
|||||||
#define CSS_TOKENS_H
|
#define CSS_TOKENS_H
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
CSSEOF,
|
CSSEOF = 0,
|
||||||
S,
|
S = 1,
|
||||||
CDO,
|
CDO = 2,
|
||||||
CDC,
|
CDC = 3,
|
||||||
INCLUDES,
|
INCLUDES = 4,
|
||||||
DASHMATCH,
|
DASHMATCH = 5,
|
||||||
LBRACE,
|
STRING = 6,
|
||||||
PLUS,
|
BAD_STRING = 7,
|
||||||
GREATER,
|
IDENT = 8,
|
||||||
COMMA,
|
HASH = 9,
|
||||||
STRING,
|
IMPORT_SYM = 10,
|
||||||
INVALID,
|
PAGE_SYM = 11,
|
||||||
IDENT,
|
MEDIA_SYM = 12,
|
||||||
HASH,
|
CHARSET_SYM = 13,
|
||||||
IMPORT_SYM,
|
IMPORTANT_SYM = 14,
|
||||||
PAGE_SYM,
|
EMS = 15,
|
||||||
MEDIA_SYM,
|
EXS = 16,
|
||||||
CHARSET_SYM,
|
LENGTH = 17,
|
||||||
IMPORTANT_SYM,
|
ANGLE = 18,
|
||||||
EMS,
|
TIME = 19,
|
||||||
EXS,
|
FREQ = 20,
|
||||||
LENGTH,
|
DIMENSION = 21,
|
||||||
ANGLE,
|
PERCENTAGE = 22,
|
||||||
TIME,
|
NUMBER = 23,
|
||||||
FREQ,
|
URI = 24,
|
||||||
DIMENSION,
|
BAD_URI = 25,
|
||||||
PERCENTAGE,
|
FUNCTION = 26,
|
||||||
NUMBER,
|
COMMENT = 27
|
||||||
URI,
|
|
||||||
FUNCTION
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* CSS_TOKENS_H */
|
#endif /* CSS_TOKENS_H */
|
||||||
|
182
src/css.l
182
src/css.l
@ -3,6 +3,12 @@
|
|||||||
%option never-interactive
|
%option never-interactive
|
||||||
%option nounput
|
%option nounput
|
||||||
|
|
||||||
|
%top{
|
||||||
|
/* config.h must precede flex's inclusion of <stdio.h>
|
||||||
|
in order for its _GNU_SOURCE definition to take effect. */
|
||||||
|
#include <config.h>
|
||||||
|
}
|
||||||
|
|
||||||
%{
|
%{
|
||||||
/* Lex source for CSS tokenizing.
|
/* Lex source for CSS tokenizing.
|
||||||
Taken from http://www.w3.org/TR/CSS21/grammar.html#q2
|
Taken from http://www.w3.org/TR/CSS21/grammar.html#q2
|
||||||
@ -39,101 +45,123 @@ as that of the covered work. */
|
|||||||
|
|
||||||
#include "css-tokens.h"
|
#include "css-tokens.h"
|
||||||
|
|
||||||
|
#if defined __clang__ || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
|
||||||
|
#pragma GCC diagnostic ignored "-Wunknown-pragmas" // clang mourns about the next one
|
||||||
|
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||||
|
#pragma GCC diagnostic ignored "-Wunused-macros"
|
||||||
|
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
||||||
|
#pragma GCC diagnostic ignored "-Wsign-compare"
|
||||||
|
#pragma GCC diagnostic ignored "-Wswitch-default"
|
||||||
|
#pragma GCC diagnostic ignored "-Wunreachable-code" // clang
|
||||||
|
#pragma clang diagnostic ignored "-Wshorten-64-to-32"
|
||||||
|
#ifndef __clang__
|
||||||
|
#pragma GCC diagnostic ignored "-Wsuggest-attribute=pure"
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
%}
|
%}
|
||||||
|
|
||||||
h [0-9a-f]
|
h [0-9a-f]
|
||||||
nonascii [\200-\377]
|
nonascii [\240-\377]
|
||||||
unicode \\{h}{1,6}(\r\n|[ \t\r\n\f])?
|
unicode \\{h}{1,6}(\r\n|[ \t\r\n\f])?
|
||||||
escape {unicode}|\\[^\r\n\f0-9a-f]
|
escape {unicode}|\\[^\r\n\f0-9a-f]
|
||||||
nmstart [_a-z]|{nonascii}|{escape}
|
nmstart [_a-z]|{nonascii}|{escape}
|
||||||
nmchar [_a-z0-9-]|{nonascii}|{escape}
|
nmchar [_a-z0-9-]|{nonascii}|{escape}
|
||||||
string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
|
string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
|
||||||
string2 \'([^\n\r\f\\']|\\{nl}|{escape})*\'
|
string2 \'([^\n\r\f\\']|\\{nl}|{escape})*\'
|
||||||
invalid1 \"([^\n\r\f\\"]|\\{nl}|{escape})*
|
badstring1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\\?
|
||||||
invalid2 \'([^\n\r\f\\']|\\{nl}|{escape})*
|
badstring2 \'([^\n\r\f\\']|\\{nl}|{escape})*\\?
|
||||||
|
badcomment1 \/\*[^*]*\*+([^/*][^*]*\*+)*
|
||||||
|
badcomment2 \/\*[^*]*(\*+[^/*][^*]*)*
|
||||||
|
baduri1 url\({w}([!#$%&*-\[\]-~]|{nonascii}|{escape})*{w}
|
||||||
|
baduri2 url\({w}{string}{w}
|
||||||
|
baduri3 url\({w}{badstring}
|
||||||
|
comment \/\*[^*]*\*+([^/*][^*]*\*+)*\/
|
||||||
|
ident -?{nmstart}{nmchar}*
|
||||||
|
name {nmchar}+
|
||||||
|
num [0-9]+|[0-9]*"."[0-9]+
|
||||||
|
string {string1}|{string2}
|
||||||
|
badstring {badstring1}|{badstring2}
|
||||||
|
badcomment {badcomment1}|{badcomment2}
|
||||||
|
baduri {baduri1}|{baduri2}|{baduri3}
|
||||||
|
url ([!#$%&*-~]|{nonascii}|{escape})*
|
||||||
|
s [ \t\r\n\f]+
|
||||||
|
w {s}?
|
||||||
|
nl \n|\r\n|\r|\f
|
||||||
|
|
||||||
comment \/\*[^*]*\*+([^/*][^*]*\*+)*\/
|
A a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?
|
||||||
ident -?{nmstart}{nmchar}*
|
C c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?
|
||||||
name {nmchar}+
|
D d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?
|
||||||
num [0-9]+|[0-9]*"."[0-9]+
|
E e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?
|
||||||
string {string1}|{string2}
|
G g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g
|
||||||
invalid {invalid1}|{invalid2}
|
H h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h
|
||||||
url ([!#$%&*-~]|{nonascii}|{escape})*
|
I i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i
|
||||||
s [ \t\r\n\f]
|
K k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k
|
||||||
w ({s}|{comment})*
|
L l|\\0{0,4}(4c|6c)(\r\n|[ \t\r\n\f])?|\\l
|
||||||
nl \n|\r\n|\r|\f
|
M m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m
|
||||||
|
N n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n
|
||||||
A a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?
|
O o|\\0{0,4}(4f|6f)(\r\n|[ \t\r\n\f])?|\\o
|
||||||
C c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?
|
P p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p
|
||||||
D d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?
|
R r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r
|
||||||
E e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?
|
S s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s
|
||||||
G g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g
|
T t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t
|
||||||
H h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h
|
U u|\\0{0,4}(55|75)(\r\n|[ \t\r\n\f])?|\\u
|
||||||
I i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i
|
X x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x
|
||||||
K k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k
|
Z z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z
|
||||||
M m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m
|
|
||||||
N n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n
|
|
||||||
P p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p
|
|
||||||
R r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r
|
|
||||||
S s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s
|
|
||||||
T t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t
|
|
||||||
X x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x
|
|
||||||
Z z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z
|
|
||||||
|
|
||||||
%%
|
%%
|
||||||
|
|
||||||
{s} {return S;}
|
{s} {return S;}
|
||||||
|
|
||||||
\/\*[^*]*\*+([^/*][^*]*\*+)*\/ {return S;} /* ignore comments */
|
{comment} {return COMMENT;}
|
||||||
|
#\/\*[^*]*\*+([^/*][^*]*\*+)*\/ /* ignore comments */
|
||||||
|
{badcomment} /* unclosed comment at EOF */
|
||||||
|
|
||||||
"<!--" {return CDO;}
|
"<!--" {return CDO;}
|
||||||
"-->" {return CDC;}
|
"-->" {return CDC;}
|
||||||
"~=" {return INCLUDES;}
|
"~=" {return INCLUDES;}
|
||||||
"|=" {return DASHMATCH;}
|
"|=" {return DASHMATCH;}
|
||||||
|
|
||||||
{w}"{" {return LBRACE;}
|
{string} {return STRING;}
|
||||||
{w}"+" {return PLUS;}
|
{badstring} {return BAD_STRING;}
|
||||||
{w}">" {return GREATER;}
|
|
||||||
{w}"," {return COMMA;}
|
|
||||||
|
|
||||||
{string} {return STRING;}
|
{ident} {return IDENT;}
|
||||||
{invalid} {return INVALID; /* unclosed string */}
|
|
||||||
|
|
||||||
{ident} {return IDENT;}
|
"#"{name} {return HASH;}
|
||||||
|
|
||||||
"#"{name} {return HASH;}
|
@{I}{M}{P}{O}{R}{T} {return IMPORT_SYM;}
|
||||||
|
@{P}{A}{G}{E} {return PAGE_SYM;}
|
||||||
|
@{M}{E}{D}{I}{A} {return MEDIA_SYM;}
|
||||||
|
"@charset " {return CHARSET_SYM;}
|
||||||
|
|
||||||
"@import" {return IMPORT_SYM;}
|
"!"({w}|{comment})*{I}{M}{P}{O}{R}{T}{A}{N}{T} {return IMPORTANT_SYM;}
|
||||||
"@page" {return PAGE_SYM;}
|
|
||||||
"@media" {return MEDIA_SYM;}
|
|
||||||
"@charset " {return CHARSET_SYM;}
|
|
||||||
|
|
||||||
"!"{w}"important" {return IMPORTANT_SYM;}
|
{num}{E}{M} {return EMS;}
|
||||||
|
{num}{E}{X} {return EXS;}
|
||||||
|
{num}{P}{X} {return LENGTH;}
|
||||||
|
{num}{C}{M} {return LENGTH;}
|
||||||
|
{num}{M}{M} {return LENGTH;}
|
||||||
|
{num}{I}{N} {return LENGTH;}
|
||||||
|
{num}{P}{T} {return LENGTH;}
|
||||||
|
{num}{P}{C} {return LENGTH;}
|
||||||
|
{num}{D}{E}{G} {return ANGLE;}
|
||||||
|
{num}{R}{A}{D} {return ANGLE;}
|
||||||
|
{num}{G}{R}{A}{D} {return ANGLE;}
|
||||||
|
{num}{M}{S} {return TIME;}
|
||||||
|
{num}{S} {return TIME;}
|
||||||
|
{num}{H}{Z} {return FREQ;}
|
||||||
|
{num}{K}{H}{Z} {return FREQ;}
|
||||||
|
{num}{ident} {return DIMENSION;}
|
||||||
|
|
||||||
{num}{E}{M} {return EMS;}
|
{num}% {return PERCENTAGE;}
|
||||||
{num}{E}{X} {return EXS;}
|
{num} {return NUMBER;}
|
||||||
{num}{P}{X} {return LENGTH;}
|
|
||||||
{num}{C}{M} {return LENGTH;}
|
|
||||||
{num}{M}{M} {return LENGTH;}
|
|
||||||
{num}{I}{N} {return LENGTH;}
|
|
||||||
{num}{P}{T} {return LENGTH;}
|
|
||||||
{num}{P}{C} {return LENGTH;}
|
|
||||||
{num}{D}{E}{G} {return ANGLE;}
|
|
||||||
{num}{R}{A}{D} {return ANGLE;}
|
|
||||||
{num}{G}{R}{A}{D} {return ANGLE;}
|
|
||||||
{num}{M}{S} {return TIME;}
|
|
||||||
{num}{S} {return TIME;}
|
|
||||||
{num}{H}{Z} {return FREQ;}
|
|
||||||
{num}{K}{H}{Z} {return FREQ;}
|
|
||||||
{num}{ident} {return DIMENSION;}
|
|
||||||
|
|
||||||
{num}% {return PERCENTAGE;}
|
|
||||||
{num} {return NUMBER;}
|
|
||||||
|
|
||||||
"url("{w}{string}{w}")" {return URI;}
|
"url("{w}{string}{w}")" {return URI;}
|
||||||
"url("{w}{url}{w}")" {return URI;}
|
"url("{w}{url}{w}")" {return URI;}
|
||||||
{ident}"(" {return FUNCTION;}
|
{baduri} {return BAD_URI;}
|
||||||
|
|
||||||
. {return *yytext;}
|
{ident}"(" {return FUNCTION;}
|
||||||
|
|
||||||
|
. {return *yytext;}
|
||||||
|
|
||||||
%%
|
%%
|
||||||
|
Loading…
Reference in New Issue
Block a user