Update CSS grammar from 1.x to 2.2

* src/css-tokens.h: Add enums and fixate values
* src/css.l: Include config.h,
  ignore several compiler warnings,
  update the grammar to CSS 2.2

Fixes OSS-Fuzz issue #8010 (slowness issue).
This is a long standing bug affecting all versions <= 1.19.4.

Some crafted CSS input was extremely slow / CPU wasting, so it could
be used as a DOS attack against website scanning.

The code/grammar changes were backported from Wget2.x.
This commit is contained in:
Tim Rühsen 2018-04-26 11:58:01 +02:00
parent 76fb1fe6f6
commit caa08d7470
2 changed files with 133 additions and 107 deletions

View File

@ -32,36 +32,34 @@ as that of the covered work. */
#define CSS_TOKENS_H #define CSS_TOKENS_H
enum { enum {
CSSEOF, CSSEOF = 0,
S, S = 1,
CDO, CDO = 2,
CDC, CDC = 3,
INCLUDES, INCLUDES = 4,
DASHMATCH, DASHMATCH = 5,
LBRACE, STRING = 6,
PLUS, BAD_STRING = 7,
GREATER, IDENT = 8,
COMMA, HASH = 9,
STRING, IMPORT_SYM = 10,
INVALID, PAGE_SYM = 11,
IDENT, MEDIA_SYM = 12,
HASH, CHARSET_SYM = 13,
IMPORT_SYM, IMPORTANT_SYM = 14,
PAGE_SYM, EMS = 15,
MEDIA_SYM, EXS = 16,
CHARSET_SYM, LENGTH = 17,
IMPORTANT_SYM, ANGLE = 18,
EMS, TIME = 19,
EXS, FREQ = 20,
LENGTH, DIMENSION = 21,
ANGLE, PERCENTAGE = 22,
TIME, NUMBER = 23,
FREQ, URI = 24,
DIMENSION, BAD_URI = 25,
PERCENTAGE, FUNCTION = 26,
NUMBER, COMMENT = 27
URI,
FUNCTION
}; };
#endif /* CSS_TOKENS_H */ #endif /* CSS_TOKENS_H */

182
src/css.l
View File

@ -3,6 +3,12 @@
%option never-interactive %option never-interactive
%option nounput %option nounput
%top{
/* config.h must precede flex's inclusion of <stdio.h>
in order for its _GNU_SOURCE definition to take effect. */
#include <config.h>
}
%{ %{
/* Lex source for CSS tokenizing. /* Lex source for CSS tokenizing.
Taken from http://www.w3.org/TR/CSS21/grammar.html#q2 Taken from http://www.w3.org/TR/CSS21/grammar.html#q2
@ -39,101 +45,123 @@ as that of the covered work. */
#include "css-tokens.h" #include "css-tokens.h"
#if defined __clang__ || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
#pragma GCC diagnostic ignored "-Wunknown-pragmas" // clang mourns about the next one
#pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wunused-macros"
#pragma GCC diagnostic ignored "-Wunused-parameter"
#pragma GCC diagnostic ignored "-Wsign-compare"
#pragma GCC diagnostic ignored "-Wswitch-default"
#pragma GCC diagnostic ignored "-Wunreachable-code" // clang
#pragma clang diagnostic ignored "-Wshorten-64-to-32"
#ifndef __clang__
#pragma GCC diagnostic ignored "-Wsuggest-attribute=pure"
#endif
#endif
%} %}
h [0-9a-f] h [0-9a-f]
nonascii [\200-\377] nonascii [\240-\377]
unicode \\{h}{1,6}(\r\n|[ \t\r\n\f])? unicode \\{h}{1,6}(\r\n|[ \t\r\n\f])?
escape {unicode}|\\[^\r\n\f0-9a-f] escape {unicode}|\\[^\r\n\f0-9a-f]
nmstart [_a-z]|{nonascii}|{escape} nmstart [_a-z]|{nonascii}|{escape}
nmchar [_a-z0-9-]|{nonascii}|{escape} nmchar [_a-z0-9-]|{nonascii}|{escape}
string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\" string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
string2 \'([^\n\r\f\\']|\\{nl}|{escape})*\' string2 \'([^\n\r\f\\']|\\{nl}|{escape})*\'
invalid1 \"([^\n\r\f\\"]|\\{nl}|{escape})* badstring1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\\?
invalid2 \'([^\n\r\f\\']|\\{nl}|{escape})* badstring2 \'([^\n\r\f\\']|\\{nl}|{escape})*\\?
badcomment1 \/\*[^*]*\*+([^/*][^*]*\*+)*
badcomment2 \/\*[^*]*(\*+[^/*][^*]*)*
baduri1 url\({w}([!#$%&*-\[\]-~]|{nonascii}|{escape})*{w}
baduri2 url\({w}{string}{w}
baduri3 url\({w}{badstring}
comment \/\*[^*]*\*+([^/*][^*]*\*+)*\/
ident -?{nmstart}{nmchar}*
name {nmchar}+
num [0-9]+|[0-9]*"."[0-9]+
string {string1}|{string2}
badstring {badstring1}|{badstring2}
badcomment {badcomment1}|{badcomment2}
baduri {baduri1}|{baduri2}|{baduri3}
url ([!#$%&*-~]|{nonascii}|{escape})*
s [ \t\r\n\f]+
w {s}?
nl \n|\r\n|\r|\f
comment \/\*[^*]*\*+([^/*][^*]*\*+)*\/ A a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?
ident -?{nmstart}{nmchar}* C c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?
name {nmchar}+ D d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?
num [0-9]+|[0-9]*"."[0-9]+ E e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?
string {string1}|{string2} G g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g
invalid {invalid1}|{invalid2} H h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h
url ([!#$%&*-~]|{nonascii}|{escape})* I i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i
s [ \t\r\n\f] K k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k
w ({s}|{comment})* L l|\\0{0,4}(4c|6c)(\r\n|[ \t\r\n\f])?|\\l
nl \n|\r\n|\r|\f M m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m
N n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n
A a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])? O o|\\0{0,4}(4f|6f)(\r\n|[ \t\r\n\f])?|\\o
C c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])? P p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p
D d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])? R r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r
E e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])? S s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s
G g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g T t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t
H h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h U u|\\0{0,4}(55|75)(\r\n|[ \t\r\n\f])?|\\u
I i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i X x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x
K k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k Z z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z
M m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m
N n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n
P p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p
R r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r
S s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s
T t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t
X x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x
Z z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z
%% %%
{s} {return S;} {s} {return S;}
\/\*[^*]*\*+([^/*][^*]*\*+)*\/ {return S;} /* ignore comments */ {comment} {return COMMENT;}
#\/\*[^*]*\*+([^/*][^*]*\*+)*\/ /* ignore comments */
{badcomment} /* unclosed comment at EOF */
"<!--" {return CDO;} "<!--" {return CDO;}
"-->" {return CDC;} "-->" {return CDC;}
"~=" {return INCLUDES;} "~=" {return INCLUDES;}
"|=" {return DASHMATCH;} "|=" {return DASHMATCH;}
{w}"{" {return LBRACE;} {string} {return STRING;}
{w}"+" {return PLUS;} {badstring} {return BAD_STRING;}
{w}">" {return GREATER;}
{w}"," {return COMMA;}
{string} {return STRING;} {ident} {return IDENT;}
{invalid} {return INVALID; /* unclosed string */}
{ident} {return IDENT;} "#"{name} {return HASH;}
"#"{name} {return HASH;} @{I}{M}{P}{O}{R}{T} {return IMPORT_SYM;}
@{P}{A}{G}{E} {return PAGE_SYM;}
@{M}{E}{D}{I}{A} {return MEDIA_SYM;}
"@charset " {return CHARSET_SYM;}
"@import" {return IMPORT_SYM;} "!"({w}|{comment})*{I}{M}{P}{O}{R}{T}{A}{N}{T} {return IMPORTANT_SYM;}
"@page" {return PAGE_SYM;}
"@media" {return MEDIA_SYM;}
"@charset " {return CHARSET_SYM;}
"!"{w}"important" {return IMPORTANT_SYM;} {num}{E}{M} {return EMS;}
{num}{E}{X} {return EXS;}
{num}{P}{X} {return LENGTH;}
{num}{C}{M} {return LENGTH;}
{num}{M}{M} {return LENGTH;}
{num}{I}{N} {return LENGTH;}
{num}{P}{T} {return LENGTH;}
{num}{P}{C} {return LENGTH;}
{num}{D}{E}{G} {return ANGLE;}
{num}{R}{A}{D} {return ANGLE;}
{num}{G}{R}{A}{D} {return ANGLE;}
{num}{M}{S} {return TIME;}
{num}{S} {return TIME;}
{num}{H}{Z} {return FREQ;}
{num}{K}{H}{Z} {return FREQ;}
{num}{ident} {return DIMENSION;}
{num}{E}{M} {return EMS;} {num}% {return PERCENTAGE;}
{num}{E}{X} {return EXS;} {num} {return NUMBER;}
{num}{P}{X} {return LENGTH;}
{num}{C}{M} {return LENGTH;}
{num}{M}{M} {return LENGTH;}
{num}{I}{N} {return LENGTH;}
{num}{P}{T} {return LENGTH;}
{num}{P}{C} {return LENGTH;}
{num}{D}{E}{G} {return ANGLE;}
{num}{R}{A}{D} {return ANGLE;}
{num}{G}{R}{A}{D} {return ANGLE;}
{num}{M}{S} {return TIME;}
{num}{S} {return TIME;}
{num}{H}{Z} {return FREQ;}
{num}{K}{H}{Z} {return FREQ;}
{num}{ident} {return DIMENSION;}
{num}% {return PERCENTAGE;}
{num} {return NUMBER;}
"url("{w}{string}{w}")" {return URI;} "url("{w}{string}{w}")" {return URI;}
"url("{w}{url}{w}")" {return URI;} "url("{w}{url}{w}")" {return URI;}
{ident}"(" {return FUNCTION;} {baduri} {return BAD_URI;}
. {return *yytext;} {ident}"(" {return FUNCTION;}
. {return *yytext;}
%% %%