sanitized string and character constant parsing

This commit is contained in:
bellard 2003-04-13 17:37:14 +00:00
parent c2e6daafc1
commit 0c618f4b7f

301
tcc.c
View File

@ -223,7 +223,7 @@ typedef struct BufferedFile {
uint8_t *buf_ptr;
uint8_t *buf_end;
int fd;
int line_num; /* current line number - here to simply code */
int line_num; /* current line number - here to simplify code */
int ifndef_macro; /* #ifndef macro / #endif search */
int ifndef_macro_saved; /* saved ifndef_macro */
int *ifdef_stack_ptr; /* ifdef_stack value at the start of the file */
@ -1738,6 +1738,18 @@ static int handle_stray1(uint8_t *p)
return c;
}
/* handle just the EOB case, but not stray */
#define PEEKC_EOB(c, p)\
{\
p++;\
c = *p;\
if (c == '\\') {\
file->buf_ptr = p;\
c = handle_eob();\
p = file->buf_ptr;\
}\
}
/* handle the complicated stray case */
#define PEEKC(c, p)\
{\
@ -1862,11 +1874,73 @@ static inline void skip_spaces(void)
cinp();
}
/* parse a string without interpreting escapes */
static uint8_t *parse_pp_string(uint8_t *p,
int sep, CString *str)
{
int c;
p++;
for(;;) {
c = *p;
if (c == sep) {
break;
} else if (c == '\\') {
file->buf_ptr = p;
c = handle_eob();
p = file->buf_ptr;
if (c == CH_EOF) {
unterminated_string:
/* XXX: indicate line number of start of string */
error("missing terminating %c character", sep);
} else if (c == '\\') {
/* escape : just skip \[\r]\n */
PEEKC_EOB(c, p);
if (c == '\n') {
file->line_num++;
p++;
} else if (c == '\r') {
PEEKC_EOB(c, p);
if (c != '\n')
expect("'\n' after '\r'");
file->line_num++;
p++;
} else if (c == CH_EOF) {
goto unterminated_string;
} else {
if (str) {
cstr_ccat(str, '\\');
cstr_ccat(str, c);
}
p++;
}
}
} else if (c == '\n') {
file->line_num++;
goto add_char;
} else if (c == '\r') {
PEEKC_EOB(c, p);
if (c != '\n') {
cstr_ccat(str, '\r');
} else {
file->line_num++;
goto add_char;
}
} else {
add_char:
if (str)
cstr_ccat(str, c);
p++;
}
}
p++;
return p;
}
/* skip block of text until #else, #elif or #endif. skip also pairs of
#if/#endif */
void preprocess_skip(void)
{
int a, start_of_line, sep, c;
int a, start_of_line, c;
uint8_t *p;
p = file->buf_ptr;
@ -1903,41 +1977,7 @@ void preprocess_skip(void)
/* skip strings */
case '\"':
case '\'':
sep = c;
p++;
for(;;) {
c = *p;
if (c == sep) {
break;
} else if (c == '\\') {
file->buf_ptr = p;
c = handle_eob();
p = file->buf_ptr;
if (c == CH_EOF) {
/* XXX: better error message */
error("unterminated string");
} else if (c == '\\') {
/* ignore next char */
p++;
c = *p;
if (c == '\\') {
file->buf_ptr = p;
c = handle_eob();
p = file->buf_ptr;
}
if (c == '\n')
file->line_num++;
else if (c != CH_EOF)
p++;
}
} else if (c == '\n') {
file->line_num++;
p++;
} else {
p++;
}
}
p++;
p = parse_pp_string(p, c, NULL);
break;
/* skip comments */
case '/':
@ -2724,55 +2764,57 @@ static void preprocess(int is_bof)
parse_flags = saved_parse_flags;
}
/* read a number in base b */
static int getn(int b)
/* evaluate escape codes in a string. */
static void parse_escape_string(CString *outstr, const uint8_t *buf, int is_long)
{
int n, t;
n = 0;
while (1) {
if (ch >= 'a' && ch <= 'f')
t = ch - 'a' + 10;
else if (ch >= 'A' && ch <= 'F')
t = ch - 'A' + 10;
else if (isnum(ch))
t = ch - '0';
else
break;
if (t < 0 || t >= b)
break;
n = n * b + t;
inp();
}
return n;
}
int c, n;
const char *p;
/* read a character for string or char constant and eval escape codes */
static int getq(void)
{
int c;
redo:
c = ch;
inp();
p = buf;
for(;;) {
c = *p;
if (c == '\0')
break;
if (c == '\\') {
switch(ch) {
p++;
/* escape */
c = *p;
switch(c) {
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
/* at most three octal digits */
c = ch - '0';
inp();
if (isoct(ch)) {
c = c * 8 + ch - '0';
inp();
if (isoct(ch)) {
c = c * 8 + ch - '0';
inp();
n = c - '0';
p++;
c = *p;
if (isoct(c)) {
n = n * 8 + c - '0';
p++;
c = *p;
if (isoct(c)) {
n = n * 8 + c - '0';
p++;
}
}
return c;
c = n;
goto add_char_nonext;
case 'x':
inp();
return getn(16);
p++;
n = 0;
for(;;) {
c = *p;
if (c >= 'a' && c <= 'f')
c = c - 'a' + 10;
else if (c >= 'A' && c <= 'F')
c = c - 'A' + 10;
else if (isnum(c))
c = c - '0';
else
break;
n = n * 16 + c;
p++;
}
c = n;
goto add_char_nonext;
case 'a':
c = '\a';
break;
@ -2803,27 +2845,24 @@ static int getq(void)
case '\"':
case '\\':
case '?':
c = ch;
break;
case '\n':
inp();
goto redo;
case '\r':
inp();
if (ch != '\n')
goto invalid_escape;
inp();
goto redo;
default:
invalid_escape:
error("invalid escaped char");
}
inp();
} else if (c == '\r' && ch == '\n') {
inp();
c = '\n';
}
return c;
p++;
add_char_nonext:
if (!is_long)
cstr_ccat(outstr, c);
else
cstr_wccat(outstr, c);
}
/* add a trailing '\0' */
if (!is_long)
cstr_ccat(outstr, '\0');
else
cstr_wccat(outstr, '\0');
}
/* we use 64 bit numbers */
@ -3132,7 +3171,7 @@ void parse_number(const char *p)
/* return next token without macro substitution */
static inline void next_nomacro1(void)
{
int b, t, c;
int t, c, is_long;
TokenSym *ts;
uint8_t *p, *p1;
unsigned int h;
@ -3304,11 +3343,8 @@ static inline void next_nomacro1(void)
goto parse_ident_fast;
} else {
PEEKC(c, p);
if (c == '\'') {
tok = TOK_LCHAR;
goto char_const;
} else if (c == '\"') {
tok = TOK_LSTR;
if (c == '\'' || c == '\"') {
is_long = 1;
goto str_const;
} else {
cstr_reset(&tokcstr);
@ -3357,42 +3393,51 @@ static inline void next_nomacro1(void)
}
break;
case '\'':
tok = TOK_CCHAR;
char_const:
file->buf_ptr = p;
inp();
b = getq();
/* this cast is needed if >= 128 */
if (tok == TOK_CCHAR)
b = (char)b;
tokc.i = b;
if (ch != '\'')
error("unterminated character constant");
p = file->buf_ptr;
p++;
break;
case '\"':
tok = TOK_STR;
is_long = 0;
str_const:
file->buf_ptr = p;
inp();
{
CString str;
int sep;
sep = c;
/* parse the string */
cstr_new(&str);
p = parse_pp_string(p, sep, &str);
cstr_ccat(&str, '\0');
/* eval the escape (should be done as TOK_PPNUM) */
cstr_reset(&tokcstr);
while (ch != '\"') {
b = getq();
if (ch == CH_EOF)
error("unterminated string");
if (tok == TOK_STR)
cstr_ccat(&tokcstr, b);
parse_escape_string(&tokcstr, str.data, is_long);
cstr_free(&str);
if (sep == '\'') {
int char_size;
/* XXX: make it portable */
if (!is_long)
char_size = 1;
else
cstr_wccat(&tokcstr, b);
char_size = sizeof(int);
if (tokcstr.size <= char_size)
error("empty character constant");
if (tokcstr.size > 2 * char_size)
warning("multi-character character constant");
if (!is_long) {
tokc.i = *(int8_t *)tokcstr.data;
tok = TOK_CCHAR;
} else {
tokc.i = *(int *)tokcstr.data;
tok = TOK_LCHAR;
}
if (tok == TOK_STR)
cstr_ccat(&tokcstr, '\0');
else
cstr_wccat(&tokcstr, '\0');
} else {
tokc.cstr = &tokcstr;
p = file->buf_ptr;
p++;
if (!is_long)
tok = TOK_STR;
else
tok = TOK_LSTR;
}
}
break;
case '<':