Change the way struct CStrings are handled.

A CString used to be copied into a token string, which is an int array.
On a 64-bit architecture the pointers were misaligned, so ASan gave
lots of warnings. On a 64-bit architecture that required memory
accesses to be correctly aligned it would not work at all.

The CString is now included in CValue instead.
This commit is contained in:
Edmund Grimley Evans 2015-11-21 11:23:53 +00:00
parent 4886d2c640
commit 1c2dfa1f4b
6 changed files with 67 additions and 66 deletions

1
TODO
View File

@ -36,7 +36,6 @@ Portability:
- it is assumed that int is 32-bit and sizeof(int) == 4 - it is assumed that int is 32-bit and sizeof(int) == 4
- int is used when host or target size_t would make more sense - int is used when host or target size_t would make more sense
- struct CString is written into an int array and ends up misaligned
- TCC handles target floating-point (fp) values using the host's fp - TCC handles target floating-point (fp) values using the host's fp
arithmetic, which is simple and fast but may lead to exceptions arithmetic, which is simple and fast but may lead to exceptions
and inaccuracy and wrong representations when cross-compiling and inaccuracy and wrong representations when cross-compiling

View File

@ -330,7 +330,7 @@ static void parse_operand(TCCState *s1, Operand *op)
next(); next();
if (tok != TOK_PPNUM) if (tok != TOK_PPNUM)
goto reg_error; goto reg_error;
p = tokc.cstr->data; p = tokc.str.data;
reg = p[0] - '0'; reg = p[0] - '0';
if ((unsigned)reg >= 8 || p[1] != '\0') if ((unsigned)reg >= 8 || p[1] != '\0')
goto reg_error; goto reg_error;

6
tcc.h
View File

@ -400,7 +400,11 @@ typedef union CValue {
double d; double d;
float f; float f;
uint64_t i; uint64_t i;
struct CString *cstr; struct {
int size;
const void *data;
void *data_allocated;
} str;
int tab[LDOUBLE_SIZE/4]; int tab[LDOUBLE_SIZE/4];
} CValue; } CValue;

View File

@ -44,7 +44,7 @@ static void asm_expr_unary(TCCState *s1, ExprValue *pe)
switch(tok) { switch(tok) {
case TOK_PPNUM: case TOK_PPNUM:
p = tokc.cstr->data; p = tokc.str.data;
n = strtoul(p, (char **)&p, 0); n = strtoul(p, (char **)&p, 0);
if (*p == 'b' || *p == 'f') { if (*p == 'b' || *p == 'f') {
/* backward or forward label */ /* backward or forward label */
@ -378,7 +378,7 @@ static void asm_parse_directive(TCCState *s1)
uint64_t vl; uint64_t vl;
const char *p; const char *p;
p = tokc.cstr->data; p = tokc.str.data;
if (tok != TOK_PPNUM) { if (tok != TOK_PPNUM) {
error_constant: error_constant:
tcc_error("64 bit constant"); tcc_error("64 bit constant");
@ -523,8 +523,8 @@ static void asm_parse_directive(TCCState *s1)
for(;;) { for(;;) {
if (tok != TOK_STR) if (tok != TOK_STR)
expect("string constant"); expect("string constant");
p = tokc.cstr->data; p = tokc.str.data;
size = tokc.cstr->size; size = tokc.str.size;
if (t == TOK_ASM_ascii && size > 0) if (t == TOK_ASM_ascii && size > 0)
size--; size--;
for(i = 0; i < size; i++) for(i = 0; i < size; i++)
@ -565,7 +565,7 @@ static void asm_parse_directive(TCCState *s1)
next(); next();
if (tok == TOK_STR) if (tok == TOK_STR)
pstrcat(filename, sizeof(filename), tokc.cstr->data); pstrcat(filename, sizeof(filename), tokc.str.data);
else else
pstrcat(filename, sizeof(filename), get_tok_str(tok, NULL)); pstrcat(filename, sizeof(filename), get_tok_str(tok, NULL));
@ -583,7 +583,7 @@ static void asm_parse_directive(TCCState *s1)
next(); next();
if (tok == TOK_STR) if (tok == TOK_STR)
pstrcat(ident, sizeof(ident), tokc.cstr->data); pstrcat(ident, sizeof(ident), tokc.str.data);
else else
pstrcat(ident, sizeof(ident), get_tok_str(tok, NULL)); pstrcat(ident, sizeof(ident), get_tok_str(tok, NULL));
@ -629,7 +629,7 @@ static void asm_parse_directive(TCCState *s1)
next(); next();
skip(','); skip(',');
if (tok == TOK_STR) { if (tok == TOK_STR) {
newtype = tokc.cstr->data; newtype = tokc.str.data;
} else { } else {
if (tok == '@' || tok == '%') if (tok == '@' || tok == '%')
skip(tok); skip(tok);
@ -655,7 +655,7 @@ static void asm_parse_directive(TCCState *s1)
sname[0] = '\0'; sname[0] = '\0';
while (tok != ';' && tok != TOK_LINEFEED && tok != ',') { while (tok != ';' && tok != TOK_LINEFEED && tok != ',') {
if (tok == TOK_STR) if (tok == TOK_STR)
pstrcat(sname, sizeof(sname), tokc.cstr->data); pstrcat(sname, sizeof(sname), tokc.str.data);
else else
pstrcat(sname, sizeof(sname), get_tok_str(tok, NULL)); pstrcat(sname, sizeof(sname), get_tok_str(tok, NULL));
next(); next();
@ -768,7 +768,7 @@ static int tcc_assemble_internal(TCCState *s1, int do_preprocess)
} else if (tok == TOK_PPNUM) { } else if (tok == TOK_PPNUM) {
const char *p; const char *p;
int n; int n;
p = tokc.cstr->data; p = tokc.str.data;
n = strtoul(p, (char **)&p, 10); n = strtoul(p, (char **)&p, 10);
if (*p != '\0') if (*p != '\0')
expect("':'"); expect("':'");
@ -970,8 +970,8 @@ static void parse_asm_operands(ASMOperand *operands, int *nb_operands_ptr,
} }
if (tok != TOK_STR) if (tok != TOK_STR)
expect("string constant"); expect("string constant");
op->constraint = tcc_malloc(tokc.cstr->size); op->constraint = tcc_malloc(tokc.str.size);
strcpy(op->constraint, tokc.cstr->data); strcpy(op->constraint, tokc.str.data);
next(); next();
skip('('); skip('(');
gexpr(); gexpr();
@ -1038,7 +1038,7 @@ ST_FUNC void asm_instr(void)
for(;;) { for(;;) {
if (tok != TOK_STR) if (tok != TOK_STR)
expect("string constant"); expect("string constant");
asm_clobber(clobber_regs, tokc.cstr->data); asm_clobber(clobber_regs, tokc.str.data);
next(); next();
if (tok == ',') { if (tok == ',') {
next(); next();

View File

@ -2759,7 +2759,7 @@ static void parse_attribute(AttributeDef *ad)
skip('('); skip('(');
if (tok != TOK_STR) if (tok != TOK_STR)
expect("section name"); expect("section name");
ad->section = find_section(tcc_state, (char *)tokc.cstr->data); ad->section = find_section(tcc_state, (char *)tokc.str.data);
next(); next();
skip(')'); skip(')');
break; break;
@ -2769,7 +2769,7 @@ static void parse_attribute(AttributeDef *ad)
if (tok != TOK_STR) if (tok != TOK_STR)
expect("alias(\"target\")"); expect("alias(\"target\")");
ad->alias_target = /* save string as token, for later */ ad->alias_target = /* save string as token, for later */
tok_alloc((char*)tokc.cstr->data, tokc.cstr->size-1)->tok; tok_alloc((char*)tokc.str.data, tokc.str.size-1)->tok;
next(); next();
skip(')'); skip(')');
break; break;
@ -2778,13 +2778,13 @@ static void parse_attribute(AttributeDef *ad)
skip('('); skip('(');
if (tok != TOK_STR) if (tok != TOK_STR)
expect("visibility(\"default|hidden|internal|protected\")"); expect("visibility(\"default|hidden|internal|protected\")");
if (!strcmp (tokc.cstr->data, "default")) if (!strcmp (tokc.str.data, "default"))
ad->a.visibility = STV_DEFAULT; ad->a.visibility = STV_DEFAULT;
else if (!strcmp (tokc.cstr->data, "hidden")) else if (!strcmp (tokc.str.data, "hidden"))
ad->a.visibility = STV_HIDDEN; ad->a.visibility = STV_HIDDEN;
else if (!strcmp (tokc.cstr->data, "internal")) else if (!strcmp (tokc.str.data, "internal"))
ad->a.visibility = STV_INTERNAL; ad->a.visibility = STV_INTERNAL;
else if (!strcmp (tokc.cstr->data, "protected")) else if (!strcmp (tokc.str.data, "protected"))
ad->a.visibility = STV_PROTECTED; ad->a.visibility = STV_PROTECTED;
else else
expect("visibility(\"default|hidden|internal|protected\")"); expect("visibility(\"default|hidden|internal|protected\")");
@ -3376,7 +3376,7 @@ ST_FUNC void parse_asm_str(CString *astr)
cstr_new(astr); cstr_new(astr);
while (tok == TOK_STR) { while (tok == TOK_STR) {
/* XXX: add \0 handling too ? */ /* XXX: add \0 handling too ? */
cstr_cat(astr, tokc.cstr->data); cstr_cat(astr, tokc.str.data);
next(); next();
} }
cstr_ccat(astr, '\0'); cstr_ccat(astr, '\0');
@ -5521,14 +5521,12 @@ static void decl_initializer(CType *type, Section *sec, unsigned long c,
) || (tok == TOK_STR && (t1->t & VT_BTYPE) == VT_BYTE)) { ) || (tok == TOK_STR && (t1->t & VT_BTYPE) == VT_BYTE)) {
while (tok == TOK_STR || tok == TOK_LSTR) { while (tok == TOK_STR || tok == TOK_LSTR) {
int cstr_len, ch; int cstr_len, ch;
CString *cstr;
cstr = tokc.cstr;
/* compute maximum number of chars wanted */ /* compute maximum number of chars wanted */
if (tok == TOK_STR) if (tok == TOK_STR)
cstr_len = cstr->size; cstr_len = tokc.str.size;
else else
cstr_len = cstr->size / sizeof(nwchar_t); cstr_len = tokc.str.size / sizeof(nwchar_t);
cstr_len--; cstr_len--;
nb = cstr_len; nb = cstr_len;
if (n >= 0 && nb > (n - array_length)) if (n >= 0 && nb > (n - array_length))
@ -5540,13 +5538,13 @@ static void decl_initializer(CType *type, Section *sec, unsigned long c,
string in global variable, we handle it string in global variable, we handle it
specifically */ specifically */
if (sec && tok == TOK_STR && size1 == 1) { if (sec && tok == TOK_STR && size1 == 1) {
memcpy(sec->data + c + array_length, cstr->data, nb); memcpy(sec->data + c + array_length, tokc.str.data, nb);
} else { } else {
for(i=0;i<nb;i++) { for(i=0;i<nb;i++) {
if (tok == TOK_STR) if (tok == TOK_STR)
ch = ((unsigned char *)cstr->data)[i]; ch = ((unsigned char *)tokc.str.data)[i];
else else
ch = ((nwchar_t *)cstr->data)[i]; ch = ((nwchar_t *)tokc.str.data)[i];
init_putv(t1, sec, c + (array_length + i) * size1, init_putv(t1, sec, c + (array_length + i) * size1,
ch, EXPR_VAL); ch, EXPR_VAL);
} }

74
tccpp.c
View File

@ -279,7 +279,6 @@ ST_FUNC const char *get_tok_str(int v, CValue *cv)
{ {
static char buf[STRING_MAX_SIZE + 1]; static char buf[STRING_MAX_SIZE + 1];
static CString cstr_buf; static CString cstr_buf;
CString *cstr;
char *p; char *p;
int i, len; int i, len;
@ -314,20 +313,19 @@ ST_FUNC const char *get_tok_str(int v, CValue *cv)
break; break;
case TOK_PPNUM: case TOK_PPNUM:
case TOK_PPSTR: case TOK_PPSTR:
return (char*)cv->cstr->data; return (char*)cv->str.data;
case TOK_LSTR: case TOK_LSTR:
cstr_ccat(&cstr_buf, 'L'); cstr_ccat(&cstr_buf, 'L');
case TOK_STR: case TOK_STR:
cstr = cv->cstr;
cstr_ccat(&cstr_buf, '\"'); cstr_ccat(&cstr_buf, '\"');
if (v == TOK_STR) { if (v == TOK_STR) {
len = cstr->size - 1; len = cv->str.size - 1;
for(i=0;i<len;i++) for(i=0;i<len;i++)
add_char(&cstr_buf, ((unsigned char *)cstr->data)[i]); add_char(&cstr_buf, ((unsigned char *)cv->str.data)[i]);
} else { } else {
len = (cstr->size / sizeof(nwchar_t)) - 1; len = (cv->str.size / sizeof(nwchar_t)) - 1;
for(i=0;i<len;i++) for(i=0;i<len;i++)
add_char(&cstr_buf, ((nwchar_t *)cstr->data)[i]); add_char(&cstr_buf, ((nwchar_t *)cv->str.data)[i]);
} }
cstr_ccat(&cstr_buf, '\"'); cstr_ccat(&cstr_buf, '\"');
cstr_ccat(&cstr_buf, '\0'); cstr_ccat(&cstr_buf, '\0');
@ -929,21 +927,13 @@ static void tok_str_add2(TokenString *s, int t, CValue *cv)
case TOK_STR: case TOK_STR:
case TOK_LSTR: case TOK_LSTR:
{ {
int nb_words; /* Insert the string into the int array. */
CString cstr; size_t nb_words =
1 + (cv->str.size + sizeof(int) - 1) / sizeof(int);
nb_words = (sizeof(CString) + cv->cstr->size + 3) >> 2;
while ((len + nb_words) > s->allocated_len) while ((len + nb_words) > s->allocated_len)
str = tok_str_realloc(s); str = tok_str_realloc(s);
/* XXX: Insert the CString into the int array. str[len] = cv->str.size;
It may end up incorrectly aligned. */ memcpy(&str[len + 1], cv->str.data, cv->str.size);
cstr.data = 0;
cstr.size = cv->cstr->size;
cstr.data_allocated = 0;
cstr.size_allocated = cstr.size;
memcpy(str + len, &cstr, sizeof(CString));
memcpy((char *)(str + len) + sizeof(CString),
cv->cstr->data, cstr.size);
len += nb_words; len += nb_words;
} }
break; break;
@ -1012,10 +1002,10 @@ static inline void TOK_GET(int *t, const int **pp, CValue *cv)
case TOK_LSTR: case TOK_LSTR:
case TOK_PPNUM: case TOK_PPNUM:
case TOK_PPSTR: case TOK_PPSTR:
/* XXX: Illegal cast: the pointer p may not be correctly aligned! */ cv->str.size = *p++;
cv->cstr = (CString *)p; cv->str.data = p;
cv->cstr->data = (char *)p + sizeof(CString); cv->str.data_allocated = 0;
p += (sizeof(CString) + cv->cstr->size + 3) >> 2; p += (cv->str.size + sizeof(int) - 1) / sizeof(int);
break; break;
case TOK_CDOUBLE: case TOK_CDOUBLE:
case TOK_CLLONG: case TOK_CLLONG:
@ -1452,7 +1442,7 @@ static void pragma_parse(TCCState *s1)
goto pragma_err; goto pragma_err;
if (next(), tok != TOK_STR) if (next(), tok != TOK_STR)
goto pragma_err; goto pragma_err;
v = tok_alloc(tokc.cstr->data, tokc.cstr->size - 1)->tok; v = tok_alloc(tokc.str.data, tokc.str.size - 1)->tok;
if (next(), tok != ')') if (next(), tok != ')')
goto pragma_err; goto pragma_err;
if (t == TOK_push_macro) { if (t == TOK_push_macro) {
@ -1528,7 +1518,7 @@ static void pragma_parse(TCCState *s1)
skip(','); skip(',');
if (tok != TOK_STR) if (tok != TOK_STR)
goto pragma_err; goto pragma_err;
file = tcc_strdup((char *)tokc.cstr->data); file = tcc_strdup((char *)tokc.str.data);
dynarray_add((void ***)&s1->pragma_libs, &s1->nb_pragma_libs, file); dynarray_add((void ***)&s1->pragma_libs, &s1->nb_pragma_libs, file);
next(); next();
if (tok != ')') if (tok != ')')
@ -1616,7 +1606,7 @@ ST_FUNC void preprocess(int is_bof)
include_syntax: include_syntax:
tcc_error("'#include' expects \"FILENAME\" or <FILENAME>"); tcc_error("'#include' expects \"FILENAME\" or <FILENAME>");
} }
pstrcat(buf, sizeof(buf), (char *)tokc.cstr->data); pstrcat(buf, sizeof(buf), (char *)tokc.str.data);
next(); next();
} }
c = '\"'; c = '\"';
@ -1775,7 +1765,7 @@ include_done:
} }
break; break;
case TOK_PPNUM: case TOK_PPNUM:
n = strtoul((char*)tokc.cstr->data, &q, 10); n = strtoul((char*)tokc.str.data, &q, 10);
goto _line_num; goto _line_num;
case TOK_LINE: case TOK_LINE:
next(); next();
@ -1787,7 +1777,7 @@ _line_num:
next(); next();
if (tok != TOK_LINEFEED) { if (tok != TOK_LINEFEED) {
if (tok == TOK_STR) if (tok == TOK_STR)
pstrcpy(file->filename, sizeof(file->filename), (char *)tokc.cstr->data); pstrcpy(file->filename, sizeof(file->filename), (char *)tokc.str.data);
else if (parse_flags & PARSE_FLAG_ASM_FILE) else if (parse_flags & PARSE_FLAG_ASM_FILE)
break; break;
else else
@ -1990,7 +1980,9 @@ void parse_string(const char *s, int len)
tok = TOK_LCHAR; tok = TOK_LCHAR;
} }
} else { } else {
tokc.cstr = &tokcstr; tokc.str.size = tokcstr.size;
tokc.str.data = tokcstr.data;
tokc.str.data_allocated = tokcstr.data_allocated;
if (!is_long) if (!is_long)
tok = TOK_STR; tok = TOK_STR;
else else
@ -2536,7 +2528,9 @@ maybe_newline:
} }
/* We add a trailing '\0' to ease parsing */ /* We add a trailing '\0' to ease parsing */
cstr_ccat(&tokcstr, '\0'); cstr_ccat(&tokcstr, '\0');
tokc.cstr = &tokcstr; tokc.str.size = tokcstr.size;
tokc.str.data = tokcstr.data;
tokc.str.data_allocated = tokcstr.data_allocated;
tok = TOK_PPNUM; tok = TOK_PPNUM;
break; break;
@ -2571,7 +2565,9 @@ maybe_newline:
p = parse_pp_string(p, c, &tokcstr); p = parse_pp_string(p, c, &tokcstr);
cstr_ccat(&tokcstr, c); cstr_ccat(&tokcstr, c);
cstr_ccat(&tokcstr, '\0'); cstr_ccat(&tokcstr, '\0');
tokc.cstr = &tokcstr; tokc.str.size = tokcstr.size;
tokc.str.data = tokcstr.data;
tokc.str.data_allocated = tokcstr.data_allocated;
tok = TOK_PPSTR; tok = TOK_PPSTR;
break; break;
@ -2804,9 +2800,11 @@ static int *macro_arg_subst(Sym **nested_list, const int *macro_str, Sym *args)
printf("\nstringize: <%s>\n", (char *)cstr.data); printf("\nstringize: <%s>\n", (char *)cstr.data);
#endif #endif
/* add string */ /* add string */
cval.cstr = &cstr; cval.str.size = cstr.size;
cval.str.data = cstr.data;
cval.str.data_allocated = cstr.data_allocated;
tok_str_add2(&str, TOK_PPSTR, &cval); tok_str_add2(&str, TOK_PPSTR, &cval);
cstr_free(cval.cstr); tcc_free(cval.str.data_allocated);
} else { } else {
bad_stringy: bad_stringy:
expect("macro parameter after '#'"); expect("macro parameter after '#'");
@ -2970,7 +2968,9 @@ static int macro_subst_tok(
cstr_new(&cstr); cstr_new(&cstr);
cstr_cat(&cstr, cstrval); cstr_cat(&cstr, cstrval);
cstr_ccat(&cstr, '\0'); cstr_ccat(&cstr, '\0');
cval.cstr = &cstr; cval.str.size = cstr.size;
cval.str.data = cstr.data;
cval.str.data_allocated = cstr.data_allocated;
tok_str_add2(tok_str, t1, &cval); tok_str_add2(tok_str, t1, &cval);
cstr_free(&cstr); cstr_free(&cstr);
} else { } else {
@ -3300,10 +3300,10 @@ ST_FUNC void next(void)
/* convert preprocessor tokens into C tokens */ /* convert preprocessor tokens into C tokens */
if (tok == TOK_PPNUM) { if (tok == TOK_PPNUM) {
if (parse_flags & PARSE_FLAG_TOK_NUM) if (parse_flags & PARSE_FLAG_TOK_NUM)
parse_number((char *)tokc.cstr->data); parse_number((char *)tokc.str.data);
} else if (tok == TOK_PPSTR) { } else if (tok == TOK_PPSTR) {
if (parse_flags & PARSE_FLAG_TOK_STR) if (parse_flags & PARSE_FLAG_TOK_STR)
parse_string((char *)tokc.cstr->data, tokc.cstr->size - 1); parse_string((char *)tokc.str.data, tokc.str.size - 1);
} }
} }