From 1c2dfa1f4bb162678c293788678f6790514cb7ba Mon Sep 17 00:00:00 2001 From: Edmund Grimley Evans Date: Sat, 21 Nov 2015 11:23:53 +0000 Subject: [PATCH] Change the way struct CStrings are handled. A CString used to be copied into a token string, which is an int array. On a 64-bit architecture the pointers were misaligned, so ASan gave lots of warnings. On a 64-bit architecture that required memory accesses to be correctly aligned it would not work at all. The CString is now included in CValue instead. --- TODO | 1 - i386-asm.c | 2 +- tcc.h | 6 ++++- tccasm.c | 24 +++++++++--------- tccgen.c | 26 +++++++++---------- tccpp.c | 74 +++++++++++++++++++++++++++--------------------------- 6 files changed, 67 insertions(+), 66 deletions(-) diff --git a/TODO b/TODO index 4ed23656..e6e5b070 100644 --- a/TODO +++ b/TODO @@ -36,7 +36,6 @@ Portability: - it is assumed that int is 32-bit and sizeof(int) == 4 - int is used when host or target size_t would make more sense -- struct CString is written into an int array and ends up misaligned - TCC handles target floating-point (fp) values using the host's fp arithmetic, which is simple and fast but may lead to exceptions and inaccuracy and wrong representations when cross-compiling diff --git a/i386-asm.c b/i386-asm.c index bd0825be..68a78613 100644 --- a/i386-asm.c +++ b/i386-asm.c @@ -330,7 +330,7 @@ static void parse_operand(TCCState *s1, Operand *op) next(); if (tok != TOK_PPNUM) goto reg_error; - p = tokc.cstr->data; + p = tokc.str.data; reg = p[0] - '0'; if ((unsigned)reg >= 8 || p[1] != '\0') goto reg_error; diff --git a/tcc.h b/tcc.h index c2848801..3b98a8c8 100644 --- a/tcc.h +++ b/tcc.h @@ -400,7 +400,11 @@ typedef union CValue { double d; float f; uint64_t i; - struct CString *cstr; + struct { + int size; + const void *data; + void *data_allocated; + } str; int tab[LDOUBLE_SIZE/4]; } CValue; diff --git a/tccasm.c b/tccasm.c index 044db834..28e07fda 100644 --- a/tccasm.c +++ b/tccasm.c @@ -44,7 +44,7 @@ static void asm_expr_unary(TCCState *s1, ExprValue *pe) switch(tok) { case TOK_PPNUM: - p = tokc.cstr->data; + p = tokc.str.data; n = strtoul(p, (char **)&p, 0); if (*p == 'b' || *p == 'f') { /* backward or forward label */ @@ -378,7 +378,7 @@ static void asm_parse_directive(TCCState *s1) uint64_t vl; const char *p; - p = tokc.cstr->data; + p = tokc.str.data; if (tok != TOK_PPNUM) { error_constant: tcc_error("64 bit constant"); @@ -523,8 +523,8 @@ static void asm_parse_directive(TCCState *s1) for(;;) { if (tok != TOK_STR) expect("string constant"); - p = tokc.cstr->data; - size = tokc.cstr->size; + p = tokc.str.data; + size = tokc.str.size; if (t == TOK_ASM_ascii && size > 0) size--; for(i = 0; i < size; i++) @@ -565,7 +565,7 @@ static void asm_parse_directive(TCCState *s1) next(); if (tok == TOK_STR) - pstrcat(filename, sizeof(filename), tokc.cstr->data); + pstrcat(filename, sizeof(filename), tokc.str.data); else pstrcat(filename, sizeof(filename), get_tok_str(tok, NULL)); @@ -583,7 +583,7 @@ static void asm_parse_directive(TCCState *s1) next(); if (tok == TOK_STR) - pstrcat(ident, sizeof(ident), tokc.cstr->data); + pstrcat(ident, sizeof(ident), tokc.str.data); else pstrcat(ident, sizeof(ident), get_tok_str(tok, NULL)); @@ -629,7 +629,7 @@ static void asm_parse_directive(TCCState *s1) next(); skip(','); if (tok == TOK_STR) { - newtype = tokc.cstr->data; + newtype = tokc.str.data; } else { if (tok == '@' || tok == '%') skip(tok); @@ -655,7 +655,7 @@ static void asm_parse_directive(TCCState *s1) sname[0] = '\0'; while (tok != ';' && tok != TOK_LINEFEED && tok != ',') { if (tok == TOK_STR) - pstrcat(sname, sizeof(sname), tokc.cstr->data); + pstrcat(sname, sizeof(sname), tokc.str.data); else pstrcat(sname, sizeof(sname), get_tok_str(tok, NULL)); next(); @@ -768,7 +768,7 @@ static int tcc_assemble_internal(TCCState *s1, int do_preprocess) } else if (tok == TOK_PPNUM) { const char *p; int n; - p = tokc.cstr->data; + p = tokc.str.data; n = strtoul(p, (char **)&p, 10); if (*p != '\0') expect("':'"); @@ -970,8 +970,8 @@ static void parse_asm_operands(ASMOperand *operands, int *nb_operands_ptr, } if (tok != TOK_STR) expect("string constant"); - op->constraint = tcc_malloc(tokc.cstr->size); - strcpy(op->constraint, tokc.cstr->data); + op->constraint = tcc_malloc(tokc.str.size); + strcpy(op->constraint, tokc.str.data); next(); skip('('); gexpr(); @@ -1038,7 +1038,7 @@ ST_FUNC void asm_instr(void) for(;;) { if (tok != TOK_STR) expect("string constant"); - asm_clobber(clobber_regs, tokc.cstr->data); + asm_clobber(clobber_regs, tokc.str.data); next(); if (tok == ',') { next(); diff --git a/tccgen.c b/tccgen.c index b085c650..08662813 100644 --- a/tccgen.c +++ b/tccgen.c @@ -2759,7 +2759,7 @@ static void parse_attribute(AttributeDef *ad) skip('('); if (tok != TOK_STR) expect("section name"); - ad->section = find_section(tcc_state, (char *)tokc.cstr->data); + ad->section = find_section(tcc_state, (char *)tokc.str.data); next(); skip(')'); break; @@ -2769,7 +2769,7 @@ static void parse_attribute(AttributeDef *ad) if (tok != TOK_STR) expect("alias(\"target\")"); ad->alias_target = /* save string as token, for later */ - tok_alloc((char*)tokc.cstr->data, tokc.cstr->size-1)->tok; + tok_alloc((char*)tokc.str.data, tokc.str.size-1)->tok; next(); skip(')'); break; @@ -2778,13 +2778,13 @@ static void parse_attribute(AttributeDef *ad) skip('('); if (tok != TOK_STR) expect("visibility(\"default|hidden|internal|protected\")"); - if (!strcmp (tokc.cstr->data, "default")) + if (!strcmp (tokc.str.data, "default")) ad->a.visibility = STV_DEFAULT; - else if (!strcmp (tokc.cstr->data, "hidden")) + else if (!strcmp (tokc.str.data, "hidden")) ad->a.visibility = STV_HIDDEN; - else if (!strcmp (tokc.cstr->data, "internal")) + else if (!strcmp (tokc.str.data, "internal")) ad->a.visibility = STV_INTERNAL; - else if (!strcmp (tokc.cstr->data, "protected")) + else if (!strcmp (tokc.str.data, "protected")) ad->a.visibility = STV_PROTECTED; else expect("visibility(\"default|hidden|internal|protected\")"); @@ -3376,7 +3376,7 @@ ST_FUNC void parse_asm_str(CString *astr) cstr_new(astr); while (tok == TOK_STR) { /* XXX: add \0 handling too ? */ - cstr_cat(astr, tokc.cstr->data); + cstr_cat(astr, tokc.str.data); next(); } cstr_ccat(astr, '\0'); @@ -5521,14 +5521,12 @@ static void decl_initializer(CType *type, Section *sec, unsigned long c, ) || (tok == TOK_STR && (t1->t & VT_BTYPE) == VT_BYTE)) { while (tok == TOK_STR || tok == TOK_LSTR) { int cstr_len, ch; - CString *cstr; - cstr = tokc.cstr; /* compute maximum number of chars wanted */ if (tok == TOK_STR) - cstr_len = cstr->size; + cstr_len = tokc.str.size; else - cstr_len = cstr->size / sizeof(nwchar_t); + cstr_len = tokc.str.size / sizeof(nwchar_t); cstr_len--; nb = cstr_len; if (n >= 0 && nb > (n - array_length)) @@ -5540,13 +5538,13 @@ static void decl_initializer(CType *type, Section *sec, unsigned long c, string in global variable, we handle it specifically */ if (sec && tok == TOK_STR && size1 == 1) { - memcpy(sec->data + c + array_length, cstr->data, nb); + memcpy(sec->data + c + array_length, tokc.str.data, nb); } else { for(i=0;idata)[i]; + ch = ((unsigned char *)tokc.str.data)[i]; else - ch = ((nwchar_t *)cstr->data)[i]; + ch = ((nwchar_t *)tokc.str.data)[i]; init_putv(t1, sec, c + (array_length + i) * size1, ch, EXPR_VAL); } diff --git a/tccpp.c b/tccpp.c index 4dba9544..0fd90a50 100644 --- a/tccpp.c +++ b/tccpp.c @@ -279,7 +279,6 @@ ST_FUNC const char *get_tok_str(int v, CValue *cv) { static char buf[STRING_MAX_SIZE + 1]; static CString cstr_buf; - CString *cstr; char *p; int i, len; @@ -314,20 +313,19 @@ ST_FUNC const char *get_tok_str(int v, CValue *cv) break; case TOK_PPNUM: case TOK_PPSTR: - return (char*)cv->cstr->data; + return (char*)cv->str.data; case TOK_LSTR: cstr_ccat(&cstr_buf, 'L'); case TOK_STR: - cstr = cv->cstr; cstr_ccat(&cstr_buf, '\"'); if (v == TOK_STR) { - len = cstr->size - 1; + len = cv->str.size - 1; for(i=0;idata)[i]); + add_char(&cstr_buf, ((unsigned char *)cv->str.data)[i]); } else { - len = (cstr->size / sizeof(nwchar_t)) - 1; + len = (cv->str.size / sizeof(nwchar_t)) - 1; for(i=0;idata)[i]); + add_char(&cstr_buf, ((nwchar_t *)cv->str.data)[i]); } cstr_ccat(&cstr_buf, '\"'); cstr_ccat(&cstr_buf, '\0'); @@ -929,21 +927,13 @@ static void tok_str_add2(TokenString *s, int t, CValue *cv) case TOK_STR: case TOK_LSTR: { - int nb_words; - CString cstr; - - nb_words = (sizeof(CString) + cv->cstr->size + 3) >> 2; + /* Insert the string into the int array. */ + size_t nb_words = + 1 + (cv->str.size + sizeof(int) - 1) / sizeof(int); while ((len + nb_words) > s->allocated_len) str = tok_str_realloc(s); - /* XXX: Insert the CString into the int array. - It may end up incorrectly aligned. */ - cstr.data = 0; - cstr.size = cv->cstr->size; - cstr.data_allocated = 0; - cstr.size_allocated = cstr.size; - memcpy(str + len, &cstr, sizeof(CString)); - memcpy((char *)(str + len) + sizeof(CString), - cv->cstr->data, cstr.size); + str[len] = cv->str.size; + memcpy(&str[len + 1], cv->str.data, cv->str.size); len += nb_words; } break; @@ -1012,10 +1002,10 @@ static inline void TOK_GET(int *t, const int **pp, CValue *cv) case TOK_LSTR: case TOK_PPNUM: case TOK_PPSTR: - /* XXX: Illegal cast: the pointer p may not be correctly aligned! */ - cv->cstr = (CString *)p; - cv->cstr->data = (char *)p + sizeof(CString); - p += (sizeof(CString) + cv->cstr->size + 3) >> 2; + cv->str.size = *p++; + cv->str.data = p; + cv->str.data_allocated = 0; + p += (cv->str.size + sizeof(int) - 1) / sizeof(int); break; case TOK_CDOUBLE: case TOK_CLLONG: @@ -1452,7 +1442,7 @@ static void pragma_parse(TCCState *s1) goto pragma_err; if (next(), tok != TOK_STR) goto pragma_err; - v = tok_alloc(tokc.cstr->data, tokc.cstr->size - 1)->tok; + v = tok_alloc(tokc.str.data, tokc.str.size - 1)->tok; if (next(), tok != ')') goto pragma_err; if (t == TOK_push_macro) { @@ -1528,7 +1518,7 @@ static void pragma_parse(TCCState *s1) skip(','); if (tok != TOK_STR) goto pragma_err; - file = tcc_strdup((char *)tokc.cstr->data); + file = tcc_strdup((char *)tokc.str.data); dynarray_add((void ***)&s1->pragma_libs, &s1->nb_pragma_libs, file); next(); if (tok != ')') @@ -1616,7 +1606,7 @@ ST_FUNC void preprocess(int is_bof) include_syntax: tcc_error("'#include' expects \"FILENAME\" or "); } - pstrcat(buf, sizeof(buf), (char *)tokc.cstr->data); + pstrcat(buf, sizeof(buf), (char *)tokc.str.data); next(); } c = '\"'; @@ -1775,7 +1765,7 @@ include_done: } break; case TOK_PPNUM: - n = strtoul((char*)tokc.cstr->data, &q, 10); + n = strtoul((char*)tokc.str.data, &q, 10); goto _line_num; case TOK_LINE: next(); @@ -1787,7 +1777,7 @@ _line_num: next(); if (tok != TOK_LINEFEED) { if (tok == TOK_STR) - pstrcpy(file->filename, sizeof(file->filename), (char *)tokc.cstr->data); + pstrcpy(file->filename, sizeof(file->filename), (char *)tokc.str.data); else if (parse_flags & PARSE_FLAG_ASM_FILE) break; else @@ -1990,7 +1980,9 @@ void parse_string(const char *s, int len) tok = TOK_LCHAR; } } else { - tokc.cstr = &tokcstr; + tokc.str.size = tokcstr.size; + tokc.str.data = tokcstr.data; + tokc.str.data_allocated = tokcstr.data_allocated; if (!is_long) tok = TOK_STR; else @@ -2536,7 +2528,9 @@ maybe_newline: } /* We add a trailing '\0' to ease parsing */ cstr_ccat(&tokcstr, '\0'); - tokc.cstr = &tokcstr; + tokc.str.size = tokcstr.size; + tokc.str.data = tokcstr.data; + tokc.str.data_allocated = tokcstr.data_allocated; tok = TOK_PPNUM; break; @@ -2571,7 +2565,9 @@ maybe_newline: p = parse_pp_string(p, c, &tokcstr); cstr_ccat(&tokcstr, c); cstr_ccat(&tokcstr, '\0'); - tokc.cstr = &tokcstr; + tokc.str.size = tokcstr.size; + tokc.str.data = tokcstr.data; + tokc.str.data_allocated = tokcstr.data_allocated; tok = TOK_PPSTR; break; @@ -2804,9 +2800,11 @@ static int *macro_arg_subst(Sym **nested_list, const int *macro_str, Sym *args) printf("\nstringize: <%s>\n", (char *)cstr.data); #endif /* add string */ - cval.cstr = &cstr; + cval.str.size = cstr.size; + cval.str.data = cstr.data; + cval.str.data_allocated = cstr.data_allocated; tok_str_add2(&str, TOK_PPSTR, &cval); - cstr_free(cval.cstr); + tcc_free(cval.str.data_allocated); } else { bad_stringy: expect("macro parameter after '#'"); @@ -2970,7 +2968,9 @@ static int macro_subst_tok( cstr_new(&cstr); cstr_cat(&cstr, cstrval); cstr_ccat(&cstr, '\0'); - cval.cstr = &cstr; + cval.str.size = cstr.size; + cval.str.data = cstr.data; + cval.str.data_allocated = cstr.data_allocated; tok_str_add2(tok_str, t1, &cval); cstr_free(&cstr); } else { @@ -3300,10 +3300,10 @@ ST_FUNC void next(void) /* convert preprocessor tokens into C tokens */ if (tok == TOK_PPNUM) { if (parse_flags & PARSE_FLAG_TOK_NUM) - parse_number((char *)tokc.cstr->data); + parse_number((char *)tokc.str.data); } else if (tok == TOK_PPSTR) { if (parse_flags & PARSE_FLAG_TOK_STR) - parse_string((char *)tokc.cstr->data, tokc.cstr->size - 1); + parse_string((char *)tokc.str.data, tokc.str.size - 1); } }