From 766ba3694dae5b3d2dfbb5c75c4ebf7069a7ce1f Mon Sep 17 00:00:00 2001
From: grischka <grischka>
Date: Sat, 1 Oct 2016 20:26:50 +0200
Subject: [PATCH] tccpp: cleanup

- "utf8 in identifiers"
  from 936819a1b90f2618bb3f86730189cf2895948ba0

- CValue: remove member str.data_allocated
- make tiny allocator private to tccpp

- allocate macro_stack objects on heap
  because otherwise it could crash after error/setjmp
  in preprocess_delete():end_macro()

- mov "TinyAlloc" defs to tccpp.c

- define_push: take int* str again
---
 libtcc.c |   2 -
 tcc.h    | 141 ++++++++++++++++-----------------------------
 tccasm.c |  14 ++---
 tccgen.c |  26 ++++-----
 tccpp.c  | 172 +++++++++++++++++++++++++++++++++----------------------
 5 files changed, 170 insertions(+), 185 deletions(-)

diff --git a/libtcc.c b/libtcc.c
index cb0d6807..3a349549 100644
--- a/libtcc.c
+++ b/libtcc.c
@@ -2199,8 +2199,6 @@ PUB_FUNC int tcc_parse_args(TCCState *s, int argc, char **argv)
                 s->dflag = 3;
             else if (*optarg == 'M')
                 s->dflag = 7;
-            else if (*optarg == 'b')
-                s->dflag = 8;
             else
                 goto unsupported_option;
             break;
diff --git a/tcc.h b/tcc.h
index 95678480..5f504bde 100644
--- a/tcc.h
+++ b/tcc.h
@@ -36,13 +36,6 @@
 #include <time.h>
 #include <sys/stat.h>          /* stat() */
 
-#ifdef CONFIG_TCCASSERT
-#include <assert.h>
-#define TCC_ASSERT(ex) assert(ex)
-#else
-#define TCC_ASSERT(ex)
-#endif
-
 #ifndef _WIN32
 # include <unistd.h>
 # include <sys/time.h>
@@ -138,51 +131,6 @@
 #include "stab.h"
 #include "libtcc.h"
 
-static inline uint16_t read16le(unsigned char *p)
-{
-    return p[0] | (uint16_t)p[1] << 8;
-}
-
-static inline void write16le(unsigned char *p, uint16_t x)
-{
-    p[0] = x & 255;
-    p[1] = x >> 8 & 255;
-}
-
-static inline uint32_t read32le(unsigned char *p)
-{
-  return (p[0] | (uint32_t)p[1] << 8 |
-          (uint32_t)p[2] << 16 | (uint32_t)p[3] << 24);
-}
-
-static inline void write32le(unsigned char *p, uint32_t x)
-{
-    p[0] = x & 255;
-    p[1] = x >> 8 & 255;
-    p[2] = x >> 16 & 255;
-    p[3] = x >> 24 & 255;
-}
-
-static inline uint64_t read64le(unsigned char *p)
-{
-  return (p[0] | (uint64_t)p[1] << 8 |
-          (uint64_t)p[2] << 16 | (uint64_t)p[3] << 24 |
-          (uint64_t)p[4] << 32 | (uint64_t)p[5] << 40 |
-          (uint64_t)p[6] << 48 | (uint64_t)p[7] << 56);
-}
-
-static inline void write64le(unsigned char *p, uint64_t x)
-{
-    p[0] = x & 255;
-    p[1] = x >> 8 & 255;
-    p[2] = x >> 16 & 255;
-    p[3] = x >> 24 & 255;
-    p[4] = x >> 32 & 255;
-    p[5] = x >> 40 & 255;
-    p[6] = x >> 48 & 255;
-    p[7] = x >> 56 & 255;
-}
-
 /* parser debug */
 /* #define PARSE_DEBUG */
 /* preprocessor debug */
@@ -399,7 +347,6 @@ typedef struct CString {
     int size; /* size in bytes */
     void *data; /* either 'char *' or 'nwchar_t *' */
     int size_allocated;
-    void *data_allocated; /* if non NULL, data has been malloced */
 } CString;
 
 /* type definition */
@@ -417,7 +364,6 @@ typedef union CValue {
     struct {
         int size;
         const void *data;
-        void *data_allocated;
     } str;
     int tab[LDOUBLE_SIZE/4];
 } CValue;
@@ -638,43 +584,6 @@ struct sym_attr {
 #endif
 };
 
-#if !defined(MEM_DEBUG)
-#define tal_free(al, p) tal_free_impl(al, p)
-#define tal_realloc(al, p, size) tal_realloc_impl(&al, p, size)
-#define TAL_DEBUG_PARAMS
-#else
-#define TAL_DEBUG 1
-#define tal_free(al, p) tal_free_impl(al, p, __FILE__, __LINE__)
-#define tal_realloc(al, p, size) tal_realloc_impl(&al, p, size, __FILE__, __LINE__)
-#define TAL_DEBUG_PARAMS , const char *file, int line
-#define TAL_DEBUG_FILE_LEN 15
-#endif
-//#define TAL_INFO 1 /* collect and dump allocators stats */
-
-typedef struct TinyAlloc {
-    size_t  limit;
-    size_t  size;
-    uint8_t *buffer;
-    uint8_t *p;
-    size_t  nb_allocs;
-    struct TinyAlloc *next, *top;
-#ifdef TAL_INFO
-    size_t  nb_peak;
-    size_t  nb_total;
-    size_t  nb_missed;
-    uint8_t *peak_p;
-#endif
-} TinyAlloc;
-
-typedef struct tal_header_t {
-    size_t  size;
-#ifdef TAL_DEBUG
-    int     line_num; /* negative line_num used for double free check */
-    char    file_name[TAL_DEBUG_FILE_LEN + 1];
-#endif
-} tal_header_t;
-
-
 struct TCCState {
 
     int verbose; /* if true, display some information during compilation */
@@ -1204,7 +1113,7 @@ PUB_FUNC void tcc_warning(const char *fmt, ...);
 /* other utilities */
 ST_FUNC void dynarray_add(void ***ptab, int *nb_ptr, void *data);
 ST_FUNC void dynarray_reset(void *pp, int *n);
-ST_FUNC void cstr_ccat(CString *cstr, int ch);
+ST_INLN void cstr_ccat(CString *cstr, int ch);
 ST_FUNC void cstr_cat(CString *cstr, const char *str, int len);
 ST_FUNC void cstr_wccat(CString *cstr, int ch);
 ST_FUNC void cstr_new(CString *cstr);
@@ -1287,10 +1196,11 @@ ST_FUNC void end_macro(void);
 ST_FUNC void save_parse_state(ParseState *s);
 ST_FUNC void restore_parse_state(ParseState *s);
 ST_INLN void tok_str_new(TokenString *s);
+ST_FUNC TokenString *tok_str_alloc(void);
 ST_FUNC void tok_str_free(int *str);
 ST_FUNC void tok_str_add(TokenString *s, int t);
 ST_FUNC void tok_str_add_tok(TokenString *s);
-ST_INLN void define_push(int v, int macro_type, TokenString *str, Sym *first_arg);
+ST_INLN void define_push(int v, int macro_type, int *str, Sym *first_arg);
 ST_FUNC void define_undef(Sym *s);
 ST_INLN Sym *define_find(int v);
 ST_FUNC void free_defines(Sym *b);
@@ -1489,6 +1399,51 @@ ST_FUNC void gen_vla_sp_save(int addr);
 ST_FUNC void gen_vla_sp_restore(int addr);
 ST_FUNC void gen_vla_alloc(CType *type, int align);
 
+static inline uint16_t read16le(unsigned char *p)
+{
+    return p[0] | (uint16_t)p[1] << 8;
+}
+
+static inline void write16le(unsigned char *p, uint16_t x)
+{
+    p[0] = x & 255;
+    p[1] = x >> 8 & 255;
+}
+
+static inline uint32_t read32le(unsigned char *p)
+{
+  return (p[0] | (uint32_t)p[1] << 8 |
+	  (uint32_t)p[2] << 16 | (uint32_t)p[3] << 24);
+}
+
+static inline void write32le(unsigned char *p, uint32_t x)
+{
+    p[0] = x & 255;
+    p[1] = x >> 8 & 255;
+    p[2] = x >> 16 & 255;
+    p[3] = x >> 24 & 255;
+}
+
+static inline uint64_t read64le(unsigned char *p)
+{
+  return (p[0] | (uint64_t)p[1] << 8 |
+	  (uint64_t)p[2] << 16 | (uint64_t)p[3] << 24 |
+	  (uint64_t)p[4] << 32 | (uint64_t)p[5] << 40 |
+	  (uint64_t)p[6] << 48 | (uint64_t)p[7] << 56);
+}
+
+static inline void write64le(unsigned char *p, uint64_t x)
+{
+    p[0] = x & 255;
+    p[1] = x >> 8 & 255;
+    p[2] = x >> 16 & 255;
+    p[3] = x >> 24 & 255;
+    p[4] = x >> 32 & 255;
+    p[5] = x >> 40 & 255;
+    p[6] = x >> 48 & 255;
+    p[7] = x >> 56 & 255;
+}
+
 /* ------------ i386-gen.c ------------ */
 #if defined TCC_TARGET_I386 || defined TCC_TARGET_X86_64
 ST_FUNC void g(int c);
diff --git a/tccasm.c b/tccasm.c
index 9dcab1ce..890956b5 100644
--- a/tccasm.c
+++ b/tccasm.c
@@ -488,25 +488,25 @@ static void asm_parse_directive(TCCState *s1)
     case TOK_ASMDIR_rept:
         {
             int repeat;
-            TokenString init_str;
+            TokenString *init_str;
             ParseState saved_parse_state = {0};
             next();
             repeat = asm_int_expr(s1);
-            tok_str_new(&init_str);
+            init_str = tok_str_alloc();
             next();
             while ((tok != TOK_ASMDIR_endr) && (tok != CH_EOF)) {
-                tok_str_add_tok(&init_str);
+                tok_str_add_tok(init_str);
                 next();
             }
             if (tok == CH_EOF) tcc_error("we at end of file, .endr not found");
             next();
-            tok_str_add(&init_str, -1);
-            tok_str_add(&init_str, 0);
+            tok_str_add(init_str, -1);
+            tok_str_add(init_str, 0);
             save_parse_state(&saved_parse_state);
-            begin_macro(&init_str, 0);
+            begin_macro(init_str, 1);
             while (repeat-- > 0) {
                 tcc_assemble_internal(s1, (parse_flags & PARSE_FLAG_PREPROCESS));
-                macro_ptr = init_str.str;
+                macro_ptr = init_str->str;
             }
             end_macro();
             restore_parse_state(&saved_parse_state);
diff --git a/tccgen.c b/tccgen.c
index 09ddb77d..e3f2a691 100644
--- a/tccgen.c
+++ b/tccgen.c
@@ -5857,7 +5857,7 @@ static void decl_initializer_alloc(CType *type, AttributeDef *ad, int r,
     int size, align, addr, data_offset;
     int level;
     ParseState saved_parse_state = {0};
-    TokenString init_str;
+    TokenString *init_str = NULL;
     Section *sec;
     Sym *flexible_array;
 
@@ -5879,15 +5879,15 @@ static void decl_initializer_alloc(CType *type, AttributeDef *ad, int r,
        (e.g. string pointers or ISOC99 compound
        literals). It also simplifies local
        initializers handling */
-    tok_str_new(&init_str);
     if (size < 0 || (flexible_array && has_init)) {
         if (!has_init) 
             tcc_error("unknown type size");
         /* get all init string */
+        init_str = tok_str_alloc();
         if (has_init == 2) {
             /* only get strings */
             while (tok == TOK_STR || tok == TOK_LSTR) {
-                tok_str_add_tok(&init_str);
+                tok_str_add_tok(init_str);
                 next();
             }
         } else {
@@ -5895,7 +5895,7 @@ static void decl_initializer_alloc(CType *type, AttributeDef *ad, int r,
             while (level > 0 || (tok != ',' && tok != ';')) {
                 if (tok < 0)
                     tcc_error("unexpected end of file in initializer");
-                tok_str_add_tok(&init_str);
+                tok_str_add_tok(init_str);
                 if (tok == '{')
                     level++;
                 else if (tok == '}') {
@@ -5908,17 +5908,17 @@ static void decl_initializer_alloc(CType *type, AttributeDef *ad, int r,
                 next();
             }
         }
-        tok_str_add(&init_str, -1);
-        tok_str_add(&init_str, 0);
+        tok_str_add(init_str, -1);
+        tok_str_add(init_str, 0);
         
         /* compute size */
         save_parse_state(&saved_parse_state);
 
-        begin_macro(&init_str, 0);
+        begin_macro(init_str, 1);
         next();
         decl_initializer(type, NULL, 0, 1, 1);
         /* prepare second initializer parsing */
-        macro_ptr = init_str.str;
+        macro_ptr = init_str->str;
         next();
         
         /* if still unknown size, error */
@@ -6076,17 +6076,17 @@ static void decl_initializer_alloc(CType *type, AttributeDef *ad, int r,
     }
     if (has_init || (type->t & VT_VLA)) {
         decl_initializer(type, sec, addr, 1, 0);
-        /* restore parse state if needed */
-        if (init_str.str) {
-            end_macro();
-            restore_parse_state(&saved_parse_state);
-        }
         /* patch flexible array member size back to -1, */
         /* for possible subsequent similar declarations */
         if (flexible_array)
             flexible_array->type.ref->c = -1;
     }
  no_alloc: ;
+    /* restore parse state if needed */
+    if (init_str) {
+        end_macro();
+        restore_parse_state(&saved_parse_state);
+    }
 }
 
 static void put_func_debug(Sym *sym)
diff --git a/tccpp.c b/tccpp.c
index 11d8dab0..c2379034 100644
--- a/tccpp.c
+++ b/tccpp.c
@@ -38,10 +38,6 @@ ST_DATA int total_bytes;
 ST_DATA int tok_ident;
 ST_DATA TokenSym **table_ident;
 
-ST_DATA TinyAlloc *toksym_alloc;
-ST_DATA TinyAlloc *tokstr_alloc;
-ST_DATA TinyAlloc *cstr_alloc;
-
 /* ------------------------------------------------------------------------- */
 
 static TokenSym *hash_ident[TOK_HASH_SIZE];
@@ -53,6 +49,10 @@ static int pp_debug_tok, pp_debug_symv;
 static int pp_once;
 static void tok_print(const char *msg, const int *str);
 
+static struct TinyAlloc *toksym_alloc;
+static struct TinyAlloc *tokstr_alloc;
+static struct TinyAlloc *cstr_alloc;
+
 /* isidnum_table flags: */
 #define IS_SPC 1
 #define IS_ID  2
@@ -110,32 +110,55 @@ ST_FUNC void expect(const char *msg)
     tcc_error("%s expected", msg);
 }
 
-ST_FUNC void begin_macro(TokenString *str, int alloc)
-{
-    str->alloc = alloc;
-    str->prev = macro_stack;
-    str->prev_ptr = macro_ptr;
-    macro_ptr = str->str;
-    macro_stack = str;
-}
-
-ST_FUNC void end_macro(void)
-{
-    TokenString *str = macro_stack;
-    macro_stack = str->prev;
-    macro_ptr = str->prev_ptr;
-    if (str->alloc == 2) {
-        str->alloc = 3; /* just mark as finished */
-    } else {
-        tok_str_free(str->str);
-        if (str->alloc == 1)
-            tcc_free(str);
-    }
-}
-
 /* ------------------------------------------------------------------------- */
 /* Custom allocator for tiny objects */
 
+#define USE_TAL
+
+#ifndef USE_TAL
+#define tal_free(al, p) tcc_free(p)
+#define tal_realloc(al, p, size) tcc_realloc(p, size)
+#define tal_new(a,b,c)
+#define tal_delete(a)
+#else
+#if !defined(MEM_DEBUG)
+#define tal_free(al, p) tal_free_impl(al, p)
+#define tal_realloc(al, p, size) tal_realloc_impl(&al, p, size)
+#define TAL_DEBUG_PARAMS
+#else
+#define TAL_DEBUG 1
+#define tal_free(al, p) tal_free_impl(al, p, __FILE__, __LINE__)
+#define tal_realloc(al, p, size) tal_realloc_impl(&al, p, size, __FILE__, __LINE__)
+#define TAL_DEBUG_PARAMS , const char *file, int line
+#define TAL_DEBUG_FILE_LEN 15
+#endif
+//#define TAL_INFO 1 /* collect and dump allocators stats */
+
+typedef struct TinyAlloc {
+    size_t  limit;
+    size_t  size;
+    uint8_t *buffer;
+    uint8_t *p;
+    size_t  nb_allocs;
+    struct TinyAlloc *next, *top;
+#ifdef TAL_INFO
+    size_t  nb_peak;
+    size_t  nb_total;
+    size_t  nb_missed;
+    uint8_t *peak_p;
+#endif
+} TinyAlloc;
+
+typedef struct tal_header_t {
+    size_t  size;
+#ifdef TAL_DEBUG
+    int     line_num; /* negative line_num used for double free check */
+    char    file_name[TAL_DEBUG_FILE_LEN + 1];
+#endif
+} tal_header_t;
+
+/* ------------------------------------------------------------------------- */
+
 ST_FUNC TinyAlloc *tal_new(TinyAlloc **pal, size_t limit, size_t size)
 {
     TinyAlloc *al = tcc_mallocz(sizeof(TinyAlloc));
@@ -160,9 +183,10 @@ tail_call:
 #endif
 #ifdef TAL_DEBUG
     if (al->nb_allocs > 0) {
+        uint8_t *p;
         fprintf(stderr, "TAL_DEBUG: mem leak %d chunks (limit= %d)\n",
                 al->nb_allocs, al->limit);
-        uint8_t *p = al->buffer;
+        p = al->buffer;
         while (p < al->p) {
             tal_header_t *header = (tal_header_t *)p;
             if (header->line_num > 0) {
@@ -222,10 +246,10 @@ tail_call:
             header = (tal_header_t *)al->p;
             header->size = adj_size;
 #ifdef TAL_DEBUG
-            int ofs = strlen(file) - TAL_DEBUG_FILE_LEN;
+            { int ofs = strlen(file) - TAL_DEBUG_FILE_LEN;
             strncpy(header->file_name, file + (ofs > 0 ? ofs : 0), TAL_DEBUG_FILE_LEN);
             header->file_name[TAL_DEBUG_FILE_LEN] = 0;
-            header->line_num = line;
+            header->line_num = line; }
 #endif
             ret = al->p + sizeof(tal_header_t);
             al->p += adj_size + sizeof(tal_header_t);
@@ -286,26 +310,25 @@ tail_call:
     return ret;
 }
 
+#endif /* USE_TAL */
+
 /* ------------------------------------------------------------------------- */
 /* CString handling */
 static void cstr_realloc(CString *cstr, int new_size)
 {
     int size;
-    void *data;
 
     size = cstr->size_allocated;
     if (size < 8)
         size = 8; /* no need to allocate a too small first string */
     while (size < new_size)
         size = size * 2;
-    data = tal_realloc(cstr_alloc, cstr->data_allocated, size);
-    cstr->data_allocated = data;
+    cstr->data = tal_realloc(cstr_alloc, cstr->data, size);
     cstr->size_allocated = size;
-    cstr->data = data;
 }
 
 /* add a byte */
-ST_FUNC void cstr_ccat(CString *cstr, int ch)
+ST_INLN void cstr_ccat(CString *cstr, int ch)
 {
     int size;
     size = cstr->size + 1;
@@ -346,7 +369,7 @@ ST_FUNC void cstr_new(CString *cstr)
 /* free string and reset it to NULL */
 ST_FUNC void cstr_free(CString *cstr)
 {
-    tal_free(cstr_alloc, cstr->data_allocated);
+    tal_free(cstr_alloc, cstr->data);
     cstr_new(cstr);
 }
 
@@ -1026,6 +1049,13 @@ ST_INLN void tok_str_new(TokenString *s)
     s->last_line_num = -1;
 }
 
+ST_FUNC TokenString *tok_str_alloc(void)
+{
+    TokenString *str = tal_realloc(tokstr_alloc, 0, sizeof *str);
+    tok_str_new(str);
+    return str;
+}
+
 ST_FUNC int *tok_str_dup(TokenString *s)
 {
     int *str;
@@ -1049,7 +1079,6 @@ ST_FUNC int *tok_str_realloc(TokenString *s, int new_size)
         size = 16;
     while (size < new_size)
         size = size * 2;
-    TCC_ASSERT((size & (size -1)) == 0);
     if (size > s->allocated_len) {
         str = tal_realloc(tokstr_alloc, s->str, size * sizeof(int));
         s->allocated_len = size;
@@ -1070,6 +1099,29 @@ ST_FUNC void tok_str_add(TokenString *s, int t)
     s->len = len;
 }
 
+ST_FUNC void begin_macro(TokenString *str, int alloc)
+{
+    str->alloc = alloc;
+    str->prev = macro_stack;
+    str->prev_ptr = macro_ptr;
+    macro_ptr = str->str;
+    macro_stack = str;
+}
+
+ST_FUNC void end_macro(void)
+{
+    TokenString *str = macro_stack;
+    macro_stack = str->prev;
+    macro_ptr = str->prev_ptr;
+    if (str->alloc == 2) {
+        str->alloc = 3; /* just mark as finished */
+    } else {
+        tok_str_free(str->str);
+        if (str->alloc == 1)
+            tal_free(tokstr_alloc, str);
+    }
+}
+
 static void tok_str_add2(TokenString *s, int t, CValue *cv)
 {
     int len, *str;
@@ -1172,7 +1224,6 @@ static inline void TOK_GET(int *t, const int **pp, CValue *cv)
     case TOK_PPSTR:
         cv->str.size = *p++;
         cv->str.data = p;
-        cv->str.data_allocated = 0;
         p += (cv->str.size + sizeof(int) - 1) / sizeof(int);
         break;
     case TOK_CDOUBLE:
@@ -1235,13 +1286,13 @@ static int macro_is_equal(const int *a, const int *b)
 }
 
 /* defines handling */
-ST_INLN void define_push(int v, int macro_type, TokenString *str, Sym *first_arg)
+ST_INLN void define_push(int v, int macro_type, int *str, Sym *first_arg)
 {
     Sym *s, *o;
 
     o = define_find(v);
     s = sym_push2(&define_stack, v, macro_type, 0);
-    s->d = str ? tok_str_dup(str) : NULL;
+    s->d = str;
     s->next = first_arg;
     table_ident[v - TOK_IDENT]->sym_define = s;
 
@@ -1344,9 +1395,9 @@ ST_FUNC void label_pop(Sym **ptop, Sym *slast)
 static int expr_preprocess(void)
 {
     int c, t;
-    TokenString str;
+    TokenString *str;
     
-    tok_str_new(&str);
+    str = tok_str_alloc();
     while (tok != TOK_LINEFEED && tok != TOK_EOF) {
         next(); /* do macro subst */
         if (tok == TOK_DEFINED) {
@@ -1364,12 +1415,12 @@ static int expr_preprocess(void)
             tok = TOK_CINT;
             tokc.i = 0;
         }
-        tok_str_add_tok(&str);
+        tok_str_add_tok(str);
     }
-    tok_str_add(&str, -1); /* simulate end of file */
-    tok_str_add(&str, 0);
+    tok_str_add(str, -1); /* simulate end of file */
+    tok_str_add(str, 0);
     /* now evaluate C constant expression */
-    begin_macro(&str, 0);
+    begin_macro(str, 1);
     next();
     c = expr_const();
     end_macro();
@@ -1457,7 +1508,7 @@ ST_FUNC void parse_define(void)
     if (3 == spc)
 bad_twosharp:
         tcc_error("'##' cannot appear at either end of macro");
-    define_push(v, t, &tokstr_buf, first);
+    define_push(v, t, tok_str_dup(&tokstr_buf), first);
 }
 
 static CachedInclude *search_cached_include(TCCState *s1, const char *filename, int add)
@@ -2061,7 +2112,6 @@ static void parse_string(const char *s, int len)
     } else {
         tokc.str.size = tokcstr.size;
         tokc.str.data = tokcstr.data;
-        tokc.str.data_allocated = tokcstr.data_allocated;
         if (!is_long)
             tok = TOK_STR;
         else
@@ -2403,11 +2453,6 @@ static inline void next_nomacro1(void)
     p = file->buf_ptr;
  redo_no_start:
     c = *p;
-#if (__TINYC__ || __GNUC__)
-#else
-    if (c & 0x80)
-        goto parse_ident_fast;
-#endif
     switch(c) {
     case ' ':
     case '\t':
@@ -2513,13 +2558,6 @@ maybe_newline:
          || (parse_flags & PARSE_FLAG_ASM_FILE))
             goto parse_simple;
 
-#if (__TINYC__ || __GNUC__)
-    case 'a' ... 'z':
-    case 'A' ... 'K':
-    case 'M' ... 'Z':
-    case '_':
-    case 0x80 ... 0xFF:
-#else
     case 'a': case 'b': case 'c': case 'd':
     case 'e': case 'f': case 'g': case 'h':
     case 'i': case 'j': case 'k': case 'l':
@@ -2535,7 +2573,6 @@ maybe_newline:
     case 'U': case 'V': case 'W': case 'X':
     case 'Y': case 'Z': 
     case '_':
-#endif
     parse_ident_fast:
         p1 = p;
         h = TOK_HASH_INIT;
@@ -2617,7 +2654,6 @@ maybe_newline:
         cstr_ccat(&tokcstr, '\0');
         tokc.str.size = tokcstr.size;
         tokc.str.data = tokcstr.data;
-        tokc.str.data_allocated = tokcstr.data_allocated;
         tok = TOK_PPNUM;
         break;
 
@@ -2658,7 +2694,6 @@ maybe_newline:
         cstr_ccat(&tokcstr, '\0');
         tokc.str.size = tokcstr.size;
         tokc.str.data = tokcstr.data;
-        tokc.str.data_allocated = tokcstr.data_allocated;
         tok = TOK_PPSTR;
         break;
 
@@ -2796,6 +2831,8 @@ maybe_newline:
         p++;
         break;
     default:
+        if (c >= 0x80 && c <= 0xFF) /* utf8 identifiers */
+	    goto parse_ident_fast;
         if (parse_flags & PARSE_FLAG_ASM_FILE)
             goto parse_simple;
         tcc_error("unrecognized character \\x%02x", c);
@@ -2895,7 +2932,6 @@ static int *macro_arg_subst(Sym **nested_list, const int *macro_str, Sym *args)
                 /* add string */
                 cval.str.size = cstr.size;
                 cval.str.data = cstr.data;
-                cval.str.data_allocated = cstr.data_allocated;
                 tok_str_add2(&str, TOK_PPSTR, &cval);
                 cstr_free(&cstr);
             } else {
@@ -3062,7 +3098,6 @@ static int macro_subst_tok(
         cstr_cat(&cstr, cstrval, 0);
         cval.str.size = cstr.size;
         cval.str.data = cstr.data;
-        cval.str.data_allocated = cstr.data_allocated;
         tok_str_add2(tok_str, t1, &cval);
         cstr_free(&cstr);
     } else {
@@ -3381,7 +3416,6 @@ ST_FUNC void next(void)
         if (s) {
             Sym *nested_list = NULL;
             tokstr_buf.len = 0;
-            nested_list = NULL;
             macro_subst_tok(&tokstr_buf, &nested_list, s, 1);
             tok_str_add(&tokstr_buf, 0);
             begin_macro(&tokstr_buf, 2);
@@ -3402,8 +3436,8 @@ ST_FUNC void next(void)
    identifier case handled for labels. */
 ST_INLN void unget_tok(int last_tok)
 {
-    TokenString *str = tcc_malloc(sizeof *str);
-    tok_str_new(str);
+
+    TokenString *str = tok_str_alloc();
     tok_str_add2(str, tok, &tokc);
     tok_str_add(str, 0);
     begin_macro(str, 1);
@@ -3529,10 +3563,8 @@ static void tok_print(const char *msg, const int *str)
 static void pp_line(TCCState *s1, BufferedFile *f, int level)
 {
     int d = f->line_num - f->line_ref;
-
     if (s1->dflag & 4)
 	return;
-
     if (s1->Pflag == LINE_MACRO_OUTPUT_FORMAT_NONE) {
 	if (level == 0 && f->line_ref && d) {
 	    d = 1;