diff --git a/libtcc.c b/libtcc.c
index e1bfe028..355f6ecd 100644
--- a/libtcc.c
+++ b/libtcc.c
@@ -64,7 +64,9 @@
 /* XXX: get rid of this ASAP (or maybe not) */
 ST_DATA struct TCCState *tcc_state;
 
+#ifdef MEM_DEBUG
 static int nb_states;
+#endif
 
 /********************************************************/
 #ifdef _WIN32
@@ -245,10 +247,6 @@ PUB_FUNC char *tcc_strdup(const char *str)
     return ptr;
 }
 
-PUB_FUNC void tcc_memcheck(void)
-{
-}
-
 #else
 
 #define MEM_DEBUG_MAGIC1 0xFEEDDEB1
@@ -631,6 +629,7 @@ ST_FUNC void tcc_open_bf(TCCState *s1, const char *filename, int initlen)
 
 ST_FUNC void tcc_close(void)
 {
+    TCCState *s1 = tcc_state;
     BufferedFile *bf = file;
     if (bf->fd > 0) {
         close(bf->fd);
@@ -668,15 +667,10 @@ ST_FUNC int tcc_open(TCCState *s1, const char *filename)
 /* compile the file opened in 'file'. Return non zero if errors. */
 static int tcc_compile(TCCState *s1, int filetype, const char *str, int fd)
 {
-    tccelf_begin_file(s1);
-
     /* Here we enter the code section where we use the global variables for
        parsing and code generation (tccpp.c, tccgen.c, <target>-gen.c).
        Other threads need to wait until we're done.
 
-       Alternatively we could of course pass TCCState *s1 everwhere
-       except that it would look extremly ugly.
-
        Alternatively we could use thread local storage for those global
        variables, which may or may not have advantages */
 
@@ -698,7 +692,9 @@ static int tcc_compile(TCCState *s1, int filetype, const char *str, int fd)
         }
 
         is_asm = !!(filetype & (AFF_TYPE_ASM|AFF_TYPE_ASMPP));
+        tccelf_begin_file(s1);
         preprocess_start(s1, is_asm);
+        tccgen_init(s1);
         if (s1->output_type == TCC_OUTPUT_PREPROCESS) {
             tcc_preprocess(s1);
         } else if (is_asm) {
@@ -712,13 +708,12 @@ static int tcc_compile(TCCState *s1, int filetype, const char *str, int fd)
         }
     }
     s1->error_set_jmp_enabled = 0;
-    tccgen_finish(s1, 1);
+    tccgen_finish(s1);
     preprocess_end(s1);
-    tccgen_finish(s1, 2);
+    tccelf_end_file(s1);
 
     tcc_state = NULL;
     POST_SEM();
-    tccelf_end_file(s1);
     return s1->nb_errors != 0 ? -1 : 0;
 }
 
@@ -749,9 +744,9 @@ LIBTCCAPI TCCState *tcc_new(void)
     s = tcc_mallocz(sizeof(TCCState));
     if (!s)
         return NULL;
-    WAIT_SEM();
+#ifdef MEM_DEBUG
     ++nb_states;
-    POST_SEM();
+#endif
 
 #undef gnu_ext
 
@@ -960,10 +955,10 @@ LIBTCCAPI void tcc_delete(TCCState *s1)
 #endif
 
     tcc_free(s1);
-    WAIT_SEM();
+#ifdef MEM_DEBUG
     if (0 == --nb_states)
         tcc_memcheck();
-    POST_SEM();
+#endif
 }
 
 LIBTCCAPI int tcc_set_output_type(TCCState *s, int output_type)
@@ -1824,7 +1819,7 @@ reparse:
             else if (*optarg == 't')
                 s->dflag = 16;
             else if (isnum(*optarg))
-                g_debug = atoi(optarg);
+                s->g_debug |= atoi(optarg);
             else
                 goto unsupported_option;
             break;
@@ -1925,7 +1920,7 @@ reparse:
             tcc_add_sysinclude_path(s, optarg);
             break;
         case TCC_OPTION_include:
-            cstr_printf(&s->cmdline_defs, "#include \"%s\"\n", optarg);
+            cstr_printf(&s->cmdline_incl, "#include \"%s\"\n", optarg);
             break;
         case TCC_OPTION_nostdinc:
             s->nostdinc = 1;
@@ -2077,7 +2072,7 @@ LIBTCCAPI void tcc_set_options(TCCState *s, const char *r)
     dynarray_reset(&argv, &argc);
 }
 
-PUB_FUNC void tcc_print_stats(TCCState *s, unsigned total_time)
+PUB_FUNC void tcc_print_stats(TCCState *s1, unsigned total_time)
 {
     if (total_time < 1)
         total_time = 1;
@@ -2085,7 +2080,7 @@ PUB_FUNC void tcc_print_stats(TCCState *s, unsigned total_time)
         total_bytes = 1;
     fprintf(stderr, "* %d idents, %d lines, %d bytes\n"
                     "* %0.3f s, %u lines/s, %0.1f MB/s\n",
-           tok_ident - TOK_IDENT, total_lines, total_bytes,
+           total_idents, total_lines, total_bytes,
            (double)total_time/1000,
            (unsigned)total_lines*1000/total_time,
            (double)total_bytes/1000/total_time);
diff --git a/tcc.h b/tcc.h
index 30f7eb94..0271f236 100644
--- a/tcc.h
+++ b/tcc.h
@@ -128,7 +128,7 @@ extern long double strtold (const char *__nptr, char **__endptr);
 /* #define PP_DEBUG */
 /* include file debug */
 /* #define INC_DEBUG */
-/* memory leak debug */
+/* memory leak debug (only for single threaded usage) */
 /* #define MEM_DEBUG */
 /* assembler debug */
 /* #define ASM_DEBUG */
@@ -889,6 +889,14 @@ struct TCCState {
 
     int fd, cc; /* used by tcc_load_ldscript */
 
+    /* benchmark info */
+    int total_idents;
+    int total_lines;
+    int total_bytes;
+
+    /* option -dnum (for general development purposes) */
+    int g_debug;
+
     /* used by main and tcc_parse_args only */
     struct filespec **files; /* files seen on command line */
     int nb_files; /* number thereof */
@@ -1192,7 +1200,6 @@ PUB_FUNC char *tcc_strdup_debug(const char *str, const char *file, int line);
 #define realloc(p, s) use_tcc_realloc(p, s)
 #undef strdup
 #define strdup(s) use_tcc_strdup(s)
-PUB_FUNC void tcc_memcheck(void);
 PUB_FUNC void _tcc_error_noabort(const char *fmt, ...);
 PUB_FUNC NORETURN void _tcc_error(const char *fmt, ...);
 PUB_FUNC void _tcc_warning(const char *fmt, ...);
@@ -1277,8 +1284,6 @@ ST_DATA int tok_flags;
 ST_DATA CString tokcstr; /* current parsed string, if any */
 
 /* display benchmark infos */
-ST_DATA int total_lines;
-ST_DATA int total_bytes;
 ST_DATA int tok_ident;
 ST_DATA TokenSym **table_ident;
 
@@ -1360,8 +1365,7 @@ ST_DATA Sym *local_label_stack;
 ST_DATA Sym *global_label_stack;
 ST_DATA Sym *define_stack;
 ST_DATA CType char_pointer_type, func_old_type, int_type, size_type;
-ST_DATA SValue __vstack[1+/*to make bcheck happy*/ VSTACK_SIZE], *vtop, *pvtop;
-#define vstack  (__vstack + 1)
+ST_DATA SValue *vtop;
 ST_DATA int rsym, anon_sym, ind, loc;
 
 ST_DATA int const_wanted; /* true if constant wanted */
@@ -1371,7 +1375,6 @@ ST_DATA CType func_vt; /* current function return type (used by return instructi
 ST_DATA int func_var; /* true if current function is variadic */
 ST_DATA int func_vc;
 ST_DATA const char *funcname;
-ST_DATA int g_debug;
 
 ST_FUNC void tcc_debug_start(TCCState *s1);
 ST_FUNC void tcc_debug_end(TCCState *s1);
@@ -1382,8 +1385,9 @@ ST_FUNC void tcc_debug_funcstart(TCCState *s1, Sym *sym);
 ST_FUNC void tcc_debug_funcend(TCCState *s1, int size);
 ST_FUNC void tcc_debug_line(TCCState *s1);
 
+ST_FUNC void tccgen_init(TCCState *s1);
 ST_FUNC int tccgen_compile(TCCState *s1);
-ST_FUNC void tccgen_finish(TCCState *s1, int f);
+ST_FUNC void tccgen_finish(TCCState *s1);
 ST_FUNC void check_vstack(void);
 
 ST_INLN int is_float(int t);
@@ -1751,6 +1755,10 @@ ST_FUNC void gen_makedeps(TCCState *s, const char *target, const char *filename)
 #define tcc_error           TCC_SET_STATE(_tcc_error)
 #define tcc_warning         TCC_SET_STATE(_tcc_warning)
 
+#define total_idents        TCC_STATE_VAR(total_idents)
+#define total_lines         TCC_STATE_VAR(total_lines)
+#define total_bytes         TCC_STATE_VAR(total_bytes)
+
 PUB_FUNC void tcc_enter_state(TCCState *s1);
 
 /********************************************************/
diff --git a/tccelf.c b/tccelf.c
index 9d7391c0..3d95a003 100644
--- a/tccelf.c
+++ b/tccelf.c
@@ -3318,8 +3318,11 @@ static int ld_add_file_list(TCCState *s1, const char *cmd, int as_needed)
     if (!as_needed)
         s1->new_undef_sym = 0;
     t = ld_next(s1, filename, sizeof(filename));
-    if (t != '(')
-        expect("(");
+    if (t != '(') {
+        tcc_error_noabort("( expected");
+        ret = -1;
+        goto lib_parse_error;
+    }
     t = ld_next(s1, filename, sizeof(filename));
     for(;;) {
         libname[0] = '\0';
@@ -3408,8 +3411,10 @@ ST_FUNC int tcc_load_ldscript(TCCState *s1, int fd)
                    !strcmp(cmd, "TARGET")) {
             /* ignore some commands */
             t = ld_next(s1, cmd, sizeof(cmd));
-            if (t != '(')
-                expect("(");
+            if (t != '(') {
+                tcc_error_noabort("( expected");
+                return -1;
+            }
             for(;;) {
                 t = ld_next(s1, filename, sizeof(filename));
                 if (t == LD_TOK_EOF) {
diff --git a/tccgen.c b/tccgen.c
index 5e3373fc..82e67e19 100644
--- a/tccgen.c
+++ b/tccgen.c
@@ -47,7 +47,9 @@ static int in_sizeof;
 static int in_generic;
 static int section_sym;
 
-ST_DATA SValue __vstack[1+VSTACK_SIZE], *vtop, *pvtop;
+ST_DATA SValue *vtop;
+static SValue _vstack[1 + VSTACK_SIZE];
+#define vstack (_vstack + 1)
 
 ST_DATA int const_wanted; /* true if constant wanted */
 ST_DATA int nocode_wanted; /* no code generation wanted */
@@ -77,7 +79,6 @@ ST_DATA int func_var; /* true if current function is variadic (used by return in
 ST_DATA int func_vc;
 static int last_line_num, new_file, func_ind; /* debug info control */
 ST_DATA const char *funcname;
-ST_DATA int g_debug;
 
 ST_DATA CType char_pointer_type, func_old_type, int_type, size_type, ptrdiff_type;
 
@@ -180,8 +181,8 @@ ST_FUNC void test_lvalue(void)
 
 ST_FUNC void check_vstack(void)
 {
-    if (pvtop != vtop)
-        tcc_error("internal compiler error: vstack leak (%d)", vtop - pvtop);
+    if (vtop != vstack - 1)
+        tcc_error("internal compiler error: vstack leak (%d)", vtop - vstack + 1);
 }
 
 /* ------------------------------------------------------------------------- */
@@ -325,15 +326,11 @@ ST_FUNC void tcc_debug_eincl(TCCState *s1)
 }
 
 /* ------------------------------------------------------------------------- */
-ST_FUNC int tccgen_compile(TCCState *s1)
+/* initialize vstack and types.  This must be done also for tcc -E */
+ST_FUNC void tccgen_init(TCCState *s1)
 {
-    cur_text_section = NULL;
-    funcname = "";
-    anon_sym = SYM_FIRST_ANOM;
-    section_sym = 0;
-    const_wanted = 0;
-    nocode_wanted = 0x80000000;
-    local_scope = 0;
+    vtop = vstack - 1;
+    memset(vtop, 0, sizeof *vtop);
 
     /* define some often used types */
     int_type.t = VT_INT;
@@ -353,17 +350,25 @@ ST_FUNC int tccgen_compile(TCCState *s1)
     func_old_type.ref = sym_push(SYM_FIELD, &int_type, 0, 0);
     func_old_type.ref->f.func_call = FUNC_CDECL;
     func_old_type.ref->f.func_type = FUNC_OLD;
+}
+
+ST_FUNC int tccgen_compile(TCCState *s1)
+{
+    cur_text_section = NULL;
+    funcname = "";
+    anon_sym = SYM_FIRST_ANOM;
+    section_sym = 0;
+    const_wanted = 0;
+    nocode_wanted = 0x80000000;
+    local_scope = 0;
 
     tcc_debug_start(s1);
-
 #ifdef TCC_TARGET_ARM
     arm_init(s1);
 #endif
-
 #ifdef INC_DEBUG
     printf("%s: **** new file\n", file->filename);
 #endif
-
     parse_flags = PARSE_FLAG_PREPROCESS | PARSE_FLAG_TOK_NUM | PARSE_FLAG_TOK_STR;
     next();
     decl(VT_CONST);
@@ -374,19 +379,16 @@ ST_FUNC int tccgen_compile(TCCState *s1)
     return 0;
 }
 
-ST_FUNC void tccgen_finish(TCCState *s1, int f)
+ST_FUNC void tccgen_finish(TCCState *s1)
 {
-    if (f == 1) {
-        free_inline_functions(s1);
-        sym_pop(&global_stack, NULL, 0);
-        sym_pop(&local_stack, NULL, 0);
-    }
-
-    if (f == 2) {
-        /* free sym_pools */
-        dynarray_reset(&sym_pools, &nb_sym_pools);
-        sym_free_first = NULL;
-    }
+    free_inline_functions(s1);
+    sym_pop(&global_stack, NULL, 0);
+    sym_pop(&local_stack, NULL, 0);
+    /* free preprocessor macros */
+    free_defines(NULL);
+    /* free sym_pools */
+    dynarray_reset(&sym_pools, &nb_sym_pools);
+    sym_free_first = NULL;
 }
 
 /* ------------------------------------------------------------------------- */
diff --git a/tccpe.c b/tccpe.c
index 01318598..9eb0b42a 100644
--- a/tccpe.c
+++ b/tccpe.c
@@ -636,12 +636,14 @@ static int pe_write(struct pe_info *pe)
 
         switch (si->cls) {
             case sec_text:
-                pe_header.opthdr.BaseOfCode = addr;
+                if (!pe_header.opthdr.BaseOfCode)
+                    pe_header.opthdr.BaseOfCode = addr;
                 break;
 
             case sec_data:
 #ifndef TCC_TARGET_X86_64
-                pe_header.opthdr.BaseOfData = addr;
+                if (!pe_header.opthdr.BaseOfData)
+                    pe_header.opthdr.BaseOfData = addr;
 #endif
                 break;
 
diff --git a/tccpp.c b/tccpp.c
index accdcdc0..f69d4e9a 100644
--- a/tccpp.c
+++ b/tccpp.c
@@ -34,8 +34,6 @@ ST_DATA const int *macro_ptr;
 ST_DATA CString tokcstr; /* current parsed string, if any */
 
 /* display benchmark infos */
-ST_DATA int total_lines;
-ST_DATA int total_bytes;
 ST_DATA int tok_ident;
 ST_DATA TokenSym **table_ident;
 
@@ -604,7 +602,7 @@ ST_FUNC const char *get_tok_str(int v, CValue *cv)
 
 /* return the current character, handling end of block if necessary
    (but not stray) */
-ST_FUNC int handle_eob(void)
+static int handle_eob(void)
 {
     BufferedFile *bf = file;
     int len;
@@ -637,7 +635,7 @@ ST_FUNC int handle_eob(void)
 }
 
 /* read next char from current input file and handle end of input buffer */
-ST_INLN void inp(void)
+static inline void inp(void)
 {
     ch = *(++(file->buf_ptr));
     /* end of buffer/file handling */
@@ -723,7 +721,7 @@ static int handle_stray1(uint8_t *p)
 /* input with '\[\r]\n' handling. Note that this function cannot
    handle other characters after '\', so you cannot call it inside
    strings or comments */
-ST_FUNC void minp(void)
+static void minp(void)
 {
     inp();
     if (ch == '\\') 
@@ -768,7 +766,7 @@ static uint8_t *parse_line_comment(uint8_t *p)
 }
 
 /* C comments */
-ST_FUNC uint8_t *parse_comment(uint8_t *p)
+static uint8_t *parse_comment(uint8_t *p)
 {
     int c;
 
@@ -3597,8 +3595,6 @@ ST_FUNC void preprocess_start(TCCState *s1, int is_asm)
     pp_counter = 0;
     pp_debug_tok = pp_debug_symv = 0;
     pp_once++;
-    pvtop = vtop = vstack - 1;
-    memset(vtop, 0, sizeof *vtop);
     s1->pack_stack[0] = 0;
     s1->pack_stack_ptr = s1->pack_stack;
 
@@ -3629,10 +3625,8 @@ ST_FUNC void preprocess_end(TCCState *s1)
     while (macro_stack)
         end_macro();
     macro_ptr = NULL;
-
     while (file)
         tcc_close();
-
     tccpp_delete(s1);
 }
 
@@ -3690,11 +3684,12 @@ ST_FUNC void tccpp_delete(TCCState *s)
 {
     int i, n;
 
-    free_defines(NULL);
     dynarray_reset(&s->cached_includes, &s->nb_cached_includes);
 
     /* free tokens */
     n = tok_ident - TOK_IDENT;
+    if (n > total_idents)
+        total_idents = n;
     for(i = 0; i < n; i++)
         tal_free(toksym_alloc, table_ident[i]);
     tcc_free(table_ident);
diff --git a/tccrun.c b/tccrun.c
index e8e56f8b..cca6a251 100644
--- a/tccrun.c
+++ b/tccrun.c
@@ -373,7 +373,7 @@ static addr_t rt_printline(TCCState *s1, addr_t wanted_pc, const char *msg)
     if (symtab_section) {
         esym_start = (ElfW(Sym) *)(symtab_section->data);
         esym_end = (ElfW(Sym) *)(symtab_section->data + symtab_section->data_offset);
-        elf_str = symtab_section->link->data;
+        elf_str = (char *) symtab_section->link->data;
     }
 
     for (sym = stab_sym + 1; sym < stab_sym_end; ++sym) {
diff --git a/tests/libtcc_test_mt.c b/tests/libtcc_test_mt.c
index 03b9f799..6fe4100a 100644
--- a/tests/libtcc_test_mt.c
+++ b/tests/libtcc_test_mt.c
@@ -218,12 +218,13 @@ TF_TYPE(thread_test_complex, vn)
     argv[argc] = NULL;
 
     s = new_state(1);
-    sleep_ms(1);
+    sleep_ms(2);
     ret = tcc_add_file(s, argv[0]);
-    sleep_ms(1);
+    sleep_ms(3);
     if (ret >= 0)
         tcc_run(s, argc, argv);
     tcc_delete(s);
+    fflush(stdout);
     return 0;
 }