diff --git a/libtcc.c b/libtcc.c
index 1b4b3545..f184502b 100644
--- a/libtcc.c
+++ b/libtcc.c
@@ -931,12 +931,8 @@ LIBTCCAPI void tcc_delete(TCCState *s1)
     dynarray_reset(&s1->pragma_libs, &s1->nb_pragma_libs);
 
 #ifdef TCC_IS_NATIVE
-# ifdef HAVE_SELINUX
-    munmap (s1->write_mem, s1->mem_size);
-    munmap (s1->runtime_mem, s1->mem_size);
-# else
-    tcc_free(s1->runtime_mem);
-# endif
+    /* free runtime memory */
+    tcc_run_free(s1);
 #endif
 
     tcc_free(s1->sym_attrs);
diff --git a/tcc.h b/tcc.h
index 96819033..bd4fee41 100644
--- a/tcc.h
+++ b/tcc.h
@@ -735,12 +735,8 @@ struct TCCState {
 
 #ifdef TCC_IS_NATIVE
     const char *runtime_main;
-    /* for tcc_relocate */
-    void *runtime_mem;
-# ifdef HAVE_SELINUX
-    void *write_mem;
-    unsigned long mem_size;
-# endif
+    void **runtime_mem;
+    int nb_runtime_mem;
 #endif
 
     /* used by main and tcc_parse_args only */
@@ -1532,13 +1528,13 @@ ST_FUNC void dlclose(void *p);
 ST_FUNC const char *dlerror(void);
 ST_FUNC void *dlsym(int flag, const char *symbol);
 #endif
-
 #ifdef CONFIG_TCC_BACKTRACE
 ST_DATA int rt_num_callers;
 ST_DATA const char **rt_bound_error_msg;
 ST_DATA void *rt_prog_main;
 ST_FUNC void tcc_set_num_callers(int n);
 #endif
+ST_FUNC void tcc_run_free(TCCState *s1);
 #endif
 
 /********************************************************/
diff --git a/tccelf.c b/tccelf.c
index a8ae48a3..9ed2484e 100644
--- a/tccelf.c
+++ b/tccelf.c
@@ -1735,6 +1735,7 @@ static void put_dt(Section *dynamic, int dt, addr_t val)
     dyn->d_un.d_val = val;
 }
 
+#ifndef TCC_TARGET_PE
 static void add_init_array_defines(TCCState *s1, const char *section_name)
 {
     Section *s;
@@ -1762,6 +1763,7 @@ static void add_init_array_defines(TCCState *s1, const char *section_name)
                 ELFW(ST_INFO)(STB_GLOBAL, STT_NOTYPE), 0,
                 s->sh_num, sym_end);
 }
+#endif
 
 static int tcc_add_support(TCCState *s1, const char *filename)
 {
@@ -1842,10 +1844,12 @@ ST_FUNC void tcc_add_linker_symbols(TCCState *s1)
                 bss_section->data_offset, 0,
                 ELFW(ST_INFO)(STB_GLOBAL, STT_NOTYPE), 0,
                 bss_section->sh_num, "_end");
+#ifndef TCC_TARGET_PE
     /* horrible new standard ldscript defines */
     add_init_array_defines(s1, ".preinit_array");
     add_init_array_defines(s1, ".init_array");
     add_init_array_defines(s1, ".fini_array");
+#endif
 
     /* add start and stop symbols for sections whose name can be
        expressed in C */
diff --git a/tccrun.c b/tccrun.c
index 6d31e2f4..15a2d76c 100644
--- a/tccrun.c
+++ b/tccrun.c
@@ -48,7 +48,8 @@ static void set_pages_executable(void *ptr, unsigned long length);
 static int tcc_relocate_ex(TCCState *s1, void *ptr);
 
 #ifdef _WIN64
-static void win64_add_function_table(TCCState *s1);
+static void *win64_add_function_table(TCCState *s1);
+static void win64_del_function_table(void *);
 #endif
 
 /* ------------------------------------------------------------- */
@@ -57,14 +58,14 @@ static void win64_add_function_table(TCCState *s1);
 
 LIBTCCAPI int tcc_relocate(TCCState *s1, void *ptr)
 {
-    int ret;
+    int size;  void *mem;
 
     if (TCC_RELOCATE_AUTO != ptr)
         return tcc_relocate_ex(s1, ptr);
 
-    ret = tcc_relocate_ex(s1, NULL);
-    if (ret < 0)
-        return ret;
+    size = tcc_relocate_ex(s1, NULL);
+    if (size < 0)
+        return -1;
 
 #ifdef HAVE_SELINUX
     {   /* Use mmap instead of malloc for Selinux.  Ref:
@@ -72,28 +73,50 @@ LIBTCCAPI int tcc_relocate(TCCState *s1, void *ptr)
 
         char tmpfname[] = "/tmp/.tccrunXXXXXX";
         int fd = mkstemp (tmpfname);
+        void *wr_mem;
 
-        s1->mem_size = ret;
         unlink (tmpfname);
-        ftruncate (fd, s1->mem_size);
+        ftruncate (fd, size);
 
-        s1->write_mem = mmap (NULL, ret, PROT_READ|PROT_WRITE,
+        wr_mem = mmap (NULL, size, PROT_READ|PROT_WRITE,
             MAP_SHARED, fd, 0);
-        if (s1->write_mem == MAP_FAILED)
+        if (wr_mem == MAP_FAILED)
             tcc_error("/tmp not writeable");
-
-        s1->runtime_mem = mmap (NULL, ret, PROT_READ|PROT_EXEC,
+        mem = mmap (NULL, size, PROT_READ|PROT_EXEC,
             MAP_SHARED, fd, 0);
-        if (s1->runtime_mem == MAP_FAILED)
+        if (mem == MAP_FAILED)
             tcc_error("/tmp not executable");
 
-        ret = tcc_relocate_ex(s1, s1->write_mem);
+        tcc_relocate_ex(s1, wr_mem);
+        dynarray_add(&s1->runtime_mem, &s1->nb_runtime_mem, (void*)(addr_t)size);
+        dynarray_add(&s1->runtime_mem, &s1->nb_runtime_mem, wr_mem);
+        dynarray_add(&s1->runtime_mem, &s1->nb_runtime_mem, mem);
     }
 #else
-    s1->runtime_mem = tcc_malloc(ret);
-    ret = tcc_relocate_ex(s1, s1->runtime_mem);
+    mem = tcc_malloc(size);
+    tcc_relocate_ex(s1, mem); /* no more errors expected */
+    dynarray_add(&s1->runtime_mem, &s1->nb_runtime_mem, mem);
 #endif
-    return ret;
+    return 0;
+}
+
+ST_FUNC void tcc_run_free(TCCState *s1)
+{
+    int i;
+
+    for (i = 0; i < s1->nb_runtime_mem; ++i) {
+#ifdef HAVE_SELINUX
+        int size = (int)(addr_t)s1->runtime_mem[i];
+        munmap(s1->runtime_mem[++i], size);
+        munmap(s1->runtime_mem[++i], size);
+#else
+# ifdef _WIN64
+        win64_del_function_table(*(void**)s1->runtime_mem[i]);
+# endif
+        tcc_free(s1->runtime_mem[i]);
+#endif
+    }
+    tcc_free(s1->runtime_mem);
 }
 
 /* launch the compiled program with the given arguments */
@@ -173,15 +196,17 @@ static int tcc_relocate_ex(TCCState *s1, void *ptr)
     }
 
     offset = 0, mem = (addr_t)ptr;
+#ifdef _WIN64
+    offset += sizeof (void*);
+#endif
     for(i = 1; i < s1->nb_sections; i++) {
         s = s1->sections[i];
         if (0 == (s->sh_flags & SHF_ALLOC))
             continue;
-        length = s->data_offset;
-        s->sh_addr = mem ? (mem + offset + 15) & ~15 : 0;
-        offset = (offset + length + 15) & ~15;
+        offset = (offset + 15) & ~15;
+        s->sh_addr = mem ? mem + offset : 0;
+        offset += s->data_offset;
     }
-    offset += 16;
 
     /* relocate symbols */
     relocate_syms(s1, 1);
@@ -199,6 +224,10 @@ static int tcc_relocate_ex(TCCState *s1, void *ptr)
     }
     relocate_plt(s1);
 
+#ifdef _WIN64
+    *(void**)ptr = win64_add_function_table(s1);
+#endif
+
     for(i = 1; i < s1->nb_sections; i++) {
         s = s1->sections[i];
         if (0 == (s->sh_flags & SHF_ALLOC))
@@ -214,10 +243,6 @@ static int tcc_relocate_ex(TCCState *s1, void *ptr)
         if (s->sh_flags & SHF_EXECINSTR)
             set_pages_executable(ptr, length);
     }
-
-#ifdef _WIN64
-    win64_add_function_table(s1);
-#endif
     return 0;
 }
 
@@ -247,6 +272,31 @@ static void set_pages_executable(void *ptr, unsigned long length)
 #endif
 }
 
+#ifdef _WIN64
+static void *win64_add_function_table(TCCState *s1)
+{
+    void *p = NULL;
+    int r;
+    if (s1->uw_pdata) {
+        p = (void*)s1->uw_pdata->sh_addr;
+        r = RtlAddFunctionTable(
+            (RUNTIME_FUNCTION*)p,
+            s1->uw_pdata->data_offset / sizeof (RUNTIME_FUNCTION),
+            text_section->sh_addr
+            );
+        s1->uw_pdata = NULL;
+    }
+    return p;;
+}
+
+static void win64_del_function_table(void *p)
+{
+    if (p) {
+        RtlDeleteFunctionTable((RUNTIME_FUNCTION*)p);
+    }
+}
+#endif
+
 /* ------------------------------------------------------------- */
 #ifdef CONFIG_TCC_BACKTRACE
 
@@ -685,17 +735,6 @@ static void set_exception_handler(void)
     SetUnhandledExceptionFilter(cpu_exception_handler);
 }
 
-#ifdef _WIN64
-static void win64_add_function_table(TCCState *s1)
-{
-    RtlAddFunctionTable(
-        (RUNTIME_FUNCTION*)s1->uw_pdata->sh_addr,
-        s1->uw_pdata->data_offset / sizeof (RUNTIME_FUNCTION),
-        text_section->sh_addr
-        );
-}
-#endif
-
 /* return the PC at frame level 'level'. Return non zero if not found */
 static int rt_get_caller_pc(addr_t *paddr, CONTEXT *uc, int level)
 {
diff --git a/win32/lib/kernel32.def b/win32/lib/kernel32.def
index febd1d11..f03e17ba 100644
--- a/win32/lib/kernel32.def
+++ b/win32/lib/kernel32.def
@@ -552,6 +552,7 @@ ResetNLSUserInfoCache
 ResetWriteWatch
 ResumeThread
 RtlAddFunctionTable
+RtlDeleteFunctionTable
 RtlFillMemory
 RtlInstallFunctionTableCallback
 RtlMoveMemory
diff --git a/x86_64-gen.c b/x86_64-gen.c
index fa2460c8..dd52e4aa 100644
--- a/x86_64-gen.c
+++ b/x86_64-gen.c
@@ -893,6 +893,22 @@ void gfunc_call(int nb_args)
     }
     
     gcall_or_jmp(0);
+    /* other compilers don't clear the upper bits when returning char/short */
+    bt = vtop->type.ref->type.t & (VT_BTYPE | VT_UNSIGNED);
+    if (bt == (VT_BYTE | VT_UNSIGNED))
+        o(0xc0b60f);  /* movzbl %al, %eax */
+    else if (bt == VT_BYTE)
+        o(0xc0be0f); /* movsbl %al, %eax */
+    else if (bt == VT_SHORT)
+        o(0x98); /* cwtl */
+    else if (bt == (VT_SHORT | VT_UNSIGNED))
+        o(0xc0b70f);  /* movzbl %al, %eax */
+#if 0 /* handled in gen_cast() */
+    else if (bt == VT_INT)
+        o(0x9848); /* cltq */
+    else if (bt == (VT_INT | VT_UNSIGNED))
+        o(0xc089); /* mov %eax,%eax */
+#endif
     vtop--;
 }