diff --git a/lib/bcheck.c b/lib/bcheck.c
index 0ec2a4b4..90590f55 100644
--- a/lib/bcheck.c
+++ b/lib/bcheck.c
@@ -423,6 +423,11 @@ void __bound_init(void)
     }
 }
 
+void __bound_exit(void)
+{
+    restore_malloc_hooks();
+}
+
 static inline void add_region(BoundEntry *e, 
                               unsigned long start, unsigned long size)
 {
diff --git a/libtcc.c b/libtcc.c
index 865c1166..e1a10bf9 100644
--- a/libtcc.c
+++ b/libtcc.c
@@ -110,6 +110,7 @@ static int tcc_ext = 1;
 #ifdef CONFIG_TCC_BACKTRACE
 int num_callers = 6;
 const char **rt_bound_error_msg;
+unsigned long rt_prog_main;
 #endif
 
 /* XXX: get rid of this ASAP */
@@ -1340,7 +1341,7 @@ static void asm_global_instr(void)
 #ifdef CONFIG_TCC_BACKTRACE
 /* print the position in the source file of PC value 'pc' by reading
    the stabs debug information */
-static void rt_printline(unsigned long wanted_pc)
+static unsigned long rt_printline(unsigned long wanted_pc)
 {
     Stab_Sym *sym, *sym_end;
     char func_name[128], last_func_name[128];
@@ -1438,6 +1439,7 @@ static void rt_printline(unsigned long wanted_pc)
                     wanted_pc < sym->st_value + sym->st_size) {
                     pstrcpy(last_func_name, sizeof(last_func_name),
                             strtab_section->data + sym->st_name);
+                    func_addr = sym->st_value;
                     goto found;
                 }
             }
@@ -1445,7 +1447,7 @@ static void rt_printline(unsigned long wanted_pc)
     }
     /* did not find any info: */
     fprintf(stderr, " ???\n");
-    return;
+    return 0;
  found:
     if (last_func_name[0] != '\0') {
         fprintf(stderr, " %s()", last_func_name);
@@ -1458,6 +1460,7 @@ static void rt_printline(unsigned long wanted_pc)
         fprintf(stderr, ")");
     }
     fprintf(stderr, "\n");
+    return func_addr;
 }
 
 #ifdef __i386__
@@ -1552,7 +1555,9 @@ void rt_error(ucontext_t *uc, const char *fmt, ...)
             fprintf(stderr, "at ");
         else
             fprintf(stderr, "by ");
-        rt_printline(pc);
+        pc = rt_printline(pc);
+        if (pc == rt_prog_main && pc)
+            break;
     }
     exit(255);
     va_end(ap);
@@ -1712,14 +1717,17 @@ int tcc_run(TCCState *s1, int argc, char **argv)
 #ifdef CONFIG_TCC_BCHECK
     if (s1->do_bounds_check) {
         void (*bound_init)(void);
-
+        void (*bound_exit)(void);
         /* set error function */
         rt_bound_error_msg = tcc_get_symbol_err(s1, "__bound_error_msg");
-
+        rt_prog_main = (unsigned long)prog_main;
         /* XXX: use .init section so that it also work in binary ? */
-        bound_init = (void *)tcc_get_symbol_err(s1, "__bound_init");
+        bound_init = tcc_get_symbol_err(s1, "__bound_init");
+        bound_exit = tcc_get_symbol_err(s1, "__bound_exit");
         bound_init();
-    }
+        ret = (*prog_main)(argc, argv);
+        bound_exit();
+    } else
 #endif
     ret = (*prog_main)(argc, argv);
     tcc_free(ptr);
diff --git a/tcc.c b/tcc.c
index 9746f9f7..daa10135 100644
--- a/tcc.c
+++ b/tcc.c
@@ -529,23 +529,20 @@ int main(int argc, char **argv)
     /* free all files */
     tcc_free(files);
 
-    if (ret)
-        goto the_end;
+    if (0 == ret) {
+        if (do_bench)
+            tcc_print_stats(s, getclock_us() - start_time);
 
-    if (do_bench)
-        tcc_print_stats(s, getclock_us() - start_time);
+        if (s->output_type == TCC_OUTPUT_PREPROCESS) {
+            if (outfile)
+                fclose(s->outfile);
+        } else if (s->output_type == TCC_OUTPUT_MEMORY)
+            ret = tcc_run(s, argc - optind, argv + optind);
+        else
+            ret = tcc_output_file(s, outfile) ? 1 : 0;
+    }
 
-    if (s->output_type == TCC_OUTPUT_PREPROCESS) {
-        if (outfile)
-            fclose(s->outfile);
-    } else if (s->output_type == TCC_OUTPUT_MEMORY) {
-        ret = tcc_run(s, argc - optind, argv + optind);
-    } else
-        ret = tcc_output_file(s, outfile) ? 1 : 0;
- the_end:
-    /* XXX: cannot do it with bound checking because of the malloc hooks */
-    if (!s->do_bounds_check)
-        tcc_delete(s);
+    tcc_delete(s);
 
 #ifdef MEM_DEBUG
     if (do_bench) {
@@ -554,4 +551,3 @@ int main(int argc, char **argv)
 #endif
     return ret;
 }
-
diff --git a/tests/Makefile b/tests/Makefile
index ddd49322..98e1b3e0 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -6,11 +6,10 @@
 TESTS = libtest test3
 
 # these should work too
-# TESTS += test1 test2 speed
+# TESTS += test1 test2 speed btest
 
 # these don't work as they should
-# TESTS += test4 btest asmtest
-
+# TESTS += test4 asmtest
 
 TOP = ..
 include $(TOP)/Makefile
@@ -87,20 +86,22 @@ BOUNDS_FAIL= 2 5 7 9 11 12 13
 btest: boundtest.c
 	@echo ------------ $@ ------------
 	@for i in $(BOUNDS_OK); do \
+	   echo ; echo --- boundtest $$i ---; \
 	   if $(TCC) -b -run boundtest.c $$i ; then \
-	       /bin/true ; \
+	       echo succeded as expected; \
 	   else\
 	       echo Failed positive test $$i ; exit 1 ; \
 	   fi ;\
 	done ;\
 	for i in $(BOUNDS_FAIL); do \
+	   echo ; echo --- boundtest $$i ---; \
 	   if $(TCC) -b -run boundtest.c $$i ; then \
 	       echo Failed negative test $$i ; exit 1 ;\
 	   else\
-	       /bin/true ; \
+	       echo failed as expected; \
 	   fi ;\
 	done ;\
-	echo Bound test OK
+	echo; echo Bound test OK
 
 # speed test
 speed: ex2 ex3