diff --git a/lib/bcheck.c b/lib/bcheck.c
index 6cfdc76e..9c0a40b6 100644
--- a/lib/bcheck.c
+++ b/lib/bcheck.c
@@ -112,6 +112,12 @@ static sem_t bounds_sem;
 #define WAIT_SEM()             if (use_sem) while (sem_wait (&bounds_sem) < 0 \
                                                    && errno == EINTR)
 #define POST_SEM()             if (use_sem) sem_post (&bounds_sem)
+#elif 0
+static pthread_mutex_t bounds_mtx;
+#define INIT_SEM()             pthread_mutex_init (&bounds_mtx, NULL)
+#define EXIT_SEM()             pthread_mutex_destroy (&bounds_mtx)
+#define WAIT_SEM()             if (use_sem) pthread_mutex_lock (&bounds_mtx)
+#define POST_SEM()             if (use_sem) pthread_mutex_unlock (&bounds_mtx)
 #else
 static pthread_spinlock_t bounds_spin;
 /* about 25% faster then semaphore. */
@@ -618,6 +624,9 @@ void __bound_new_region(void *p, size_t size)
     alloca_list_type *cur;
     alloca_list_type *new;
 
+    if (no_checking)
+        return;
+
     dprintf(stderr, "%s, %s(): %p, 0x%lx\n",
             __FILE__, __FUNCTION__, p, (unsigned long)size);
     GET_CALLER_FP (fp);
@@ -638,8 +647,7 @@ void __bound_new_region(void *p, size_t size)
         last = cur;
         cur = cur->next;
     }
-    if (no_checking == 0)
-        tree = splay_insert((size_t)p, size, tree);
+    tree = splay_insert((size_t)p, size, tree);
     if (new) {
         new->fp = fp;
         new->p = p;
@@ -1188,7 +1196,7 @@ void __bound_free(void *ptr, const void *caller)
     size_t addr = (size_t) ptr;
     void *p;
 
-    if (ptr == NULL || tree == NULL || no_checking
+    if (ptr == NULL || tree == NULL
 #if MALLOC_REDIR
         || ((unsigned char *) ptr >= &initial_pool[0] &&
             (unsigned char *) ptr < &initial_pool[sizeof(initial_pool)])
@@ -1198,25 +1206,27 @@ void __bound_free(void *ptr, const void *caller)
 
     dprintf(stderr, "%s, %s(): %p\n", __FILE__, __FUNCTION__, ptr);
 
-    WAIT_SEM ();
-    INCR_COUNT(bound_free_count);
-    tree = splay (addr, tree);
-    if (tree->start == addr) {
-        if (tree->is_invalid) {
-            POST_SEM ();
-            bound_error("freeing invalid region");
-            return;
+    if (no_checking == 0) {
+        WAIT_SEM ();
+        INCR_COUNT(bound_free_count);
+        tree = splay (addr, tree);
+        if (tree->start == addr) {
+            if (tree->is_invalid) {
+                POST_SEM ();
+                bound_error("freeing invalid region");
+                return;
+            }
+            tree->is_invalid = 1;
+            memset (ptr, 0x5a, tree->size);
+            p = free_reuse_list[free_reuse_index];
+            free_reuse_list[free_reuse_index] = ptr;
+            free_reuse_index = (free_reuse_index + 1) % FREE_REUSE_SIZE;
+            if (p)
+                tree = splay_delete((size_t)p, tree);
+            ptr = p;
         }
-        tree->is_invalid = 1;
-        memset (ptr, 0x5a, tree->size);
-        p = free_reuse_list[free_reuse_index];
-        free_reuse_list[free_reuse_index] = ptr;
-        free_reuse_index = (free_reuse_index + 1) % FREE_REUSE_SIZE;
-        if (p)
-            tree = splay_delete((size_t)p, tree);
-        ptr = p;
+        POST_SEM ();
     }
-    POST_SEM ();
     BOUND_FREE (ptr);
 }
 
@@ -1340,8 +1350,7 @@ int __bound_munmap (void *start, size_t size)
 /* check that (p ... p + size - 1) lies inside 'p' region, if any */
 static void __bound_check(const void *p, size_t size, const char *function)
 {
-    if (no_checking == 0 && size != 0 &&
-        __bound_ptr_add((void *)p, size) == INVALID_POINTER) {
+    if (size != 0 && __bound_ptr_add((void *)p, size) == INVALID_POINTER) {
         bound_error("invalid pointer %p, size 0x%lx in %s",
                 p, (unsigned long)size, function);
     }
diff --git a/tcc-doc.texi b/tcc-doc.texi
index 390e764e..d7ac7178 100644
--- a/tcc-doc.texi
+++ b/tcc-doc.texi
@@ -951,34 +951,53 @@ Here are some examples of caught errors:
 
 Signal handlers are not compatible with bounds checking. The code
 below can be used to protect signal handlers.
-The call to __bound_checking(1) will disable bounds checking in the
-whole application.
+The @code{__attribute__((bound_no_checking))} will prevent all bound checking
+code generation. If a signal handler calls another function this
+function must also use @code{__attribute__((bound_no_checking))}.
 
-The BOUNDS_CHECKING_OFF and BOUNDS_CHECKING_ON can also be used to
+The fork() function call in a multi threaded application is also a problem.
+To solve this all bounds checking can be disabled by calling
+@code{__bound_checking(1)}. The call to @code{__bound_checking(1)} will disable bounds
+checking in the whole application.
+
+The @code{BOUNDS_CHECKING_OFF} and @code{BOUNDS_CHECKING_ON} can also be used to
 disable bounds checking for some code. This is not recommended.
 It is better to fix the code.
 
 @example
 
-#ifdef __BOUNDS_CHECKING_ON
+#if defined(__TINYC__) && __BOUNDS_CHECKING_ON
+#undef __attribute__
 extern void __bound_checking (int no_check);
 #define BOUNDS_CHECKING_OFF __bound_checking(1)
 #define BOUNDS_CHECKING_ON  __bound_checking(-1)
+#define BOUNDS_NO_CHECKING __attribute__((bound_no_checking))
 #else
 #define BOUNDS_CHECKING_OFF
 #define BOUNDS_CHECKING_ON
+#define BOUNDS_NO_CHECKING
 #endif
 
-void real_signal_handler(int sig, siginfo_t *info, void *ucontext)
+void signal_handler(int sig, void *info, void *ucontext) BOUNDS_NO_CHECKING
 @{
-    ...
+    ... signal handler code without generated bounds checking code.
 @}
 
-void signal_handler(int sig, void *info, void *ucontext)
+void run(const char *cmd)
 @{
-    BOUNDS_CHECKING_OFF;
-    real_signal_handler(sig, info, data);
-    BOUNDS_CHECKING_ON;
+    switch (fork()) @{
+    case 0:
+      BOUNDS_CHECKING_OFF;
+      ...
+      exec...
+      exit(1);
+    case -1: 
+      ...
+      break;
+    default:
+      ...
+      break;
+    @}
 @}
 
 @end example
diff --git a/tcc.h b/tcc.h
index 321ca57f..f3783d88 100644
--- a/tcc.h
+++ b/tcc.h
@@ -505,7 +505,8 @@ struct FuncAttr {
     func_dtor   : 1, /* attribute((destructor)) */
     func_args   : 8, /* PE __stdcall args */
     func_alwinl : 1, /* always_inline */
-    xxxx        :15;
+    no_bcheck   : 1, /* no bound checking */
+    xxxx        :14;
 };
 
 /* symbol management */
diff --git a/tccgen.c b/tccgen.c
index 629e07a8..7d8ad3d5 100644
--- a/tccgen.c
+++ b/tccgen.c
@@ -1501,6 +1501,8 @@ static void merge_funcattr(struct FuncAttr *fa, struct FuncAttr *fa1)
       fa->func_ctor = 1;
     if (fa1->func_dtor)
       fa->func_dtor = 1;
+    if (fa1->no_bcheck)
+      fa->no_bcheck = 1;
 }
 
 /* Merge attributes.  */
@@ -4122,6 +4124,10 @@ redo:
         case TOK_ALWAYS_INLINE2:
             ad->f.func_alwinl = 1;
             break;
+        case TOK_NO_BOUND_CHECK1:
+        case TOK_NO_BOUND_CHECK2:
+            ad->f.no_bcheck = 1;
+            break;
         case TOK_SECTION1:
         case TOK_SECTION2:
             skip('(');
@@ -8079,10 +8085,14 @@ static void decl_initializer_alloc(CType *type, AttributeDef *ad, int r,
    'cur_text_section' */
 static void gen_function(Sym *sym)
 {
+    unsigned char save_bcheck = tcc_state->do_bounds_check;
     /* Initialize VLA state */
     struct scope f = { 0 };
     cur_scope = root_scope = &f;
 
+    if (sym->type.ref->f.no_bcheck)
+        tcc_state->do_bounds_check = 0;
+
     nocode_wanted = 0;
     ind = cur_text_section->data_offset;
     if (sym->a.aligned) {
@@ -8135,6 +8145,7 @@ static void gen_function(Sym *sym)
     check_vstack();
     /* do this after funcend debug info */
     next();
+    tcc_state->do_bounds_check = save_bcheck;
 }
 
 static void gen_inline_functions(TCCState *s)
diff --git a/tcctok.h b/tcctok.h
index 1e9bc4ab..d64a363b 100644
--- a/tcctok.h
+++ b/tcctok.h
@@ -134,6 +134,8 @@
      DEF(TOK_DESTRUCTOR2, "__destructor__")
      DEF(TOK_ALWAYS_INLINE1, "always_inline")
      DEF(TOK_ALWAYS_INLINE2, "__always_inline__")
+     DEF(TOK_NO_BOUND_CHECK1, "bound_no_checking")
+     DEF(TOK_NO_BOUND_CHECK2, "__bound_no_checking__")
 
      DEF(TOK_MODE, "__mode__")
      DEF(TOK_MODE_QI, "__QI__")
diff --git a/tests/tests2/103_implicit_memmove.c b/tests/tests2/103_implicit_memmove.c
index a5e53034..1592fb25 100644
--- a/tests/tests2/103_implicit_memmove.c
+++ b/tests/tests2/103_implicit_memmove.c
@@ -8,7 +8,7 @@ int foo (struct S *a, struct S *b)
   return 0;
 }
 
-void *memmove(void*,void*,long);
+void *memmove(void*,const void*,__SIZE_TYPE__);
 void foo2 (struct S *a, struct S *b)
 {
   memmove(a, b, sizeof *a);
diff --git a/tests/tests2/114_bound_signal.c b/tests/tests2/114_bound_signal.c
index e36ee639..8cdf86f2 100644
--- a/tests/tests2/114_bound_signal.c
+++ b/tests/tests2/114_bound_signal.c
@@ -8,12 +8,25 @@
 #include <errno.h>
 #include <setjmp.h>
 
+/* See tcc-doc.info */
+#if defined(__TINYC__) && __BOUNDS_CHECKING_ON
+#undef __attribute__
+extern void __bound_checking (int no_check);
+#define BOUNDS_CHECKING_OFF __bound_checking(1)
+#define BOUNDS_CHECKING_ON  __bound_checking(-1)
+#define BOUNDS_NO_CHECKING __attribute__((bound_no_checking))
+#else
+#define BOUNDS_CHECKING_OFF
+#define BOUNDS_CHECKING_ON
+#define BOUNDS_NO_CHECKING
+#endif
+
 static volatile int run = 1;
 static int dummy[10];
 static sem_t sem;
 
 static void
-add (void)
+add (void) BOUNDS_NO_CHECKING
 {
     int i;
 
@@ -41,29 +54,12 @@ do_signal (void *unused)
     return NULL;
 }
 
-/* See tcc-doc.info */
-#ifdef __BOUNDS_CHECKING_ON
-extern void __bound_checking (int no_check);
-#define BOUNDS_CHECKING_OFF __bound_checking(1)
-#define BOUNDS_CHECKING_ON  __bound_checking(-1)
-#else
-#define BOUNDS_CHECKING_OFF
-#define BOUNDS_CHECKING_ON
-#endif
-
-static void real_signal_handler(int sig)
+static void signal_handler(int sig) BOUNDS_NO_CHECKING
 {
     add();
     sem_post (&sem);
 }
 
-static void signal_handler(int sig)
-{
-    BOUNDS_CHECKING_OFF;
-    real_signal_handler(sig);
-    BOUNDS_CHECKING_ON;
-}
-
 int
 main (void)
 {