diff --git a/CMakeLists.txt b/CMakeLists.txt
index 41dd81b4..fb0d9683 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -57,6 +57,8 @@ endif()
 # Use two variables to keep CMake configuration variable names consistent
 set(TCC_BCHECK OFF CACHE BOOL "Enable bounds checking")
 set(CONFIG_TCC_BCHECK ${TCC_BCHECK})
+set(TCC_ASSERT OFF CACHE BOOL "Enable assertions")
+set(CONFIG_TCC_ASSERT ${TCC_ASSERT})
 
 set(TCC_BUILD_NATIVE ON CACHE BOOL "Build native compiler")
 set(TCC_BUILD_I386 OFF CACHE BOOL "Build i386 cross compiler")
diff --git a/arm-gen.c b/arm-gen.c
index 0f70062a..a8831212 100644
--- a/arm-gen.c
+++ b/arm-gen.c
@@ -2017,6 +2017,21 @@ void ggoto(void)
   vtop--;
 }
 
+/* Save the stack pointer onto the stack and return the location of its address */
+ST_FUNC void gen_vla_sp_save(int addr) {
+    tcc_error("variable length arrays unsupported for this target");
+}
+
+/* Restore the SP from a location on the stack */
+ST_FUNC void gen_vla_sp_restore(int addr) {
+    tcc_error("variable length arrays unsupported for this target");
+}
+
+/* Subtract from the stack pointer, and push the resulting value onto the stack */
+ST_FUNC void gen_vla_alloc(CType *type, int align) {
+    tcc_error("variable length arrays unsupported for this target");
+}
+
 /* end of ARM code generator */
 /*************************************************************/
 #endif
diff --git a/c67-gen.c b/c67-gen.c
index 7d559c89..0d5e33f5 100644
--- a/c67-gen.c
+++ b/c67-gen.c
@@ -2560,6 +2560,21 @@ void ggoto(void)
     vtop--;
 }
 
+/* Save the stack pointer onto the stack and return the location of its address */
+ST_FUNC void gen_vla_sp_save(int addr) {
+    tcc_error("variable length arrays unsupported for this target");
+}
+
+/* Restore the SP from a location on the stack */
+ST_FUNC void gen_vla_sp_restore(int addr) {
+    tcc_error("variable length arrays unsupported for this target");
+}
+
+/* Subtract from the stack pointer, and push the resulting value onto the stack */
+ST_FUNC void gen_vla_alloc(CType *type, int align) {
+    tcc_error("variable length arrays unsupported for this target");
+}
+
 /* end of C67 code generator */
 /*************************************************************/
 #endif
diff --git a/config.h.in b/config.h.in
index 16393e9d..c9e1a180 100644
--- a/config.h.in
+++ b/config.h.in
@@ -3,4 +3,5 @@
 
 #cmakedefine CONFIG_WIN32
 #cmakedefine CONFIG_WIN64
-#cmakedefine CONFIG_TCC_BCHECK
\ No newline at end of file
+#cmakedefine CONFIG_TCC_BCHECK
+#cmakedefine CONFIG_TCC_ASSERT
diff --git a/configure b/configure
index 72d48af1..b513cc18 100755
--- a/configure
+++ b/configure
@@ -19,6 +19,7 @@ TMPH=$TMPN.h
 # default parameters
 build_cross="no"
 use_libgcc="no"
+enable_assert="no"
 prefix=""
 execprefix=""
 bindir=""
@@ -165,6 +166,8 @@ for opt do
   ;;
   --enable-cross) build_cross="yes"
   ;;
+  --enable-assert) enable_assert="yes"
+  ;;
   --disable-static) disable_static="yes"
   ;;
   --disable-rpath) disable_rpath="yes"
@@ -274,6 +277,7 @@ Advanced options (experts only):
   --enable-mingw32         build windows version on linux with mingw32
   --enable-cygwin          build windows version on windows with cygwin
   --enable-cross           build cross compilers
+  --enable-assert          enable debug assertions
   --with-selinux           use mmap for exec mem [needs writable /tmp]
   --sysincludepaths=...    specify system include paths, colon separated
   --libpaths=...           specify system library paths, colon separated
@@ -484,6 +488,9 @@ if test "$have_selinux" = "yes" ; then
   echo "#define HAVE_SELINUX" >> $TMPH
   echo "HAVE_SELINUX=yes" >> config.mak
 fi
+if test "$enable_assert" = "yes" ; then
+  echo "#define CONFIG_TCC_ASSERT" >> $TMPH
+fi
 
 version=`head $source_path/VERSION`
 echo "VERSION=$version" >>config.mak
diff --git a/i386-gen.c b/i386-gen.c
index 044f9e60..9ff2e180 100644
--- a/i386-gen.c
+++ b/i386-gen.c
@@ -43,6 +43,7 @@ enum {
     TREG_ECX,
     TREG_EDX,
     TREG_ST0,
+    TREG_ESP = 4
 };
 
 /* return registers for function */
@@ -1117,6 +1118,44 @@ ST_FUNC void gen_bounded_ptr_deref(void)
 }
 #endif
 
+/* Save the stack pointer onto the stack */
+ST_FUNC void gen_vla_sp_save(int addr) {
+    /* mov %esp,addr(%ebp)*/
+    o(0x89);
+    gen_modrm(TREG_ESP, VT_LOCAL, NULL, addr);
+}
+
+/* Restore the SP from a location on the stack */
+ST_FUNC void gen_vla_sp_restore(int addr) {
+    o(0x8b);
+    gen_modrm(TREG_ESP, VT_LOCAL, NULL, addr);
+}
+
+/* Subtract from the stack pointer, and push the resulting value onto the stack */
+ST_FUNC void gen_vla_alloc(CType *type, int align) {
+#ifdef TCC_TARGET_PE
+    /* alloca does more than just adjust %rsp on Windows */
+    vpush_global_sym(&func_old_type, TOK_alloca);
+    vswap(); /* Move alloca ref past allocation size */
+    gfunc_call(1);
+    vset(type, REG_IRET, 0);
+#else
+    int r;
+    r = gv(RC_INT); /* allocation size */
+    /* sub r,%rsp */
+    o(0x2b);
+    o(0xe0 | r);
+    /* We align to 16 bytes rather than align */
+    /* and ~15, %esp */
+    o(0xf0e483);
+    /* mov %esp, r */
+    o(0x89);
+    o(0xe0 | r);
+    vpop();
+    vset(type, r, 0);
+#endif
+}
+
 /* end of X86 code generator */
 /*************************************************************/
 #endif
diff --git a/tcc.h b/tcc.h
index 7205f324..859d4fde 100644
--- a/tcc.h
+++ b/tcc.h
@@ -40,6 +40,13 @@
 #include <setjmp.h>
 #include <time.h>
 
+#ifdef CONFIG_TCCASSERT
+#include <assert.h>
+#define TCC_ASSERT(ex) assert(ex)
+#else
+#define TCC_ASSERT(ex)
+#endif
+
 #ifndef _WIN32
 # include <unistd.h>
 # include <sys/time.h>
@@ -404,6 +411,12 @@ typedef struct AttributeDef {
 #define SYM_FIELD      0x20000000 /* struct/union field symbol space */
 #define SYM_FIRST_ANOM 0x10000000 /* first anonymous sym */
 
+#define VLA_SP_LOC_SET     0x01 /* Location of SP on stack has been allocated */
+#define VLA_SP_SAVED       0x02 /* SP has been saved to slot already */
+#define VLA_NEED_NEW_FRAME 0x04 /* Needs new frame for next VLA */
+#define VLA_IN_SCOPE       0x08 /* One or more VLAs are in scope */
+#define VLA_SCOPE_FLAGS    (VLA_SP_SAVED|VLA_NEED_NEW_FRAME|VLA_IN_SCOPE) /* Flags which are saved and restored upon entering and exiting a block */
+
 /* stored in 'Sym.c' field */
 #define FUNC_NEW       1 /* ansi function prototype */
 #define FUNC_OLD       2 /* old function prototype */
@@ -1201,7 +1214,7 @@ ST_FUNC void decl(int l);
 #if defined CONFIG_TCC_BCHECK || defined TCC_TARGET_C67
 ST_FUNC Sym *get_sym_ref(CType *type, Section *sec, unsigned long offset, unsigned long size);
 #endif
-#ifdef TCC_TARGET_X86_64
+#if defined TCC_TARGET_X86_64 && !defined TCC_TARGET_PE
 ST_FUNC int classify_x86_64_va_arg(CType *ty);
 #endif
 
@@ -1286,6 +1299,9 @@ ST_FUNC void o(unsigned int c);
 #ifndef TCC_TARGET_ARM
 ST_FUNC void gen_cvt_itof(int t);
 #endif
+ST_FUNC void gen_vla_sp_save(int addr);
+ST_FUNC void gen_vla_sp_restore(int addr);
+ST_FUNC void gen_vla_alloc(CType *type, int align);
 
 /* ------------ i386-gen.c ------------ */
 #if defined TCC_TARGET_I386 || defined TCC_TARGET_X86_64
diff --git a/tccgen.c b/tccgen.c
index 0d1e21b4..c5c41de3 100644
--- a/tccgen.c
+++ b/tccgen.c
@@ -55,6 +55,11 @@ ST_DATA Sym *define_stack;
 ST_DATA Sym *global_label_stack;
 ST_DATA Sym *local_label_stack;
 
+ST_DATA int vla_sp_loc_tmp; /* vla_sp_loc is set to this when the value won't be needed later */
+ST_DATA int vla_sp_root_loc; /* vla_sp_loc for SP before any VLAs were pushed */
+ST_DATA int *vla_sp_loc; /* Pointer to variable holding location to store stack pointer on the stack when modifying stack pointer */
+ST_DATA int vla_flags; /* VLA_* flags */
+
 ST_DATA SValue __vstack[1+VSTACK_SIZE], *vtop;
 
 ST_DATA int const_wanted; /* true if constant wanted */
@@ -81,6 +86,7 @@ static int decl0(int l, int is_for_loop_init);
 static void expr_eq(void);
 static void unary_type(CType *type);
 static void vla_runtime_type_size(CType *type, int *a);
+static void vla_sp_save(void);
 static int is_compatible_parameter_types(CType *type1, CType *type2);
 static void expr_type(CType *type);
 
@@ -2109,6 +2115,17 @@ ST_FUNC void vla_runtime_type_size(CType *type, int *a)
     }
 }
 
+static void vla_sp_save(void) {
+    if (!(vla_flags & VLA_SP_LOC_SET)) {
+        *vla_sp_loc = (loc -= PTR_SIZE);
+        vla_flags |= VLA_SP_LOC_SET;
+    }
+    if (!(vla_flags & VLA_SP_SAVED)) {
+        gen_vla_sp_save(*vla_sp_loc);
+        vla_flags |= VLA_SP_SAVED;
+    }
+}
+
 /* return the pointed type of t */
 static inline CType *pointed_type(CType *type)
 {
@@ -4484,6 +4501,16 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym,
         frame_bottom->next = scope_stack_bottom;
         scope_stack_bottom = frame_bottom;
         llabel = local_label_stack;
+        
+        /* save VLA state */
+        int block_vla_sp_loc = *vla_sp_loc;
+        int *saved_vla_sp_loc = vla_sp_loc;
+        if (saved_vla_sp_loc != &vla_sp_root_loc)
+          vla_sp_loc = &block_vla_sp_loc;
+
+        int saved_vla_flags = vla_flags;
+        vla_flags |= VLA_NEED_NEW_FRAME;
+        
         /* handle local labels declarations */
         if (tok == TOK_LABEL) {
             next();
@@ -4527,6 +4554,16 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym,
         /* pop locally defined symbols */
         scope_stack_bottom = scope_stack_bottom->next;
         sym_pop(&local_stack, s);
+        
+        /* Pop VLA frames and restore stack pointer if required */
+        if (saved_vla_sp_loc != &vla_sp_root_loc)
+            *saved_vla_sp_loc = block_vla_sp_loc;
+        if (vla_sp_loc != (saved_vla_sp_loc == &vla_sp_root_loc ? &vla_sp_root_loc : &block_vla_sp_loc)) {
+            vla_sp_loc = saved_vla_sp_loc;
+            gen_vla_sp_restore(*vla_sp_loc);
+        }
+        vla_flags = (vla_flags & ~VLA_SCOPE_FLAGS) | (saved_vla_flags & VLA_SCOPE_FLAGS);
+        
         next();
     } else if (tok == TOK_RETURN) {
         next();
@@ -4731,6 +4768,13 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym,
                     s->r = LABEL_FORWARD;
             }
             /* label already defined */
+            if (vla_flags & VLA_IN_SCOPE) {
+                /* If VLAs are in use, save the current stack pointer and
+                   reset the stack pointer to what it was at function entry
+                   (label will restore stack pointer in inner scopes) */
+                vla_sp_save();
+                gen_vla_sp_restore(vla_sp_root_loc);
+            }
             if (s->r & LABEL_FORWARD) 
                 s->jnext = gjmp(s->jnext);
             else
@@ -4746,6 +4790,12 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym,
         b = is_label();
         if (b) {
             /* label case */
+            if (vla_flags & VLA_IN_SCOPE) {
+                /* save/restore stack pointer across label
+                   this is a no-op when combined with the load immediately
+                   after the label unless we arrive via goto */
+                vla_sp_save();
+            }
             s = label_find(b);
             if (s) {
                 if (s->r == LABEL_DEFINED)
@@ -4756,6 +4806,10 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym,
                 s = label_push(&global_label_stack, b, LABEL_DEFINED);
             }
             s->jnext = ind;
+            if (vla_flags & VLA_IN_SCOPE) {
+                gen_vla_sp_restore(*vla_sp_loc);
+                vla_flags |= VLA_NEED_NEW_FRAME;
+            }
             /* we accept this, but it is a mistake */
         block_after_label:
             if (tok == '}') {
@@ -5026,24 +5080,21 @@ static void decl_initializer(CType *type, Section *sec, unsigned long c,
     CType *t1;
 
     if (type->t & VT_VLA) {
-#if defined TCC_TARGET_I386 || defined TCC_TARGET_X86_64
         int a;
-        CValue retcval;
-
-        vpush_global_sym(&func_old_type, TOK_alloca);
+        
+        /* save current stack pointer */
+        if (vla_flags & VLA_NEED_NEW_FRAME) {
+            vla_sp_save();
+            vla_flags = VLA_IN_SCOPE;
+            vla_sp_loc = &vla_sp_loc_tmp;
+        }
+        
         vla_runtime_type_size(type, &a);
-        gfunc_call(1);
-
-        /* return value */
-        retcval.i = 0;
-        vsetc(type, REG_IRET, &retcval);
+        gen_vla_alloc(type, a);
         vset(type, VT_LOCAL|VT_LVAL, c);
         vswap();
         vstore();
         vpop();
-#else
-        tcc_error("variable length arrays unsupported for this target");
-#endif
     } else if (type->t & VT_ARRAY) {
         s = type->ref;
         n = s->c;
@@ -5585,6 +5636,9 @@ static void gen_function(Sym *sym)
     put_extern_sym(sym, cur_text_section, ind, 0);
     funcname = get_tok_str(sym->v, NULL);
     func_ind = ind;
+    /* Initialize VLA state */
+    vla_sp_loc = &vla_sp_root_loc;
+    vla_flags = VLA_NEED_NEW_FRAME;
     /* put debug symbol */
     if (tcc_state->do_debug)
         put_func_debug(sym);
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index bcd37cd4..cf5caf8f 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -15,11 +15,16 @@ add_test(NAME abitest-cc WORKING_DIRECTORY ${CMAKE_BINARY_DIR} COMMAND abitest-c
 
 set(ABITEST_TCC abitest-tcc${CMAKE_EXECUTABLE_SUFFIX})
 get_property(LIBTCC_LIB TARGET libtcc PROPERTY LOCATION)
-add_custom_command(OUTPUT ${ABITEST_TCC} COMMAND tcc ${TCC_CFLAGS} -g ${CMAKE_CURRENT_SOURCE_DIR}/abitest.c ${LIBTCC_LDFLAGS} ${LIBTCC_LIB} -o ${ABITEST_TCC} DEPENDS tcc abitest.c)
+add_custom_command(OUTPUT ${ABITEST_TCC} COMMAND tcc ${TCC_CFLAGS} -g ${CMAKE_CURRENT_SOURCE_DIR}/abitest.c ${LIBTCC_LDFLAGS} ${LIBTCC_LIB} -o ${ABITEST_TCC} DEPENDS tcc ${CMAKE_CURRENT_SOURCE_DIR}/abitest.c)
 add_custom_target(abitest-tcc-exe ALL DEPENDS ${ABITEST_TCC})
 
 add_test(NAME abitest-tcc WORKING_DIRECTORY ${CMAKE_BINARY_DIR} COMMAND ${CMAKE_CURRENT_BINARY_DIR}/${ABITEST_TCC} lib_path=${CMAKE_BINARY_DIR} include=${CMAKE_SOURCE_DIR}/include)
 
+set(VLA_TEST vla_test${CMAKE_EXECUTABLE_SUFFIX})
+add_custom_command(OUTPUT ${VLA_TEST} COMMAND tcc ${TCC_CFLAGS} -g ${CMAKE_CURRENT_SOURCE_DIR}/vla_test.c -o ${VLA_TEST} DEPENDS tcc ${CMAKE_CURRENT_SOURCE_DIR}/vla_test.c)
+add_custom_target(vla_test-exe ALL DEPENDS ${VLA_TEST})
+add_test(vla_test vla_test)
+
 add_executable(tcctest-cc tcctest.c)
 target_link_libraries(tcctest-cc libtcc)
 set_target_properties(tcctest-cc PROPERTIES COMPILE_FLAGS -std=gnu99)
@@ -41,21 +46,21 @@ if(PYTHONINTERP_FOUND)
 
   # Object + link output
   set(TEST4 test4${CMAKE_EXECUTABLE_SUFFIX})
-  add_custom_command(OUTPUT test4.o COMMAND tcc ${TCC_TEST_CFLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/tcctest.c -c -o test4.o DEPENDS tcc tcctest.c)
+  add_custom_command(OUTPUT test4.o COMMAND tcc ${TCC_TEST_CFLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/tcctest.c -c -o test4.o DEPENDS tcc ${CMAKE_CURRENT_SOURCE_DIR}/tcctest.c)
   add_custom_command(OUTPUT ${TEST4} COMMAND tcc ${TCC_TEST_CFLAGS} test4.o -o ${TEST4} DEPENDS tcc test4.o)
   add_custom_target(test4-exe ALL DEPENDS ${TEST4})
   add_test(test4 ${TCCTEST_PY} ${CMAKE_CURRENT_BINARY_DIR}/${TEST4})
 
   # Dynamic output
   set(TEST5 test5${CMAKE_EXECUTABLE_SUFFIX})
-  add_custom_command(OUTPUT ${TEST5} COMMAND tcc ${TCC_TEST_CFLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/tcctest.c -o ${TEST5} DEPENDS tcc tcctest.c)
+  add_custom_command(OUTPUT ${TEST5} COMMAND tcc ${TCC_TEST_CFLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/tcctest.c -o ${TEST5} DEPENDS tcc ${CMAKE_CURRENT_SOURCE_DIR}/tcctest.c)
   add_custom_target(test5-exe ALL DEPENDS ${TEST5})
   add_test(test5 ${TCCTEST_PY} ${CMAKE_CURRENT_BINARY_DIR}/${TEST5})
 
   if(TCC_BCHECK)
     # Dynamic output + bound check
     set(TEST6 test6${CMAKE_EXECUTABLE_SUFFIX})
-    add_custom_command(OUTPUT ${TEST5} COMMAND tcc ${TCC_TEST_CFLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/tcctest.c -b -o ${TEST6} DEPENDS tcc tcctest.c)
+    add_custom_command(OUTPUT ${TEST6} COMMAND tcc ${TCC_TEST_CFLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/tcctest.c -b -o ${TEST6} DEPENDS tcc ${CMAKE_CURRENT_SOURCE_DIR}/tcctest.c)
     add_custom_target(test6-exe ALL DEPENDS ${TEST6})
     add_test(test6 ${TCCTEST_PY} ${CMAKE_CURRENT_BINARY_DIR}/${TEST6})
   endif()
@@ -63,7 +68,7 @@ if(PYTHONINTERP_FOUND)
   if(0)
     # Static output
     set(TEST7 test7${CMAKE_EXECUTABLE_SUFFIX})
-    add_custom_command(OUTPUT ${TEST7} COMMAND tcc ${TCC_TEST_CFLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/tcctest.c -static -o ${TEST7} DEPENDS tcc tcctest.c)
+    add_custom_command(OUTPUT ${TEST7} COMMAND tcc ${TCC_TEST_CFLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/tcctest.c -static -o ${TEST7} DEPENDS tcc ${CMAKE_CURRENT_SOURCE_DIR}/tcctest.c)
     add_custom_target(test7-exe ALL DEPENDS ${TEST7})
     add_test(test7 ${TCCTEST_PY} ${CMAKE_CURRENT_BINARY_DIR}/${TEST7})
   endif()
diff --git a/tests/Makefile b/tests/Makefile
index edb581c4..08dfa42b 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -14,6 +14,7 @@ TESTS = \
  libtest \
  test3 \
  abitest \
+ vla_test-run \
  moretests
 
 # test4 -- problem with -static
@@ -31,6 +32,10 @@ endif
 ifeq ($(TARGETOS),Darwin)
  TESTS := $(filter-out hello-exe test3 btest,$(TESTS))
 endif
+ifeq ($(ARCH),i386)
+else ifneq ($(ARCH),x86-64)
+ TESTS := $(filter-out vla_test-run,$(TESTS))
+endif
 
 ifdef DISABLE_STATIC
  export LD_LIBRARY_PATH:=$(CURDIR)/..
@@ -190,6 +195,12 @@ abitest: abitest-cc$(EXESUF) abitest-tcc$(EXESUF)
 	./abitest-cc$(EXESUF) lib_path=.. include="$(top_srcdir)/include"
 	./abitest-tcc$(EXESUF) lib_path=.. include="$(top_srcdir)/include"
 
+vla_test$(EXESUF): vla_test.c
+	$(TCC) -o $@ $^ $(CPPFLAGS) $(CFLAGS)
+vla_test-run: vla_test$(EXESUF)
+	@echo ------------ $@ ------------
+	./vla_test$(EXESUF)
+
 # targets for development
 %.bin: %.c tcc
 	$(TCC) -g -o $@ $<
diff --git a/tests/vla_test.c b/tests/vla_test.c
new file mode 100644
index 00000000..3616c46d
--- /dev/null
+++ b/tests/vla_test.c
@@ -0,0 +1,84 @@
+/*
+ * Test that allocating a variable length array in a loop
+ * does not use up a linear amount of memory
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#define LOOP_COUNT 1000
+#define ARRAY_SIZE 100
+
+/* Overwrite a VLA. This will overwrite the return address if SP is incorrect */
+void smash(char *p, int n) {
+  memset(p, 0, n);
+}
+
+int test1(int n) {
+  int i;
+  char *array_ptrs[LOOP_COUNT];
+  
+  for (i = 0; i < LOOP_COUNT; ++i) {
+    char test[n];
+    smash(test, n);
+    array_ptrs[i] = test;
+  }
+  
+  return (array_ptrs[0]-array_ptrs[LOOP_COUNT-1] < n) ? 0 : 1;
+}
+
+/* ensure goto does not circumvent array free */
+int test2(int n) {
+  char *array_ptrs[LOOP_COUNT];
+
+  int i = 0;
+loop:;
+  char test[n];
+  smash(test, n);
+  if (i >= LOOP_COUNT)
+    goto end;
+  array_ptrs[i] = test;
+  ++i;
+  goto loop;
+
+end:
+  smash(test, n);
+  char test2[n];
+  smash(test2, n);
+  return (array_ptrs[0] - array_ptrs[LOOP_COUNT-1] < n) ? 0 : 1;
+}
+
+int test3(int n) {
+  char test[n];
+  smash(test, n);
+  goto label;
+label:
+  smash(test, n);
+  char test2[n];
+  smash(test2, n);
+  return (test-test2 >= n) ? 0 : 1;
+}
+
+#define RUN_TEST(t) \
+  if (!testname || (strcmp(#t, testname) == 0)) { \
+    fputs(#t "... ", stdout); \
+    fflush(stdout); \
+    if (t(ARRAY_SIZE) == 0) { \
+      fputs("success\n", stdout); \
+    } else { \
+      fputs("failure\n", stdout); \
+      retval = EXIT_FAILURE; \
+    } \
+  }
+
+int main(int argc, char **argv) {
+  const char *testname = NULL;
+  int retval = EXIT_SUCCESS;
+  if (argc > 1)
+    testname = argv[1];
+  RUN_TEST(test1)
+  RUN_TEST(test2)
+  RUN_TEST(test3)
+  return retval;
+}
diff --git a/x86_64-gen.c b/x86_64-gen.c
index db24cddc..24fa2c6d 100644
--- a/x86_64-gen.c
+++ b/x86_64-gen.c
@@ -23,7 +23,7 @@
 #ifdef TARGET_DEFS_ONLY
 
 /* number of available registers */
-#define NB_REGS         24
+#define NB_REGS         25
 #define NB_ASM_REGS     8
 
 /* a register can belong to several classes. The classes must be
@@ -57,6 +57,7 @@ enum {
     TREG_RAX = 0,
     TREG_RCX = 1,
     TREG_RDX = 2,
+    TREG_RSP = 4,
     TREG_RSI = 6,
     TREG_RDI = 7,
 
@@ -74,7 +75,7 @@ enum {
     TREG_XMM6 = 22,
     TREG_XMM7 = 23,
 
-    TREG_ST0 = 4, // SP slot won't be used
+    TREG_ST0 = 24,
 
     TREG_MEM = 0x20,
 };
@@ -125,7 +126,7 @@ ST_DATA const int reg_classes[NB_REGS] = {
     /* ecx */ RC_INT | RC_RCX,
     /* edx */ RC_INT | RC_RDX,
     0,
-    /* st0 */ RC_ST0,
+    0,
     0,
     0,
     0,
@@ -147,7 +148,8 @@ ST_DATA const int reg_classes[NB_REGS] = {
        but they are not tagged with RC_FLOAT because they are
        callee saved on Windows */
     RC_XMM6,
-    RC_XMM7 
+    RC_XMM7,
+    /* st0 */ RC_ST0
 };
 
 static unsigned long func_sub_sp_offset;
@@ -2080,6 +2082,43 @@ void ggoto(void)
     vtop--;
 }
 
+/* Save the stack pointer onto the stack and return the location of its address */
+ST_FUNC void gen_vla_sp_save(int addr) {
+    /* mov %rsp,addr(%rbp)*/
+    gen_modrm64(0x89, TREG_RSP, VT_LOCAL, NULL, addr);
+}
+
+/* Restore the SP from a location on the stack */
+ST_FUNC void gen_vla_sp_restore(int addr) {
+    gen_modrm64(0x8b, TREG_RSP, VT_LOCAL, NULL, addr);
+}
+
+/* Subtract from the stack pointer, and push the resulting value onto the stack */
+ST_FUNC void gen_vla_alloc(CType *type, int align) {
+#ifdef TCC_TARGET_PE
+    /* alloca does more than just adjust %rsp on Windows */
+    vpush_global_sym(&func_old_type, TOK_alloca);
+    vswap(); /* Move alloca ref past allocation size */
+    gfunc_call(1);
+    vset(type, REG_IRET, 0);
+#else
+    int r;
+    r = gv(RC_INT); /* allocation size */
+    /* sub r,%rsp */
+    o(0x2b48);
+    o(0xe0 | REG_VALUE(r));
+    /* We align to 16 bytes rather than align */
+    /* and ~15, %rsp */
+    o(0xf0e48348);
+    /* mov %rsp, r */
+    o(0x8948);
+    o(0xe0 | REG_VALUE(r));
+    vpop();
+    vset(type, r, 0);
+#endif
+}
+
+
 /* end of x86-64 code generator */
 /*************************************************************/
 #endif /* ! TARGET_DEFS_ONLY */