From 8227db3a23fd3cf11840eaa25eab5f3f5f813ac7 Mon Sep 17 00:00:00 2001 From: grischka Date: Sat, 22 Jun 2019 11:45:35 +0200 Subject: [PATCH] jump optimizations This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP with both a positive and a negative jump target list. Such we can delay putting the non-inverted or inverted jump until we can see which one is nore suitable (in most cases). example: if (a && b || c && d) e = 0; before this patch: a: 8b 45 fc mov 0xfffffffc(%ebp),%eax d: 83 f8 00 cmp $0x0,%eax 10: 0f 84 11 00 00 00 je 27 16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax 19: 83 f8 00 cmp $0x0,%eax 1c: 0f 84 05 00 00 00 je 27 22: e9 22 00 00 00 jmp 49 27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax 2a: 83 f8 00 cmp $0x0,%eax 2d: 0f 84 11 00 00 00 je 44 33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax 36: 83 f8 00 cmp $0x0,%eax 39: 0f 84 05 00 00 00 je 44 3f: e9 05 00 00 00 jmp 49 44: e9 08 00 00 00 jmp 51 49: b8 00 00 00 00 mov $0x0,%eax 4e: 89 45 ec mov %eax,0xffffffec(%ebp) 51: ... with this patch: a: 8b 45 fc mov 0xfffffffc(%ebp),%eax d: 83 f8 00 cmp $0x0,%eax 10: 0f 84 0c 00 00 00 je 22 16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax 19: 83 f8 00 cmp $0x0,%eax 1c: 0f 85 18 00 00 00 jne 3a 22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax 25: 83 f8 00 cmp $0x0,%eax 28: 0f 84 14 00 00 00 je 42 2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax 31: 83 f8 00 cmp $0x0,%eax 34: 0f 84 08 00 00 00 je 42 3a: b8 00 00 00 00 mov $0x0,%eax 3f: 89 45 ec mov %eax,0xffffffec(%ebp) 42: ... --- Makefile | 2 +- arm-gen.c | 77 +++++++--------- arm64-gen.c | 65 +++++++++++++- c67-gen.c | 40 ++++----- i386-gen.c | 92 +++++++------------ tcc.h | 21 +++-- tccgen.c | 228 +++++++++++++++++++++++++++++++++--------------- tests/tcctest.c | 4 +- x86_64-gen.c | 100 +++++++-------------- 9 files changed, 346 insertions(+), 283 deletions(-) diff --git a/Makefile b/Makefile index d5e10c8c..83d602ad 100644 --- a/Makefile +++ b/Makefile @@ -125,7 +125,7 @@ DEF-i386-win32 = -DTCC_TARGET_PE -DTCC_TARGET_I386 DEF-x86_64-win32= -DTCC_TARGET_PE -DTCC_TARGET_X86_64 DEF-x86_64-osx = -DTCC_TARGET_MACHO -DTCC_TARGET_X86_64 DEF-arm-wince = -DTCC_TARGET_PE -DTCC_TARGET_ARM -DTCC_ARM_EABI -DTCC_ARM_VFP -DTCC_ARM_HARDFLOAT -DEF-arm64 = -DTCC_TARGET_ARM64 +DEF-arm64 = -DTCC_TARGET_ARM64 -Wno-format DEF-c67 = -DTCC_TARGET_C67 -w # disable warnigs DEF-arm-fpa = -DTCC_TARGET_ARM DEF-arm-fpa-ld = -DTCC_TARGET_ARM -DLDOUBLE_SIZE=12 diff --git a/arm-gen.c b/arm-gen.c index 49738548..abc09426 100644 --- a/arm-gen.c +++ b/arm-gen.c @@ -1419,7 +1419,7 @@ ST_FUNC void gen_fill_nops(int bytes) } /* generate a jump to a label */ -int gjmp(int t) +ST_FUNC int gjmp(int t) { int r; if (nocode_wanted) @@ -1430,51 +1430,37 @@ int gjmp(int t) } /* generate a jump to a fixed address */ -void gjmp_addr(int a) +ST_FUNC void gjmp_addr(int a) { gjmp(a); } -/* generate a test. set 'inv' to invert test. Stack entry is popped */ -int gtst(int inv, int t) +ST_FUNC int gjmp_cond(int op, int t) { - int v, r; - uint32_t op; - - v = vtop->r & VT_VALMASK; + int r; + if (nocode_wanted) + return t; r=ind; + op=mapcc(op); + op|=encbranch(r,t,1); + o(op); + return r; +} - if (nocode_wanted) { - ; - } else if (v == VT_CMP) { - op=mapcc(inv?negcc(vtop->c.i):vtop->c.i); - op|=encbranch(r,t,1); - o(op); - t=r; - } else if (v == VT_JMP || v == VT_JMPI) { - if ((v & 1) == inv) { - if(!vtop->c.i) - vtop->c.i=t; - else { - uint32_t *x; - int p,lp; - if(t) { - p = vtop->c.i; - do { - p = decbranch(lp=p); - } while(p); - x = (uint32_t *)(cur_text_section->data + lp); - *x &= 0xff000000; - *x |= encbranch(lp,t,1); - } - t = vtop->c.i; - } - } else { - t = gjmp(t); - gsym(vtop->c.i); - } +ST_FUNC int gjmp_append(int n, int t) +{ + uint32_t *x; + int p,lp; + if(n) { + p = n; + do { + p = decbranch(lp=p); + } while(p); + x = (uint32_t *)(cur_text_section->data + lp); + *x &= 0xff000000; + *x |= encbranch(lp,t,1); + t = n; } - vtop--; return t; } @@ -1611,10 +1597,8 @@ void gen_opi(int op) o(opc|(r<<12)|fr); done: vtop--; - if (op >= TOK_ULT && op <= TOK_GT) { - vtop->r = VT_CMP; - vtop->c.i = op; - } + if (op >= TOK_ULT && op <= TOK_GT) + vset_VT_CMP(op); break; case 2: opc=0xE1A00000|(opc<<5); @@ -1730,9 +1714,7 @@ void gen_opf(int op) case TOK_UGE: op=TOK_GE; break; case TOK_UGT: op=TOK_GT; break; } - - vtop->r = VT_CMP; - vtop->c.i = op; + vset_VT_CMP(op); return; } r=gv(RC_FLOAT); @@ -1934,8 +1916,9 @@ void gen_opf(int op) } else { r2=fpr(gv(RC_FLOAT)); } - vtop[-1].r = VT_CMP; - vtop[-1].c.i = op; + --vtop; + vset_VT_CMP(op); + ++vtop; } else { tcc_error("unknown fp op %x!",op); return; diff --git a/arm64-gen.c b/arm64-gen.c index 40fd1a12..86672de6 100644 --- a/arm64-gen.c +++ b/arm64-gen.c @@ -91,6 +91,9 @@ static uint32_t fltr(int r) return r - TREG_F(0); } +#define dprintf(x) ((void)(tcc_state->verbose == 2 && printf x)) +//#define dprintf(x) + // Add an instruction to text section: ST_FUNC void o(unsigned int c) { @@ -100,6 +103,7 @@ ST_FUNC void o(unsigned int c) if (ind1 > cur_text_section->data_allocated) section_realloc(cur_text_section, ind1); write32le(cur_text_section->data + ind, c); + dprintf(("o %04x : %08x\n", ind, c)); //gr ind = ind1; } @@ -232,6 +236,7 @@ ST_FUNC void gsym_addr(int t_, int a_) tcc_error("branch out of range"); write32le(ptr, (a - t == 4 ? 0xd503201f : // nop 0x14000000 | ((a - t) >> 2 & 0x3ffffff))); // b + dprintf((". gsym TARG=%04x ADDR=%04x\n", t, a)); //gr t = next; } } @@ -440,6 +445,8 @@ static void arm64_sym(int r, Sym *sym, unsigned long addend) } } +static void arm64_load_cmp(int r, SValue *sv); + ST_FUNC void load(int r, SValue *sv) { int svtt = sv->type.t; @@ -531,6 +538,11 @@ ST_FUNC void load(int r, SValue *sv) return; } + if (svr == VT_CMP) { + arm64_load_cmp(r, sv); + return; + } + printf("load(%x, (%x, %x, %llx))\n", r, svtt, sv->r, (long long)svcul); assert(0); } @@ -1284,6 +1296,7 @@ ST_FUNC void gen_fill_nops(int bytes) ST_FUNC int gjmp(int t) { int r = ind; + dprintf((". gjmp T=%04x\n", t)); //gr if (nocode_wanted) return t; o(t); @@ -1295,11 +1308,57 @@ ST_FUNC void gjmp_addr(int a) { assert(a - ind + 0x8000000 < 0x10000000); o(0x14000000 | ((a - ind) >> 2 & 0x3ffffff)); + dprintf((". gjmp_addr T=%04x\n", a)); //gr } -ST_FUNC int gtst(int inv, int t) +ST_FUNC int gjmp_append(int n, int t) +{ + void *p; + /* insert vtop->c jump list in t */ + if (n) { + uint32_t n1 = n, n2; + while ((n2 = read32le(p = cur_text_section->data + n1))) + n1 = n2; + write32le(p, t); + t = n; + } + return t; +} + +void arm64_vset_VT_CMP(int op) +{ + if (op >= TOK_ULT && op <= TOK_GT) { + vtop->cmp_r = vtop->r; + vset_VT_CMP(0x80); + dprintf((". set VT_CMP OP(%s) R=%x\n", get_tok_str(op, 0), vtop->cmp_r)); + } +} + +static void arm64_gen_opil(int op, uint32_t l); + +static void arm64_load_cmp(int r, SValue *sv) +{ + sv->r = sv->cmp_r; + dprintf((". load VT_CMP OP(%x), R=%x/%x\n", (int)sv->c.i, sv->r, r)); + if (sv->c.i & 1) { + vpushi(1); + arm64_gen_opil('^', 0); + } + if (r != sv->r) { + load(r, sv); + sv->r = r; + } + dprintf((". load VT_CMP done\n")); //gr +} + +ST_FUNC int gjmp_cond(int op, int t) { int bt = vtop->type.t & VT_BTYPE; + + int inv = op & 1; + vtop->r = vtop->cmp_r; + dprintf((". gjmp_cond OP(%x) R=%x T=%04x\n", op, vtop->r, t)); //gr + if (bt == VT_LDOUBLE) { uint32_t a, b, f = fltr(gv(RC_FLOAT)); a = get_reg(RC_INT); @@ -1324,7 +1383,6 @@ ST_FUNC int gtst(int inv, int t) uint32_t a = intr(gv(RC_INT)); o(0x34000040 | a | !!inv << 24 | ll << 31); // cbz/cbnz wA,.+8 } - --vtop; return gjmp(t); } @@ -1553,11 +1611,13 @@ static void arm64_gen_opil(int op, uint32_t l) ST_FUNC void gen_opi(int op) { arm64_gen_opil(op, 0); + arm64_vset_VT_CMP(op); } ST_FUNC void gen_opl(int op) { arm64_gen_opil(op, 1); + arm64_vset_VT_CMP(op); } ST_FUNC void gen_opf(int op) @@ -1657,6 +1717,7 @@ ST_FUNC void gen_opf(int op) default: assert(0); } + arm64_vset_VT_CMP(op); } // Generate sign extension from 32 to 64 bits: diff --git a/c67-gen.c b/c67-gen.c index 2d7af093..880a5720 100644 --- a/c67-gen.c +++ b/c67-gen.c @@ -2072,15 +2072,13 @@ void gjmp_addr(int a) } /* generate a test. set 'inv' to invert test. Stack entry is popped */ -int gtst(int inv, int t) +ST_FUNC int gjmp_cond(int op, int t) { - int ind1, n; - int v, *p; + int ind1; + int inv = op & 1; + if (nocode_wanted) + return t; - v = vtop->r & VT_VALMASK; - if (nocode_wanted) { - ; - } else if (v == VT_CMP) { /* fast case : can jump directly since flags are set */ // C67 uses B2 sort of as flags register ind1 = ind; @@ -2098,16 +2096,18 @@ int gtst(int inv, int t) C67_NOP(5); t = ind1; //return where we need to patch - } else if (v == VT_JMP || v == VT_JMPI) { - /* && or || optimization */ - if ((v & 1) == inv) { + return t; +} + +ST_FUNC int gjmp_append(int n0, int t) +{ + if (n0) { + int n = n0, *p; /* insert vtop->c jump list in t */ // I guess the idea is to traverse to the // null at the end of the list and store t // there - - n = vtop->c.i; while (n != 0) { p = (int *) (cur_text_section->data + n); @@ -2117,14 +2117,8 @@ int gtst(int inv, int t) } *p |= (t & 0xffff) << 7; *(p + 1) |= ((t >> 16) & 0xffff) << 7; - t = vtop->c.i; - - } else { - t = gjmp(t); - gsym(vtop->c.i); - } + t = n0; } - vtop--; return t; } @@ -2200,10 +2194,8 @@ void gen_opi(int op) ALWAYS_ASSERT(FALSE); vtop--; - if (op >= TOK_ULT && op <= TOK_GT) { - vtop->r = VT_CMP; - vtop->c.i = op; - } + if (op >= TOK_ULT && op <= TOK_GT) + vset_VT_CMP(0x80); break; case '-': case TOK_SUBC1: /* sub with carry generation */ @@ -2359,7 +2351,7 @@ void gen_opf(int op) } else { ALWAYS_ASSERT(FALSE); } - vtop->r = VT_CMP; // tell TCC that result is in "flags" actually B2 + vset_VT_CMP(0x80); } else { if (op == '+') { if ((ft & VT_BTYPE) == VT_DOUBLE) { diff --git a/i386-gen.c b/i386-gen.c index abe2b910..51fbf073 100644 --- a/i386-gen.c +++ b/i386-gen.c @@ -261,10 +261,10 @@ ST_FUNC void load(int r, SValue *sv) o(0xe8 + r); /* mov %ebp, r */ } } else if (v == VT_CMP) { - oad(0xb8 + r, 0); /* mov $0, r */ o(0x0f); /* setxx %br */ o(fc); o(0xc0 + r); + o(0xc0b60f + r * 0x90000); /* movzbl %al, %eax */ } else if (v == VT_JMP || v == VT_JMPI) { t = v & 1; oad(0xb8 + r, t); /* mov $1, r */ @@ -692,63 +692,39 @@ ST_FUNC void gjmp_addr(int a) } } -ST_FUNC void gtst_addr(int inv, int a) +#if 0 +/* generate a jump to a fixed address */ +ST_FUNC void gjmp_cond_addr(int a, int op) { - int v = vtop->r & VT_VALMASK; - if (v == VT_CMP) { - inv ^= (vtop--)->c.i; - a -= ind + 2; - if (a == (char)a) { - g(inv - 32); - g(a); - } else { - g(0x0f); - oad(inv - 16, a - 4); - } - } else if ((v & ~1) == VT_JMP) { - if ((v & 1) != inv) { - gjmp_addr(a); - gsym(vtop->c.i); - } else { - gsym(vtop->c.i); - o(0x05eb); - gjmp_addr(a); - } - vtop--; - } + int r = a - ind - 2; + if (r == (char)r) + g(op - 32), g(r); + else + g(0x0f), gjmp2(op - 16, r - 4); } +#endif -/* generate a test. set 'inv' to invert test. Stack entry is popped */ -ST_FUNC int gtst(int inv, int t) +ST_FUNC int gjmp_append(int n, int t) { - int v = vtop->r & VT_VALMASK; - if (nocode_wanted) { - ; - } else if (v == VT_CMP) { - /* fast case : can jump directly since flags are set */ - g(0x0f); - t = gjmp2((vtop->c.i - 16) ^ inv, t); - } else if (v == VT_JMP || v == VT_JMPI) { - /* && or || optimization */ - if ((v & 1) == inv) { - /* insert vtop->c jump list in t */ - uint32_t n1, n = vtop->c.i; - if (n) { - while ((n1 = read32le(cur_text_section->data + n))) - n = n1; - write32le(cur_text_section->data + n, t); - t = vtop->c.i; - } - } else { - t = gjmp(t); - gsym(vtop->c.i); - } + void *p; + /* insert vtop->c jump list in t */ + if (n) { + uint32_t n1 = n, n2; + while ((n2 = read32le(p = cur_text_section->data + n1))) + n1 = n2; + write32le(p, t); + t = n; } - vtop--; return t; } -/* generate an integer binary operation */ +ST_FUNC int gjmp_cond(int op, int t) +{ + g(0x0f); + t = gjmp2(op - 16, t); + return t; +} + ST_FUNC void gen_opi(int op) { int r, fr, opc, c; @@ -766,10 +742,9 @@ ST_FUNC void gen_opi(int op) c = vtop->c.i; if (c == (char)c) { /* generate inc and dec for smaller code */ - if (c==1 && opc==0 && op != TOK_ADDC1) { - o (0x40 | r); // inc - } else if (c==1 && opc==5 && op != TOK_SUBC1) { - o (0x48 | r); // dec + if ((c == 1 || c == -1) && (op == '+' || op == '-')) { + opc = (c == 1) ^ (op == '+'); + o (0x40 | (opc << 3) | r); // inc,dec } else { o(0x83); o(0xc0 | (opc << 3) | r); @@ -787,10 +762,8 @@ ST_FUNC void gen_opi(int op) o(0xc0 + r + fr * 8); } vtop--; - if (op >= TOK_ULT && op <= TOK_GT) { - vtop->r = VT_CMP; - vtop->c.i = op; - } + if (op >= TOK_ULT && op <= TOK_GT) + vset_VT_CMP(op); break; case '-': case TOK_SUBC1: /* sub with carry generation */ @@ -948,8 +921,7 @@ ST_FUNC void gen_opf(int op) op = TOK_EQ; } vtop--; - vtop->r = VT_CMP; - vtop->c.i = op; + vset_VT_CMP(op); } else { /* no memory reference possible for long double operations */ if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) { diff --git a/tcc.h b/tcc.h index fdad5624..1470dc95 100644 --- a/tcc.h +++ b/tcc.h @@ -430,9 +430,15 @@ typedef struct SValue { unsigned short r; /* register + flags */ unsigned short r2; /* second register, used for 'long long' type. If not used, set to VT_CONST */ - CValue c; /* constant, if VT_CONST */ - struct Sym *sym; /* symbol, if (VT_SYM | VT_CONST), or if - result of unary() for an identifier. */ + union { + struct { int jtrue, jfalse; }; /* forward jmps */ + CValue c; /* constant, if VT_CONST */ + }; + union { + struct { unsigned short cmp_op, cmp_r; }; /* VT_CMP operation */ + struct Sym *sym; /* symbol, if (VT_SYM | VT_CONST), or if */ + }; /* result of unary() for an identifier. */ + } SValue; /* symbol attributes */ @@ -1322,6 +1328,7 @@ ST_FUNC ElfSym *elfsym(Sym *); ST_FUNC void update_storage(Sym *sym); ST_FUNC Sym *external_global_sym(int v, CType *type); ST_FUNC void vset(CType *type, int r, int v); +ST_FUNC void vset_VT_CMP(int op); ST_FUNC void vswap(void); ST_FUNC void vpush_global_sym(CType *type, int v); ST_FUNC void vrote(SValue *e, int n); @@ -1490,12 +1497,8 @@ ST_FUNC void gfunc_epilog(void); ST_FUNC void gen_fill_nops(int); ST_FUNC int gjmp(int t); ST_FUNC void gjmp_addr(int a); -ST_FUNC int gtst(int inv, int t); -#if defined TCC_TARGET_I386 || defined TCC_TARGET_X86_64 -ST_FUNC void gtst_addr(int inv, int a); -#else -#define gtst_addr(inv, a) gsym_addr(gtst(inv, 0), a) -#endif +ST_FUNC int gjmp_cond(int op, int t); +ST_FUNC int gjmp_append(int n, int t); ST_FUNC void gen_opi(int op); ST_FUNC void gen_opf(int op); ST_FUNC void gen_cvt_ftoi(int t); diff --git a/tccgen.c b/tccgen.c index 6cc842d2..38dc33f1 100644 --- a/tccgen.c +++ b/tccgen.c @@ -681,13 +681,8 @@ ST_FUNC void sym_pop(Sym **ptop, Sym *b, int keep) } /* ------------------------------------------------------------------------- */ - -static void vsetc(CType *type, int r, CValue *vc) +static void vcheck_cmp(void) { - int v; - - if (vtop >= vstack + (VSTACK_SIZE - 1)) - tcc_error("memory full (vstack)"); /* cannot let cpu flags if other instruction are generated. Also avoid leaving VT_JMP anywhere except on the top of the stack because it would complicate the code generator. @@ -698,15 +693,17 @@ static void vsetc(CType *type, int r, CValue *vc) as their value might still be used for real. All values we push under nocode_wanted will eventually be popped again, so that the VT_CMP/VT_JMP value will be in vtop - when code is unsuppressed again. + when code is unsuppressed again. */ - Same logic below in vswap(); */ - if (vtop >= vstack && !nocode_wanted) { - v = vtop->r & VT_VALMASK; - if (v == VT_CMP || (v & ~1) == VT_JMP) - gv(RC_INT); - } + if (vtop->r == VT_CMP && !nocode_wanted) + gv(RC_INT); +} +static void vsetc(CType *type, int r, CValue *vc) +{ + if (vtop >= vstack + (VSTACK_SIZE - 1)) + tcc_error("memory full (vstack)"); + vcheck_cmp(); vtop++; vtop->type = *type; vtop->r = r; @@ -718,12 +715,8 @@ static void vsetc(CType *type, int r, CValue *vc) ST_FUNC void vswap(void) { SValue tmp; - /* cannot vswap cpu flags. See comment at vsetc() above */ - if (vtop >= vstack && !nocode_wanted) { - int v = vtop->r & VT_VALMASK; - if (v == VT_CMP || (v & ~1) == VT_JMP) - gv(RC_INT); - } + + vcheck_cmp(); tmp = vtop[0]; vtop[0] = vtop[-1]; vtop[-1] = tmp; @@ -740,9 +733,10 @@ ST_FUNC void vpop(void) o(0xd8dd); /* fstp %st(0) */ } else #endif - if (v == VT_JMP || v == VT_JMPI) { + if (v == VT_CMP) { /* need to put correct jump if && or || without test */ - gsym(vtop->c.i); + gsym(vtop->jtrue); + gsym(vtop->jfalse); } vtop--; } @@ -823,6 +817,7 @@ ST_FUNC void vrotb(int n) int i; SValue tmp; + vcheck_cmp(); tmp = vtop[-n + 1]; for(i=-n+1;i!=0;i++) vtop[i] = vtop[i+1]; @@ -837,6 +832,7 @@ ST_FUNC void vrote(SValue *e, int n) int i; SValue tmp; + vcheck_cmp(); tmp = *e; for(i = 0;i < n - 1; i++) e[-i] = e[-i - 1]; @@ -851,6 +847,75 @@ ST_FUNC void vrott(int n) vrote(vtop, n); } +/* ------------------------------------------------------------------------- */ +/* vtop->r = VT_CMP means CPU-flags have been set from comparison or test. */ + +/* called from generators to set the result from relational ops */ +ST_FUNC void vset_VT_CMP(int op) +{ + vtop->r = VT_CMP; + vtop->cmp_op = op; + vtop->jfalse = 0; + vtop->jtrue = 0; +} + +/* called once before asking generators to load VT_CMP to a register */ +static void vset_VT_JMP(void) +{ + int op = vtop->cmp_op; + if (vtop->jtrue || vtop->jfalse) { + /* we need to jump to 'mov $0,%R' or 'mov $1,%R' */ + int inv = op & (op < 2); /* small optimization */ + vseti(VT_JMP+inv, gvtst(inv, 0)); + } else { + /* otherwise convert flags (rsp. 0/1) to register */ + vtop->c.i = op; + if (op < 2) /* doesn't seem to happen */ + vtop->r = VT_CONST; + } +} + +/* Set CPU Flags, doesn't yet jump */ +static void gvtst_set(int inv, int t) +{ + int *p; + if (vtop->r != VT_CMP) { + vpushi(0); + gen_op(TOK_NE); + if (vtop->r != VT_CMP) /* must be VT_CONST then */ + vset_VT_CMP(vtop->c.i != 0); + } + p = inv ? &vtop->jfalse : &vtop->jtrue; + *p = gjmp_append(*p, t); +} + +/* Generate value test + * + * Generate a test for any value (jump, comparison and integers) */ +static int gvtst(int inv, int t) +{ + int op, u, x; + + gvtst_set(inv, t); + + t = vtop->jtrue, u = vtop->jfalse; + if (inv) + x = u, u = t, t = x; + op = vtop->cmp_op; + + /* jump to the wanted target */ + if (op > 1) + t = gjmp_cond(op ^ inv, t); + else if (op != inv) + t = gjmp(t); + /* resolve complementary jumps to here */ + gsym(u); + + vtop--; + return t; +} + +/* ------------------------------------------------------------------------- */ /* push a symbol value of TYPE */ static inline void vpushsym(CType *type, Sym *sym) { @@ -1591,6 +1656,8 @@ ST_FUNC int gv(int rc) /* restore wanted type */ vtop->type.t = t1; } else { + if (vtop->r == VT_CMP) + vset_VT_JMP(); /* one register type load */ load(r, vtop); } @@ -1608,13 +1675,10 @@ ST_FUNC int gv(int rc) /* generate vtop[-1] and vtop[0] in resp. classes rc1 and rc2 */ ST_FUNC void gv2(int rc1, int rc2) { - int v; - /* generate more generic register first. But VT_JMP or VT_CMP values must be generated first in all cases to avoid possible reload errors */ - v = vtop[0].r & VT_VALMASK; - if (v != VT_CMP && (v & ~1) != VT_JMP && rc1 <= rc2) { + if (vtop->r != VT_CMP && rc1 <= rc2) { vswap(); gv(rc1); vswap(); @@ -1749,26 +1813,6 @@ static void gv_dup(void) } } -/* Generate value test - * - * Generate a test for any value (jump, comparison and integers) */ -ST_FUNC int gvtst(int inv, int t) -{ - int v = vtop->r & VT_VALMASK; - if (v != VT_CMP && v != VT_JMP && v != VT_JMPI) { - vpushi(0); - gen_op(TOK_NE); - } - if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) { - /* constant jmp optimization */ - if ((vtop->c.i != 0) != inv) - t = gjmp(t); - vtop--; - return t; - } - return gtst(inv, t); -} - #if PTR_SIZE == 4 /* generate CPU independent (unsigned) long long operations */ static void gen_opl(int op) @@ -1974,8 +2018,8 @@ static void gen_opl(int op) a = gvtst(1, 0); if (op != TOK_EQ) { /* generate non equal test */ - vpushi(TOK_NE); - vtop->r = VT_CMP; + vpushi(0); + vset_VT_CMP(TOK_NE); b = gvtst(0, 0); } } @@ -1990,9 +2034,12 @@ static void gen_opl(int op) else if (op1 == TOK_GE) op1 = TOK_UGE; gen_op(op1); - a = gvtst(1, a); - gsym(b); - vseti(VT_JMPI, a); +#if 0//def TCC_TARGET_I386 + if (op == TOK_NE) { gsym(b); break; } + if (op == TOK_EQ) { gsym(a); break; } +#endif + gvtst_set(1, a); + gvtst_set(0, b); break; } } @@ -5001,11 +5048,12 @@ ST_FUNC void unary(void) if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) { gen_cast_s(VT_BOOL); vtop->c.i = !vtop->c.i; - } else if ((vtop->r & VT_VALMASK) == VT_CMP) - vtop->c.i ^= 1; - else { - save_regs(1); - vseti(VT_JMP, gvtst(1, 0)); + } else if (vtop->r == VT_CMP) { + vtop->cmp_op ^= 1; + n = vtop->jfalse, vtop->jfalse = vtop->jtrue, vtop->jtrue = n; + } else { + vpushi(0); + gen_op(TOK_EQ); } break; case '~': @@ -5035,7 +5083,9 @@ ST_FUNC void unary(void) next(); in_sizeof++; expr_type(&type, unary); /* Perform a in_sizeof = 0; */ - s = vtop[1].sym; /* hack: accessing previous vtop */ + s = NULL; + if (vtop[1].r & VT_SYM) + s = vtop[1].sym; /* hack: accessing previous vtop */ size = type_size(&type, &align); if (s && s->a.aligned) align = 1 << (s->a.aligned - 1); @@ -5653,7 +5703,7 @@ static void expr_landor(void(*e_fn)(void), int e_op, int i) gsym(t); nocode_wanted -= f; } else { - vseti(VT_JMP + i, gvtst(i, t)); + gvtst_set(i, t); } break; } @@ -5696,6 +5746,16 @@ static int condition_3way(void) return c; } +static int is_cond_bool(SValue *sv) +{ + if ((sv->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST + && (sv->type.t & VT_BTYPE) == VT_INT) + return (unsigned)sv->c.i < 2; + if (sv->r == VT_CMP) + return 1; + return 0; +} + static void expr_cond(void) { int tt, u, r1, r2, rc, t1, t2, bt1, bt2, islv, c, g; @@ -5741,6 +5801,12 @@ static void expr_cond(void) if (!g) gexpr(); + if (c < 0 && vtop->r == VT_CMP) { + t1 = gvtst(0, 0); + vpushi(0); + gvtst_set(0, t1); + } + if ((vtop->type.t & VT_BTYPE) == VT_FUNC) mk_pointer(&vtop->type); type1 = vtop->type; @@ -5761,6 +5827,22 @@ static void expr_cond(void) skip(':'); expr_cond(); + if (c < 0 && is_cond_bool(vtop) && is_cond_bool(&sv)) { + if (sv.r == VT_CMP) { + t1 = sv.jtrue; + t2 = u; + } else { + t1 = gvtst(0, 0); + t2 = gjmp(0); + gsym(u); + vpushv(&sv); + } + gvtst_set(0, t1); + gvtst_set(1, t2); + nocode_wanted = ncw_prev; + // tcc_warning("two conditions expr_cond"); + return; + } if ((vtop->type.t & VT_BTYPE) == VT_FUNC) mk_pointer(&vtop->type); @@ -6059,12 +6141,16 @@ static int case_cmp(const void *pa, const void *pb) return a < b ? -1 : a > b; } +static void gtst_addr(int t, int a) +{ + gsym_addr(gvtst(0, t), a); +} + static void gcase(struct case_t **base, int len, int *bsym) { struct case_t *p; int e; int ll = (vtop->type.t & VT_BTYPE) == VT_LLONG; - gv(RC_INT); while (len > 4) { /* binary search */ p = base[len/2]; @@ -6074,7 +6160,7 @@ static void gcase(struct case_t **base, int len, int *bsym) else vpushi(p->v2); gen_op(TOK_LE); - e = gtst(1, 0); + e = gvtst(1, 0); vdup(); if (ll) vpushll(p->v1); @@ -6084,10 +6170,6 @@ static void gcase(struct case_t **base, int len, int *bsym) gtst_addr(0, p->sym); /* v1 <= x <= v2 */ /* x < v1 */ gcase(base, len/2, bsym); - if (cur_switch->def_sym) - gjmp_addr(cur_switch->def_sym); - else - *bsym = gjmp(*bsym); /* x > v2 */ gsym(e); e = len/2 + 1; @@ -6106,7 +6188,7 @@ static void gcase(struct case_t **base, int len, int *bsym) gtst_addr(0, p->sym); } else { gen_op(TOK_LE); - e = gtst(1, 0); + e = gvtst(1, 0); vdup(); if (ll) vpushll(p->v1); @@ -6117,6 +6199,7 @@ static void gcase(struct case_t **base, int len, int *bsym) gsym(e); } } + *bsym = gjmp(*bsym); } /* call 'func' for each __attribute__((cleanup(func))) */ @@ -6367,15 +6450,17 @@ static void block(int *bsym, Sym *bcl, int *csym, Sym *ccl, int is_expr) gexpr(); skip(')'); switchval = *vtop--; - a = 0; - b = gjmp(0); /* jump to first case */ + sw.p = NULL; sw.n = 0; sw.def_sym = 0; saved = cur_switch; cur_switch = &sw; + a = 0; + b = gjmp(0); /* jump to first case */ block(&a, current_cleanups, csym, ccl, 0); a = gjmp(a); /* add implicit break */ /* case lookup */ gsym(b); + qsort(sw.p, sw.n, sizeof(void*), case_cmp); for (b = 1; b < sw.n; b++) if (sw.p[b - 1]->v2 >= sw.p[b]->v1) @@ -6385,14 +6470,17 @@ static void block(int *bsym, Sym *bcl, int *csym, Sym *ccl, int is_expr) if ((switchval.type.t & VT_BTYPE) == VT_LLONG) switchval.type.t &= ~VT_UNSIGNED; vpushv(&switchval); - gcase(sw.p, sw.n, &a); + gv(RC_INT); + d = 0, gcase(sw.p, sw.n, &d); vpop(); if (sw.def_sym) - gjmp_addr(sw.def_sym); - dynarray_reset(&sw.p, &sw.n); - cur_switch = saved; + gsym_addr(d, sw.def_sym); + else + gsym(d); /* break label */ gsym(a); + dynarray_reset(&sw.p, &sw.n); + cur_switch = saved; } else if (t == TOK_CASE) { struct case_t *cr = tcc_malloc(sizeof(struct case_t)); diff --git a/tests/tcctest.c b/tests/tcctest.c index c054479b..b4e201cd 100644 --- a/tests/tcctest.c +++ b/tests/tcctest.c @@ -3777,6 +3777,7 @@ void math_cmp_test(void) double one = 1.0; double two = 2.0; int comp = 0; + int v; #define bug(a,b,op,iop,part) printf("Test broken: %s %s %s %s %d\n", #a, #b, #op, #iop, part) /* This asserts that "a op b" is _not_ true, but "a iop b" is true. @@ -3798,7 +3799,8 @@ void math_cmp_test(void) if ((a iop b) || comp) \ ; \ else \ - bug (a,b,op,iop,5); + bug (a,b,op,iop,5); \ + if (v = !(a op b), !v) bug(a,b,op,iop,7); /* Equality tests. */ FCMP(nan, nan, ==, !=, 0); diff --git a/x86_64-gen.c b/x86_64-gen.c index 9a0100a0..3260546d 100644 --- a/x86_64-gen.c +++ b/x86_64-gen.c @@ -471,22 +471,22 @@ void load(int r, SValue *sv) orex(1,0,r,0x8d); /* lea xxx(%ebp), r */ gen_modrm(r, VT_LOCAL, sv->sym, fc); } else if (v == VT_CMP) { - orex(0,r,0,0); - if ((fc & ~0x100) != TOK_NE) - oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */ - else - oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */ if (fc & 0x100) { + v = vtop->cmp_r; + fc &= ~0x100; /* This was a float compare. If the parity bit is set the result was unordered, meaning false for everything except TOK_NE, and true for TOK_NE. */ - fc &= ~0x100; - o(0x037a + (REX_BASE(r) << 8)); - } + orex(0, r, 0, 0xb0 + REG_VALUE(r)); /* mov $0/1,%al */ + g(v ^ fc ^ (v == TOK_NE)); + o(0x037a + (REX_BASE(r) << 8)); + } orex(0,r,0, 0x0f); /* setxx %br */ o(fc); o(0xc0 + REG_VALUE(r)); + orex(0,r,0, 0x0f); + o(0xc0b6 + REG_VALUE(r) * 0x900); /* movzbl %al, %eax */ } else if (v == VT_JMP || v == VT_JMPI) { t = v & 1; orex(0,r,0,0); @@ -1666,42 +1666,23 @@ void gjmp_addr(int a) } } -ST_FUNC void gtst_addr(int inv, int a) +ST_FUNC int gjmp_append(int n, int t) { - int v = vtop->r & VT_VALMASK; - if (v == VT_CMP) { - inv ^= (vtop--)->c.i; - a -= ind + 2; - if (a == (char)a) { - g(inv - 32); - g(a); - } else { - g(0x0f); - oad(inv - 16, a - 4); - } - } else if ((v & ~1) == VT_JMP) { - if ((v & 1) != inv) { - gjmp_addr(a); - gsym(vtop->c.i); - } else { - gsym(vtop->c.i); - o(0x05eb); - gjmp_addr(a); - } - vtop--; + void *p; + /* insert vtop->c jump list in t */ + if (n) { + uint32_t n1 = n, n2; + while ((n2 = read32le(p = cur_text_section->data + n1))) + n1 = n2; + write32le(p, t); + t = n; } + return t; } -/* generate a test. set 'inv' to invert test. Stack entry is popped */ -ST_FUNC int gtst(int inv, int t) +ST_FUNC int gjmp_cond(int op, int t) { - int v = vtop->r & VT_VALMASK; - - if (nocode_wanted) { - ; - } else if (v == VT_CMP) { - /* fast case : can jump directly since flags are set */ - if (vtop->c.i & 0x100) + if (op & 0x100) { /* This was a float compare. If the parity flag is set the result was unordered. For anything except != this @@ -1710,9 +1691,10 @@ ST_FUNC int gtst(int inv, int t) Take care about inverting the test. We need to jump to our target if the result was unordered and test wasn't NE, otherwise if unordered we don't want to jump. */ - vtop->c.i &= ~0x100; - if (inv == (vtop->c.i == TOK_NE)) - o(0x067a); /* jp +6 */ + int v = vtop->cmp_r; + op &= ~0x100; + if (op ^ v ^ (v != TOK_NE)) + o(0x067a); /* jp +6 */ else { g(0x0f); @@ -1720,25 +1702,8 @@ ST_FUNC int gtst(int inv, int t) } } g(0x0f); - t = gjmp2((vtop->c.i - 16) ^ inv, t); - } else if (v == VT_JMP || v == VT_JMPI) { - /* && or || optimization */ - if ((v & 1) == inv) { - /* insert vtop->c jump list in t */ - uint32_t n1, n = vtop->c.i; - if (n) { - while ((n1 = read32le(cur_text_section->data + n))) - n = n1; - write32le(cur_text_section->data + n, t); - t = vtop->c.i; - } - } else { - t = gjmp(t); - gsym(vtop->c.i); - } - } - vtop--; - return t; + t = gjmp2(op - 16, t); + return t; } /* generate an integer binary operation */ @@ -1779,10 +1744,8 @@ void gen_opi(int op) o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8); } vtop--; - if (op >= TOK_ULT && op <= TOK_GT) { - vtop->r = VT_CMP; - vtop->c.i = op; - } + if (op >= TOK_ULT && op <= TOK_GT) + vset_VT_CMP(op); break; case '-': case TOK_SUBC1: /* sub with carry generation */ @@ -1937,8 +1900,7 @@ void gen_opf(int op) op = TOK_EQ; } vtop--; - vtop->r = VT_CMP; - vtop->c.i = op; + vset_VT_CMP(op); } else { /* no memory reference possible for long double operations */ load(TREG_ST0, vtop); @@ -2016,8 +1978,8 @@ void gen_opf(int op) } vtop--; - vtop->r = VT_CMP; - vtop->c.i = op | 0x100; + vset_VT_CMP(op | 0x100); + vtop->cmp_r = op; } else { assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE); switch(op) {