From 3715f1d7ee302c220d36f545107bbf808fd979d9 Mon Sep 17 00:00:00 2001 From: Ziyao Date: Sat, 9 Jul 2022 12:53:29 +0800 Subject: [PATCH] Optimize small structure copying on x86_64 --- lib/bcheck.c | 12 ++++++++++++ tcc.h | 2 ++ tccgen.c | 35 +++++++++++++++++++++++++++++++++-- tcctok.h | 1 + x86_64-gen.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 98 insertions(+), 2 deletions(-) diff --git a/lib/bcheck.c b/lib/bcheck.c index 1f974b2e..4ec586b5 100644 --- a/lib/bcheck.c +++ b/lib/bcheck.c @@ -292,6 +292,7 @@ DLL_EXPORT int __bound_strncmp(const char *s1, const char *s2, size_t n); DLL_EXPORT char *__bound_strcat(char *dest, const char *src); DLL_EXPORT char *__bound_strchr(const char *string, int ch); DLL_EXPORT char *__bound_strdup(const char *s); +DLL_EXPORT void __bound_struct_copy(void *dst,void *src,size_t size); #if defined(__arm__) && defined(__ARM_EABI__) DLL_EXPORT void *__bound___aeabi_memcpy(void *dst, const void *src, size_t size); @@ -424,6 +425,7 @@ static unsigned long long bound_strncmp_count; static unsigned long long bound_strcat_count; static unsigned long long bound_strchr_count; static unsigned long long bound_strdup_count; +static unsigned long long bound_struct_copy_count; static unsigned long long bound_not_found; #define INCR_COUNT(x) ++x #else @@ -1765,6 +1767,16 @@ void *__bound_memset(void *s, int c, size_t n) return memset(s, c, n); } +void __bound_struct_copy(void *dst,void *src,size_t size) +{ + dprintf(stderr, "Copy struct from %p to %p,size %lx\n", + src,dst,size); + INCR_COUNT(bound_struct_copy_count); + __bound_check(dst,size,"struct copy destination"); + __bound_check(src,size,"struct copy source"); + return; +} + #if defined(__arm__) && defined(__ARM_EABI__) void *__bound___aeabi_memcpy(void *dest, const void *src, size_t n) { diff --git a/tcc.h b/tcc.h index 9bc12e38..34db3728 100644 --- a/tcc.h +++ b/tcc.h @@ -1644,6 +1644,8 @@ ST_FUNC void gen_increment_tcov (SValue *sv); /* ------------ x86_64-gen.c ------------ */ #ifdef TCC_TARGET_X86_64 +#define TCC_TARGET_NATIVE_STRUCT_COPY +ST_FUNC void gen_struct_copy(int size); ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c); ST_FUNC void gen_opl(int op); #ifdef TCC_TARGET_PE diff --git a/tccgen.c b/tccgen.c index c170ff3f..6bfdf4b0 100644 --- a/tccgen.c +++ b/tccgen.c @@ -3474,9 +3474,38 @@ ST_FUNC void vstore(void) if (sbt == VT_STRUCT) { /* if structure, only generate pointer */ /* structure assignment : generate memcpy */ - /* XXX: optimize if small size */ size = type_size(&vtop->type, &align); +#ifdef TCC_TARGET_NATIVE_STRUCT_COPY + if (size <= (PTR_SIZE << 4)) { + vswap(); +#ifdef CONFIG_TCC_BCHECK + if (vtop->r & VT_MUSTBOUND) + gbound(); +#endif + vtop->type.t = VT_PTR; + gaddrof(); + + vpushv(vtop - 1); +#ifdef CONFIG_TCC_BCHECK + if (vtop->r & VT_MUSTBOUND) + gbound(); +#endif + vtop->type.t = VT_PTR; + gaddrof(); /* src dest src */ +#ifdef CONFIG_TCC_BCHECK + if (tcc_state->do_bounds_check) { + vpush_helper_func(TOK___bound_struct_copy); + vpushv(vtop - 2); + vpushv(vtop - 2); + vpushi(size); + gfunc_call(3); + } +#endif + + gen_struct_copy(size); + } else { +#endif /* destination */ vswap(); #ifdef CONFIG_TCC_BCHECK @@ -3510,7 +3539,9 @@ ST_FUNC void vstore(void) vpushi(size); gfunc_call(3); /* leave source on stack */ - +#ifdef TCC_TARGET_NATIVE_STRUCT_COPY + } +#endif } else if (ft & VT_BITFIELD) { /* bitfield store handling */ diff --git a/tcctok.h b/tcctok.h index d4c1ef5c..ca09063b 100644 --- a/tcctok.h +++ b/tcctok.h @@ -330,6 +330,7 @@ DEF(TOK___bound_setjmp, "__bound_setjmp") DEF(TOK___bound_longjmp, "__bound_longjmp") DEF(TOK___bound_new_region, "__bound_new_region") + DEF(TOK___bound_struct_copy,"__bound_struct_copy") # ifdef TCC_TARGET_PE # ifdef TCC_TARGET_X86_64 DEF(TOK___bound_alloca_nr, "__bound_alloca_nr") diff --git a/x86_64-gen.c b/x86_64-gen.c index 29871d57..cad4da16 100644 --- a/x86_64-gen.c +++ b/x86_64-gen.c @@ -2282,6 +2282,56 @@ ST_FUNC void gen_vla_alloc(CType *type, int align) { } } +/* + * Assmuing the top part of the stack looks like below, + * src dest src + */ +void gen_struct_copy(int size) +{ + save_reg(TREG_RSI); + load(TREG_RSI,vtop); + vtop->r = TREG_RSI; + vswap(); /* dest src src */ + save_reg(TREG_RDI); + load(TREG_RDI,vtop); + vtop->r = TREG_RDI; + /* Not aligned by 8bytes */ + if (size & 0x04) { + o(0xa5); + } + if (size & 0x02) { + o(0xa566); + } + if (size & 0x01) { + o(0xa4); + } + + size >>= 3; + if (!size) + goto done; + /* Although this function is only called when the struct is smaller */ + /* than 32 bytes(4 * PTR_SIZE),a common implementation is included */ + if (size <= 4 && size) { + switch (size) { + case 4: o(0xa548); + case 3: o(0xa548); + case 2: o(0xa548); + case 1: o(0xa548); + } + } else { + save_reg(TREG_RCX); + vpushi(size); + load(TREG_RCX,vtop); + vtop->r = TREG_RCX; + o(0xa548f3); + vpop(); + } +done: + vpop(); + vpop(); + return; +} + /* end of x86-64 code generator */ /*************************************************************/