Optimize small structure copying on x86_64

This commit is contained in:
Ziyao 2022-07-09 12:53:29 +08:00
parent fe7ee1105c
commit 3715f1d7ee
No known key found for this signature in database
GPG Key ID: A32FA9E5C3BB744D
5 changed files with 98 additions and 2 deletions

View File

@ -292,6 +292,7 @@ DLL_EXPORT int __bound_strncmp(const char *s1, const char *s2, size_t n);
DLL_EXPORT char *__bound_strcat(char *dest, const char *src);
DLL_EXPORT char *__bound_strchr(const char *string, int ch);
DLL_EXPORT char *__bound_strdup(const char *s);
DLL_EXPORT void __bound_struct_copy(void *dst,void *src,size_t size);
#if defined(__arm__) && defined(__ARM_EABI__)
DLL_EXPORT void *__bound___aeabi_memcpy(void *dst, const void *src, size_t size);
@ -424,6 +425,7 @@ static unsigned long long bound_strncmp_count;
static unsigned long long bound_strcat_count;
static unsigned long long bound_strchr_count;
static unsigned long long bound_strdup_count;
static unsigned long long bound_struct_copy_count;
static unsigned long long bound_not_found;
#define INCR_COUNT(x) ++x
#else
@ -1765,6 +1767,16 @@ void *__bound_memset(void *s, int c, size_t n)
return memset(s, c, n);
}
void __bound_struct_copy(void *dst,void *src,size_t size)
{
dprintf(stderr, "Copy struct from %p to %p,size %lx\n",
src,dst,size);
INCR_COUNT(bound_struct_copy_count);
__bound_check(dst,size,"struct copy destination");
__bound_check(src,size,"struct copy source");
return;
}
#if defined(__arm__) && defined(__ARM_EABI__)
void *__bound___aeabi_memcpy(void *dest, const void *src, size_t n)
{

2
tcc.h
View File

@ -1644,6 +1644,8 @@ ST_FUNC void gen_increment_tcov (SValue *sv);
/* ------------ x86_64-gen.c ------------ */
#ifdef TCC_TARGET_X86_64
#define TCC_TARGET_NATIVE_STRUCT_COPY
ST_FUNC void gen_struct_copy(int size);
ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c);
ST_FUNC void gen_opl(int op);
#ifdef TCC_TARGET_PE

View File

@ -3474,9 +3474,38 @@ ST_FUNC void vstore(void)
if (sbt == VT_STRUCT) {
/* if structure, only generate pointer */
/* structure assignment : generate memcpy */
/* XXX: optimize if small size */
size = type_size(&vtop->type, &align);
#ifdef TCC_TARGET_NATIVE_STRUCT_COPY
if (size <= (PTR_SIZE << 4)) {
vswap();
#ifdef CONFIG_TCC_BCHECK
if (vtop->r & VT_MUSTBOUND)
gbound();
#endif
vtop->type.t = VT_PTR;
gaddrof();
vpushv(vtop - 1);
#ifdef CONFIG_TCC_BCHECK
if (vtop->r & VT_MUSTBOUND)
gbound();
#endif
vtop->type.t = VT_PTR;
gaddrof(); /* src dest src */
#ifdef CONFIG_TCC_BCHECK
if (tcc_state->do_bounds_check) {
vpush_helper_func(TOK___bound_struct_copy);
vpushv(vtop - 2);
vpushv(vtop - 2);
vpushi(size);
gfunc_call(3);
}
#endif
gen_struct_copy(size);
} else {
#endif
/* destination */
vswap();
#ifdef CONFIG_TCC_BCHECK
@ -3510,7 +3539,9 @@ ST_FUNC void vstore(void)
vpushi(size);
gfunc_call(3);
/* leave source on stack */
#ifdef TCC_TARGET_NATIVE_STRUCT_COPY
}
#endif
} else if (ft & VT_BITFIELD) {
/* bitfield store handling */

View File

@ -330,6 +330,7 @@
DEF(TOK___bound_setjmp, "__bound_setjmp")
DEF(TOK___bound_longjmp, "__bound_longjmp")
DEF(TOK___bound_new_region, "__bound_new_region")
DEF(TOK___bound_struct_copy,"__bound_struct_copy")
# ifdef TCC_TARGET_PE
# ifdef TCC_TARGET_X86_64
DEF(TOK___bound_alloca_nr, "__bound_alloca_nr")

View File

@ -2282,6 +2282,56 @@ ST_FUNC void gen_vla_alloc(CType *type, int align) {
}
}
/*
* Assmuing the top part of the stack looks like below,
* src dest src
*/
void gen_struct_copy(int size)
{
save_reg(TREG_RSI);
load(TREG_RSI,vtop);
vtop->r = TREG_RSI;
vswap(); /* dest src src */
save_reg(TREG_RDI);
load(TREG_RDI,vtop);
vtop->r = TREG_RDI;
/* Not aligned by 8bytes */
if (size & 0x04) {
o(0xa5);
}
if (size & 0x02) {
o(0xa566);
}
if (size & 0x01) {
o(0xa4);
}
size >>= 3;
if (!size)
goto done;
/* Although this function is only called when the struct is smaller */
/* than 32 bytes(4 * PTR_SIZE),a common implementation is included */
if (size <= 4 && size) {
switch (size) {
case 4: o(0xa548);
case 3: o(0xa548);
case 2: o(0xa548);
case 1: o(0xa548);
}
} else {
save_reg(TREG_RCX);
vpushi(size);
load(TREG_RCX,vtop);
vtop->r = TREG_RCX;
o(0xa548f3);
vpop();
}
done:
vpop();
vpop();
return;
}
/* end of x86-64 code generator */
/*************************************************************/