mirror of
https://github.com/mirror/tinycc.git
synced 2025-01-27 06:10:06 +08:00
Fixed x86-64 long double passing.
long double arguments require 16-byte alignment on the stack, which requires adjustment when the the stack offset is not an evven number of 8-byte words.
This commit is contained in:
parent
41d76e1fcb
commit
6ee366e765
@ -7,13 +7,13 @@
|
||||
typedef void *va_list;
|
||||
|
||||
va_list __va_start(void *fp);
|
||||
void *__va_arg(va_list ap, int arg_type, int size);
|
||||
void *__va_arg(va_list ap, int arg_type, int size, int align);
|
||||
va_list __va_copy(va_list src);
|
||||
void __va_end(va_list ap);
|
||||
|
||||
#define va_start(ap, last) ((ap) = __va_start(__builtin_frame_address(0)))
|
||||
#define va_arg(ap, type) \
|
||||
(*(type *)(__va_arg(ap, __builtin_va_arg_types(type), sizeof(type))))
|
||||
(*(type *)(__va_arg(ap, __builtin_va_arg_types(type), sizeof(type), __alignof__(type))))
|
||||
#define va_copy(dest, src) ((dest) = __va_copy(src))
|
||||
#define va_end(ap) __va_end(ap)
|
||||
|
||||
|
@ -645,9 +645,10 @@ void *__va_start(void *fp)
|
||||
|
||||
void *__va_arg(struct __va_list_struct *ap,
|
||||
enum __va_arg_type arg_type,
|
||||
int size)
|
||||
int size, int align)
|
||||
{
|
||||
size = (size + 7) & ~7;
|
||||
align = (align + 7) & ~7;
|
||||
switch (arg_type) {
|
||||
case __va_gen_reg:
|
||||
if (ap->gp_offset < 48) {
|
||||
@ -668,6 +669,7 @@ void *__va_arg(struct __va_list_struct *ap,
|
||||
case __va_stack:
|
||||
use_overflow_area:
|
||||
ap->overflow_arg_area += size;
|
||||
ap->overflow_arg_area = (char*)((long long)(ap->overflow_arg_area + align - 1) & -(long long)align);
|
||||
return ap->overflow_arg_area - size;
|
||||
|
||||
default:
|
||||
|
@ -389,6 +389,24 @@ static int stdarg_struct_test(void) {
|
||||
return run_callback(src, stdarg_struct_test_callback);
|
||||
}
|
||||
|
||||
/* Test that x86-64 arranges the stack correctly for arguments with alignment >8 bytes */
|
||||
|
||||
typedef LONG_DOUBLE (*arg_align_test_callback_type) (LONG_DOUBLE,int,LONG_DOUBLE,int,LONG_DOUBLE);
|
||||
|
||||
static int arg_align_test_callback(void *ptr) {
|
||||
arg_align_test_callback_type f = (arg_align_test_callback_type)ptr;
|
||||
long double x = f(12, 0, 25, 0, 37);
|
||||
return (x == 74) ? 0 : -1;
|
||||
}
|
||||
|
||||
static int arg_align_test(void) {
|
||||
const char *src =
|
||||
"long double f(long double a, int b, long double c, int d, long double e) {\n"
|
||||
" return a + c + e;\n"
|
||||
"}\n";
|
||||
return run_callback(src, arg_align_test_callback);
|
||||
}
|
||||
|
||||
#define RUN_TEST(t) \
|
||||
if (!testname || (strcmp(#t, testname) == 0)) { \
|
||||
fputs(#t "... ", stdout); \
|
||||
@ -432,5 +450,6 @@ int main(int argc, char **argv) {
|
||||
RUN_TEST(many_struct_test_2);
|
||||
RUN_TEST(stdarg_test);
|
||||
RUN_TEST(stdarg_struct_test);
|
||||
RUN_TEST(arg_align_test);
|
||||
return retval;
|
||||
}
|
||||
|
300
x86_64-gen.c
300
x86_64-gen.c
@ -96,9 +96,9 @@ enum {
|
||||
|
||||
/* long double size and alignment, in bytes */
|
||||
#define LDOUBLE_SIZE 16
|
||||
#define LDOUBLE_ALIGN 8
|
||||
#define LDOUBLE_ALIGN 16
|
||||
/* maximum alignment (for aligned attribute support) */
|
||||
#define MAX_ALIGN 8
|
||||
#define MAX_ALIGN 16
|
||||
|
||||
/******************************************************/
|
||||
/* ELF defines */
|
||||
@ -983,7 +983,7 @@ static X86_64_Mode classify_x86_64_inner(CType *ty) {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *reg_count) {
|
||||
static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, int *reg_count) {
|
||||
X86_64_Mode mode;
|
||||
int size, align, ret_t;
|
||||
|
||||
@ -995,6 +995,7 @@ static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *r
|
||||
} else {
|
||||
size = type_size(ty, &align);
|
||||
*psize = (size + 7) & ~7;
|
||||
*palign = (align + 7) & ~7;
|
||||
|
||||
if (size > 16) {
|
||||
mode = x86_64_mode_memory;
|
||||
@ -1042,8 +1043,8 @@ ST_FUNC int classify_x86_64_va_arg(CType *ty) {
|
||||
enum __va_arg_type {
|
||||
__va_gen_reg, __va_float_reg, __va_stack
|
||||
};
|
||||
int size, reg_count;
|
||||
X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, ®_count);
|
||||
int size, align, reg_count;
|
||||
X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, ®_count);
|
||||
switch (mode) {
|
||||
default: return __va_stack;
|
||||
case x86_64_mode_integer: return __va_gen_reg;
|
||||
@ -1053,9 +1054,9 @@ ST_FUNC int classify_x86_64_va_arg(CType *ty) {
|
||||
|
||||
/* Return 1 if this function returns via an sret pointer, 0 otherwise */
|
||||
int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
|
||||
int size, reg_count;
|
||||
int size, align, reg_count;
|
||||
*ret_align = 1; // Never have to re-align return values for x86-64
|
||||
return (classify_x86_64_arg(vt, ret, &size, ®_count) == x86_64_mode_memory);
|
||||
return (classify_x86_64_arg(vt, ret, &size, &align, ®_count) == x86_64_mode_memory);
|
||||
}
|
||||
|
||||
#define REGN 6
|
||||
@ -1078,77 +1079,187 @@ void gfunc_call(int nb_args)
|
||||
{
|
||||
X86_64_Mode mode;
|
||||
CType type;
|
||||
int size, align, r, args_size, i, j, reg_count;
|
||||
int size, align, r, args_size, stack_adjust, run_start, run_end, i, j, reg_count;
|
||||
int nb_reg_args = 0;
|
||||
int nb_sse_args = 0;
|
||||
int sse_reg, gen_reg;
|
||||
|
||||
/* calculate the number of integer/float arguments */
|
||||
args_size = 0;
|
||||
/* calculate the number of integer/float register arguments */
|
||||
for(i = 0; i < nb_args; i++) {
|
||||
mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, ®_count);
|
||||
switch (mode) {
|
||||
case x86_64_mode_memory:
|
||||
case x86_64_mode_x87:
|
||||
args_size += size;
|
||||
break;
|
||||
|
||||
case x86_64_mode_sse:
|
||||
mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count);
|
||||
if (mode == x86_64_mode_sse)
|
||||
nb_sse_args += reg_count;
|
||||
if (nb_sse_args > 8) args_size += size;
|
||||
break;
|
||||
|
||||
case x86_64_mode_integer:
|
||||
else if (mode == x86_64_mode_integer)
|
||||
nb_reg_args += reg_count;
|
||||
if (nb_reg_args > REGN) args_size += size;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
|
||||
and ended by a 16-byte aligned argument. This is because, from the point of view of
|
||||
the callee, argument alignment is computed from the bottom up. */
|
||||
/* for struct arguments, we need to call memcpy and the function
|
||||
call breaks register passing arguments we are preparing.
|
||||
So, we process arguments which will be passed by stack first. */
|
||||
gen_reg = nb_reg_args;
|
||||
sse_reg = nb_sse_args;
|
||||
run_start = 0;
|
||||
args_size = 0;
|
||||
while (run_start != nb_args) {
|
||||
int run_gen_reg = gen_reg, run_sse_reg = sse_reg;
|
||||
|
||||
/* adjust stack to align SSE boundary */
|
||||
if (args_size &= 15) {
|
||||
/* fetch cpu flag before the following sub will change the value */
|
||||
if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
|
||||
gv(RC_INT);
|
||||
|
||||
args_size = 16 - args_size;
|
||||
o(0x48);
|
||||
oad(0xec81, args_size); /* sub $xxx, %rsp */
|
||||
}
|
||||
|
||||
for(i = 0; i < nb_args;) {
|
||||
/* Swap argument to top, it will possibly be changed here,
|
||||
and might use more temps. At the end of the loop we keep
|
||||
in on the stack and swap it back to its original position
|
||||
if it is a register. */
|
||||
SValue tmp = vtop[0];
|
||||
vtop[0] = vtop[-i];
|
||||
vtop[-i] = tmp;
|
||||
|
||||
mode = classify_x86_64_arg(&vtop->type, NULL, &size, ®_count);
|
||||
|
||||
int arg_stored = 1;
|
||||
switch (vtop->type.t & VT_BTYPE) {
|
||||
case VT_STRUCT:
|
||||
if (mode == x86_64_mode_sse) {
|
||||
if (sse_reg > 8)
|
||||
sse_reg -= reg_count;
|
||||
run_end = nb_args;
|
||||
stack_adjust = 0;
|
||||
for(i = run_start; (i < nb_args) && (run_end == nb_args); i++) {
|
||||
mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count);
|
||||
switch (mode) {
|
||||
case x86_64_mode_memory:
|
||||
case x86_64_mode_x87:
|
||||
stack_arg:
|
||||
if (align == 16)
|
||||
run_end = i;
|
||||
else
|
||||
stack_adjust += size;
|
||||
break;
|
||||
|
||||
case x86_64_mode_sse:
|
||||
sse_reg -= reg_count;
|
||||
if (sse_reg + reg_count > 8) goto stack_arg;
|
||||
break;
|
||||
|
||||
case x86_64_mode_integer:
|
||||
gen_reg -= reg_count;
|
||||
if (gen_reg + reg_count > REGN) goto stack_arg;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
gen_reg = run_gen_reg;
|
||||
sse_reg = run_sse_reg;
|
||||
|
||||
/* adjust stack to align SSE boundary */
|
||||
if (stack_adjust &= 15) {
|
||||
/* fetch cpu flag before the following sub will change the value */
|
||||
if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
|
||||
gv(RC_INT);
|
||||
|
||||
stack_adjust = 16 - stack_adjust;
|
||||
o(0x48);
|
||||
oad(0xec81, stack_adjust); /* sub $xxx, %rsp */
|
||||
args_size += stack_adjust;
|
||||
}
|
||||
|
||||
for(i = run_start; i < run_end;) {
|
||||
/* Swap argument to top, it will possibly be changed here,
|
||||
and might use more temps. At the end of the loop we keep
|
||||
in on the stack and swap it back to its original position
|
||||
if it is a register. */
|
||||
SValue tmp = vtop[0];
|
||||
vtop[0] = vtop[-i];
|
||||
vtop[-i] = tmp;
|
||||
|
||||
mode = classify_x86_64_arg(&vtop->type, NULL, &size, &align, ®_count);
|
||||
|
||||
int arg_stored = 1;
|
||||
switch (vtop->type.t & VT_BTYPE) {
|
||||
case VT_STRUCT:
|
||||
if (mode == x86_64_mode_sse) {
|
||||
if (sse_reg > 8)
|
||||
sse_reg -= reg_count;
|
||||
else
|
||||
arg_stored = 0;
|
||||
} else if (mode == x86_64_mode_integer) {
|
||||
if (gen_reg > REGN)
|
||||
gen_reg -= reg_count;
|
||||
else
|
||||
arg_stored = 0;
|
||||
}
|
||||
|
||||
if (arg_stored) {
|
||||
/* allocate the necessary size on stack */
|
||||
o(0x48);
|
||||
oad(0xec81, size); /* sub $xxx, %rsp */
|
||||
/* generate structure store */
|
||||
r = get_reg(RC_INT);
|
||||
orex(1, r, 0, 0x89); /* mov %rsp, r */
|
||||
o(0xe0 + REG_VALUE(r));
|
||||
vset(&vtop->type, r | VT_LVAL, 0);
|
||||
vswap();
|
||||
vstore();
|
||||
args_size += size;
|
||||
}
|
||||
break;
|
||||
|
||||
case VT_LDOUBLE:
|
||||
assert(0);
|
||||
break;
|
||||
|
||||
case VT_FLOAT:
|
||||
case VT_DOUBLE:
|
||||
assert(mode == x86_64_mode_sse);
|
||||
if (sse_reg > 8) {
|
||||
--sse_reg;
|
||||
r = gv(RC_FLOAT);
|
||||
o(0x50); /* push $rax */
|
||||
/* movq %xmmN, (%rsp) */
|
||||
o(0xd60f66);
|
||||
o(0x04 + REG_VALUE(r)*8);
|
||||
o(0x24);
|
||||
args_size += size;
|
||||
} else {
|
||||
arg_stored = 0;
|
||||
} else if (mode == x86_64_mode_integer) {
|
||||
if (gen_reg > REGN)
|
||||
gen_reg -= reg_count;
|
||||
else
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(mode == x86_64_mode_integer);
|
||||
/* simple type */
|
||||
/* XXX: implicit cast ? */
|
||||
if (gen_reg > REGN) {
|
||||
--gen_reg;
|
||||
r = gv(RC_INT);
|
||||
orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
|
||||
args_size += size;
|
||||
} else {
|
||||
arg_stored = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* And swap the argument back to it's original position. */
|
||||
tmp = vtop[0];
|
||||
vtop[0] = vtop[-i];
|
||||
vtop[-i] = tmp;
|
||||
|
||||
if (arg_stored) {
|
||||
vrotb(i+1);
|
||||
assert((vtop->type.t == tmp.type.t) && (vtop->r == tmp.r));
|
||||
vpop();
|
||||
--nb_args;
|
||||
--run_end;
|
||||
} else {
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
/* handle 16 byte aligned arguments at end of run */
|
||||
run_start = i = run_end;
|
||||
while (i < nb_args) {
|
||||
/* Rotate argument to top since it will always be popped */
|
||||
mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count);
|
||||
if (align != 16)
|
||||
break;
|
||||
|
||||
vrotb(i+1);
|
||||
|
||||
if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
|
||||
gv(RC_ST0);
|
||||
oad(0xec8148, size); /* sub $xxx, %rsp */
|
||||
o(0x7cdb); /* fstpt 0(%rsp) */
|
||||
g(0x24);
|
||||
g(0x00);
|
||||
args_size += size;
|
||||
} else {
|
||||
assert(mode == x86_64_mode_memory);
|
||||
|
||||
/* allocate the necessary size on stack */
|
||||
o(0x48);
|
||||
oad(0xec81, size); /* sub $xxx, %rsp */
|
||||
@ -1161,62 +1272,9 @@ void gfunc_call(int nb_args)
|
||||
vstore();
|
||||
args_size += size;
|
||||
}
|
||||
break;
|
||||
|
||||
case VT_LDOUBLE:
|
||||
gv(RC_ST0);
|
||||
size = LDOUBLE_SIZE;
|
||||
oad(0xec8148, size); /* sub $xxx, %rsp */
|
||||
o(0x7cdb); /* fstpt 0(%rsp) */
|
||||
g(0x24);
|
||||
g(0x00);
|
||||
args_size += size;
|
||||
break;
|
||||
|
||||
case VT_FLOAT:
|
||||
case VT_DOUBLE:
|
||||
assert(mode == x86_64_mode_sse);
|
||||
if (sse_reg > 8) {
|
||||
--sse_reg;
|
||||
r = gv(RC_FLOAT);
|
||||
o(0x50); /* push $rax */
|
||||
/* movq %xmmN, (%rsp) */
|
||||
o(0xd60f66);
|
||||
o(0x04 + REG_VALUE(r)*8);
|
||||
o(0x24);
|
||||
args_size += size;
|
||||
} else {
|
||||
arg_stored = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(mode == x86_64_mode_integer);
|
||||
/* simple type */
|
||||
/* XXX: implicit cast ? */
|
||||
if (gen_reg > REGN) {
|
||||
--gen_reg;
|
||||
r = gv(RC_INT);
|
||||
orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
|
||||
args_size += size;
|
||||
} else {
|
||||
arg_stored = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* And swap the argument back to it's original position. */
|
||||
tmp = vtop[0];
|
||||
vtop[0] = vtop[-i];
|
||||
vtop[-i] = tmp;
|
||||
|
||||
if (arg_stored) {
|
||||
vrotb(i+1);
|
||||
assert(vtop->type.t == tmp.type.t);
|
||||
vpop();
|
||||
--nb_args;
|
||||
} else {
|
||||
++i;
|
||||
vpop();
|
||||
--nb_args;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1230,7 +1288,7 @@ void gfunc_call(int nb_args)
|
||||
assert(gen_reg <= REGN);
|
||||
assert(sse_reg <= 8);
|
||||
for(i = 0; i < nb_args; i++) {
|
||||
mode = classify_x86_64_arg(&vtop->type, &type, &size, ®_count);
|
||||
mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, ®_count);
|
||||
/* Alter stack entry type so that gv() knows how to treat it */
|
||||
vtop->type = type;
|
||||
if (mode == x86_64_mode_sse) {
|
||||
@ -1324,10 +1382,11 @@ void gfunc_prolog(CType *func_type)
|
||||
sym = func_type->ref;
|
||||
while ((sym = sym->next) != NULL) {
|
||||
type = &sym->type;
|
||||
mode = classify_x86_64_arg(type, NULL, &size, ®_count);
|
||||
mode = classify_x86_64_arg(type, NULL, &size, &align, ®_count);
|
||||
switch (mode) {
|
||||
default:
|
||||
seen_stack_size += size;
|
||||
stack_arg:
|
||||
seen_stack_size = ((seen_stack_size + align - 1) & -align) + size;
|
||||
break;
|
||||
|
||||
case x86_64_mode_integer:
|
||||
@ -1335,7 +1394,7 @@ void gfunc_prolog(CType *func_type)
|
||||
seen_reg_num += reg_count;
|
||||
} else {
|
||||
seen_reg_num = 8;
|
||||
seen_stack_size += size;
|
||||
goto stack_arg;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -1344,7 +1403,7 @@ void gfunc_prolog(CType *func_type)
|
||||
seen_sse_num += reg_count;
|
||||
} else {
|
||||
seen_sse_num = 8;
|
||||
seen_stack_size += size;
|
||||
goto stack_arg;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -1383,7 +1442,7 @@ void gfunc_prolog(CType *func_type)
|
||||
/* if the function returns a structure, then add an
|
||||
implicit pointer parameter */
|
||||
func_vt = sym->type;
|
||||
mode = classify_x86_64_arg(&func_vt, NULL, &size, ®_count);
|
||||
mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, ®_count);
|
||||
if (mode == x86_64_mode_memory) {
|
||||
push_arg_reg(reg_param_index);
|
||||
func_vc = loc;
|
||||
@ -1392,7 +1451,7 @@ void gfunc_prolog(CType *func_type)
|
||||
/* define parameters */
|
||||
while ((sym = sym->next) != NULL) {
|
||||
type = &sym->type;
|
||||
mode = classify_x86_64_arg(type, NULL, &size, ®_count);
|
||||
mode = classify_x86_64_arg(type, NULL, &size, &align, ®_count);
|
||||
switch (mode) {
|
||||
case x86_64_mode_sse:
|
||||
if (sse_param_index + reg_count <= 8) {
|
||||
@ -1405,6 +1464,7 @@ void gfunc_prolog(CType *func_type)
|
||||
++sse_param_index;
|
||||
}
|
||||
} else {
|
||||
addr = (addr + align - 1) & -align;
|
||||
param_addr = addr;
|
||||
addr += size;
|
||||
sse_param_index += reg_count;
|
||||
@ -1413,6 +1473,7 @@ void gfunc_prolog(CType *func_type)
|
||||
|
||||
case x86_64_mode_memory:
|
||||
case x86_64_mode_x87:
|
||||
addr = (addr + align - 1) & -align;
|
||||
param_addr = addr;
|
||||
addr += size;
|
||||
break;
|
||||
@ -1427,6 +1488,7 @@ void gfunc_prolog(CType *func_type)
|
||||
++reg_param_index;
|
||||
}
|
||||
} else {
|
||||
addr = (addr + align - 1) & -align;
|
||||
param_addr = addr;
|
||||
addr += size;
|
||||
reg_param_index += reg_count;
|
||||
|
Loading…
Reference in New Issue
Block a user