mirror of
https://github.com/mirror/tinycc.git
synced 2025-01-29 06:10:09 +08:00
Fixed x86-64 long double passing.
long double arguments require 16-byte alignment on the stack, which requires adjustment when the the stack offset is not an evven number of 8-byte words.
This commit is contained in:
parent
41d76e1fcb
commit
6ee366e765
@ -7,13 +7,13 @@
|
|||||||
typedef void *va_list;
|
typedef void *va_list;
|
||||||
|
|
||||||
va_list __va_start(void *fp);
|
va_list __va_start(void *fp);
|
||||||
void *__va_arg(va_list ap, int arg_type, int size);
|
void *__va_arg(va_list ap, int arg_type, int size, int align);
|
||||||
va_list __va_copy(va_list src);
|
va_list __va_copy(va_list src);
|
||||||
void __va_end(va_list ap);
|
void __va_end(va_list ap);
|
||||||
|
|
||||||
#define va_start(ap, last) ((ap) = __va_start(__builtin_frame_address(0)))
|
#define va_start(ap, last) ((ap) = __va_start(__builtin_frame_address(0)))
|
||||||
#define va_arg(ap, type) \
|
#define va_arg(ap, type) \
|
||||||
(*(type *)(__va_arg(ap, __builtin_va_arg_types(type), sizeof(type))))
|
(*(type *)(__va_arg(ap, __builtin_va_arg_types(type), sizeof(type), __alignof__(type))))
|
||||||
#define va_copy(dest, src) ((dest) = __va_copy(src))
|
#define va_copy(dest, src) ((dest) = __va_copy(src))
|
||||||
#define va_end(ap) __va_end(ap)
|
#define va_end(ap) __va_end(ap)
|
||||||
|
|
||||||
|
@ -645,9 +645,10 @@ void *__va_start(void *fp)
|
|||||||
|
|
||||||
void *__va_arg(struct __va_list_struct *ap,
|
void *__va_arg(struct __va_list_struct *ap,
|
||||||
enum __va_arg_type arg_type,
|
enum __va_arg_type arg_type,
|
||||||
int size)
|
int size, int align)
|
||||||
{
|
{
|
||||||
size = (size + 7) & ~7;
|
size = (size + 7) & ~7;
|
||||||
|
align = (align + 7) & ~7;
|
||||||
switch (arg_type) {
|
switch (arg_type) {
|
||||||
case __va_gen_reg:
|
case __va_gen_reg:
|
||||||
if (ap->gp_offset < 48) {
|
if (ap->gp_offset < 48) {
|
||||||
@ -668,6 +669,7 @@ void *__va_arg(struct __va_list_struct *ap,
|
|||||||
case __va_stack:
|
case __va_stack:
|
||||||
use_overflow_area:
|
use_overflow_area:
|
||||||
ap->overflow_arg_area += size;
|
ap->overflow_arg_area += size;
|
||||||
|
ap->overflow_arg_area = (char*)((long long)(ap->overflow_arg_area + align - 1) & -(long long)align);
|
||||||
return ap->overflow_arg_area - size;
|
return ap->overflow_arg_area - size;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -389,6 +389,24 @@ static int stdarg_struct_test(void) {
|
|||||||
return run_callback(src, stdarg_struct_test_callback);
|
return run_callback(src, stdarg_struct_test_callback);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Test that x86-64 arranges the stack correctly for arguments with alignment >8 bytes */
|
||||||
|
|
||||||
|
typedef LONG_DOUBLE (*arg_align_test_callback_type) (LONG_DOUBLE,int,LONG_DOUBLE,int,LONG_DOUBLE);
|
||||||
|
|
||||||
|
static int arg_align_test_callback(void *ptr) {
|
||||||
|
arg_align_test_callback_type f = (arg_align_test_callback_type)ptr;
|
||||||
|
long double x = f(12, 0, 25, 0, 37);
|
||||||
|
return (x == 74) ? 0 : -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int arg_align_test(void) {
|
||||||
|
const char *src =
|
||||||
|
"long double f(long double a, int b, long double c, int d, long double e) {\n"
|
||||||
|
" return a + c + e;\n"
|
||||||
|
"}\n";
|
||||||
|
return run_callback(src, arg_align_test_callback);
|
||||||
|
}
|
||||||
|
|
||||||
#define RUN_TEST(t) \
|
#define RUN_TEST(t) \
|
||||||
if (!testname || (strcmp(#t, testname) == 0)) { \
|
if (!testname || (strcmp(#t, testname) == 0)) { \
|
||||||
fputs(#t "... ", stdout); \
|
fputs(#t "... ", stdout); \
|
||||||
@ -432,5 +450,6 @@ int main(int argc, char **argv) {
|
|||||||
RUN_TEST(many_struct_test_2);
|
RUN_TEST(many_struct_test_2);
|
||||||
RUN_TEST(stdarg_test);
|
RUN_TEST(stdarg_test);
|
||||||
RUN_TEST(stdarg_struct_test);
|
RUN_TEST(stdarg_struct_test);
|
||||||
|
RUN_TEST(arg_align_test);
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
152
x86_64-gen.c
152
x86_64-gen.c
@ -96,9 +96,9 @@ enum {
|
|||||||
|
|
||||||
/* long double size and alignment, in bytes */
|
/* long double size and alignment, in bytes */
|
||||||
#define LDOUBLE_SIZE 16
|
#define LDOUBLE_SIZE 16
|
||||||
#define LDOUBLE_ALIGN 8
|
#define LDOUBLE_ALIGN 16
|
||||||
/* maximum alignment (for aligned attribute support) */
|
/* maximum alignment (for aligned attribute support) */
|
||||||
#define MAX_ALIGN 8
|
#define MAX_ALIGN 16
|
||||||
|
|
||||||
/******************************************************/
|
/******************************************************/
|
||||||
/* ELF defines */
|
/* ELF defines */
|
||||||
@ -983,7 +983,7 @@ static X86_64_Mode classify_x86_64_inner(CType *ty) {
|
|||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *reg_count) {
|
static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, int *reg_count) {
|
||||||
X86_64_Mode mode;
|
X86_64_Mode mode;
|
||||||
int size, align, ret_t;
|
int size, align, ret_t;
|
||||||
|
|
||||||
@ -995,6 +995,7 @@ static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *r
|
|||||||
} else {
|
} else {
|
||||||
size = type_size(ty, &align);
|
size = type_size(ty, &align);
|
||||||
*psize = (size + 7) & ~7;
|
*psize = (size + 7) & ~7;
|
||||||
|
*palign = (align + 7) & ~7;
|
||||||
|
|
||||||
if (size > 16) {
|
if (size > 16) {
|
||||||
mode = x86_64_mode_memory;
|
mode = x86_64_mode_memory;
|
||||||
@ -1042,8 +1043,8 @@ ST_FUNC int classify_x86_64_va_arg(CType *ty) {
|
|||||||
enum __va_arg_type {
|
enum __va_arg_type {
|
||||||
__va_gen_reg, __va_float_reg, __va_stack
|
__va_gen_reg, __va_float_reg, __va_stack
|
||||||
};
|
};
|
||||||
int size, reg_count;
|
int size, align, reg_count;
|
||||||
X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, ®_count);
|
X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, ®_count);
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
default: return __va_stack;
|
default: return __va_stack;
|
||||||
case x86_64_mode_integer: return __va_gen_reg;
|
case x86_64_mode_integer: return __va_gen_reg;
|
||||||
@ -1053,9 +1054,9 @@ ST_FUNC int classify_x86_64_va_arg(CType *ty) {
|
|||||||
|
|
||||||
/* Return 1 if this function returns via an sret pointer, 0 otherwise */
|
/* Return 1 if this function returns via an sret pointer, 0 otherwise */
|
||||||
int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
|
int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
|
||||||
int size, reg_count;
|
int size, align, reg_count;
|
||||||
*ret_align = 1; // Never have to re-align return values for x86-64
|
*ret_align = 1; // Never have to re-align return values for x86-64
|
||||||
return (classify_x86_64_arg(vt, ret, &size, ®_count) == x86_64_mode_memory);
|
return (classify_x86_64_arg(vt, ret, &size, &align, ®_count) == x86_64_mode_memory);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define REGN 6
|
#define REGN 6
|
||||||
@ -1078,51 +1079,75 @@ void gfunc_call(int nb_args)
|
|||||||
{
|
{
|
||||||
X86_64_Mode mode;
|
X86_64_Mode mode;
|
||||||
CType type;
|
CType type;
|
||||||
int size, align, r, args_size, i, j, reg_count;
|
int size, align, r, args_size, stack_adjust, run_start, run_end, i, j, reg_count;
|
||||||
int nb_reg_args = 0;
|
int nb_reg_args = 0;
|
||||||
int nb_sse_args = 0;
|
int nb_sse_args = 0;
|
||||||
int sse_reg, gen_reg;
|
int sse_reg, gen_reg;
|
||||||
|
|
||||||
/* calculate the number of integer/float arguments */
|
/* calculate the number of integer/float register arguments */
|
||||||
args_size = 0;
|
|
||||||
for(i = 0; i < nb_args; i++) {
|
for(i = 0; i < nb_args; i++) {
|
||||||
mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, ®_count);
|
mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count);
|
||||||
switch (mode) {
|
if (mode == x86_64_mode_sse)
|
||||||
case x86_64_mode_memory:
|
|
||||||
case x86_64_mode_x87:
|
|
||||||
args_size += size;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case x86_64_mode_sse:
|
|
||||||
nb_sse_args += reg_count;
|
nb_sse_args += reg_count;
|
||||||
if (nb_sse_args > 8) args_size += size;
|
else if (mode == x86_64_mode_integer)
|
||||||
break;
|
|
||||||
|
|
||||||
case x86_64_mode_integer:
|
|
||||||
nb_reg_args += reg_count;
|
nb_reg_args += reg_count;
|
||||||
if (nb_reg_args > REGN) args_size += size;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
|
||||||
|
and ended by a 16-byte aligned argument. This is because, from the point of view of
|
||||||
|
the callee, argument alignment is computed from the bottom up. */
|
||||||
/* for struct arguments, we need to call memcpy and the function
|
/* for struct arguments, we need to call memcpy and the function
|
||||||
call breaks register passing arguments we are preparing.
|
call breaks register passing arguments we are preparing.
|
||||||
So, we process arguments which will be passed by stack first. */
|
So, we process arguments which will be passed by stack first. */
|
||||||
gen_reg = nb_reg_args;
|
gen_reg = nb_reg_args;
|
||||||
sse_reg = nb_sse_args;
|
sse_reg = nb_sse_args;
|
||||||
|
run_start = 0;
|
||||||
|
args_size = 0;
|
||||||
|
while (run_start != nb_args) {
|
||||||
|
int run_gen_reg = gen_reg, run_sse_reg = sse_reg;
|
||||||
|
|
||||||
|
run_end = nb_args;
|
||||||
|
stack_adjust = 0;
|
||||||
|
for(i = run_start; (i < nb_args) && (run_end == nb_args); i++) {
|
||||||
|
mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count);
|
||||||
|
switch (mode) {
|
||||||
|
case x86_64_mode_memory:
|
||||||
|
case x86_64_mode_x87:
|
||||||
|
stack_arg:
|
||||||
|
if (align == 16)
|
||||||
|
run_end = i;
|
||||||
|
else
|
||||||
|
stack_adjust += size;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case x86_64_mode_sse:
|
||||||
|
sse_reg -= reg_count;
|
||||||
|
if (sse_reg + reg_count > 8) goto stack_arg;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case x86_64_mode_integer:
|
||||||
|
gen_reg -= reg_count;
|
||||||
|
if (gen_reg + reg_count > REGN) goto stack_arg;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
gen_reg = run_gen_reg;
|
||||||
|
sse_reg = run_sse_reg;
|
||||||
|
|
||||||
/* adjust stack to align SSE boundary */
|
/* adjust stack to align SSE boundary */
|
||||||
if (args_size &= 15) {
|
if (stack_adjust &= 15) {
|
||||||
/* fetch cpu flag before the following sub will change the value */
|
/* fetch cpu flag before the following sub will change the value */
|
||||||
if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
|
if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
|
||||||
gv(RC_INT);
|
gv(RC_INT);
|
||||||
|
|
||||||
args_size = 16 - args_size;
|
stack_adjust = 16 - stack_adjust;
|
||||||
o(0x48);
|
o(0x48);
|
||||||
oad(0xec81, args_size); /* sub $xxx, %rsp */
|
oad(0xec81, stack_adjust); /* sub $xxx, %rsp */
|
||||||
|
args_size += stack_adjust;
|
||||||
}
|
}
|
||||||
|
|
||||||
for(i = 0; i < nb_args;) {
|
for(i = run_start; i < run_end;) {
|
||||||
/* Swap argument to top, it will possibly be changed here,
|
/* Swap argument to top, it will possibly be changed here,
|
||||||
and might use more temps. At the end of the loop we keep
|
and might use more temps. At the end of the loop we keep
|
||||||
in on the stack and swap it back to its original position
|
in on the stack and swap it back to its original position
|
||||||
@ -1131,7 +1156,7 @@ void gfunc_call(int nb_args)
|
|||||||
vtop[0] = vtop[-i];
|
vtop[0] = vtop[-i];
|
||||||
vtop[-i] = tmp;
|
vtop[-i] = tmp;
|
||||||
|
|
||||||
mode = classify_x86_64_arg(&vtop->type, NULL, &size, ®_count);
|
mode = classify_x86_64_arg(&vtop->type, NULL, &size, &align, ®_count);
|
||||||
|
|
||||||
int arg_stored = 1;
|
int arg_stored = 1;
|
||||||
switch (vtop->type.t & VT_BTYPE) {
|
switch (vtop->type.t & VT_BTYPE) {
|
||||||
@ -1164,13 +1189,7 @@ void gfunc_call(int nb_args)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case VT_LDOUBLE:
|
case VT_LDOUBLE:
|
||||||
gv(RC_ST0);
|
assert(0);
|
||||||
size = LDOUBLE_SIZE;
|
|
||||||
oad(0xec8148, size); /* sub $xxx, %rsp */
|
|
||||||
o(0x7cdb); /* fstpt 0(%rsp) */
|
|
||||||
g(0x24);
|
|
||||||
g(0x00);
|
|
||||||
args_size += size;
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case VT_FLOAT:
|
case VT_FLOAT:
|
||||||
@ -1212,14 +1231,53 @@ void gfunc_call(int nb_args)
|
|||||||
|
|
||||||
if (arg_stored) {
|
if (arg_stored) {
|
||||||
vrotb(i+1);
|
vrotb(i+1);
|
||||||
assert(vtop->type.t == tmp.type.t);
|
assert((vtop->type.t == tmp.type.t) && (vtop->r == tmp.r));
|
||||||
vpop();
|
vpop();
|
||||||
--nb_args;
|
--nb_args;
|
||||||
|
--run_end;
|
||||||
} else {
|
} else {
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* handle 16 byte aligned arguments at end of run */
|
||||||
|
run_start = i = run_end;
|
||||||
|
while (i < nb_args) {
|
||||||
|
/* Rotate argument to top since it will always be popped */
|
||||||
|
mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count);
|
||||||
|
if (align != 16)
|
||||||
|
break;
|
||||||
|
|
||||||
|
vrotb(i+1);
|
||||||
|
|
||||||
|
if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
|
||||||
|
gv(RC_ST0);
|
||||||
|
oad(0xec8148, size); /* sub $xxx, %rsp */
|
||||||
|
o(0x7cdb); /* fstpt 0(%rsp) */
|
||||||
|
g(0x24);
|
||||||
|
g(0x00);
|
||||||
|
args_size += size;
|
||||||
|
} else {
|
||||||
|
assert(mode == x86_64_mode_memory);
|
||||||
|
|
||||||
|
/* allocate the necessary size on stack */
|
||||||
|
o(0x48);
|
||||||
|
oad(0xec81, size); /* sub $xxx, %rsp */
|
||||||
|
/* generate structure store */
|
||||||
|
r = get_reg(RC_INT);
|
||||||
|
orex(1, r, 0, 0x89); /* mov %rsp, r */
|
||||||
|
o(0xe0 + REG_VALUE(r));
|
||||||
|
vset(&vtop->type, r | VT_LVAL, 0);
|
||||||
|
vswap();
|
||||||
|
vstore();
|
||||||
|
args_size += size;
|
||||||
|
}
|
||||||
|
|
||||||
|
vpop();
|
||||||
|
--nb_args;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* XXX This should be superfluous. */
|
/* XXX This should be superfluous. */
|
||||||
save_regs(0); /* save used temporary registers */
|
save_regs(0); /* save used temporary registers */
|
||||||
|
|
||||||
@ -1230,7 +1288,7 @@ void gfunc_call(int nb_args)
|
|||||||
assert(gen_reg <= REGN);
|
assert(gen_reg <= REGN);
|
||||||
assert(sse_reg <= 8);
|
assert(sse_reg <= 8);
|
||||||
for(i = 0; i < nb_args; i++) {
|
for(i = 0; i < nb_args; i++) {
|
||||||
mode = classify_x86_64_arg(&vtop->type, &type, &size, ®_count);
|
mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, ®_count);
|
||||||
/* Alter stack entry type so that gv() knows how to treat it */
|
/* Alter stack entry type so that gv() knows how to treat it */
|
||||||
vtop->type = type;
|
vtop->type = type;
|
||||||
if (mode == x86_64_mode_sse) {
|
if (mode == x86_64_mode_sse) {
|
||||||
@ -1324,10 +1382,11 @@ void gfunc_prolog(CType *func_type)
|
|||||||
sym = func_type->ref;
|
sym = func_type->ref;
|
||||||
while ((sym = sym->next) != NULL) {
|
while ((sym = sym->next) != NULL) {
|
||||||
type = &sym->type;
|
type = &sym->type;
|
||||||
mode = classify_x86_64_arg(type, NULL, &size, ®_count);
|
mode = classify_x86_64_arg(type, NULL, &size, &align, ®_count);
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
default:
|
default:
|
||||||
seen_stack_size += size;
|
stack_arg:
|
||||||
|
seen_stack_size = ((seen_stack_size + align - 1) & -align) + size;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case x86_64_mode_integer:
|
case x86_64_mode_integer:
|
||||||
@ -1335,7 +1394,7 @@ void gfunc_prolog(CType *func_type)
|
|||||||
seen_reg_num += reg_count;
|
seen_reg_num += reg_count;
|
||||||
} else {
|
} else {
|
||||||
seen_reg_num = 8;
|
seen_reg_num = 8;
|
||||||
seen_stack_size += size;
|
goto stack_arg;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -1344,7 +1403,7 @@ void gfunc_prolog(CType *func_type)
|
|||||||
seen_sse_num += reg_count;
|
seen_sse_num += reg_count;
|
||||||
} else {
|
} else {
|
||||||
seen_sse_num = 8;
|
seen_sse_num = 8;
|
||||||
seen_stack_size += size;
|
goto stack_arg;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1383,7 +1442,7 @@ void gfunc_prolog(CType *func_type)
|
|||||||
/* if the function returns a structure, then add an
|
/* if the function returns a structure, then add an
|
||||||
implicit pointer parameter */
|
implicit pointer parameter */
|
||||||
func_vt = sym->type;
|
func_vt = sym->type;
|
||||||
mode = classify_x86_64_arg(&func_vt, NULL, &size, ®_count);
|
mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, ®_count);
|
||||||
if (mode == x86_64_mode_memory) {
|
if (mode == x86_64_mode_memory) {
|
||||||
push_arg_reg(reg_param_index);
|
push_arg_reg(reg_param_index);
|
||||||
func_vc = loc;
|
func_vc = loc;
|
||||||
@ -1392,7 +1451,7 @@ void gfunc_prolog(CType *func_type)
|
|||||||
/* define parameters */
|
/* define parameters */
|
||||||
while ((sym = sym->next) != NULL) {
|
while ((sym = sym->next) != NULL) {
|
||||||
type = &sym->type;
|
type = &sym->type;
|
||||||
mode = classify_x86_64_arg(type, NULL, &size, ®_count);
|
mode = classify_x86_64_arg(type, NULL, &size, &align, ®_count);
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
case x86_64_mode_sse:
|
case x86_64_mode_sse:
|
||||||
if (sse_param_index + reg_count <= 8) {
|
if (sse_param_index + reg_count <= 8) {
|
||||||
@ -1405,6 +1464,7 @@ void gfunc_prolog(CType *func_type)
|
|||||||
++sse_param_index;
|
++sse_param_index;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
addr = (addr + align - 1) & -align;
|
||||||
param_addr = addr;
|
param_addr = addr;
|
||||||
addr += size;
|
addr += size;
|
||||||
sse_param_index += reg_count;
|
sse_param_index += reg_count;
|
||||||
@ -1413,6 +1473,7 @@ void gfunc_prolog(CType *func_type)
|
|||||||
|
|
||||||
case x86_64_mode_memory:
|
case x86_64_mode_memory:
|
||||||
case x86_64_mode_x87:
|
case x86_64_mode_x87:
|
||||||
|
addr = (addr + align - 1) & -align;
|
||||||
param_addr = addr;
|
param_addr = addr;
|
||||||
addr += size;
|
addr += size;
|
||||||
break;
|
break;
|
||||||
@ -1427,6 +1488,7 @@ void gfunc_prolog(CType *func_type)
|
|||||||
++reg_param_index;
|
++reg_param_index;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
addr = (addr + align - 1) & -align;
|
||||||
param_addr = addr;
|
param_addr = addr;
|
||||||
addr += size;
|
addr += size;
|
||||||
reg_param_index += reg_count;
|
reg_param_index += reg_count;
|
||||||
|
Loading…
Reference in New Issue
Block a user