2015-02-14 02:58:31 +08:00
|
|
|
/*
|
|
|
|
* A64 code generator for TCC
|
|
|
|
*
|
|
|
|
* Copyright (c) 2014-2015 Edmund Grimley Evans
|
|
|
|
*
|
|
|
|
* Copying and distribution of this file, with or without modification,
|
|
|
|
* are permitted in any medium without royalty provided the copyright
|
|
|
|
* notice and this notice are preserved. This file is offered as-is,
|
|
|
|
* without any warranty.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifdef TARGET_DEFS_ONLY
|
|
|
|
|
|
|
|
// Number of registers available to allocator:
|
|
|
|
#define NB_REGS 28 // x0-x18, x30, v0-v7
|
|
|
|
|
|
|
|
#define TREG_R(x) (x) // x = 0..18
|
|
|
|
#define TREG_R30 19
|
|
|
|
#define TREG_F(x) (x + 20) // x = 0..7
|
|
|
|
|
|
|
|
// Register classes sorted from more general to more precise:
|
|
|
|
#define RC_INT (1 << 0)
|
|
|
|
#define RC_FLOAT (1 << 1)
|
|
|
|
#define RC_R(x) (1 << (2 + (x))) // x = 0..18
|
|
|
|
#define RC_R30 (1 << 21)
|
|
|
|
#define RC_F(x) (1 << (22 + (x))) // x = 0..7
|
|
|
|
|
|
|
|
#define RC_IRET (RC_R(0)) // int return register class
|
|
|
|
#define RC_FRET (RC_F(0)) // float return register class
|
|
|
|
|
|
|
|
#define REG_IRET (TREG_R(0)) // int return register number
|
|
|
|
#define REG_FRET (TREG_F(0)) // float return register number
|
|
|
|
|
|
|
|
#define PTR_SIZE 8
|
|
|
|
|
|
|
|
#define LDOUBLE_SIZE 16
|
|
|
|
#define LDOUBLE_ALIGN 16
|
|
|
|
|
|
|
|
#define MAX_ALIGN 16
|
|
|
|
|
2022-11-17 02:52:51 +08:00
|
|
|
#ifndef TCC_TARGET_MACHO
|
2015-02-14 02:58:31 +08:00
|
|
|
#define CHAR_IS_UNSIGNED
|
2022-11-17 02:52:51 +08:00
|
|
|
#endif
|
2015-02-14 02:58:31 +08:00
|
|
|
|
Adjust return value promotion for some archs
this is a bit complicated: for i386 and x86-64 we really need to
extend return values ourself, as the common code now does. For arm64
this at least preserves old behaviour. For riscv64 we don't have to
extend ourself but can expect things to be extended up to int (this
matters for var-args tests, when the sign-extension to int64 needs to
happen explicitely). As the extensions are useless, don't do them.
And for arm32 we actually can't express GCC behaviour: the callee side
expects the return value to be correctly extended to int32, but
remembers the original type. In case the ultimate target type for the
call result is only int, no further extension is done. But in case
the target type is e.g. int64 an extension happens, but not from int32
but from the original type. We don't know the ultimate target type,
so we have to choose a type to put into vtop:
* original type (plus VT_MUSTCAST) - this looses when the ultimate
target is int (GCC: no cast, TCC: a cast)
* int (without MUSTCAST) - this looses when the ultimate target is
int64 (GCC: cast from original type, TCC: cast from int)
This difference can only be seen with undefined sources, like the
testcases, so it doesn't seem worthwhile to try an make it work, just
disable the test on arm and choose the second variant as that generates
less code.
2019-12-17 08:46:06 +08:00
|
|
|
/* define if return values need to be extended explicitely
|
|
|
|
at caller side (for interfacing with non-TCC compilers) */
|
|
|
|
#define PROMOTE_RET
|
2015-02-14 02:58:31 +08:00
|
|
|
/******************************************************/
|
|
|
|
#else /* ! TARGET_DEFS_ONLY */
|
|
|
|
/******************************************************/
|
2019-12-11 07:37:18 +08:00
|
|
|
#define USING_GLOBALS
|
2015-07-30 04:53:57 +08:00
|
|
|
#include "tcc.h"
|
2015-02-14 02:58:31 +08:00
|
|
|
#include <assert.h>
|
|
|
|
|
2021-02-01 22:10:58 +08:00
|
|
|
ST_DATA const char * const target_machine_defs =
|
2021-02-21 18:56:16 +08:00
|
|
|
#if defined(__APPLE__)
|
2020-12-23 04:10:22 +08:00
|
|
|
"__aarch64__\0"
|
2021-02-21 18:56:16 +08:00
|
|
|
"__arm64__\0"
|
|
|
|
#else
|
|
|
|
"__aarch64__\0"
|
|
|
|
#endif
|
2020-12-23 04:10:22 +08:00
|
|
|
;
|
|
|
|
|
2015-02-14 02:58:31 +08:00
|
|
|
ST_DATA const int reg_classes[NB_REGS] = {
|
|
|
|
RC_INT | RC_R(0),
|
|
|
|
RC_INT | RC_R(1),
|
|
|
|
RC_INT | RC_R(2),
|
|
|
|
RC_INT | RC_R(3),
|
|
|
|
RC_INT | RC_R(4),
|
|
|
|
RC_INT | RC_R(5),
|
|
|
|
RC_INT | RC_R(6),
|
|
|
|
RC_INT | RC_R(7),
|
|
|
|
RC_INT | RC_R(8),
|
|
|
|
RC_INT | RC_R(9),
|
|
|
|
RC_INT | RC_R(10),
|
|
|
|
RC_INT | RC_R(11),
|
|
|
|
RC_INT | RC_R(12),
|
|
|
|
RC_INT | RC_R(13),
|
|
|
|
RC_INT | RC_R(14),
|
|
|
|
RC_INT | RC_R(15),
|
|
|
|
RC_INT | RC_R(16),
|
|
|
|
RC_INT | RC_R(17),
|
|
|
|
RC_INT | RC_R(18),
|
|
|
|
RC_R30, // not in RC_INT as we make special use of x30
|
|
|
|
RC_FLOAT | RC_F(0),
|
|
|
|
RC_FLOAT | RC_F(1),
|
|
|
|
RC_FLOAT | RC_F(2),
|
|
|
|
RC_FLOAT | RC_F(3),
|
|
|
|
RC_FLOAT | RC_F(4),
|
|
|
|
RC_FLOAT | RC_F(5),
|
|
|
|
RC_FLOAT | RC_F(6),
|
|
|
|
RC_FLOAT | RC_F(7)
|
|
|
|
};
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
#if defined(CONFIG_TCC_BCHECK)
|
|
|
|
static addr_t func_bound_offset;
|
|
|
|
static unsigned long func_bound_ind;
|
|
|
|
ST_DATA int func_bound_add_epilog;
|
|
|
|
#endif
|
|
|
|
|
2015-02-14 02:58:31 +08:00
|
|
|
#define IS_FREG(x) ((x) >= TREG_F(0))
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
static uint32_t intr(int r)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
|
|
|
assert(TREG_R(0) <= r && r <= TREG_R30);
|
|
|
|
return r < TREG_R30 ? r : 30;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t fltr(int r)
|
|
|
|
{
|
|
|
|
assert(TREG_F(0) <= r && r <= TREG_F(7));
|
|
|
|
return r - TREG_F(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add an instruction to text section:
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void o(unsigned int c)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2021-10-22 13:39:54 +08:00
|
|
|
int ind1 = ind + 4;
|
|
|
|
if (nocode_wanted)
|
2016-12-19 00:23:33 +08:00
|
|
|
return;
|
2015-02-14 02:58:31 +08:00
|
|
|
if (ind1 > cur_text_section->data_allocated)
|
2021-10-22 13:39:54 +08:00
|
|
|
section_realloc(cur_text_section, ind1);
|
|
|
|
write32le(cur_text_section->data + ind, c);
|
|
|
|
ind = ind1;
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int arm64_encode_bimm64(uint64_t x)
|
|
|
|
{
|
|
|
|
int neg = x & 1;
|
|
|
|
int rep, pos, len;
|
|
|
|
|
|
|
|
if (neg)
|
|
|
|
x = ~x;
|
|
|
|
if (!x)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (x >> 2 == (x & (((uint64_t)1 << (64 - 2)) - 1)))
|
|
|
|
rep = 2, x &= ((uint64_t)1 << 2) - 1;
|
|
|
|
else if (x >> 4 == (x & (((uint64_t)1 << (64 - 4)) - 1)))
|
|
|
|
rep = 4, x &= ((uint64_t)1 << 4) - 1;
|
|
|
|
else if (x >> 8 == (x & (((uint64_t)1 << (64 - 8)) - 1)))
|
|
|
|
rep = 8, x &= ((uint64_t)1 << 8) - 1;
|
|
|
|
else if (x >> 16 == (x & (((uint64_t)1 << (64 - 16)) - 1)))
|
|
|
|
rep = 16, x &= ((uint64_t)1 << 16) - 1;
|
|
|
|
else if (x >> 32 == (x & (((uint64_t)1 << (64 - 32)) - 1)))
|
|
|
|
rep = 32, x &= ((uint64_t)1 << 32) - 1;
|
|
|
|
else
|
|
|
|
rep = 64;
|
|
|
|
|
|
|
|
pos = 0;
|
|
|
|
if (!(x & (((uint64_t)1 << 32) - 1))) x >>= 32, pos += 32;
|
|
|
|
if (!(x & (((uint64_t)1 << 16) - 1))) x >>= 16, pos += 16;
|
|
|
|
if (!(x & (((uint64_t)1 << 8) - 1))) x >>= 8, pos += 8;
|
|
|
|
if (!(x & (((uint64_t)1 << 4) - 1))) x >>= 4, pos += 4;
|
|
|
|
if (!(x & (((uint64_t)1 << 2) - 1))) x >>= 2, pos += 2;
|
|
|
|
if (!(x & (((uint64_t)1 << 1) - 1))) x >>= 1, pos += 1;
|
|
|
|
|
|
|
|
len = 0;
|
|
|
|
if (!(~x & (((uint64_t)1 << 32) - 1))) x >>= 32, len += 32;
|
|
|
|
if (!(~x & (((uint64_t)1 << 16) - 1))) x >>= 16, len += 16;
|
|
|
|
if (!(~x & (((uint64_t)1 << 8) - 1))) x >>= 8, len += 8;
|
|
|
|
if (!(~x & (((uint64_t)1 << 4) - 1))) x >>= 4, len += 4;
|
|
|
|
if (!(~x & (((uint64_t)1 << 2) - 1))) x >>= 2, len += 2;
|
|
|
|
if (!(~x & (((uint64_t)1 << 1) - 1))) x >>= 1, len += 1;
|
|
|
|
|
|
|
|
if (x)
|
|
|
|
return -1;
|
|
|
|
if (neg) {
|
|
|
|
pos = (pos + len) & (rep - 1);
|
|
|
|
len = rep - len;
|
|
|
|
}
|
|
|
|
return ((0x1000 & rep << 6) | (((rep - 1) ^ 31) << 1 & 63) |
|
|
|
|
((rep - pos) & (rep - 1)) << 6 | (len - 1));
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t arm64_movi(int r, uint64_t x)
|
|
|
|
{
|
|
|
|
uint64_t m = 0xffff;
|
|
|
|
int e;
|
|
|
|
if (!(x & ~m))
|
|
|
|
return 0x52800000 | r | x << 5; // movz w(r),#(x)
|
|
|
|
if (!(x & ~(m << 16)))
|
|
|
|
return 0x52a00000 | r | x >> 11; // movz w(r),#(x >> 16),lsl #16
|
|
|
|
if (!(x & ~(m << 32)))
|
|
|
|
return 0xd2c00000 | r | x >> 27; // movz x(r),#(x >> 32),lsl #32
|
|
|
|
if (!(x & ~(m << 48)))
|
|
|
|
return 0xd2e00000 | r | x >> 43; // movz x(r),#(x >> 48),lsl #48
|
|
|
|
if ((x & ~m) == m << 16)
|
|
|
|
return (0x12800000 | r |
|
|
|
|
(~x << 5 & 0x1fffe0)); // movn w(r),#(~x)
|
|
|
|
if ((x & ~(m << 16)) == m)
|
|
|
|
return (0x12a00000 | r |
|
|
|
|
(~x >> 11 & 0x1fffe0)); // movn w(r),#(~x >> 16),lsl #16
|
|
|
|
if (!~(x | m))
|
|
|
|
return (0x92800000 | r |
|
|
|
|
(~x << 5 & 0x1fffe0)); // movn x(r),#(~x)
|
|
|
|
if (!~(x | m << 16))
|
|
|
|
return (0x92a00000 | r |
|
|
|
|
(~x >> 11 & 0x1fffe0)); // movn x(r),#(~x >> 16),lsl #16
|
|
|
|
if (!~(x | m << 32))
|
|
|
|
return (0x92c00000 | r |
|
|
|
|
(~x >> 27 & 0x1fffe0)); // movn x(r),#(~x >> 32),lsl #32
|
|
|
|
if (!~(x | m << 48))
|
|
|
|
return (0x92e00000 | r |
|
|
|
|
(~x >> 43 & 0x1fffe0)); // movn x(r),#(~x >> 32),lsl #32
|
|
|
|
if (!(x >> 32) && (e = arm64_encode_bimm64(x | x << 32)) >= 0)
|
|
|
|
return 0x320003e0 | r | (uint32_t)e << 10; // movi w(r),#(x)
|
|
|
|
if ((e = arm64_encode_bimm64(x)) >= 0)
|
|
|
|
return 0xb20003e0 | r | (uint32_t)e << 10; // movi x(r),#(x)
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
static void arm64_movimm(int r, uint64_t x)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
|
|
|
uint32_t i;
|
|
|
|
if ((i = arm64_movi(r, x)))
|
2021-10-22 13:39:54 +08:00
|
|
|
o(i); // a single MOV
|
2015-02-14 02:58:31 +08:00
|
|
|
else {
|
2015-03-03 04:39:28 +08:00
|
|
|
// MOVZ/MOVN and 1-3 MOVKs
|
|
|
|
int z = 0, m = 0;
|
|
|
|
uint32_t mov1 = 0xd2800000; // movz
|
|
|
|
uint64_t x1 = x;
|
|
|
|
for (i = 0; i < 64; i += 16) {
|
|
|
|
z += !(x >> i & 0xffff);
|
|
|
|
m += !(~x >> i & 0xffff);
|
|
|
|
}
|
|
|
|
if (m > z) {
|
|
|
|
x1 = ~x;
|
|
|
|
mov1 = 0x92800000; // movn
|
|
|
|
}
|
|
|
|
for (i = 0; i < 64; i += 16)
|
|
|
|
if (x1 >> i & 0xffff) {
|
2021-10-22 13:39:54 +08:00
|
|
|
o(mov1 | r | (x1 >> i & 0xffff) << 5 | i << 17);
|
2015-03-03 04:39:28 +08:00
|
|
|
// movz/movn x(r),#(*),lsl #(i)
|
|
|
|
break;
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
2015-03-03 04:39:28 +08:00
|
|
|
for (i += 16; i < 64; i += 16)
|
|
|
|
if (x1 >> i & 0xffff)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0xf2800000 | r | (x >> i & 0xffff) << 5 | i << 17);
|
2015-03-03 04:39:28 +08:00
|
|
|
// movk x(r),#(*),lsl #(i)
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Patch all branches in list pointed to by t to branch to a:
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gsym_addr(int t_, int a_)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
|
|
|
uint32_t t = t_;
|
|
|
|
uint32_t a = a_;
|
|
|
|
while (t) {
|
2015-11-20 02:21:14 +08:00
|
|
|
unsigned char *ptr = cur_text_section->data + t;
|
|
|
|
uint32_t next = read32le(ptr);
|
2015-02-14 02:58:31 +08:00
|
|
|
if (a - t + 0x8000000 >= 0x10000000)
|
2021-10-22 13:39:54 +08:00
|
|
|
tcc_error("branch out of range");
|
2015-11-20 02:21:14 +08:00
|
|
|
write32le(ptr, (a - t == 4 ? 0xd503201f : // nop
|
|
|
|
0x14000000 | ((a - t) >> 2 & 0x3ffffff))); // b
|
2015-02-14 02:58:31 +08:00
|
|
|
t = next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int arm64_type_size(int t)
|
|
|
|
{
|
2020-03-22 15:26:03 +08:00
|
|
|
/*
|
|
|
|
* case values are in increasing order (from 1 to 11).
|
|
|
|
* which 'may' help compiler optimizers. See tcc.h
|
|
|
|
*/
|
2015-02-14 02:58:31 +08:00
|
|
|
switch (t & VT_BTYPE) {
|
|
|
|
case VT_BYTE: return 0;
|
|
|
|
case VT_SHORT: return 1;
|
2020-03-22 15:26:03 +08:00
|
|
|
case VT_INT: return 2;
|
|
|
|
case VT_LLONG: return 3;
|
2015-02-14 02:58:31 +08:00
|
|
|
case VT_PTR: return 3;
|
|
|
|
case VT_FUNC: return 3;
|
2020-03-22 15:26:03 +08:00
|
|
|
case VT_STRUCT: return 3;
|
2015-02-14 02:58:31 +08:00
|
|
|
case VT_FLOAT: return 2;
|
|
|
|
case VT_DOUBLE: return 3;
|
|
|
|
case VT_LDOUBLE: return 4;
|
|
|
|
case VT_BOOL: return 0;
|
|
|
|
}
|
|
|
|
assert(0);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
static void arm64_spoff(int reg, uint64_t off)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2015-03-03 04:51:03 +08:00
|
|
|
uint32_t sub = off >> 63;
|
|
|
|
if (sub)
|
|
|
|
off = -off;
|
|
|
|
if (off < 4096)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x910003e0 | sub << 30 | reg | off << 10);
|
2015-03-03 04:51:03 +08:00
|
|
|
// (add|sub) x(reg),sp,#(off)
|
|
|
|
else {
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_movimm(30, off); // use x30 for offset
|
|
|
|
o(0x8b3e63e0 | sub << 30 | reg); // (add|sub) x(reg),sp,x30
|
2015-03-03 04:51:03 +08:00
|
|
|
}
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
2020-07-30 15:26:20 +08:00
|
|
|
/* invert 0: return value to use for store/load */
|
|
|
|
/* invert 1: return value to use for arm64_sym */
|
|
|
|
static uint64_t arm64_check_offset(int invert, int sz_, uint64_t off)
|
|
|
|
{
|
|
|
|
uint32_t sz = sz_;
|
|
|
|
if (!(off & ~((uint32_t)0xfff << sz)) ||
|
|
|
|
(off < 256 || -off <= 256))
|
|
|
|
return invert ? off : 0ul;
|
|
|
|
else if ((off & ((uint32_t)0xfff << sz)))
|
|
|
|
return invert ? off & ((uint32_t)0xfff << sz)
|
|
|
|
: off & ~((uint32_t)0xfff << sz);
|
|
|
|
else if (off & 0x1ff)
|
|
|
|
return invert ? off & 0x1ff : off & ~0x1ff;
|
|
|
|
else
|
|
|
|
return invert ? 0ul : off;
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
static void arm64_ldrx(int sg, int sz_, int dst, int bas, uint64_t off)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t sz = sz_;
|
2015-02-14 02:58:31 +08:00
|
|
|
if (sz >= 2)
|
|
|
|
sg = 0;
|
2015-11-10 07:06:05 +08:00
|
|
|
if (!(off & ~((uint32_t)0xfff << sz)))
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x39400000 | dst | bas << 5 | off << (10 - sz) |
|
2015-11-10 07:06:05 +08:00
|
|
|
(uint32_t)!!sg << 23 | sz << 30); // ldr(*) x(dst),[x(bas),#(off)]
|
2015-02-14 02:58:31 +08:00
|
|
|
else if (off < 256 || -off <= 256)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x38400000 | dst | bas << 5 | (off & 511) << 12 |
|
2015-11-10 07:06:05 +08:00
|
|
|
(uint32_t)!!sg << 23 | sz << 30); // ldur(*) x(dst),[x(bas),#(off)]
|
2015-02-14 02:58:31 +08:00
|
|
|
else {
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_movimm(30, off); // use x30 for offset
|
|
|
|
o(0x38206800 | dst | bas << 5 | (uint32_t)30 << 16 |
|
2015-11-10 07:06:05 +08:00
|
|
|
(uint32_t)(!!sg + 1) << 22 | sz << 30); // ldr(*) x(dst),[x(bas),x30]
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
static void arm64_ldrv(int sz_, int dst, int bas, uint64_t off)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t sz = sz_;
|
|
|
|
if (!(off & ~((uint32_t)0xfff << sz)))
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x3d400000 | dst | bas << 5 | off << (10 - sz) |
|
2015-03-03 04:45:58 +08:00
|
|
|
(sz & 4) << 21 | (sz & 3) << 30); // ldr (s|d|q)(dst),[x(bas),#(off)]
|
2015-02-14 02:58:31 +08:00
|
|
|
else if (off < 256 || -off <= 256)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x3c400000 | dst | bas << 5 | (off & 511) << 12 |
|
2015-03-03 04:45:58 +08:00
|
|
|
(sz & 4) << 21 | (sz & 3) << 30); // ldur (s|d|q)(dst),[x(bas),#(off)]
|
2015-02-14 02:58:31 +08:00
|
|
|
else {
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_movimm(30, off); // use x30 for offset
|
|
|
|
o(0x3c606800 | dst | bas << 5 | (uint32_t)30 << 16 |
|
2015-11-10 07:06:05 +08:00
|
|
|
sz << 30 | (sz & 4) << 21); // ldr (s|d|q)(dst),[x(bas),x30]
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
static void arm64_ldrs(int reg_, int size)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t reg = reg_;
|
2015-02-14 02:58:31 +08:00
|
|
|
// Use x30 for intermediate value in some cases.
|
|
|
|
switch (size) {
|
|
|
|
default: assert(0); break;
|
2020-07-05 20:01:50 +08:00
|
|
|
case 0:
|
|
|
|
/* Can happen with zero size structs */
|
|
|
|
break;
|
2015-02-14 02:58:31 +08:00
|
|
|
case 1:
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(0, 0, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 2:
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(0, 1, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 3:
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(0, 1, 30, reg, 0);
|
|
|
|
arm64_ldrx(0, 0, reg, reg, 2);
|
|
|
|
o(0x2a0043c0 | reg | reg << 16); // orr x(reg),x30,x(reg),lsl #16
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 4:
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(0, 2, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 5:
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(0, 2, 30, reg, 0);
|
|
|
|
arm64_ldrx(0, 0, reg, reg, 4);
|
|
|
|
o(0xaa0083c0 | reg | reg << 16); // orr x(reg),x30,x(reg),lsl #32
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 6:
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(0, 2, 30, reg, 0);
|
|
|
|
arm64_ldrx(0, 1, reg, reg, 4);
|
|
|
|
o(0xaa0083c0 | reg | reg << 16); // orr x(reg),x30,x(reg),lsl #32
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 7:
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(0, 2, 30, reg, 0);
|
|
|
|
arm64_ldrx(0, 2, reg, reg, 3);
|
|
|
|
o(0x53087c00 | reg | reg << 5); // lsr w(reg), w(reg), #8
|
|
|
|
o(0xaa0083c0 | reg | reg << 16); // orr x(reg),x30,x(reg),lsl #32
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 8:
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(0, 3, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 9:
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(0, 0, reg + 1, reg, 8);
|
|
|
|
arm64_ldrx(0, 3, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 10:
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(0, 1, reg + 1, reg, 8);
|
|
|
|
arm64_ldrx(0, 3, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 11:
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(0, 2, reg + 1, reg, 7);
|
|
|
|
o(0x53087c00 | (reg+1) | (reg+1) << 5); // lsr w(reg+1), w(reg+1), #8
|
|
|
|
arm64_ldrx(0, 3, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 12:
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(0, 2, reg + 1, reg, 8);
|
|
|
|
arm64_ldrx(0, 3, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 13:
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(0, 3, reg + 1, reg, 5);
|
|
|
|
o(0xd358fc00 | (reg+1) | (reg+1) << 5); // lsr x(reg+1), x(reg+1), #24
|
|
|
|
arm64_ldrx(0, 3, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 14:
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(0, 3, reg + 1, reg, 6);
|
|
|
|
o(0xd350fc00 | (reg+1) | (reg+1) << 5); // lsr x(reg+1), x(reg+1), #16
|
|
|
|
arm64_ldrx(0, 3, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 15:
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(0, 3, reg + 1, reg, 7);
|
|
|
|
o(0xd348fc00 | (reg+1) | (reg+1) << 5); // lsr x(reg+1), x(reg+1), #8
|
|
|
|
arm64_ldrx(0, 3, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 16:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0xa9400000 | reg | (reg+1) << 10 | reg << 5);
|
2015-02-14 02:58:31 +08:00
|
|
|
// ldp x(reg),x(reg+1),[x(reg)]
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
static void arm64_strx(int sz_, int dst, int bas, uint64_t off)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t sz = sz_;
|
|
|
|
if (!(off & ~((uint32_t)0xfff << sz)))
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x39000000 | dst | bas << 5 | off << (10 - sz) | sz << 30);
|
2015-03-03 04:45:58 +08:00
|
|
|
// str(*) x(dst),[x(bas],#(off)]
|
2015-02-14 02:58:31 +08:00
|
|
|
else if (off < 256 || -off <= 256)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x38000000 | dst | bas << 5 | (off & 511) << 12 | sz << 30);
|
2015-03-03 04:45:58 +08:00
|
|
|
// stur(*) x(dst),[x(bas],#(off)]
|
2015-02-14 02:58:31 +08:00
|
|
|
else {
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_movimm(30, off); // use x30 for offset
|
|
|
|
o(0x38206800 | dst | bas << 5 | (uint32_t)30 << 16 | sz << 30);
|
2015-03-03 04:45:58 +08:00
|
|
|
// str(*) x(dst),[x(bas),x30]
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
static void arm64_strv(int sz_, int dst, int bas, uint64_t off)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t sz = sz_;
|
|
|
|
if (!(off & ~((uint32_t)0xfff << sz)))
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x3d000000 | dst | bas << 5 | off << (10 - sz) |
|
2015-03-03 04:45:58 +08:00
|
|
|
(sz & 4) << 21 | (sz & 3) << 30); // str (s|d|q)(dst),[x(bas),#(off)]
|
2015-02-14 02:58:31 +08:00
|
|
|
else if (off < 256 || -off <= 256)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x3c000000 | dst | bas << 5 | (off & 511) << 12 |
|
2015-03-03 04:45:58 +08:00
|
|
|
(sz & 4) << 21 | (sz & 3) << 30); // stur (s|d|q)(dst),[x(bas),#(off)]
|
2015-02-14 02:58:31 +08:00
|
|
|
else {
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_movimm(30, off); // use x30 for offset
|
|
|
|
o(0x3c206800 | dst | bas << 5 | (uint32_t)30 << 16 |
|
2015-11-10 07:06:05 +08:00
|
|
|
sz << 30 | (sz & 4) << 21); // str (s|d|q)(dst),[x(bas),x30]
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
static void arm64_sym(int r, Sym *sym, unsigned long addend)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2021-10-22 13:39:54 +08:00
|
|
|
greloca(cur_text_section, sym, ind, R_AARCH64_ADR_GOT_PAGE, 0);
|
|
|
|
o(0x90000000 | r); // adrp xr, #sym
|
|
|
|
greloca(cur_text_section, sym, ind, R_AARCH64_LD64_GOT_LO12_NC, 0);
|
|
|
|
o(0xf9400000 | r | (r << 5)); // ld xr,[xr, #sym]
|
2020-07-30 15:26:20 +08:00
|
|
|
if (addend) {
|
|
|
|
// add xr, xr, #addend
|
|
|
|
if (addend & 0xffful)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x91000000 | r | r << 5 | (addend & 0xfff) << 10);
|
2020-07-30 15:26:20 +08:00
|
|
|
if (addend > 0xffful) {
|
|
|
|
// add xr, xr, #addend, lsl #12
|
|
|
|
if (addend & 0xfff000ul)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x91400000 | r | r << 5 | ((addend >> 12) & 0xfff) << 10);
|
2020-07-30 15:26:20 +08:00
|
|
|
if (addend > 0xfffffful) {
|
|
|
|
/* very unlikely */
|
|
|
|
int t = r ? 0 : 1;
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0xf81f0fe0 | t); /* str xt, [sp, #-16]! */
|
|
|
|
arm64_movimm(t, addend & ~0xfffffful); // use xt for addent
|
|
|
|
o(0x91000000 | r | (t << 5)); /* add xr, xt, #0 */
|
|
|
|
o(0xf84107e0 | t); /* ldr xt, [sp], #16 */
|
2020-07-30 15:26:20 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
static void arm64_load_cmp(int r, SValue *sv);
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void load(int r, SValue *sv)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
|
|
|
int svtt = sv->type.t;
|
2022-08-16 23:54:42 +08:00
|
|
|
int svr = sv->r & ~(VT_BOUNDED | VT_NONCONST);
|
2015-02-14 02:58:31 +08:00
|
|
|
int svrv = svr & VT_VALMASK;
|
2015-11-18 03:09:35 +08:00
|
|
|
uint64_t svcul = (uint32_t)sv->c.i;
|
2015-11-10 07:06:05 +08:00
|
|
|
svcul = svcul >> 31 & 1 ? svcul - ((uint64_t)1 << 32) : svcul;
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
if (svr == (VT_LOCAL | VT_LVAL)) {
|
|
|
|
if (IS_FREG(r))
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrv(arm64_type_size(svtt), fltr(r), 29, svcul);
|
2015-02-14 02:58:31 +08:00
|
|
|
else
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(!(svtt & VT_UNSIGNED), arm64_type_size(svtt),
|
|
|
|
intr(r), 29, svcul);
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-07-05 20:01:50 +08:00
|
|
|
if (svr == (VT_CONST | VT_LVAL)) {
|
2021-01-12 15:46:05 +08:00
|
|
|
if (sv->sym)
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_sym(30, sv->sym, // use x30 for address
|
2021-01-12 15:46:05 +08:00
|
|
|
arm64_check_offset(0, arm64_type_size(svtt), sv->c.i));
|
|
|
|
else
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_movimm (30, sv->c.i);
|
2015-02-14 02:58:31 +08:00
|
|
|
if (IS_FREG(r))
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrv(arm64_type_size(svtt), fltr(r), 30,
|
2020-07-30 15:26:20 +08:00
|
|
|
arm64_check_offset(1, arm64_type_size(svtt), sv->c.i));
|
2015-02-14 02:58:31 +08:00
|
|
|
else
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(!(svtt&VT_UNSIGNED), arm64_type_size(svtt), intr(r), 30,
|
2020-07-30 15:26:20 +08:00
|
|
|
arm64_check_offset(1, arm64_type_size(svtt), sv->c.i));
|
2020-07-05 20:01:50 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((svr & ~VT_VALMASK) == VT_LVAL && svrv < VT_CONST) {
|
|
|
|
if ((svtt & VT_BTYPE) != VT_VOID) {
|
|
|
|
if (IS_FREG(r))
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrv(arm64_type_size(svtt), fltr(r), intr(svrv), 0);
|
2020-07-05 20:01:50 +08:00
|
|
|
else
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(!(svtt & VT_UNSIGNED), arm64_type_size(svtt),
|
|
|
|
intr(r), intr(svrv), 0);
|
2020-07-05 20:01:50 +08:00
|
|
|
}
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (svr == (VT_CONST | VT_LVAL | VT_SYM)) {
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_sym(30, sv->sym, // use x30 for address
|
2020-07-30 15:26:20 +08:00
|
|
|
arm64_check_offset(0, arm64_type_size(svtt), svcul));
|
2015-02-14 02:58:31 +08:00
|
|
|
if (IS_FREG(r))
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrv(arm64_type_size(svtt), fltr(r), 30,
|
2020-07-30 15:26:20 +08:00
|
|
|
arm64_check_offset(1, arm64_type_size(svtt), svcul));
|
2015-02-14 02:58:31 +08:00
|
|
|
else
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(!(svtt&VT_UNSIGNED), arm64_type_size(svtt), intr(r), 30,
|
2020-07-30 15:26:20 +08:00
|
|
|
arm64_check_offset(1, arm64_type_size(svtt), svcul));
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (svr == (VT_CONST | VT_SYM)) {
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_sym(intr(r), sv->sym, svcul);
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (svr == VT_CONST) {
|
|
|
|
if ((svtt & VT_BTYPE) != VT_VOID)
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_movimm(intr(r), arm64_type_size(svtt) == 3 ?
|
2015-11-18 03:09:35 +08:00
|
|
|
sv->c.i : (uint32_t)svcul);
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (svr < VT_CONST) {
|
|
|
|
if (IS_FREG(r) && IS_FREG(svr))
|
|
|
|
if (svtt == VT_LDOUBLE)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x4ea01c00 | fltr(r) | fltr(svr) << 5);
|
2015-02-14 02:58:31 +08:00
|
|
|
// mov v(r).16b,v(svr).16b
|
|
|
|
else
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1e604000 | fltr(r) | fltr(svr) << 5); // fmov d(r),d(svr)
|
2015-02-14 02:58:31 +08:00
|
|
|
else if (!IS_FREG(r) && !IS_FREG(svr))
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0xaa0003e0 | intr(r) | intr(svr) << 16); // mov x(r),x(svr)
|
2015-02-14 02:58:31 +08:00
|
|
|
else
|
|
|
|
assert(0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (svr == VT_LOCAL) {
|
|
|
|
if (-svcul < 0x1000)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0xd10003a0 | intr(r) | -svcul << 10); // sub x(r),x29,#...
|
2015-02-14 02:58:31 +08:00
|
|
|
else {
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_movimm(30, -svcul); // use x30 for offset
|
|
|
|
o(0xcb0003a0 | intr(r) | (uint32_t)30 << 16); // sub x(r),x29,x30
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (svr == VT_JMP || svr == VT_JMPI) {
|
|
|
|
int t = (svr == VT_JMPI);
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_movimm(intr(r), t);
|
|
|
|
o(0x14000002); // b .+8
|
|
|
|
gsym(svcul);
|
|
|
|
arm64_movimm(intr(r), t ^ 1);
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (svr == (VT_LLOCAL | VT_LVAL)) {
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(0, 3, 30, 29, svcul); // use x30 for offset
|
2015-02-14 02:58:31 +08:00
|
|
|
if (IS_FREG(r))
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrv(arm64_type_size(svtt), fltr(r), 30, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
else
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(!(svtt & VT_UNSIGNED), arm64_type_size(svtt),
|
|
|
|
intr(r), 30, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
if (svr == VT_CMP) {
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_load_cmp(r, sv);
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-11-25 18:23:01 +08:00
|
|
|
printf("load(%x, (%x, %x, %lx))\n", r, svtt, sv->r, (long)svcul);
|
2015-02-14 02:58:31 +08:00
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void store(int r, SValue *sv)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
|
|
|
int svtt = sv->type.t;
|
2020-06-16 13:39:48 +08:00
|
|
|
int svr = sv->r & ~VT_BOUNDED;
|
2015-02-14 02:58:31 +08:00
|
|
|
int svrv = svr & VT_VALMASK;
|
2015-11-18 03:09:35 +08:00
|
|
|
uint64_t svcul = (uint32_t)sv->c.i;
|
2015-11-10 07:06:05 +08:00
|
|
|
svcul = svcul >> 31 & 1 ? svcul - ((uint64_t)1 << 32) : svcul;
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
if (svr == (VT_LOCAL | VT_LVAL)) {
|
|
|
|
if (IS_FREG(r))
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_strv(arm64_type_size(svtt), fltr(r), 29, svcul);
|
2015-02-14 02:58:31 +08:00
|
|
|
else
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_strx(arm64_type_size(svtt), intr(r), 29, svcul);
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-07-05 20:01:50 +08:00
|
|
|
if (svr == (VT_CONST | VT_LVAL)) {
|
2021-01-12 15:46:05 +08:00
|
|
|
if (sv->sym)
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_sym(30, sv->sym, // use x30 for address
|
2021-01-12 15:46:05 +08:00
|
|
|
arm64_check_offset(0, arm64_type_size(svtt), sv->c.i));
|
|
|
|
else
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_movimm (30, sv->c.i);
|
2020-07-05 20:01:50 +08:00
|
|
|
if (IS_FREG(r))
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_strv(arm64_type_size(svtt), fltr(r), 30,
|
2020-07-30 15:26:20 +08:00
|
|
|
arm64_check_offset(1, arm64_type_size(svtt), sv->c.i));
|
2020-07-05 20:01:50 +08:00
|
|
|
else
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_strx(arm64_type_size(svtt), intr(r), 30,
|
2020-07-30 15:26:20 +08:00
|
|
|
arm64_check_offset(1, arm64_type_size(svtt), sv->c.i));
|
2020-07-05 20:01:50 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2015-02-14 02:58:31 +08:00
|
|
|
if ((svr & ~VT_VALMASK) == VT_LVAL && svrv < VT_CONST) {
|
|
|
|
if (IS_FREG(r))
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_strv(arm64_type_size(svtt), fltr(r), intr(svrv), 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
else
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_strx(arm64_type_size(svtt), intr(r), intr(svrv), 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (svr == (VT_CONST | VT_LVAL | VT_SYM)) {
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_sym(30, sv->sym, // use x30 for address
|
2020-07-30 15:26:20 +08:00
|
|
|
arm64_check_offset(0, arm64_type_size(svtt), svcul));
|
2015-02-14 02:58:31 +08:00
|
|
|
if (IS_FREG(r))
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_strv(arm64_type_size(svtt), fltr(r), 30,
|
2020-07-30 15:26:20 +08:00
|
|
|
arm64_check_offset(1, arm64_type_size(svtt), svcul));
|
2015-02-14 02:58:31 +08:00
|
|
|
else
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_strx(arm64_type_size(svtt), intr(r), 30,
|
2020-07-30 15:26:20 +08:00
|
|
|
arm64_check_offset(1, arm64_type_size(svtt), svcul));
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-11-25 18:23:01 +08:00
|
|
|
printf("store(%x, (%x, %x, %lx))\n", r, svtt, sv->r, (long)svcul);
|
2015-02-14 02:58:31 +08:00
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
static void arm64_gen_bl_or_b(int b)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2021-10-22 13:39:54 +08:00
|
|
|
if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST && (vtop->r & VT_SYM)) {
|
|
|
|
greloca(cur_text_section, vtop->sym, ind,
|
2020-07-05 20:01:50 +08:00
|
|
|
b ? R_AARCH64_JUMP26 : R_AARCH64_CALL26, 0);
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x14000000 | (uint32_t)!b << 31); // b/bl .
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
2020-06-16 13:39:48 +08:00
|
|
|
else {
|
|
|
|
#ifdef CONFIG_TCC_BCHECK
|
2021-10-22 13:39:54 +08:00
|
|
|
vtop->r &= ~VT_MUSTBOUND;
|
2020-06-16 13:39:48 +08:00
|
|
|
#endif
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0xd61f0000 | (uint32_t)!b << 21 | intr(gv(RC_R30)) << 5); // br/blr
|
2020-06-16 13:39:48 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#if defined(CONFIG_TCC_BCHECK)
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
static void gen_bounds_call(int v)
|
2020-06-16 13:39:48 +08:00
|
|
|
{
|
2021-10-22 13:39:54 +08:00
|
|
|
Sym *sym = external_helper_sym(v);
|
2020-06-16 13:39:48 +08:00
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
greloca(cur_text_section, sym, ind, R_AARCH64_CALL26, 0);
|
|
|
|
o(0x94000000); // bl
|
2020-06-16 13:39:48 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
static void gen_bounds_prolog(void)
|
2020-06-16 13:39:48 +08:00
|
|
|
{
|
|
|
|
/* leave some room for bound checking code */
|
2021-10-22 13:39:54 +08:00
|
|
|
func_bound_offset = lbounds_section->data_offset;
|
|
|
|
func_bound_ind = ind;
|
|
|
|
func_bound_add_epilog = 0;
|
|
|
|
o(0xd503201f); /* nop -> mov x0, lbound section pointer */
|
|
|
|
o(0xd503201f);
|
|
|
|
o(0xd503201f);
|
|
|
|
o(0xd503201f); /* nop -> call __bound_local_new */
|
2020-06-16 13:39:48 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
static void gen_bounds_epilog(void)
|
2020-06-16 13:39:48 +08:00
|
|
|
{
|
|
|
|
addr_t saved_ind;
|
|
|
|
addr_t *bounds_ptr;
|
|
|
|
Sym *sym_data;
|
2021-10-22 13:39:54 +08:00
|
|
|
int offset_modified = func_bound_offset != lbounds_section->data_offset;
|
2020-06-16 13:39:48 +08:00
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
if (!offset_modified && !func_bound_add_epilog)
|
2020-06-16 13:39:48 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
/* add end of table info */
|
2021-10-22 13:39:54 +08:00
|
|
|
bounds_ptr = section_ptr_add(lbounds_section, sizeof(addr_t));
|
2020-06-16 13:39:48 +08:00
|
|
|
*bounds_ptr = 0;
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
sym_data = get_sym_ref(&char_pointer_type, lbounds_section,
|
Add dwarf support
The new gcc12 release does not support stabs any more.
This was a good reason to add support for dwarf.
The stabs code still works and is used if configure option --dwarf
is not used.
Tested on x86_64, i386, arm, arm64, riscv64 with dwarf-5.
Some debuggers may not support dwarf-5. Try using older dwarf versions
i that case.
The tccmacho.c code probably need some support for dwarf.
arm-gen.c, arm64-gen.c, i386-gen.c, riscv64-gen.c, x86_64-gen.
- fix get_sym_ref symbol size
arm-link.c, arm64-link.c, i386-link.c, riscv64-link.c, x86_64-link.c
- add R_DATA_32U
libtcc.c:
- parse -gdwarf option
tcc.c:
- add dwarf option
tcc.h:
- add dwarf option and sections
tccelf.c:
- init dwarf sections
- avoid adding sh_addr for dwarf sections
- remove dwarf relocs for output dll
- add dwarf sections for tccrun
tccgen.c:
- add dwarf defines + global data
- add dwarf_* functions
- mix dwarf code with stabs code
- a trick is used to emit function name in .debug_line section so
only this section has to be parsed instead of .debug_info and
.debug_abbrev.
- fix init debug_modes
tccrun.c:
- add dwarf sections in rt_context
- init them in tcc_run
- add new dwarf code rt_printline_dwarf to find file/function
dwarf.h:
- New file
tcc-doc.texi:
- document dwarf
configure:
- add dwarf option
lib/Makefile
- change -gstabs into -gdwarf
lib/bt-exe.c, tests/tests2/Makefile, tests/tests2/126_bound_global:
- Add __bound_init call
- Add new testcase to test it
2022-05-05 15:10:37 +08:00
|
|
|
func_bound_offset, PTR_SIZE);
|
2020-06-16 13:39:48 +08:00
|
|
|
|
|
|
|
/* generate bound local allocation */
|
|
|
|
if (offset_modified) {
|
2021-10-22 13:39:54 +08:00
|
|
|
saved_ind = ind;
|
|
|
|
ind = func_bound_ind;
|
|
|
|
greloca(cur_text_section, sym_data, ind, R_AARCH64_ADR_GOT_PAGE, 0);
|
|
|
|
o(0x90000000 | 0); // adrp x0, #sym_data
|
|
|
|
greloca(cur_text_section, sym_data, ind, R_AARCH64_LD64_GOT_LO12_NC, 0);
|
|
|
|
o(0xf9400000 | 0 | (0 << 5)); // ld x0,[x0, #sym_data]
|
|
|
|
gen_bounds_call(TOK___bound_local_new);
|
|
|
|
ind = saved_ind;
|
2020-06-16 13:39:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* generate bound check local freeing */
|
2022-04-16 02:11:18 +08:00
|
|
|
o(0xa9bf07e0); /* stp x0, x1, [sp, #-16]! */
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x3c9f0fe0); /* str q0, [sp, #-16]! */
|
|
|
|
greloca(cur_text_section, sym_data, ind, R_AARCH64_ADR_GOT_PAGE, 0);
|
|
|
|
o(0x90000000 | 0); // adrp x0, #sym_data
|
|
|
|
greloca(cur_text_section, sym_data, ind, R_AARCH64_LD64_GOT_LO12_NC, 0);
|
|
|
|
o(0xf9400000 | 0 | (0 << 5)); // ld x0,[x0, #sym_data]
|
|
|
|
gen_bounds_call(TOK___bound_local_delete);
|
|
|
|
o(0x3cc107e0); /* ldr q0, [sp], #16 */
|
2022-04-16 02:11:18 +08:00
|
|
|
o(0xa8c107e0); /* ldp x0, x1, [sp], #16 */
|
2020-06-16 13:39:48 +08:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2015-02-14 02:58:31 +08:00
|
|
|
static int arm64_hfa_aux(CType *type, int *fsize, int num)
|
|
|
|
{
|
|
|
|
if (is_float(type->t)) {
|
|
|
|
int a, n = type_size(type, &a);
|
|
|
|
if (num >= 4 || (*fsize && *fsize != n))
|
|
|
|
return -1;
|
|
|
|
*fsize = n;
|
|
|
|
return num + 1;
|
|
|
|
}
|
|
|
|
else if ((type->t & VT_BTYPE) == VT_STRUCT) {
|
|
|
|
int is_struct = 0; // rather than union
|
|
|
|
Sym *field;
|
|
|
|
for (field = type->ref->next; field; field = field->next)
|
|
|
|
if (field->c) {
|
|
|
|
is_struct = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (is_struct) {
|
|
|
|
int num0 = num;
|
|
|
|
for (field = type->ref->next; field; field = field->next) {
|
|
|
|
if (field->c != (num - num0) * *fsize)
|
|
|
|
return -1;
|
|
|
|
num = arm64_hfa_aux(&field->type, fsize, num);
|
|
|
|
if (num == -1)
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (type->ref->c != (num - num0) * *fsize)
|
|
|
|
return -1;
|
|
|
|
return num;
|
|
|
|
}
|
|
|
|
else { // union
|
|
|
|
int num0 = num;
|
|
|
|
for (field = type->ref->next; field; field = field->next) {
|
|
|
|
int num1 = arm64_hfa_aux(&field->type, fsize, num0);
|
|
|
|
if (num1 == -1)
|
|
|
|
return -1;
|
|
|
|
num = num1 < num ? num : num1;
|
|
|
|
}
|
|
|
|
if (type->ref->c != (num - num0) * *fsize)
|
|
|
|
return -1;
|
|
|
|
return num;
|
|
|
|
}
|
|
|
|
}
|
2020-06-16 13:39:48 +08:00
|
|
|
else if ((type->t & VT_ARRAY) && ((type->t & VT_BTYPE) != VT_PTR)) {
|
2015-02-14 02:58:31 +08:00
|
|
|
int num1;
|
|
|
|
if (!type->ref->c)
|
|
|
|
return num;
|
|
|
|
num1 = arm64_hfa_aux(&type->ref->type, fsize, num);
|
|
|
|
if (num1 == -1 || (num1 != num && type->ref->c > 4))
|
|
|
|
return -1;
|
|
|
|
num1 = num + type->ref->c * (num1 - num);
|
|
|
|
if (num1 > 4)
|
|
|
|
return -1;
|
|
|
|
return num1;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2020-06-27 23:22:04 +08:00
|
|
|
static int arm64_hfa(CType *type, unsigned *fsize)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2020-06-16 13:39:48 +08:00
|
|
|
if ((type->t & VT_BTYPE) == VT_STRUCT ||
|
|
|
|
((type->t & VT_ARRAY) && ((type->t & VT_BTYPE) != VT_PTR))) {
|
2015-02-14 02:58:31 +08:00
|
|
|
int sz = 0;
|
|
|
|
int n = arm64_hfa_aux(type, &sz, 0);
|
|
|
|
if (0 < n && n <= 4) {
|
|
|
|
if (fsize)
|
|
|
|
*fsize = sz;
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-11-17 02:52:51 +08:00
|
|
|
static unsigned long arm64_pcs_aux(int variadic, int n, CType **type, unsigned long *a)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
|
|
|
int nx = 0; // next integer register
|
|
|
|
int nv = 0; // next vector register
|
|
|
|
unsigned long ns = 32; // next stack offset
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
int hfa = arm64_hfa(type[i], 0);
|
|
|
|
int size, align;
|
|
|
|
|
|
|
|
if ((type[i]->t & VT_ARRAY) ||
|
|
|
|
(type[i]->t & VT_BTYPE) == VT_FUNC)
|
|
|
|
size = align = 8;
|
|
|
|
else
|
|
|
|
size = type_size(type[i], &align);
|
|
|
|
|
2022-11-17 02:52:51 +08:00
|
|
|
#if defined(__APPLE__)
|
|
|
|
if (variadic && i == variadic) {
|
|
|
|
nx = 8;
|
|
|
|
nv = 8;
|
|
|
|
}
|
|
|
|
#endif
|
2015-02-14 02:58:31 +08:00
|
|
|
if (hfa)
|
|
|
|
// B.2
|
|
|
|
;
|
|
|
|
else if (size > 16) {
|
|
|
|
// B.3: replace with pointer
|
|
|
|
if (nx < 8)
|
|
|
|
a[i] = nx++ << 1 | 1;
|
|
|
|
else {
|
|
|
|
ns = (ns + 7) & ~7;
|
|
|
|
a[i] = ns | 1;
|
|
|
|
ns += 8;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else if ((type[i]->t & VT_BTYPE) == VT_STRUCT)
|
|
|
|
// B.4
|
|
|
|
size = (size + 7) & ~7;
|
|
|
|
|
|
|
|
// C.1
|
|
|
|
if (is_float(type[i]->t) && nv < 8) {
|
|
|
|
a[i] = 16 + (nv++ << 1);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// C.2
|
|
|
|
if (hfa && nv + hfa <= 8) {
|
|
|
|
a[i] = 16 + (nv << 1);
|
|
|
|
nv += hfa;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// C.3
|
|
|
|
if (hfa) {
|
|
|
|
nv = 8;
|
|
|
|
size = (size + 7) & ~7;
|
|
|
|
}
|
|
|
|
|
|
|
|
// C.4
|
|
|
|
if (hfa || (type[i]->t & VT_BTYPE) == VT_LDOUBLE) {
|
|
|
|
ns = (ns + 7) & ~7;
|
|
|
|
ns = (ns + align - 1) & -align;
|
|
|
|
}
|
|
|
|
|
|
|
|
// C.5
|
|
|
|
if ((type[i]->t & VT_BTYPE) == VT_FLOAT)
|
|
|
|
size = 8;
|
|
|
|
|
|
|
|
// C.6
|
|
|
|
if (hfa || is_float(type[i]->t)) {
|
|
|
|
a[i] = ns;
|
|
|
|
ns += size;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// C.7
|
|
|
|
if ((type[i]->t & VT_BTYPE) != VT_STRUCT && size <= 8 && nx < 8) {
|
|
|
|
a[i] = nx++ << 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// C.8
|
|
|
|
if (align == 16)
|
|
|
|
nx = (nx + 1) & ~1;
|
|
|
|
|
|
|
|
// C.9
|
|
|
|
if ((type[i]->t & VT_BTYPE) != VT_STRUCT && size == 16 && nx < 7) {
|
|
|
|
a[i] = nx << 1;
|
|
|
|
nx += 2;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// C.10
|
|
|
|
if ((type[i]->t & VT_BTYPE) == VT_STRUCT && size <= (8 - nx) * 8) {
|
|
|
|
a[i] = nx << 1;
|
|
|
|
nx += (size + 7) >> 3;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// C.11
|
|
|
|
nx = 8;
|
|
|
|
|
|
|
|
// C.12
|
|
|
|
ns = (ns + 7) & ~7;
|
|
|
|
ns = (ns + align - 1) & -align;
|
|
|
|
|
|
|
|
// C.13
|
|
|
|
if ((type[i]->t & VT_BTYPE) == VT_STRUCT) {
|
|
|
|
a[i] = ns;
|
|
|
|
ns += size;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// C.14
|
|
|
|
if (size < 8)
|
|
|
|
size = 8;
|
|
|
|
|
|
|
|
// C.15
|
|
|
|
a[i] = ns;
|
|
|
|
ns += size;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ns - 32;
|
|
|
|
}
|
|
|
|
|
2022-11-17 02:52:51 +08:00
|
|
|
static unsigned long arm64_pcs(int variadic, int n, CType **type, unsigned long *a)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
|
|
|
unsigned long stack;
|
|
|
|
|
|
|
|
// Return type:
|
|
|
|
if ((type[0]->t & VT_BTYPE) == VT_VOID)
|
|
|
|
a[0] = -1;
|
|
|
|
else {
|
2022-11-17 02:52:51 +08:00
|
|
|
arm64_pcs_aux(0, 1, type, a);
|
2015-02-14 02:58:31 +08:00
|
|
|
assert(a[0] == 0 || a[0] == 1 || a[0] == 16);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Argument types:
|
2022-11-17 02:52:51 +08:00
|
|
|
stack = arm64_pcs_aux(variadic, n, type + 1, a + 1);
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
if (0) {
|
|
|
|
int i;
|
|
|
|
for (i = 0; i <= n; i++) {
|
|
|
|
if (!i)
|
|
|
|
printf("arm64_pcs return: ");
|
|
|
|
else
|
|
|
|
printf("arm64_pcs arg %d: ", i);
|
|
|
|
if (a[i] == (unsigned long)-1)
|
|
|
|
printf("void\n");
|
|
|
|
else if (a[i] == 1 && !i)
|
|
|
|
printf("X8 pointer\n");
|
|
|
|
else if (a[i] < 16)
|
|
|
|
printf("X%lu%s\n", a[i] / 2, a[i] & 1 ? " pointer" : "");
|
|
|
|
else if (a[i] < 32)
|
|
|
|
printf("V%lu\n", a[i] / 2 - 8);
|
|
|
|
else
|
|
|
|
printf("stack %lu%s\n",
|
|
|
|
(a[i] - 32) & ~1, a[i] & 1 ? " pointer" : "");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return stack;
|
|
|
|
}
|
|
|
|
|
2022-11-17 02:52:51 +08:00
|
|
|
static int n_func_args(CType *type)
|
|
|
|
{
|
|
|
|
int n_args = 0;
|
|
|
|
Sym *arg;
|
|
|
|
|
|
|
|
for (arg = type->ref->next; arg; arg = arg->next)
|
|
|
|
n_args++;
|
|
|
|
return n_args;
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gfunc_call(int nb_args)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
|
|
|
CType *return_type;
|
|
|
|
CType **t;
|
|
|
|
unsigned long *a, *a1;
|
|
|
|
unsigned long stack;
|
|
|
|
int i;
|
2022-11-17 02:52:51 +08:00
|
|
|
int variadic = (vtop[-nb_args].type.ref->f.func_type == FUNC_ELLIPSIS);
|
|
|
|
int var_nb_arg = n_func_args(&vtop[-nb_args].type);
|
2015-02-14 02:58:31 +08:00
|
|
|
|
2020-06-16 13:39:48 +08:00
|
|
|
#ifdef CONFIG_TCC_BCHECK
|
2021-10-22 13:39:54 +08:00
|
|
|
if (tcc_state->do_bounds_check)
|
|
|
|
gbound_args(nb_args);
|
2020-06-16 13:39:48 +08:00
|
|
|
#endif
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
return_type = &vtop[-nb_args].type.ref->type;
|
2015-02-14 02:58:31 +08:00
|
|
|
if ((return_type->t & VT_BTYPE) == VT_STRUCT)
|
|
|
|
--nb_args;
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
t = tcc_malloc((nb_args + 1) * sizeof(*t));
|
|
|
|
a = tcc_malloc((nb_args + 1) * sizeof(*a));
|
|
|
|
a1 = tcc_malloc((nb_args + 1) * sizeof(*a1));
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
t[0] = return_type;
|
|
|
|
for (i = 0; i < nb_args; i++)
|
2021-10-22 13:39:54 +08:00
|
|
|
t[nb_args - i] = &vtop[-i].type;
|
2015-02-14 02:58:31 +08:00
|
|
|
|
2022-11-17 02:52:51 +08:00
|
|
|
stack = arm64_pcs(variadic ? var_nb_arg : 0, nb_args, t, a);
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
// Allocate space for structs replaced by pointer:
|
|
|
|
for (i = nb_args; i; i--)
|
|
|
|
if (a[i] & 1) {
|
2021-10-22 13:39:54 +08:00
|
|
|
SValue *arg = &vtop[i - nb_args];
|
2015-02-14 02:58:31 +08:00
|
|
|
int align, size = type_size(&arg->type, &align);
|
|
|
|
assert((arg->type.t & VT_BTYPE) == VT_STRUCT);
|
|
|
|
stack = (stack + align - 1) & -align;
|
|
|
|
a1[i] = stack;
|
|
|
|
stack += size;
|
|
|
|
}
|
|
|
|
|
|
|
|
stack = (stack + 15) >> 4 << 4;
|
|
|
|
|
2021-06-22 13:38:39 +08:00
|
|
|
/* fetch cpu flag before generating any code */
|
2021-10-22 13:39:54 +08:00
|
|
|
if ((vtop->r & VT_VALMASK) == VT_CMP)
|
|
|
|
gv(RC_INT);
|
2021-06-22 13:38:39 +08:00
|
|
|
|
2020-07-05 20:01:50 +08:00
|
|
|
if (stack >= 0x1000000) // 16Mb
|
2021-10-22 13:39:54 +08:00
|
|
|
tcc_error("stack size too big %lu", stack);
|
2020-07-05 20:01:50 +08:00
|
|
|
if (stack & 0xfff)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0xd10003ff | (stack & 0xfff) << 10); // sub sp,sp,#(n)
|
2020-07-05 20:01:50 +08:00
|
|
|
if (stack >> 12)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0xd14003ff | (stack >> 12) << 10);
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
// First pass: set all values on stack
|
|
|
|
for (i = nb_args; i; i--) {
|
2021-10-22 13:39:54 +08:00
|
|
|
vpushv(vtop - nb_args + i);
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
if (a[i] & 1) {
|
|
|
|
// struct replaced by pointer
|
2021-10-22 13:39:54 +08:00
|
|
|
int r = get_reg(RC_INT);
|
|
|
|
arm64_spoff(intr(r), a1[i]);
|
|
|
|
vset(&vtop->type, r | VT_LVAL, 0);
|
|
|
|
vswap();
|
|
|
|
vstore();
|
2015-02-14 02:58:31 +08:00
|
|
|
if (a[i] >= 32) {
|
|
|
|
// pointer on stack
|
2021-10-22 13:39:54 +08:00
|
|
|
r = get_reg(RC_INT);
|
|
|
|
arm64_spoff(intr(r), a1[i]);
|
|
|
|
arm64_strx(3, intr(r), 31, (a[i] - 32) >> 1 << 1);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (a[i] >= 32) {
|
|
|
|
// value on stack
|
2021-10-22 13:39:54 +08:00
|
|
|
if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
|
|
|
|
int r = get_reg(RC_INT);
|
|
|
|
arm64_spoff(intr(r), a[i] - 32);
|
|
|
|
vset(&vtop->type, r | VT_LVAL, 0);
|
|
|
|
vswap();
|
|
|
|
vstore();
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
2021-10-22 13:39:54 +08:00
|
|
|
else if (is_float(vtop->type.t)) {
|
|
|
|
gv(RC_FLOAT);
|
|
|
|
arm64_strv(arm64_type_size(vtop[0].type.t),
|
|
|
|
fltr(vtop[0].r), 31, a[i] - 32);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
else {
|
2021-10-22 13:39:54 +08:00
|
|
|
gv(RC_INT);
|
2022-11-17 02:52:51 +08:00
|
|
|
arm64_strx(3, // arm64_type_size(vtop[0].type.t),
|
2021-10-22 13:39:54 +08:00
|
|
|
intr(vtop[0].r), 31, a[i] - 32);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
--vtop;
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Second pass: assign values to registers
|
2021-10-22 13:39:54 +08:00
|
|
|
for (i = nb_args; i; i--, vtop--) {
|
2015-02-14 02:58:31 +08:00
|
|
|
if (a[i] < 16 && !(a[i] & 1)) {
|
|
|
|
// value in general-purpose registers
|
2021-10-22 13:39:54 +08:00
|
|
|
if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
|
|
|
|
int align, size = type_size(&vtop->type, &align);
|
2020-09-17 14:42:28 +08:00
|
|
|
if (size) {
|
2021-10-22 13:39:54 +08:00
|
|
|
vtop->type.t = VT_PTR;
|
|
|
|
gaddrof();
|
|
|
|
gv(RC_R(a[i] / 2));
|
|
|
|
arm64_ldrs(a[i] / 2, size);
|
2020-09-17 14:42:28 +08:00
|
|
|
}
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
else
|
2021-10-22 13:39:54 +08:00
|
|
|
gv(RC_R(a[i] / 2));
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
else if (a[i] < 16)
|
|
|
|
// struct replaced by pointer in register
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_spoff(a[i] / 2, a1[i]);
|
2015-02-14 02:58:31 +08:00
|
|
|
else if (a[i] < 32) {
|
|
|
|
// value in floating-point registers
|
2021-10-22 13:39:54 +08:00
|
|
|
if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
|
|
|
|
uint32_t j, sz, n = arm64_hfa(&vtop->type, &sz);
|
|
|
|
vtop->type.t = VT_PTR;
|
|
|
|
gaddrof();
|
|
|
|
gv(RC_R30);
|
2015-02-14 02:58:31 +08:00
|
|
|
for (j = 0; j < n; j++)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x3d4003c0 |
|
2015-02-14 02:58:31 +08:00
|
|
|
(sz & 16) << 19 | -(sz & 8) << 27 | (sz & 4) << 29 |
|
|
|
|
(a[i] / 2 - 8 + j) |
|
|
|
|
j << 10); // ldr ([sdq])(*),[x30,#(j * sz)]
|
|
|
|
}
|
|
|
|
else
|
2021-10-22 13:39:54 +08:00
|
|
|
gv(RC_F(a[i] / 2 - 8));
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((return_type->t & VT_BTYPE) == VT_STRUCT) {
|
|
|
|
if (a[0] == 1) {
|
|
|
|
// indirect return: set x8 and discard the stack value
|
2021-10-22 13:39:54 +08:00
|
|
|
gv(RC_R(8));
|
|
|
|
--vtop;
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
// return in registers: keep the address for after the call
|
2021-10-22 13:39:54 +08:00
|
|
|
vswap();
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
save_regs(0);
|
|
|
|
arm64_gen_bl_or_b(0);
|
|
|
|
--vtop;
|
2020-07-05 20:01:50 +08:00
|
|
|
if (stack & 0xfff)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x910003ff | (stack & 0xfff) << 10); // add sp,sp,#(n)
|
2020-07-05 20:01:50 +08:00
|
|
|
if (stack >> 12)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x914003ff | (stack >> 12) << 10);
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
{
|
|
|
|
int rt = return_type->t;
|
|
|
|
int bt = rt & VT_BTYPE;
|
2019-09-23 23:45:39 +08:00
|
|
|
if (bt == VT_STRUCT && !(a[0] & 1)) {
|
2015-02-14 02:58:31 +08:00
|
|
|
// A struct was returned in registers, so write it out:
|
2021-10-22 13:39:54 +08:00
|
|
|
gv(RC_R(8));
|
|
|
|
--vtop;
|
2015-02-14 02:58:31 +08:00
|
|
|
if (a[0] == 0) {
|
|
|
|
int align, size = type_size(return_type, &align);
|
|
|
|
assert(size <= 16);
|
|
|
|
if (size > 8)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0xa9000500); // stp x0,x1,[x8]
|
2015-02-14 02:58:31 +08:00
|
|
|
else if (size)
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_strx(size > 4 ? 3 : size > 2 ? 2 : size > 1, 0, 8, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
}
|
|
|
|
else if (a[0] == 16) {
|
2020-06-27 23:22:04 +08:00
|
|
|
uint32_t j, sz, n = arm64_hfa(return_type, &sz);
|
2015-02-14 02:58:31 +08:00
|
|
|
for (j = 0; j < n; j++)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x3d000100 |
|
2015-02-14 02:58:31 +08:00
|
|
|
(sz & 16) << 19 | -(sz & 8) << 27 | (sz & 4) << 29 |
|
|
|
|
(a[i] / 2 - 8 + j) |
|
|
|
|
j << 10); // str ([sdq])(*),[x8,#(j * sz)]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
tcc_free(a1);
|
|
|
|
tcc_free(a);
|
|
|
|
tcc_free(t);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned long arm64_func_va_list_stack;
|
|
|
|
static int arm64_func_va_list_gr_offs;
|
|
|
|
static int arm64_func_va_list_vr_offs;
|
|
|
|
static int arm64_func_sub_sp_offset;
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gfunc_prolog(Sym *func_sym)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2019-12-10 15:07:25 +08:00
|
|
|
CType *func_type = &func_sym->type;
|
2015-02-14 02:58:31 +08:00
|
|
|
int n = 0;
|
|
|
|
int i = 0;
|
|
|
|
Sym *sym;
|
|
|
|
CType **t;
|
|
|
|
unsigned long *a;
|
2022-04-16 02:11:18 +08:00
|
|
|
int use_x8 = 0;
|
|
|
|
int last_int = 0;
|
|
|
|
int last_float = 0;
|
2022-11-17 02:52:51 +08:00
|
|
|
int variadic = func_sym->type.ref->f.func_type == FUNC_ELLIPSIS;
|
|
|
|
int var_nb_arg = n_func_args(&func_sym->type);
|
2015-02-14 02:58:31 +08:00
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
func_vc = 144; // offset of where x8 is stored
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
for (sym = func_type->ref; sym; sym = sym->next)
|
|
|
|
++n;
|
2021-10-22 13:39:54 +08:00
|
|
|
t = n ? tcc_malloc(n * sizeof(*t)) : NULL;
|
|
|
|
a = n ? tcc_malloc(n * sizeof(*a)) : NULL;
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
for (sym = func_type->ref; sym; sym = sym->next)
|
|
|
|
t[i++] = &sym->type;
|
|
|
|
|
2022-11-17 02:52:51 +08:00
|
|
|
arm64_func_va_list_stack = arm64_pcs(variadic ? var_nb_arg : 0, n - 1, t, a);
|
2015-02-14 02:58:31 +08:00
|
|
|
|
2022-11-23 13:58:40 +08:00
|
|
|
#if !defined(__APPLE__)
|
2022-11-17 02:52:51 +08:00
|
|
|
if (variadic) {
|
2022-04-16 02:11:18 +08:00
|
|
|
use_x8 = 1;
|
|
|
|
last_int = 4;
|
|
|
|
last_float = 4;
|
|
|
|
}
|
2022-11-23 13:58:40 +08:00
|
|
|
#endif
|
2022-04-16 02:11:18 +08:00
|
|
|
if (a && a[0] == 1)
|
|
|
|
use_x8 = 1;
|
|
|
|
for (i = 1, sym = func_type->ref->next; sym; i++, sym = sym->next) {
|
|
|
|
if (a[i] < 16) {
|
|
|
|
int last, align, size = type_size(&sym->type, &align);
|
|
|
|
last = a[i] / 4 + 1 + (size - 1) / 8;
|
|
|
|
last_int = last > last_int ? last : last_int;
|
|
|
|
}
|
|
|
|
else if (a[i] < 32) {
|
|
|
|
int last, hfa = arm64_hfa(&sym->type, 0);
|
|
|
|
last = a[i] / 4 - 3 + (hfa ? hfa - 1 : 0);
|
|
|
|
last_float = last > last_float ? last : last_float;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
last_int = last_int > 4 ? 4 : last_int;
|
|
|
|
last_float = last_float > 4 ? 4 : last_float;
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0xa9b27bfd); // stp x29,x30,[sp,#-224]!
|
2022-04-16 02:11:18 +08:00
|
|
|
for (i = 0; i < last_float; i++)
|
|
|
|
// stp q0,q1,[sp,#16], stp q2,q3,[sp,#48]
|
|
|
|
// stp q4,q5,[sp,#80], stp q6,q7,[sp,#112]
|
|
|
|
o(0xad0087e0 + i * 0x10000 + (i << 11) + (i << 1));
|
|
|
|
if (use_x8)
|
|
|
|
o(0xa90923e8); // stp x8,x8,[sp,#144]
|
|
|
|
for (i = 0; i < last_int; i++)
|
|
|
|
// stp x0,x1,[sp,#160], stp x2,x3,[sp,#176]
|
|
|
|
// stp x4,x5,[sp,#192], stp x6,x7,[sp,#208]
|
|
|
|
o(0xa90a07e0 + i * 0x10000 + (i << 11) + (i << 1));
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
arm64_func_va_list_gr_offs = -64;
|
|
|
|
arm64_func_va_list_vr_offs = -128;
|
|
|
|
|
|
|
|
for (i = 1, sym = func_type->ref->next; sym; i++, sym = sym->next) {
|
|
|
|
int off = (a[i] < 16 ? 160 + a[i] / 2 * 8 :
|
|
|
|
a[i] < 32 ? 16 + (a[i] - 16) / 2 * 16 :
|
|
|
|
224 + ((a[i] - 32) >> 1 << 1));
|
2021-10-22 13:39:54 +08:00
|
|
|
sym_push(sym->v & ~SYM_FIELD, &sym->type,
|
2019-12-17 01:48:31 +08:00
|
|
|
(a[i] & 1 ? VT_LLOCAL : VT_LOCAL) | VT_LVAL,
|
2015-02-14 02:58:31 +08:00
|
|
|
off);
|
|
|
|
|
|
|
|
if (a[i] < 16) {
|
|
|
|
int align, size = type_size(&sym->type, &align);
|
|
|
|
arm64_func_va_list_gr_offs = (a[i] / 2 - 7 +
|
|
|
|
(!(a[i] & 1) && size > 8)) * 8;
|
|
|
|
}
|
|
|
|
else if (a[i] < 32) {
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t hfa = arm64_hfa(&sym->type, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
arm64_func_va_list_vr_offs = (a[i] / 2 - 16 +
|
|
|
|
(hfa ? hfa : 1)) * 16;
|
|
|
|
}
|
|
|
|
|
|
|
|
// HFAs of float and double need to be written differently:
|
|
|
|
if (16 <= a[i] && a[i] < 32 && (sym->type.t & VT_BTYPE) == VT_STRUCT) {
|
2020-06-27 23:22:04 +08:00
|
|
|
uint32_t j, sz, k = arm64_hfa(&sym->type, &sz);
|
2015-02-14 02:58:31 +08:00
|
|
|
if (sz < 16)
|
|
|
|
for (j = 0; j < k; j++) {
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x3d0003e0 | -(sz & 8) << 27 | (sz & 4) << 29 |
|
2015-02-14 02:58:31 +08:00
|
|
|
((a[i] - 16) / 2 + j) | (off / sz + j) << 10);
|
|
|
|
// str ([sdq])(*),[sp,#(j * sz)]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
tcc_free(a);
|
|
|
|
tcc_free(t);
|
2015-02-14 02:58:31 +08:00
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x910003fd); // mov x29,sp
|
|
|
|
arm64_func_sub_sp_offset = ind;
|
2015-02-14 02:58:31 +08:00
|
|
|
// In gfunc_epilog these will be replaced with code to decrement SP:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0xd503201f); // nop
|
|
|
|
o(0xd503201f); // nop
|
|
|
|
loc = 0;
|
2020-06-16 13:39:48 +08:00
|
|
|
#ifdef CONFIG_TCC_BCHECK
|
2021-10-22 13:39:54 +08:00
|
|
|
if (tcc_state->do_bounds_check)
|
|
|
|
gen_bounds_prolog();
|
2020-06-16 13:39:48 +08:00
|
|
|
#endif
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gen_va_start(void)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
|
|
|
int r;
|
2021-10-22 13:39:54 +08:00
|
|
|
--vtop; // we don't need the "arg"
|
|
|
|
gaddrof();
|
|
|
|
r = intr(gv(RC_INT));
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
if (arm64_func_va_list_stack) {
|
|
|
|
//xx could use add (immediate) here
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_movimm(30, arm64_func_va_list_stack + 224);
|
|
|
|
o(0x8b1e03be); // add x30,x29,x30
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
else
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x910383be); // add x30,x29,#224
|
|
|
|
o(0xf900001e | r << 5); // str x30,[x(r)]
|
2015-02-14 02:58:31 +08:00
|
|
|
|
2022-11-17 02:52:51 +08:00
|
|
|
#if !defined(__APPLE__)
|
2015-02-14 02:58:31 +08:00
|
|
|
if (arm64_func_va_list_gr_offs) {
|
|
|
|
if (arm64_func_va_list_stack)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x910383be); // add x30,x29,#224
|
|
|
|
o(0xf900041e | r << 5); // str x30,[x(r),#8]
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (arm64_func_va_list_vr_offs) {
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x910243be); // add x30,x29,#144
|
|
|
|
o(0xf900081e | r << 5); // str x30,[x(r),#16]
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_movimm(30, arm64_func_va_list_gr_offs);
|
|
|
|
o(0xb900181e | r << 5); // str w30,[x(r),#24]
|
2015-02-14 02:58:31 +08:00
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_movimm(30, arm64_func_va_list_vr_offs);
|
|
|
|
o(0xb9001c1e | r << 5); // str w30,[x(r),#28]
|
2022-11-17 02:52:51 +08:00
|
|
|
#endif
|
2015-02-14 02:58:31 +08:00
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
--vtop;
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gen_va_arg(CType *t)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
|
|
|
int align, size = type_size(t, &align);
|
2020-06-27 23:22:04 +08:00
|
|
|
unsigned fsize, hfa = arm64_hfa(t, &fsize);
|
2015-02-26 06:38:56 +08:00
|
|
|
uint32_t r0, r1;
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
if (is_float(t->t)) {
|
|
|
|
hfa = 1;
|
|
|
|
fsize = size;
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
gaddrof();
|
|
|
|
r0 = intr(gv(RC_INT));
|
|
|
|
r1 = get_reg(RC_INT);
|
|
|
|
vtop[0].r = r1 | VT_LVAL;
|
|
|
|
r1 = intr(r1);
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
if (!hfa) {
|
|
|
|
uint32_t n = size > 16 ? 8 : (size + 7) & -8;
|
2022-11-17 02:52:51 +08:00
|
|
|
#if !defined(__APPLE__)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0xb940181e | r0 << 5); // ldr w30,[x(r0),#24] // __gr_offs
|
2015-02-26 06:38:56 +08:00
|
|
|
if (align == 16) {
|
|
|
|
assert(0); // this path untested but needed for __uint128_t
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x11003fde); // add w30,w30,#15
|
|
|
|
o(0x121c6fde); // and w30,w30,#-16
|
2015-02-26 06:38:56 +08:00
|
|
|
}
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x310003c0 | r1 | n << 10); // adds w(r1),w30,#(n)
|
|
|
|
o(0x540000ad); // b.le .+20
|
2022-11-17 02:52:51 +08:00
|
|
|
#endif
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0xf9400000 | r1 | r0 << 5); // ldr x(r1),[x(r0)] // __stack
|
|
|
|
o(0x9100001e | r1 << 5 | n << 10); // add x30,x(r1),#(n)
|
|
|
|
o(0xf900001e | r0 << 5); // str x30,[x(r0)] // __stack
|
2022-11-17 02:52:51 +08:00
|
|
|
#if !defined(__APPLE__)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x14000004); // b .+16
|
|
|
|
o(0xb9001800 | r1 | r0 << 5); // str w(r1),[x(r0),#24] // __gr_offs
|
|
|
|
o(0xf9400400 | r1 | r0 << 5); // ldr x(r1),[x(r0),#8] // __gr_top
|
|
|
|
o(0x8b3ec000 | r1 | r1 << 5); // add x(r1),x(r1),w30,sxtw
|
2022-11-17 02:52:51 +08:00
|
|
|
#endif
|
2015-02-14 02:58:31 +08:00
|
|
|
if (size > 16)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0xf9400000 | r1 | r1 << 5); // ldr x(r1),[x(r1)]
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
uint32_t ssz = (size + 7) & -(uint32_t)8;
|
2022-11-17 02:52:51 +08:00
|
|
|
#if !defined(__APPLE__)
|
|
|
|
uint32_t rsz = hfa << 4;
|
2015-02-26 06:38:56 +08:00
|
|
|
uint32_t b1, b2;
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0xb9401c1e | r0 << 5); // ldr w30,[x(r0),#28] // __vr_offs
|
|
|
|
o(0x310003c0 | r1 | rsz << 10); // adds w(r1),w30,#(rsz)
|
|
|
|
b1 = ind; o(0x5400000d); // b.le lab1
|
2022-11-17 02:52:51 +08:00
|
|
|
#endif
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0xf9400000 | r1 | r0 << 5); // ldr x(r1),[x(r0)] // __stack
|
2015-02-14 02:58:31 +08:00
|
|
|
if (fsize == 16) {
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x91003c00 | r1 | r1 << 5); // add x(r1),x(r1),#15
|
|
|
|
o(0x927cec00 | r1 | r1 << 5); // and x(r1),x(r1),#-16
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x9100001e | r1 << 5 | ssz << 10); // add x30,x(r1),#(ssz)
|
|
|
|
o(0xf900001e | r0 << 5); // str x30,[x(r0)] // __stack
|
2022-11-17 02:52:51 +08:00
|
|
|
#if !defined(__APPLE__)
|
2021-10-22 13:39:54 +08:00
|
|
|
b2 = ind; o(0x14000000); // b lab2
|
2015-02-14 02:58:31 +08:00
|
|
|
// lab1:
|
2021-10-22 13:39:54 +08:00
|
|
|
write32le(cur_text_section->data + b1, 0x5400000d | (ind - b1) << 3);
|
|
|
|
o(0xb9001c00 | r1 | r0 << 5); // str w(r1),[x(r0),#28] // __vr_offs
|
|
|
|
o(0xf9400800 | r1 | r0 << 5); // ldr x(r1),[x(r0),#16] // __vr_top
|
2015-02-26 06:38:56 +08:00
|
|
|
if (hfa == 1 || fsize == 16)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x8b3ec000 | r1 | r1 << 5); // add x(r1),x(r1),w30,sxtw
|
2015-02-26 06:38:56 +08:00
|
|
|
else {
|
|
|
|
// We need to change the layout of this HFA.
|
|
|
|
// Get some space on the stack using global variable "loc":
|
2021-10-22 13:39:54 +08:00
|
|
|
loc = (loc - size) & -(uint32_t)align;
|
|
|
|
o(0x8b3ec000 | 30 | r1 << 5); // add x30,x(r1),w30,sxtw
|
|
|
|
arm64_movimm(r1, loc);
|
|
|
|
o(0x8b0003a0 | r1 | r1 << 16); // add x(r1),x29,x(r1)
|
|
|
|
o(0x4c402bdc | (uint32_t)fsize << 7 |
|
2015-02-26 06:38:56 +08:00
|
|
|
(uint32_t)(hfa == 2) << 15 |
|
|
|
|
(uint32_t)(hfa == 3) << 14); // ld1 {v28.(4s|2d),...},[x30]
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x0d00801c | r1 << 5 | (fsize == 8) << 10 |
|
2015-02-26 06:38:56 +08:00
|
|
|
(uint32_t)(hfa != 2) << 13 |
|
|
|
|
(uint32_t)(hfa != 3) << 21); // st(hfa) {v28.(s|d),...}[0],[x(r1)]
|
|
|
|
}
|
|
|
|
// lab2:
|
2021-10-22 13:39:54 +08:00
|
|
|
write32le(cur_text_section->data + b2, 0x14000000 | (ind - b2) >> 2);
|
2022-11-17 02:52:51 +08:00
|
|
|
#endif
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-11-10 07:06:05 +08:00
|
|
|
ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret,
|
|
|
|
int *align, int *regsize)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gfunc_return(CType *func_type)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2017-02-09 02:45:31 +08:00
|
|
|
CType *t = func_type;
|
2015-02-14 02:58:31 +08:00
|
|
|
unsigned long a;
|
|
|
|
|
2022-11-17 02:52:51 +08:00
|
|
|
arm64_pcs(0, 0, &t, &a);
|
2015-02-14 02:58:31 +08:00
|
|
|
switch (a) {
|
|
|
|
case -1:
|
|
|
|
break;
|
|
|
|
case 0:
|
2017-02-09 02:45:31 +08:00
|
|
|
if ((func_type->t & VT_BTYPE) == VT_STRUCT) {
|
|
|
|
int align, size = type_size(func_type, &align);
|
2021-10-22 13:39:54 +08:00
|
|
|
gaddrof();
|
|
|
|
gv(RC_R(0));
|
|
|
|
arm64_ldrs(0, size);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
else
|
2021-10-22 13:39:54 +08:00
|
|
|
gv(RC_IRET);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 1: {
|
2017-02-09 02:45:31 +08:00
|
|
|
CType type = *func_type;
|
2021-10-22 13:39:54 +08:00
|
|
|
mk_pointer(&type);
|
|
|
|
vset(&type, VT_LOCAL | VT_LVAL, func_vc);
|
|
|
|
indir();
|
|
|
|
vswap();
|
|
|
|
vstore();
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case 16:
|
2017-02-09 02:45:31 +08:00
|
|
|
if ((func_type->t & VT_BTYPE) == VT_STRUCT) {
|
2021-10-22 13:39:54 +08:00
|
|
|
uint32_t j, sz, n = arm64_hfa(&vtop->type, &sz);
|
|
|
|
gaddrof();
|
|
|
|
gv(RC_R(0));
|
2015-02-14 02:58:31 +08:00
|
|
|
for (j = 0; j < n; j++)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x3d400000 |
|
2015-02-14 02:58:31 +08:00
|
|
|
(sz & 16) << 19 | -(sz & 8) << 27 | (sz & 4) << 29 |
|
|
|
|
j | j << 10); // ldr ([sdq])(*),[x0,#(j * sz)]
|
|
|
|
}
|
|
|
|
else
|
2021-10-22 13:39:54 +08:00
|
|
|
gv(RC_FRET);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
}
|
2021-10-22 13:39:54 +08:00
|
|
|
vtop--;
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gfunc_epilog(void)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2020-06-16 13:39:48 +08:00
|
|
|
#ifdef CONFIG_TCC_BCHECK
|
2021-10-22 13:39:54 +08:00
|
|
|
if (tcc_state->do_bounds_check)
|
|
|
|
gen_bounds_epilog();
|
2020-06-16 13:39:48 +08:00
|
|
|
#endif
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
if (loc) {
|
2015-02-14 02:58:31 +08:00
|
|
|
// Insert instructions to subtract size of stack frame from SP.
|
2015-11-20 02:21:14 +08:00
|
|
|
unsigned char *ptr = cur_text_section->data + arm64_func_sub_sp_offset;
|
2021-10-22 13:39:54 +08:00
|
|
|
uint64_t diff = (-loc + 15) & ~15;
|
2015-02-14 02:58:31 +08:00
|
|
|
if (!(diff >> 24)) {
|
|
|
|
if (diff & 0xfff) // sub sp,sp,#(diff & 0xfff)
|
2015-11-20 02:21:14 +08:00
|
|
|
write32le(ptr, 0xd10003ff | (diff & 0xfff) << 10);
|
2015-02-14 02:58:31 +08:00
|
|
|
if (diff >> 12) // sub sp,sp,#(diff >> 12),lsl #12
|
2015-11-20 02:21:14 +08:00
|
|
|
write32le(ptr + 4, 0xd14003ff | (diff >> 12) << 10);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
// In this case we may subtract more than necessary,
|
|
|
|
// but always less than 17/16 of what we were aiming for.
|
|
|
|
int i = 0;
|
|
|
|
int j = 0;
|
|
|
|
while (diff >> 20) {
|
|
|
|
diff = (diff + 0xffff) >> 16;
|
|
|
|
++i;
|
|
|
|
}
|
|
|
|
while (diff >> 16) {
|
|
|
|
diff = (diff + 1) >> 1;
|
|
|
|
++j;
|
|
|
|
}
|
2015-11-20 02:21:14 +08:00
|
|
|
write32le(ptr, 0xd2800010 | diff << 5 | i << 21);
|
2015-02-14 02:58:31 +08:00
|
|
|
// mov x16,#(diff),lsl #(16 * i)
|
2015-11-20 02:21:14 +08:00
|
|
|
write32le(ptr + 4, 0xcb3063ff | j << 10);
|
2015-02-14 02:58:31 +08:00
|
|
|
// sub sp,sp,x16,lsl #(j)
|
|
|
|
}
|
|
|
|
}
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x910003bf); // mov sp,x29
|
|
|
|
o(0xa8ce7bfd); // ldp x29,x30,[sp],#224
|
2015-02-14 02:58:31 +08:00
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0xd65f03c0); // ret
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gen_fill_nops(int bytes)
|
2018-04-07 05:01:45 +08:00
|
|
|
{
|
|
|
|
if ((bytes & 3))
|
2021-10-22 13:39:54 +08:00
|
|
|
tcc_error("alignment of code section not multiple of 4");
|
2018-04-07 05:01:45 +08:00
|
|
|
while (bytes > 0) {
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0xd503201f); // nop
|
2018-04-07 05:01:45 +08:00
|
|
|
bytes -= 4;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-14 02:58:31 +08:00
|
|
|
// Generate forward branch to label:
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC int gjmp(int t)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2021-10-22 13:39:54 +08:00
|
|
|
int r = ind;
|
|
|
|
if (nocode_wanted)
|
2016-12-19 00:23:33 +08:00
|
|
|
return t;
|
2021-10-22 13:39:54 +08:00
|
|
|
o(t);
|
2015-02-14 02:58:31 +08:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Generate branch to known address:
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gjmp_addr(int a)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2021-10-22 13:39:54 +08:00
|
|
|
assert(a - ind + 0x8000000 < 0x10000000);
|
|
|
|
o(0x14000000 | ((a - ind) >> 2 & 0x3ffffff));
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC int gjmp_append(int n, int t)
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
{
|
|
|
|
void *p;
|
|
|
|
/* insert vtop->c jump list in t */
|
|
|
|
if (n) {
|
|
|
|
uint32_t n1 = n, n2;
|
|
|
|
while ((n2 = read32le(p = cur_text_section->data + n1)))
|
|
|
|
n1 = n2;
|
|
|
|
write32le(p, t);
|
|
|
|
t = n;
|
|
|
|
}
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
void arm64_vset_VT_CMP(int op)
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
{
|
|
|
|
if (op >= TOK_ULT && op <= TOK_GT) {
|
2021-10-22 13:39:54 +08:00
|
|
|
vtop->cmp_r = vtop->r;
|
|
|
|
vset_VT_CMP(0x80);
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
}
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
static void arm64_gen_opil(int op, uint32_t l);
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
static void arm64_load_cmp(int r, SValue *sv)
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
{
|
|
|
|
sv->r = sv->cmp_r;
|
|
|
|
if (sv->c.i & 1) {
|
2021-10-22 13:39:54 +08:00
|
|
|
vpushi(1);
|
|
|
|
arm64_gen_opil('^', 0);
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
}
|
|
|
|
if (r != sv->r) {
|
2021-10-22 13:39:54 +08:00
|
|
|
load(r, sv);
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
sv->r = r;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC int gjmp_cond(int op, int t)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2021-10-22 13:39:54 +08:00
|
|
|
int bt = vtop->type.t & VT_BTYPE;
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
|
|
|
|
int inv = op & 1;
|
2021-10-22 13:39:54 +08:00
|
|
|
vtop->r = vtop->cmp_r;
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
|
2015-02-14 02:58:31 +08:00
|
|
|
if (bt == VT_LDOUBLE) {
|
2021-10-22 13:39:54 +08:00
|
|
|
uint32_t a, b, f = fltr(gv(RC_FLOAT));
|
|
|
|
a = get_reg(RC_INT);
|
|
|
|
vpushi(0);
|
|
|
|
vtop[0].r = a;
|
|
|
|
b = get_reg(RC_INT);
|
|
|
|
a = intr(a);
|
|
|
|
b = intr(b);
|
|
|
|
o(0x4e083c00 | a | f << 5); // mov x(a),v(f).d[0]
|
|
|
|
o(0x4e183c00 | b | f << 5); // mov x(b),v(f).d[1]
|
|
|
|
o(0xaa000400 | a | a << 5 | b << 16); // orr x(a),x(a),x(b),lsl #1
|
|
|
|
o(0xb4000040 | a | !!inv << 24); // cbz/cbnz x(a),.+8
|
|
|
|
--vtop;
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
else if (bt == VT_FLOAT || bt == VT_DOUBLE) {
|
2021-10-22 13:39:54 +08:00
|
|
|
uint32_t a = fltr(gv(RC_FLOAT));
|
|
|
|
o(0x1e202008 | a << 5 | (bt != VT_FLOAT) << 22); // fcmp
|
|
|
|
o(0x54000040 | !!inv); // b.eq/b.ne .+8
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
else {
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t ll = (bt == VT_PTR || bt == VT_LLONG);
|
2021-10-22 13:39:54 +08:00
|
|
|
uint32_t a = intr(gv(RC_INT));
|
|
|
|
o(0x34000040 | a | !!inv << 24 | ll << 31); // cbz/cbnz wA,.+8
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
2021-10-22 13:39:54 +08:00
|
|
|
return gjmp(t);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
2015-03-08 01:03:51 +08:00
|
|
|
static int arm64_iconst(uint64_t *val, SValue *sv)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2015-03-08 01:03:51 +08:00
|
|
|
if ((sv->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
|
|
|
|
return 0;
|
|
|
|
if (val) {
|
2015-11-18 03:09:35 +08:00
|
|
|
int t = sv->type.t;
|
2016-12-13 19:48:18 +08:00
|
|
|
int bt = t & VT_BTYPE;
|
|
|
|
*val = ((bt == VT_LLONG || bt == VT_PTR) ? sv->c.i :
|
2015-11-18 03:09:35 +08:00
|
|
|
(uint32_t)sv->c.i |
|
|
|
|
(t & VT_UNSIGNED ? 0 : -(sv->c.i & 0x80000000)));
|
2015-03-08 01:03:51 +08:00
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
static int arm64_gen_opic(int op, uint32_t l, int rev, uint64_t val,
|
2015-03-08 01:03:51 +08:00
|
|
|
uint32_t x, uint32_t a)
|
|
|
|
{
|
|
|
|
if (op == '-' && !rev) {
|
|
|
|
val = -val;
|
|
|
|
op = '+';
|
|
|
|
}
|
|
|
|
val = l ? val : (uint32_t)val;
|
|
|
|
|
|
|
|
switch (op) {
|
|
|
|
|
|
|
|
case '+': {
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t s = l ? val >> 63 : val >> 31;
|
2015-03-08 01:03:51 +08:00
|
|
|
val = s ? -val : val;
|
|
|
|
val = l ? val : (uint32_t)val;
|
|
|
|
if (!(val & ~(uint64_t)0xfff))
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x11000000 | l << 31 | s << 30 | x | a << 5 | val << 10);
|
2015-03-08 01:03:51 +08:00
|
|
|
else if (!(val & ~(uint64_t)0xfff000))
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x11400000 | l << 31 | s << 30 | x | a << 5 | val >> 12 << 10);
|
2015-03-08 01:03:51 +08:00
|
|
|
else {
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_movimm(30, val); // use x30
|
|
|
|
o(0x0b1e0000 | l << 31 | s << 30 | x | a << 5);
|
2015-03-08 01:03:51 +08:00
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
case '-':
|
|
|
|
if (!val)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x4b0003e0 | l << 31 | x | a << 16); // neg
|
2015-03-08 01:03:51 +08:00
|
|
|
else if (val == (l ? (uint64_t)-1 : (uint32_t)-1))
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x2a2003e0 | l << 31 | x | a << 16); // mvn
|
2015-03-08 01:03:51 +08:00
|
|
|
else {
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_movimm(30, val); // use x30
|
|
|
|
o(0x4b0003c0 | l << 31 | x | a << 16); // sub
|
2015-03-08 01:03:51 +08:00
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
case '^':
|
|
|
|
if (val == -1 || (val == 0xffffffff && !l)) {
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x2a2003e0 | l << 31 | x | a << 16); // mvn
|
2015-03-08 01:03:51 +08:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
// fall through
|
|
|
|
case '&':
|
|
|
|
case '|': {
|
|
|
|
int e = arm64_encode_bimm64(l ? val : val | val << 32);
|
|
|
|
if (e < 0)
|
|
|
|
return 0;
|
2021-10-22 13:39:54 +08:00
|
|
|
o((op == '&' ? 0x12000000 :
|
2015-03-08 01:03:51 +08:00
|
|
|
op == '|' ? 0x32000000 : 0x52000000) |
|
|
|
|
l << 31 | x | a << 5 | (uint32_t)e << 10);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
case TOK_SAR:
|
|
|
|
case TOK_SHL:
|
|
|
|
case TOK_SHR: {
|
|
|
|
uint32_t n = 32 << l;
|
|
|
|
val = val & (n - 1);
|
|
|
|
if (rev)
|
|
|
|
return 0;
|
2020-07-05 20:01:50 +08:00
|
|
|
if (!val) {
|
|
|
|
// tcc_warning("shift count >= width of type");
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x2a0003e0 | l << 31 | a << 16);
|
2020-07-05 20:01:50 +08:00
|
|
|
return 1;
|
|
|
|
}
|
2015-03-08 01:03:51 +08:00
|
|
|
else if (op == TOK_SHL)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x53000000 | l << 31 | l << 22 | x | a << 5 |
|
2015-03-08 01:03:51 +08:00
|
|
|
(n - val) << 16 | (n - 1 - val) << 10); // lsl
|
|
|
|
else
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x13000000 | (op == TOK_SHR) << 30 | l << 31 | l << 22 |
|
2015-03-08 01:03:51 +08:00
|
|
|
x | a << 5 | val << 16 | (n - 1) << 10); // lsr/asr
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
static void arm64_gen_opil(int op, uint32_t l)
|
2015-03-08 01:03:51 +08:00
|
|
|
{
|
|
|
|
uint32_t x, a, b;
|
|
|
|
|
|
|
|
// Special treatment for operations with a constant operand:
|
|
|
|
{
|
|
|
|
uint64_t val;
|
|
|
|
int rev = 1;
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
if (arm64_iconst(0, &vtop[0])) {
|
|
|
|
vswap();
|
2015-03-08 01:03:51 +08:00
|
|
|
rev = 0;
|
|
|
|
}
|
2021-10-22 13:39:54 +08:00
|
|
|
if (arm64_iconst(&val, &vtop[-1])) {
|
|
|
|
gv(RC_INT);
|
|
|
|
a = intr(vtop[0].r);
|
|
|
|
--vtop;
|
|
|
|
x = get_reg(RC_INT);
|
|
|
|
++vtop;
|
|
|
|
if (arm64_gen_opic(op, l, rev, val, intr(x), a)) {
|
|
|
|
vtop[0].r = x;
|
|
|
|
vswap();
|
|
|
|
--vtop;
|
2015-03-08 01:03:51 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!rev)
|
2021-10-22 13:39:54 +08:00
|
|
|
vswap();
|
2015-03-08 01:03:51 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
gv2(RC_INT, RC_INT);
|
|
|
|
assert(vtop[-1].r < VT_CONST && vtop[0].r < VT_CONST);
|
|
|
|
a = intr(vtop[-1].r);
|
|
|
|
b = intr(vtop[0].r);
|
|
|
|
vtop -= 2;
|
|
|
|
x = get_reg(RC_INT);
|
|
|
|
++vtop;
|
|
|
|
vtop[0].r = x;
|
|
|
|
x = intr(x);
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
switch (op) {
|
|
|
|
case '%':
|
|
|
|
// Use x30 for quotient:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1ac00c00 | l << 31 | 30 | a << 5 | b << 16); // sdiv
|
|
|
|
o(0x1b008000 | l << 31 | x | (uint32_t)30 << 5 |
|
2015-11-10 07:06:05 +08:00
|
|
|
b << 16 | a << 10); // msub
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case '&':
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x0a000000 | l << 31 | x | a << 5 | b << 16); // and
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case '*':
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1b007c00 | l << 31 | x | a << 5 | b << 16); // mul
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case '+':
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x0b000000 | l << 31 | x | a << 5 | b << 16); // add
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case '-':
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x4b000000 | l << 31 | x | a << 5 | b << 16); // sub
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case '/':
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1ac00c00 | l << 31 | x | a << 5 | b << 16); // sdiv
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case '^':
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x4a000000 | l << 31 | x | a << 5 | b << 16); // eor
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case '|':
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x2a000000 | l << 31 | x | a << 5 | b << 16); // orr
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_EQ:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp
|
|
|
|
o(0x1a9f17e0 | x); // cset wA,eq
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_GE:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp
|
|
|
|
o(0x1a9fb7e0 | x); // cset wA,ge
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_GT:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp
|
|
|
|
o(0x1a9fd7e0 | x); // cset wA,gt
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_LE:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp
|
|
|
|
o(0x1a9fc7e0 | x); // cset wA,le
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_LT:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp
|
|
|
|
o(0x1a9fa7e0 | x); // cset wA,lt
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_NE:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp
|
|
|
|
o(0x1a9f07e0 | x); // cset wA,ne
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_SAR:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1ac02800 | l << 31 | x | a << 5 | b << 16); // asr
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_SHL:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1ac02000 | l << 31 | x | a << 5 | b << 16); // lsl
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_SHR:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1ac02400 | l << 31 | x | a << 5 | b << 16); // lsr
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_UDIV:
|
|
|
|
case TOK_PDIV:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1ac00800 | l << 31 | x | a << 5 | b << 16); // udiv
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_UGE:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp
|
|
|
|
o(0x1a9f37e0 | x); // cset wA,cs
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_UGT:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp
|
|
|
|
o(0x1a9f97e0 | x); // cset wA,hi
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_ULT:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp
|
|
|
|
o(0x1a9f27e0 | x); // cset wA,cc
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_ULE:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp
|
|
|
|
o(0x1a9f87e0 | x); // cset wA,ls
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_UMOD:
|
|
|
|
// Use x30 for quotient:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1ac00800 | l << 31 | 30 | a << 5 | b << 16); // udiv
|
|
|
|
o(0x1b008000 | l << 31 | x | (uint32_t)30 << 5 |
|
2015-11-10 07:06:05 +08:00
|
|
|
b << 16 | a << 10); // msub
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gen_opi(int op)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_gen_opil(op, 0);
|
|
|
|
arm64_vset_VT_CMP(op);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gen_opl(int op)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_gen_opil(op, 1);
|
|
|
|
arm64_vset_VT_CMP(op);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gen_opf(int op)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t x, a, b, dbl;
|
2015-02-14 02:58:31 +08:00
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
if (vtop[0].type.t == VT_LDOUBLE) {
|
|
|
|
CType type = vtop[0].type;
|
2015-02-14 02:58:31 +08:00
|
|
|
int func = 0;
|
|
|
|
int cond = -1;
|
|
|
|
switch (op) {
|
|
|
|
case '*': func = TOK___multf3; break;
|
|
|
|
case '+': func = TOK___addtf3; break;
|
|
|
|
case '-': func = TOK___subtf3; break;
|
|
|
|
case '/': func = TOK___divtf3; break;
|
|
|
|
case TOK_EQ: func = TOK___eqtf2; cond = 1; break;
|
|
|
|
case TOK_NE: func = TOK___netf2; cond = 0; break;
|
|
|
|
case TOK_LT: func = TOK___lttf2; cond = 10; break;
|
|
|
|
case TOK_GE: func = TOK___getf2; cond = 11; break;
|
|
|
|
case TOK_LE: func = TOK___letf2; cond = 12; break;
|
|
|
|
case TOK_GT: func = TOK___gttf2; cond = 13; break;
|
|
|
|
default: assert(0); break;
|
|
|
|
}
|
2021-10-22 13:39:54 +08:00
|
|
|
vpush_helper_func(func);
|
|
|
|
vrott(3);
|
|
|
|
gfunc_call(2);
|
|
|
|
vpushi(0);
|
|
|
|
vtop->r = cond < 0 ? REG_FRET : REG_IRET;
|
2015-02-14 02:58:31 +08:00
|
|
|
if (cond < 0)
|
2021-10-22 13:39:54 +08:00
|
|
|
vtop->type = type;
|
2015-02-14 02:58:31 +08:00
|
|
|
else {
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x7100001f); // cmp w0,#0
|
|
|
|
o(0x1a9f07e0 | (uint32_t)cond << 12); // cset w0,(cond)
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
dbl = vtop[0].type.t != VT_FLOAT;
|
|
|
|
gv2(RC_FLOAT, RC_FLOAT);
|
|
|
|
assert(vtop[-1].r < VT_CONST && vtop[0].r < VT_CONST);
|
|
|
|
a = fltr(vtop[-1].r);
|
|
|
|
b = fltr(vtop[0].r);
|
|
|
|
vtop -= 2;
|
2015-02-14 02:58:31 +08:00
|
|
|
switch (op) {
|
|
|
|
case TOK_EQ: case TOK_NE:
|
|
|
|
case TOK_LT: case TOK_GE: case TOK_LE: case TOK_GT:
|
2021-10-22 13:39:54 +08:00
|
|
|
x = get_reg(RC_INT);
|
|
|
|
++vtop;
|
|
|
|
vtop[0].r = x;
|
|
|
|
x = intr(x);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
default:
|
2021-10-22 13:39:54 +08:00
|
|
|
x = get_reg(RC_FLOAT);
|
|
|
|
++vtop;
|
|
|
|
vtop[0].r = x;
|
2015-02-14 02:58:31 +08:00
|
|
|
x = fltr(x);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (op) {
|
|
|
|
case '*':
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1e200800 | dbl << 22 | x | a << 5 | b << 16); // fmul
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case '+':
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1e202800 | dbl << 22 | x | a << 5 | b << 16); // fadd
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case '-':
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1e203800 | dbl << 22 | x | a << 5 | b << 16); // fsub
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case '/':
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1e201800 | dbl << 22 | x | a << 5 | b << 16); // fdiv
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_EQ:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp
|
|
|
|
o(0x1a9f17e0 | x); // cset w(x),eq
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_GE:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp
|
|
|
|
o(0x1a9fb7e0 | x); // cset w(x),ge
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_GT:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp
|
|
|
|
o(0x1a9fd7e0 | x); // cset w(x),gt
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_LE:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp
|
|
|
|
o(0x1a9f87e0 | x); // cset w(x),ls
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_LT:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp
|
|
|
|
o(0x1a9f57e0 | x); // cset w(x),mi
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case TOK_NE:
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp
|
|
|
|
o(0x1a9f07e0 | x); // cset w(x),ne
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
}
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_vset_VT_CMP(op);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Generate sign extension from 32 to 64 bits:
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gen_cvt_sxtw(void)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2021-10-22 13:39:54 +08:00
|
|
|
uint32_t r = intr(gv(RC_INT));
|
|
|
|
o(0x93407c00 | r | r << 5); // sxtw x(r),w(r)
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
2019-12-17 01:51:28 +08:00
|
|
|
/* char/short to int conversion */
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gen_cvt_csti(int t)
|
2019-12-17 01:51:28 +08:00
|
|
|
{
|
2021-10-22 13:39:54 +08:00
|
|
|
int r = intr(gv(RC_INT));
|
|
|
|
o(0x13001c00
|
2019-12-17 01:51:28 +08:00
|
|
|
| ((t & VT_BTYPE) == VT_SHORT) << 13
|
|
|
|
| (uint32_t)!!(t & VT_UNSIGNED) << 30
|
|
|
|
| r | r << 5); // [su]xt[bh] w(r),w(r)
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gen_cvt_itof(int t)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
|
|
|
if (t == VT_LDOUBLE) {
|
2021-10-22 13:39:54 +08:00
|
|
|
int f = vtop->type.t;
|
2015-02-14 02:58:31 +08:00
|
|
|
int func = (f & VT_BTYPE) == VT_LLONG ?
|
|
|
|
(f & VT_UNSIGNED ? TOK___floatunditf : TOK___floatditf) :
|
|
|
|
(f & VT_UNSIGNED ? TOK___floatunsitf : TOK___floatsitf);
|
2021-10-22 13:39:54 +08:00
|
|
|
vpush_helper_func(func);
|
|
|
|
vrott(2);
|
|
|
|
gfunc_call(1);
|
|
|
|
vpushi(0);
|
|
|
|
vtop->type.t = t;
|
|
|
|
vtop->r = REG_FRET;
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
else {
|
2021-10-22 13:39:54 +08:00
|
|
|
int d, n = intr(gv(RC_INT));
|
|
|
|
int s = !(vtop->type.t & VT_UNSIGNED);
|
|
|
|
uint32_t l = ((vtop->type.t & VT_BTYPE) == VT_LLONG);
|
|
|
|
--vtop;
|
|
|
|
d = get_reg(RC_FLOAT);
|
|
|
|
++vtop;
|
|
|
|
vtop[0].r = d;
|
|
|
|
o(0x1e220000 | (uint32_t)!s << 16 |
|
2015-11-10 07:06:05 +08:00
|
|
|
(uint32_t)(t != VT_FLOAT) << 22 | fltr(d) |
|
2015-02-14 02:58:31 +08:00
|
|
|
l << 31 | n << 5); // [us]cvtf [sd](d),[wx](n)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gen_cvt_ftoi(int t)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2021-10-22 13:39:54 +08:00
|
|
|
if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
|
2015-02-14 02:58:31 +08:00
|
|
|
int func = (t & VT_BTYPE) == VT_LLONG ?
|
|
|
|
(t & VT_UNSIGNED ? TOK___fixunstfdi : TOK___fixtfdi) :
|
|
|
|
(t & VT_UNSIGNED ? TOK___fixunstfsi : TOK___fixtfsi);
|
2021-10-22 13:39:54 +08:00
|
|
|
vpush_helper_func(func);
|
|
|
|
vrott(2);
|
|
|
|
gfunc_call(1);
|
|
|
|
vpushi(0);
|
|
|
|
vtop->type.t = t;
|
|
|
|
vtop->r = REG_IRET;
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
else {
|
2021-10-22 13:39:54 +08:00
|
|
|
int d, n = fltr(gv(RC_FLOAT));
|
|
|
|
uint32_t l = ((vtop->type.t & VT_BTYPE) != VT_FLOAT);
|
|
|
|
--vtop;
|
|
|
|
d = get_reg(RC_INT);
|
|
|
|
++vtop;
|
|
|
|
vtop[0].r = d;
|
|
|
|
o(0x1e380000 |
|
2015-11-10 07:06:05 +08:00
|
|
|
(uint32_t)!!(t & VT_UNSIGNED) << 16 |
|
2021-10-22 13:39:54 +08:00
|
|
|
(uint32_t)((t & VT_BTYPE) == VT_LLONG) << 31 | intr(d) |
|
2015-02-14 02:58:31 +08:00
|
|
|
l << 22 | n << 5); // fcvtz[su] [wx](d),[sd](n)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gen_cvt_ftof(int t)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2021-10-22 13:39:54 +08:00
|
|
|
int f = vtop[0].type.t & VT_BTYPE;
|
2015-02-14 02:58:31 +08:00
|
|
|
assert(t == VT_FLOAT || t == VT_DOUBLE || t == VT_LDOUBLE);
|
|
|
|
assert(f == VT_FLOAT || f == VT_DOUBLE || f == VT_LDOUBLE);
|
|
|
|
if (t == f)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (t == VT_LDOUBLE || f == VT_LDOUBLE) {
|
|
|
|
int func = (t == VT_LDOUBLE) ?
|
|
|
|
(f == VT_FLOAT ? TOK___extendsftf2 : TOK___extenddftf2) :
|
|
|
|
(t == VT_FLOAT ? TOK___trunctfsf2 : TOK___trunctfdf2);
|
2021-10-22 13:39:54 +08:00
|
|
|
vpush_helper_func(func);
|
|
|
|
vrott(2);
|
|
|
|
gfunc_call(1);
|
|
|
|
vpushi(0);
|
|
|
|
vtop->type.t = t;
|
|
|
|
vtop->r = REG_FRET;
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
int x, a;
|
2021-10-22 13:39:54 +08:00
|
|
|
gv(RC_FLOAT);
|
|
|
|
assert(vtop[0].r < VT_CONST);
|
|
|
|
a = fltr(vtop[0].r);
|
|
|
|
--vtop;
|
|
|
|
x = get_reg(RC_FLOAT);
|
|
|
|
++vtop;
|
|
|
|
vtop[0].r = x;
|
2015-02-14 02:58:31 +08:00
|
|
|
x = fltr(x);
|
|
|
|
|
|
|
|
if (f == VT_FLOAT)
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1e22c000 | x | a << 5); // fcvt d(x),s(a)
|
2015-02-14 02:58:31 +08:00
|
|
|
else
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x1e624000 | x | a << 5); // fcvt s(x),d(a)
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-24 01:17:38 +08:00
|
|
|
/* increment tcov counter */
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gen_increment_tcov (SValue *sv)
|
2021-01-24 01:17:38 +08:00
|
|
|
{
|
|
|
|
int r1, r2;
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
vpushv(sv);
|
|
|
|
vtop->r = r1 = get_reg(RC_INT);
|
|
|
|
r2 = get_reg(RC_INT);
|
|
|
|
greloca(cur_text_section, sv->sym, ind, R_AARCH64_ADR_GOT_PAGE, 0);
|
|
|
|
o(0x90000000 | r1); // adrp r1, #sym
|
|
|
|
greloca(cur_text_section, sv->sym, ind, R_AARCH64_LD64_GOT_LO12_NC, 0);
|
|
|
|
o(0xf9400000 | r1 | (r1 << 5)); // ld xr,[xr, #sym]
|
|
|
|
o(0xf9400000 | (intr(r1)<<5) | intr(r2)); // ldr r2, [r1]
|
|
|
|
o(0x91000400 | (intr(r2)<<5) | intr(r2)); // add r2, r2, #1
|
|
|
|
o(0xf9000000 | (intr(r1)<<5) | intr(r2)); // str r2, [r1]
|
|
|
|
vpop();
|
2021-01-24 01:17:38 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void ggoto(void)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_gen_bl_or_b(1);
|
|
|
|
--vtop;
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gen_clear_cache(void)
|
2015-03-08 08:10:44 +08:00
|
|
|
{
|
|
|
|
uint32_t beg, end, dsz, isz, p, lab1, b1;
|
2021-10-22 13:39:54 +08:00
|
|
|
gv2(RC_INT, RC_INT);
|
|
|
|
vpushi(0);
|
|
|
|
vtop->r = get_reg(RC_INT);
|
|
|
|
vpushi(0);
|
|
|
|
vtop->r = get_reg(RC_INT);
|
|
|
|
vpushi(0);
|
|
|
|
vtop->r = get_reg(RC_INT);
|
|
|
|
beg = intr(vtop[-4].r); // x0
|
|
|
|
end = intr(vtop[-3].r); // x1
|
|
|
|
dsz = intr(vtop[-2].r); // x2
|
|
|
|
isz = intr(vtop[-1].r); // x3
|
|
|
|
p = intr(vtop[0].r); // x4
|
|
|
|
vtop -= 5;
|
|
|
|
|
|
|
|
o(0xd53b0020 | isz); // mrs x(isz),ctr_el0
|
|
|
|
o(0x52800080 | p); // mov w(p),#4
|
|
|
|
o(0x53104c00 | dsz | isz << 5); // ubfx w(dsz),w(isz),#16,#4
|
|
|
|
o(0x1ac02000 | dsz | p << 5 | dsz << 16); // lsl w(dsz),w(p),w(dsz)
|
|
|
|
o(0x12000c00 | isz | isz << 5); // and w(isz),w(isz),#15
|
|
|
|
o(0x1ac02000 | isz | p << 5 | isz << 16); // lsl w(isz),w(p),w(isz)
|
|
|
|
o(0x51000400 | p | dsz << 5); // sub w(p),w(dsz),#1
|
|
|
|
o(0x8a240004 | p | beg << 5 | p << 16); // bic x(p),x(beg),x(p)
|
|
|
|
b1 = ind; o(0x14000000); // b
|
|
|
|
lab1 = ind;
|
|
|
|
o(0xd50b7b20 | p); // dc cvau,x(p)
|
|
|
|
o(0x8b000000 | p | p << 5 | dsz << 16); // add x(p),x(p),x(dsz)
|
|
|
|
write32le(cur_text_section->data + b1, 0x14000000 | (ind - b1) >> 2);
|
|
|
|
o(0xeb00001f | p << 5 | end << 16); // cmp x(p),x(end)
|
|
|
|
o(0x54ffffa3 | ((lab1 - ind) << 3 & 0xffffe0)); // b.cc lab1
|
|
|
|
o(0xd5033b9f); // dsb ish
|
|
|
|
o(0x51000400 | p | isz << 5); // sub w(p),w(isz),#1
|
|
|
|
o(0x8a240004 | p | beg << 5 | p << 16); // bic x(p),x(beg),x(p)
|
|
|
|
b1 = ind; o(0x14000000); // b
|
|
|
|
lab1 = ind;
|
|
|
|
o(0xd50b7520 | p); // ic ivau,x(p)
|
|
|
|
o(0x8b000000 | p | p << 5 | isz << 16); // add x(p),x(p),x(isz)
|
|
|
|
write32le(cur_text_section->data + b1, 0x14000000 | (ind - b1) >> 2);
|
|
|
|
o(0xeb00001f | p << 5 | end << 16); // cmp x(p),x(end)
|
|
|
|
o(0x54ffffa3 | ((lab1 - ind) << 3 & 0xffffe0)); // b.cc lab1
|
|
|
|
o(0xd5033b9f); // dsb ish
|
|
|
|
o(0xd5033fdf); // isb
|
2015-03-08 08:10:44 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gen_vla_sp_save(int addr) {
|
|
|
|
uint32_t r = intr(get_reg(RC_INT));
|
|
|
|
o(0x910003e0 | r); // mov x(r),sp
|
|
|
|
arm64_strx(3, r, 29, addr);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gen_vla_sp_restore(int addr) {
|
2016-10-11 03:15:57 +08:00
|
|
|
// Use x30 because this function can be called when there
|
|
|
|
// is a live return value in x0 but there is nothing on
|
|
|
|
// the value stack to prevent get_reg from returning x0.
|
|
|
|
uint32_t r = 30;
|
2021-10-22 13:39:54 +08:00
|
|
|
arm64_ldrx(0, 3, r, 29, addr);
|
|
|
|
o(0x9100001f | r << 5); // mov sp,x(r)
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
2021-10-22 13:39:54 +08:00
|
|
|
ST_FUNC void gen_vla_alloc(CType *type, int align) {
|
2020-06-17 17:24:17 +08:00
|
|
|
uint32_t r;
|
|
|
|
#if defined(CONFIG_TCC_BCHECK)
|
2021-10-22 13:39:54 +08:00
|
|
|
if (tcc_state->do_bounds_check)
|
|
|
|
vpushv(vtop);
|
2020-06-17 17:24:17 +08:00
|
|
|
#endif
|
2021-10-22 13:39:54 +08:00
|
|
|
r = intr(gv(RC_INT));
|
2020-10-01 23:09:09 +08:00
|
|
|
#if defined(CONFIG_TCC_BCHECK)
|
2021-10-22 13:39:54 +08:00
|
|
|
if (tcc_state->do_bounds_check)
|
|
|
|
o(0x91004000 | r | r << 5); // add x(r),x(r),#15+1
|
2020-10-01 23:09:09 +08:00
|
|
|
else
|
|
|
|
#endif
|
2021-10-22 13:39:54 +08:00
|
|
|
o(0x91003c00 | r | r << 5); // add x(r),x(r),#15
|
|
|
|
o(0x927cec00 | r | r << 5); // bic x(r),x(r),#15
|
|
|
|
o(0xcb2063ff | r << 16); // sub sp,sp,x(r)
|
|
|
|
vpop();
|
2020-06-17 17:24:17 +08:00
|
|
|
#if defined(CONFIG_TCC_BCHECK)
|
2021-10-22 13:39:54 +08:00
|
|
|
if (tcc_state->do_bounds_check) {
|
|
|
|
vpushi(0);
|
|
|
|
vtop->r = TREG_R(0);
|
|
|
|
o(0x910003e0 | vtop->r); // mov r0,sp
|
|
|
|
vswap();
|
|
|
|
vpush_helper_func(TOK___bound_new_region);
|
|
|
|
vrott(3);
|
|
|
|
gfunc_call(2);
|
|
|
|
func_bound_add_epilog = 1;
|
2020-06-17 17:24:17 +08:00
|
|
|
}
|
|
|
|
#endif
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* end of A64 code generator */
|
|
|
|
/*************************************************************/
|
|
|
|
#endif
|
|
|
|
/*************************************************************/
|