2015-02-14 02:58:31 +08:00
|
|
|
/*
|
|
|
|
* A64 code generator for TCC
|
|
|
|
*
|
|
|
|
* Copyright (c) 2014-2015 Edmund Grimley Evans
|
|
|
|
*
|
|
|
|
* Copying and distribution of this file, with or without modification,
|
|
|
|
* are permitted in any medium without royalty provided the copyright
|
|
|
|
* notice and this notice are preserved. This file is offered as-is,
|
|
|
|
* without any warranty.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifdef TARGET_DEFS_ONLY
|
|
|
|
|
|
|
|
// Number of registers available to allocator:
|
|
|
|
#define NB_REGS 28 // x0-x18, x30, v0-v7
|
|
|
|
|
|
|
|
#define TREG_R(x) (x) // x = 0..18
|
|
|
|
#define TREG_R30 19
|
|
|
|
#define TREG_F(x) (x + 20) // x = 0..7
|
|
|
|
|
|
|
|
// Register classes sorted from more general to more precise:
|
|
|
|
#define RC_INT (1 << 0)
|
|
|
|
#define RC_FLOAT (1 << 1)
|
|
|
|
#define RC_R(x) (1 << (2 + (x))) // x = 0..18
|
|
|
|
#define RC_R30 (1 << 21)
|
|
|
|
#define RC_F(x) (1 << (22 + (x))) // x = 0..7
|
|
|
|
|
|
|
|
#define RC_IRET (RC_R(0)) // int return register class
|
|
|
|
#define RC_FRET (RC_F(0)) // float return register class
|
|
|
|
|
|
|
|
#define REG_IRET (TREG_R(0)) // int return register number
|
|
|
|
#define REG_FRET (TREG_F(0)) // float return register number
|
|
|
|
|
|
|
|
#define PTR_SIZE 8
|
|
|
|
|
|
|
|
#define LDOUBLE_SIZE 16
|
|
|
|
#define LDOUBLE_ALIGN 16
|
|
|
|
|
|
|
|
#define MAX_ALIGN 16
|
|
|
|
|
|
|
|
#define CHAR_IS_UNSIGNED
|
|
|
|
|
Adjust return value promotion for some archs
this is a bit complicated: for i386 and x86-64 we really need to
extend return values ourself, as the common code now does. For arm64
this at least preserves old behaviour. For riscv64 we don't have to
extend ourself but can expect things to be extended up to int (this
matters for var-args tests, when the sign-extension to int64 needs to
happen explicitely). As the extensions are useless, don't do them.
And for arm32 we actually can't express GCC behaviour: the callee side
expects the return value to be correctly extended to int32, but
remembers the original type. In case the ultimate target type for the
call result is only int, no further extension is done. But in case
the target type is e.g. int64 an extension happens, but not from int32
but from the original type. We don't know the ultimate target type,
so we have to choose a type to put into vtop:
* original type (plus VT_MUSTCAST) - this looses when the ultimate
target is int (GCC: no cast, TCC: a cast)
* int (without MUSTCAST) - this looses when the ultimate target is
int64 (GCC: cast from original type, TCC: cast from int)
This difference can only be seen with undefined sources, like the
testcases, so it doesn't seem worthwhile to try an make it work, just
disable the test on arm and choose the second variant as that generates
less code.
2019-12-17 08:46:06 +08:00
|
|
|
/* define if return values need to be extended explicitely
|
|
|
|
at caller side (for interfacing with non-TCC compilers) */
|
|
|
|
#define PROMOTE_RET
|
2015-02-14 02:58:31 +08:00
|
|
|
/******************************************************/
|
|
|
|
#else /* ! TARGET_DEFS_ONLY */
|
|
|
|
/******************************************************/
|
2019-12-11 07:37:18 +08:00
|
|
|
#define USING_GLOBALS
|
2015-07-30 04:53:57 +08:00
|
|
|
#include "tcc.h"
|
2015-02-14 02:58:31 +08:00
|
|
|
#include <assert.h>
|
|
|
|
|
|
|
|
ST_DATA const int reg_classes[NB_REGS] = {
|
|
|
|
RC_INT | RC_R(0),
|
|
|
|
RC_INT | RC_R(1),
|
|
|
|
RC_INT | RC_R(2),
|
|
|
|
RC_INT | RC_R(3),
|
|
|
|
RC_INT | RC_R(4),
|
|
|
|
RC_INT | RC_R(5),
|
|
|
|
RC_INT | RC_R(6),
|
|
|
|
RC_INT | RC_R(7),
|
|
|
|
RC_INT | RC_R(8),
|
|
|
|
RC_INT | RC_R(9),
|
|
|
|
RC_INT | RC_R(10),
|
|
|
|
RC_INT | RC_R(11),
|
|
|
|
RC_INT | RC_R(12),
|
|
|
|
RC_INT | RC_R(13),
|
|
|
|
RC_INT | RC_R(14),
|
|
|
|
RC_INT | RC_R(15),
|
|
|
|
RC_INT | RC_R(16),
|
|
|
|
RC_INT | RC_R(17),
|
|
|
|
RC_INT | RC_R(18),
|
|
|
|
RC_R30, // not in RC_INT as we make special use of x30
|
|
|
|
RC_FLOAT | RC_F(0),
|
|
|
|
RC_FLOAT | RC_F(1),
|
|
|
|
RC_FLOAT | RC_F(2),
|
|
|
|
RC_FLOAT | RC_F(3),
|
|
|
|
RC_FLOAT | RC_F(4),
|
|
|
|
RC_FLOAT | RC_F(5),
|
|
|
|
RC_FLOAT | RC_F(6),
|
|
|
|
RC_FLOAT | RC_F(7)
|
|
|
|
};
|
|
|
|
|
|
|
|
#define IS_FREG(x) ((x) >= TREG_F(0))
|
|
|
|
|
|
|
|
static uint32_t intr(int r)
|
|
|
|
{
|
|
|
|
assert(TREG_R(0) <= r && r <= TREG_R30);
|
|
|
|
return r < TREG_R30 ? r : 30;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t fltr(int r)
|
|
|
|
{
|
|
|
|
assert(TREG_F(0) <= r && r <= TREG_F(7));
|
|
|
|
return r - TREG_F(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add an instruction to text section:
|
|
|
|
ST_FUNC void o(unsigned int c)
|
|
|
|
{
|
|
|
|
int ind1 = ind + 4;
|
2016-12-19 00:23:33 +08:00
|
|
|
if (nocode_wanted)
|
|
|
|
return;
|
2015-02-14 02:58:31 +08:00
|
|
|
if (ind1 > cur_text_section->data_allocated)
|
|
|
|
section_realloc(cur_text_section, ind1);
|
2015-11-20 02:21:14 +08:00
|
|
|
write32le(cur_text_section->data + ind, c);
|
2015-02-14 02:58:31 +08:00
|
|
|
ind = ind1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int arm64_encode_bimm64(uint64_t x)
|
|
|
|
{
|
|
|
|
int neg = x & 1;
|
|
|
|
int rep, pos, len;
|
|
|
|
|
|
|
|
if (neg)
|
|
|
|
x = ~x;
|
|
|
|
if (!x)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (x >> 2 == (x & (((uint64_t)1 << (64 - 2)) - 1)))
|
|
|
|
rep = 2, x &= ((uint64_t)1 << 2) - 1;
|
|
|
|
else if (x >> 4 == (x & (((uint64_t)1 << (64 - 4)) - 1)))
|
|
|
|
rep = 4, x &= ((uint64_t)1 << 4) - 1;
|
|
|
|
else if (x >> 8 == (x & (((uint64_t)1 << (64 - 8)) - 1)))
|
|
|
|
rep = 8, x &= ((uint64_t)1 << 8) - 1;
|
|
|
|
else if (x >> 16 == (x & (((uint64_t)1 << (64 - 16)) - 1)))
|
|
|
|
rep = 16, x &= ((uint64_t)1 << 16) - 1;
|
|
|
|
else if (x >> 32 == (x & (((uint64_t)1 << (64 - 32)) - 1)))
|
|
|
|
rep = 32, x &= ((uint64_t)1 << 32) - 1;
|
|
|
|
else
|
|
|
|
rep = 64;
|
|
|
|
|
|
|
|
pos = 0;
|
|
|
|
if (!(x & (((uint64_t)1 << 32) - 1))) x >>= 32, pos += 32;
|
|
|
|
if (!(x & (((uint64_t)1 << 16) - 1))) x >>= 16, pos += 16;
|
|
|
|
if (!(x & (((uint64_t)1 << 8) - 1))) x >>= 8, pos += 8;
|
|
|
|
if (!(x & (((uint64_t)1 << 4) - 1))) x >>= 4, pos += 4;
|
|
|
|
if (!(x & (((uint64_t)1 << 2) - 1))) x >>= 2, pos += 2;
|
|
|
|
if (!(x & (((uint64_t)1 << 1) - 1))) x >>= 1, pos += 1;
|
|
|
|
|
|
|
|
len = 0;
|
|
|
|
if (!(~x & (((uint64_t)1 << 32) - 1))) x >>= 32, len += 32;
|
|
|
|
if (!(~x & (((uint64_t)1 << 16) - 1))) x >>= 16, len += 16;
|
|
|
|
if (!(~x & (((uint64_t)1 << 8) - 1))) x >>= 8, len += 8;
|
|
|
|
if (!(~x & (((uint64_t)1 << 4) - 1))) x >>= 4, len += 4;
|
|
|
|
if (!(~x & (((uint64_t)1 << 2) - 1))) x >>= 2, len += 2;
|
|
|
|
if (!(~x & (((uint64_t)1 << 1) - 1))) x >>= 1, len += 1;
|
|
|
|
|
|
|
|
if (x)
|
|
|
|
return -1;
|
|
|
|
if (neg) {
|
|
|
|
pos = (pos + len) & (rep - 1);
|
|
|
|
len = rep - len;
|
|
|
|
}
|
|
|
|
return ((0x1000 & rep << 6) | (((rep - 1) ^ 31) << 1 & 63) |
|
|
|
|
((rep - pos) & (rep - 1)) << 6 | (len - 1));
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t arm64_movi(int r, uint64_t x)
|
|
|
|
{
|
|
|
|
uint64_t m = 0xffff;
|
|
|
|
int e;
|
|
|
|
if (!(x & ~m))
|
|
|
|
return 0x52800000 | r | x << 5; // movz w(r),#(x)
|
|
|
|
if (!(x & ~(m << 16)))
|
|
|
|
return 0x52a00000 | r | x >> 11; // movz w(r),#(x >> 16),lsl #16
|
|
|
|
if (!(x & ~(m << 32)))
|
|
|
|
return 0xd2c00000 | r | x >> 27; // movz x(r),#(x >> 32),lsl #32
|
|
|
|
if (!(x & ~(m << 48)))
|
|
|
|
return 0xd2e00000 | r | x >> 43; // movz x(r),#(x >> 48),lsl #48
|
|
|
|
if ((x & ~m) == m << 16)
|
|
|
|
return (0x12800000 | r |
|
|
|
|
(~x << 5 & 0x1fffe0)); // movn w(r),#(~x)
|
|
|
|
if ((x & ~(m << 16)) == m)
|
|
|
|
return (0x12a00000 | r |
|
|
|
|
(~x >> 11 & 0x1fffe0)); // movn w(r),#(~x >> 16),lsl #16
|
|
|
|
if (!~(x | m))
|
|
|
|
return (0x92800000 | r |
|
|
|
|
(~x << 5 & 0x1fffe0)); // movn x(r),#(~x)
|
|
|
|
if (!~(x | m << 16))
|
|
|
|
return (0x92a00000 | r |
|
|
|
|
(~x >> 11 & 0x1fffe0)); // movn x(r),#(~x >> 16),lsl #16
|
|
|
|
if (!~(x | m << 32))
|
|
|
|
return (0x92c00000 | r |
|
|
|
|
(~x >> 27 & 0x1fffe0)); // movn x(r),#(~x >> 32),lsl #32
|
|
|
|
if (!~(x | m << 48))
|
|
|
|
return (0x92e00000 | r |
|
|
|
|
(~x >> 43 & 0x1fffe0)); // movn x(r),#(~x >> 32),lsl #32
|
|
|
|
if (!(x >> 32) && (e = arm64_encode_bimm64(x | x << 32)) >= 0)
|
|
|
|
return 0x320003e0 | r | (uint32_t)e << 10; // movi w(r),#(x)
|
|
|
|
if ((e = arm64_encode_bimm64(x)) >= 0)
|
|
|
|
return 0xb20003e0 | r | (uint32_t)e << 10; // movi x(r),#(x)
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void arm64_movimm(int r, uint64_t x)
|
|
|
|
{
|
|
|
|
uint32_t i;
|
|
|
|
if ((i = arm64_movi(r, x)))
|
2015-03-03 04:39:28 +08:00
|
|
|
o(i); // a single MOV
|
2015-02-14 02:58:31 +08:00
|
|
|
else {
|
2015-03-03 04:39:28 +08:00
|
|
|
// MOVZ/MOVN and 1-3 MOVKs
|
|
|
|
int z = 0, m = 0;
|
|
|
|
uint32_t mov1 = 0xd2800000; // movz
|
|
|
|
uint64_t x1 = x;
|
|
|
|
for (i = 0; i < 64; i += 16) {
|
|
|
|
z += !(x >> i & 0xffff);
|
|
|
|
m += !(~x >> i & 0xffff);
|
|
|
|
}
|
|
|
|
if (m > z) {
|
|
|
|
x1 = ~x;
|
|
|
|
mov1 = 0x92800000; // movn
|
|
|
|
}
|
|
|
|
for (i = 0; i < 64; i += 16)
|
|
|
|
if (x1 >> i & 0xffff) {
|
|
|
|
o(mov1 | r | (x1 >> i & 0xffff) << 5 | i << 17);
|
|
|
|
// movz/movn x(r),#(*),lsl #(i)
|
|
|
|
break;
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
2015-03-03 04:39:28 +08:00
|
|
|
for (i += 16; i < 64; i += 16)
|
|
|
|
if (x1 >> i & 0xffff)
|
|
|
|
o(0xf2800000 | r | (x >> i & 0xffff) << 5 | i << 17);
|
|
|
|
// movk x(r),#(*),lsl #(i)
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Patch all branches in list pointed to by t to branch to a:
|
|
|
|
ST_FUNC void gsym_addr(int t_, int a_)
|
|
|
|
{
|
|
|
|
uint32_t t = t_;
|
|
|
|
uint32_t a = a_;
|
|
|
|
while (t) {
|
2015-11-20 02:21:14 +08:00
|
|
|
unsigned char *ptr = cur_text_section->data + t;
|
|
|
|
uint32_t next = read32le(ptr);
|
2015-02-14 02:58:31 +08:00
|
|
|
if (a - t + 0x8000000 >= 0x10000000)
|
|
|
|
tcc_error("branch out of range");
|
2015-11-20 02:21:14 +08:00
|
|
|
write32le(ptr, (a - t == 4 ? 0xd503201f : // nop
|
|
|
|
0x14000000 | ((a - t) >> 2 & 0x3ffffff))); // b
|
2015-02-14 02:58:31 +08:00
|
|
|
t = next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int arm64_type_size(int t)
|
|
|
|
{
|
2020-03-22 15:26:03 +08:00
|
|
|
/*
|
|
|
|
* case values are in increasing order (from 1 to 11).
|
|
|
|
* which 'may' help compiler optimizers. See tcc.h
|
|
|
|
*/
|
2015-02-14 02:58:31 +08:00
|
|
|
switch (t & VT_BTYPE) {
|
|
|
|
case VT_BYTE: return 0;
|
|
|
|
case VT_SHORT: return 1;
|
2020-03-22 15:26:03 +08:00
|
|
|
case VT_INT: return 2;
|
|
|
|
case VT_LLONG: return 3;
|
2015-02-14 02:58:31 +08:00
|
|
|
case VT_PTR: return 3;
|
|
|
|
case VT_FUNC: return 3;
|
2020-03-22 15:26:03 +08:00
|
|
|
case VT_STRUCT: return 3;
|
2015-02-14 02:58:31 +08:00
|
|
|
case VT_FLOAT: return 2;
|
|
|
|
case VT_DOUBLE: return 3;
|
|
|
|
case VT_LDOUBLE: return 4;
|
|
|
|
case VT_BOOL: return 0;
|
|
|
|
}
|
|
|
|
assert(0);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-03-03 04:45:58 +08:00
|
|
|
static void arm64_spoff(int reg, uint64_t off)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2015-03-03 04:51:03 +08:00
|
|
|
uint32_t sub = off >> 63;
|
|
|
|
if (sub)
|
|
|
|
off = -off;
|
|
|
|
if (off < 4096)
|
|
|
|
o(0x910003e0 | sub << 30 | reg | off << 10);
|
|
|
|
// (add|sub) x(reg),sp,#(off)
|
|
|
|
else {
|
|
|
|
arm64_movimm(30, off); // use x30 for offset
|
|
|
|
o(0x8b3e63e0 | sub << 30 | reg); // (add|sub) x(reg),sp,x30
|
|
|
|
}
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
2015-11-10 07:06:05 +08:00
|
|
|
static void arm64_ldrx(int sg, int sz_, int dst, int bas, uint64_t off)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t sz = sz_;
|
2015-02-14 02:58:31 +08:00
|
|
|
if (sz >= 2)
|
|
|
|
sg = 0;
|
2015-11-10 07:06:05 +08:00
|
|
|
if (!(off & ~((uint32_t)0xfff << sz)))
|
2015-02-14 02:58:31 +08:00
|
|
|
o(0x39400000 | dst | bas << 5 | off << (10 - sz) |
|
2015-11-10 07:06:05 +08:00
|
|
|
(uint32_t)!!sg << 23 | sz << 30); // ldr(*) x(dst),[x(bas),#(off)]
|
2015-02-14 02:58:31 +08:00
|
|
|
else if (off < 256 || -off <= 256)
|
|
|
|
o(0x38400000 | dst | bas << 5 | (off & 511) << 12 |
|
2015-11-10 07:06:05 +08:00
|
|
|
(uint32_t)!!sg << 23 | sz << 30); // ldur(*) x(dst),[x(bas),#(off)]
|
2015-02-14 02:58:31 +08:00
|
|
|
else {
|
|
|
|
arm64_movimm(30, off); // use x30 for offset
|
2015-11-10 07:06:05 +08:00
|
|
|
o(0x38206800 | dst | bas << 5 | (uint32_t)30 << 16 |
|
|
|
|
(uint32_t)(!!sg + 1) << 22 | sz << 30); // ldr(*) x(dst),[x(bas),x30]
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-11-10 07:06:05 +08:00
|
|
|
static void arm64_ldrv(int sz_, int dst, int bas, uint64_t off)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t sz = sz_;
|
|
|
|
if (!(off & ~((uint32_t)0xfff << sz)))
|
2015-02-14 02:58:31 +08:00
|
|
|
o(0x3d400000 | dst | bas << 5 | off << (10 - sz) |
|
2015-03-03 04:45:58 +08:00
|
|
|
(sz & 4) << 21 | (sz & 3) << 30); // ldr (s|d|q)(dst),[x(bas),#(off)]
|
2015-02-14 02:58:31 +08:00
|
|
|
else if (off < 256 || -off <= 256)
|
|
|
|
o(0x3c400000 | dst | bas << 5 | (off & 511) << 12 |
|
2015-03-03 04:45:58 +08:00
|
|
|
(sz & 4) << 21 | (sz & 3) << 30); // ldur (s|d|q)(dst),[x(bas),#(off)]
|
2015-02-14 02:58:31 +08:00
|
|
|
else {
|
|
|
|
arm64_movimm(30, off); // use x30 for offset
|
2015-11-10 07:06:05 +08:00
|
|
|
o(0x3c606800 | dst | bas << 5 | (uint32_t)30 << 16 |
|
|
|
|
sz << 30 | (sz & 4) << 21); // ldr (s|d|q)(dst),[x(bas),x30]
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-11-10 07:06:05 +08:00
|
|
|
static void arm64_ldrs(int reg_, int size)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t reg = reg_;
|
2015-02-14 02:58:31 +08:00
|
|
|
// Use x30 for intermediate value in some cases.
|
|
|
|
switch (size) {
|
|
|
|
default: assert(0); break;
|
|
|
|
case 1:
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(0, 0, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 2:
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(0, 1, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 3:
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(0, 1, 30, reg, 0);
|
|
|
|
arm64_ldrx(0, 0, reg, reg, 2);
|
2015-02-14 02:58:31 +08:00
|
|
|
o(0x2a0043c0 | reg | reg << 16); // orr x(reg),x30,x(reg),lsl #16
|
|
|
|
break;
|
|
|
|
case 4:
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(0, 2, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 5:
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(0, 2, 30, reg, 0);
|
|
|
|
arm64_ldrx(0, 0, reg, reg, 4);
|
2015-02-14 02:58:31 +08:00
|
|
|
o(0xaa0083c0 | reg | reg << 16); // orr x(reg),x30,x(reg),lsl #32
|
|
|
|
break;
|
|
|
|
case 6:
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(0, 2, 30, reg, 0);
|
|
|
|
arm64_ldrx(0, 1, reg, reg, 4);
|
2015-02-14 02:58:31 +08:00
|
|
|
o(0xaa0083c0 | reg | reg << 16); // orr x(reg),x30,x(reg),lsl #32
|
|
|
|
break;
|
|
|
|
case 7:
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(0, 2, 30, reg, 0);
|
|
|
|
arm64_ldrx(0, 2, reg, reg, 3);
|
2015-02-14 02:58:31 +08:00
|
|
|
o(0x53087c00 | reg | reg << 5); // lsr w(reg), w(reg), #8
|
|
|
|
o(0xaa0083c0 | reg | reg << 16); // orr x(reg),x30,x(reg),lsl #32
|
|
|
|
break;
|
|
|
|
case 8:
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(0, 3, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 9:
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(0, 0, reg + 1, reg, 8);
|
|
|
|
arm64_ldrx(0, 3, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 10:
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(0, 1, reg + 1, reg, 8);
|
|
|
|
arm64_ldrx(0, 3, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 11:
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(0, 2, reg + 1, reg, 7);
|
2015-02-14 02:58:31 +08:00
|
|
|
o(0x53087c00 | (reg+1) | (reg+1) << 5); // lsr w(reg+1), w(reg+1), #8
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(0, 3, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 12:
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(0, 2, reg + 1, reg, 8);
|
|
|
|
arm64_ldrx(0, 3, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 13:
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(0, 3, reg + 1, reg, 5);
|
2015-02-14 02:58:31 +08:00
|
|
|
o(0xd358fc00 | (reg+1) | (reg+1) << 5); // lsr x(reg+1), x(reg+1), #24
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(0, 3, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 14:
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(0, 3, reg + 1, reg, 6);
|
2015-02-14 02:58:31 +08:00
|
|
|
o(0xd350fc00 | (reg+1) | (reg+1) << 5); // lsr x(reg+1), x(reg+1), #16
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(0, 3, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 15:
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(0, 3, reg + 1, reg, 7);
|
2015-02-14 02:58:31 +08:00
|
|
|
o(0xd348fc00 | (reg+1) | (reg+1) << 5); // lsr x(reg+1), x(reg+1), #8
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(0, 3, reg, reg, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case 16:
|
|
|
|
o(0xa9400000 | reg | (reg+1) << 10 | reg << 5);
|
|
|
|
// ldp x(reg),x(reg+1),[x(reg)]
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-11-10 07:06:05 +08:00
|
|
|
static void arm64_strx(int sz_, int dst, int bas, uint64_t off)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t sz = sz_;
|
|
|
|
if (!(off & ~((uint32_t)0xfff << sz)))
|
2015-02-14 02:58:31 +08:00
|
|
|
o(0x39000000 | dst | bas << 5 | off << (10 - sz) | sz << 30);
|
2015-03-03 04:45:58 +08:00
|
|
|
// str(*) x(dst),[x(bas],#(off)]
|
2015-02-14 02:58:31 +08:00
|
|
|
else if (off < 256 || -off <= 256)
|
|
|
|
o(0x38000000 | dst | bas << 5 | (off & 511) << 12 | sz << 30);
|
2015-03-03 04:45:58 +08:00
|
|
|
// stur(*) x(dst),[x(bas],#(off)]
|
2015-02-14 02:58:31 +08:00
|
|
|
else {
|
|
|
|
arm64_movimm(30, off); // use x30 for offset
|
2015-11-10 07:06:05 +08:00
|
|
|
o(0x38206800 | dst | bas << 5 | (uint32_t)30 << 16 | sz << 30);
|
2015-03-03 04:45:58 +08:00
|
|
|
// str(*) x(dst),[x(bas),x30]
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-11-10 07:06:05 +08:00
|
|
|
static void arm64_strv(int sz_, int dst, int bas, uint64_t off)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t sz = sz_;
|
|
|
|
if (!(off & ~((uint32_t)0xfff << sz)))
|
2015-02-14 02:58:31 +08:00
|
|
|
o(0x3d000000 | dst | bas << 5 | off << (10 - sz) |
|
2015-03-03 04:45:58 +08:00
|
|
|
(sz & 4) << 21 | (sz & 3) << 30); // str (s|d|q)(dst),[x(bas),#(off)]
|
2015-02-14 02:58:31 +08:00
|
|
|
else if (off < 256 || -off <= 256)
|
|
|
|
o(0x3c000000 | dst | bas << 5 | (off & 511) << 12 |
|
2015-03-03 04:45:58 +08:00
|
|
|
(sz & 4) << 21 | (sz & 3) << 30); // stur (s|d|q)(dst),[x(bas),#(off)]
|
2015-02-14 02:58:31 +08:00
|
|
|
else {
|
|
|
|
arm64_movimm(30, off); // use x30 for offset
|
2015-11-10 07:06:05 +08:00
|
|
|
o(0x3c206800 | dst | bas << 5 | (uint32_t)30 << 16 |
|
|
|
|
sz << 30 | (sz & 4) << 21); // str (s|d|q)(dst),[x(bas),x30]
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-03 04:45:58 +08:00
|
|
|
static void arm64_sym(int r, Sym *sym, unsigned long addend)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2015-03-01 19:31:10 +08:00
|
|
|
// Currently TCC's linker does not generate COPY relocations for
|
|
|
|
// STT_OBJECTs when tcc is invoked with "-run". This typically
|
|
|
|
// results in "R_AARCH64_ADR_PREL_PG_HI21 relocation failed" when
|
|
|
|
// a program refers to stdin. A workaround is to avoid that
|
|
|
|
// relocation and use only relocations with unlimited range.
|
|
|
|
int avoid_adrp = 1;
|
|
|
|
|
2017-07-09 18:34:11 +08:00
|
|
|
if (avoid_adrp || sym->a.weak) {
|
2015-03-01 19:31:10 +08:00
|
|
|
// (GCC uses a R_AARCH64_ABS64 in this case.)
|
|
|
|
greloca(cur_text_section, sym, ind, R_AARCH64_MOVW_UABS_G0_NC, addend);
|
|
|
|
o(0xd2800000 | r); // mov x(rt),#0,lsl #0
|
|
|
|
greloca(cur_text_section, sym, ind, R_AARCH64_MOVW_UABS_G1_NC, addend);
|
|
|
|
o(0xf2a00000 | r); // movk x(rt),#0,lsl #16
|
|
|
|
greloca(cur_text_section, sym, ind, R_AARCH64_MOVW_UABS_G2_NC, addend);
|
|
|
|
o(0xf2c00000 | r); // movk x(rt),#0,lsl #32
|
|
|
|
greloca(cur_text_section, sym, ind, R_AARCH64_MOVW_UABS_G3, addend);
|
|
|
|
o(0xf2e00000 | r); // movk x(rt),#0,lsl #48
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
greloca(cur_text_section, sym, ind, R_AARCH64_ADR_PREL_PG_HI21, addend);
|
|
|
|
o(0x90000000 | r);
|
|
|
|
greloca(cur_text_section, sym, ind, R_AARCH64_ADD_ABS_LO12_NC, addend);
|
|
|
|
o(0x91000000 | r | r << 5);
|
|
|
|
}
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
static void arm64_load_cmp(int r, SValue *sv);
|
|
|
|
|
2015-02-14 02:58:31 +08:00
|
|
|
ST_FUNC void load(int r, SValue *sv)
|
|
|
|
{
|
|
|
|
int svtt = sv->type.t;
|
2019-12-17 01:48:31 +08:00
|
|
|
int svr = sv->r;
|
2015-02-14 02:58:31 +08:00
|
|
|
int svrv = svr & VT_VALMASK;
|
2015-11-18 03:09:35 +08:00
|
|
|
uint64_t svcul = (uint32_t)sv->c.i;
|
2015-11-10 07:06:05 +08:00
|
|
|
svcul = svcul >> 31 & 1 ? svcul - ((uint64_t)1 << 32) : svcul;
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
if (svr == (VT_LOCAL | VT_LVAL)) {
|
|
|
|
if (IS_FREG(r))
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrv(arm64_type_size(svtt), fltr(r), 29, svcul);
|
2015-02-14 02:58:31 +08:00
|
|
|
else
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(!(svtt & VT_UNSIGNED), arm64_type_size(svtt),
|
|
|
|
intr(r), 29, svcul);
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((svr & ~VT_VALMASK) == VT_LVAL && svrv < VT_CONST) {
|
|
|
|
if (IS_FREG(r))
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrv(arm64_type_size(svtt), fltr(r), intr(svrv), 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
else
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(!(svtt & VT_UNSIGNED), arm64_type_size(svtt),
|
|
|
|
intr(r), intr(svrv), 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (svr == (VT_CONST | VT_LVAL | VT_SYM)) {
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_sym(30, sv->sym, svcul); // use x30 for address
|
2015-02-14 02:58:31 +08:00
|
|
|
if (IS_FREG(r))
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrv(arm64_type_size(svtt), fltr(r), 30, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
else
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(!(svtt & VT_UNSIGNED), arm64_type_size(svtt),
|
|
|
|
intr(r), 30, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (svr == (VT_CONST | VT_SYM)) {
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_sym(intr(r), sv->sym, svcul);
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (svr == VT_CONST) {
|
|
|
|
if ((svtt & VT_BTYPE) != VT_VOID)
|
2015-03-08 01:29:54 +08:00
|
|
|
arm64_movimm(intr(r), arm64_type_size(svtt) == 3 ?
|
2015-11-18 03:09:35 +08:00
|
|
|
sv->c.i : (uint32_t)svcul);
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (svr < VT_CONST) {
|
|
|
|
if (IS_FREG(r) && IS_FREG(svr))
|
|
|
|
if (svtt == VT_LDOUBLE)
|
|
|
|
o(0x4ea01c00 | fltr(r) | fltr(svr) << 5);
|
|
|
|
// mov v(r).16b,v(svr).16b
|
|
|
|
else
|
|
|
|
o(0x1e604000 | fltr(r) | fltr(svr) << 5); // fmov d(r),d(svr)
|
|
|
|
else if (!IS_FREG(r) && !IS_FREG(svr))
|
|
|
|
o(0xaa0003e0 | intr(r) | intr(svr) << 16); // mov x(r),x(svr)
|
|
|
|
else
|
|
|
|
assert(0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (svr == VT_LOCAL) {
|
|
|
|
if (-svcul < 0x1000)
|
|
|
|
o(0xd10003a0 | intr(r) | -svcul << 10); // sub x(r),x29,#...
|
|
|
|
else {
|
|
|
|
arm64_movimm(30, -svcul); // use x30 for offset
|
2015-11-10 07:06:05 +08:00
|
|
|
o(0xcb0003a0 | intr(r) | (uint32_t)30 << 16); // sub x(r),x29,x30
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (svr == VT_JMP || svr == VT_JMPI) {
|
|
|
|
int t = (svr == VT_JMPI);
|
|
|
|
arm64_movimm(intr(r), t);
|
|
|
|
o(0x14000002); // b .+8
|
|
|
|
gsym(svcul);
|
|
|
|
arm64_movimm(intr(r), t ^ 1);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (svr == (VT_LLOCAL | VT_LVAL)) {
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(0, 3, 30, 29, svcul); // use x30 for offset
|
2015-02-14 02:58:31 +08:00
|
|
|
if (IS_FREG(r))
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrv(arm64_type_size(svtt), fltr(r), 30, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
else
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrx(!(svtt & VT_UNSIGNED), arm64_type_size(svtt),
|
|
|
|
intr(r), 30, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
if (svr == VT_CMP) {
|
|
|
|
arm64_load_cmp(r, sv);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2015-02-14 02:58:31 +08:00
|
|
|
printf("load(%x, (%x, %x, %llx))\n", r, svtt, sv->r, (long long)svcul);
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
ST_FUNC void store(int r, SValue *sv)
|
|
|
|
{
|
|
|
|
int svtt = sv->type.t;
|
2019-12-17 01:48:31 +08:00
|
|
|
int svr = sv->r;
|
2015-02-14 02:58:31 +08:00
|
|
|
int svrv = svr & VT_VALMASK;
|
2015-11-18 03:09:35 +08:00
|
|
|
uint64_t svcul = (uint32_t)sv->c.i;
|
2015-11-10 07:06:05 +08:00
|
|
|
svcul = svcul >> 31 & 1 ? svcul - ((uint64_t)1 << 32) : svcul;
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
if (svr == (VT_LOCAL | VT_LVAL)) {
|
|
|
|
if (IS_FREG(r))
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_strv(arm64_type_size(svtt), fltr(r), 29, svcul);
|
2015-02-14 02:58:31 +08:00
|
|
|
else
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_strx(arm64_type_size(svtt), intr(r), 29, svcul);
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((svr & ~VT_VALMASK) == VT_LVAL && svrv < VT_CONST) {
|
|
|
|
if (IS_FREG(r))
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_strv(arm64_type_size(svtt), fltr(r), intr(svrv), 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
else
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_strx(arm64_type_size(svtt), intr(r), intr(svrv), 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (svr == (VT_CONST | VT_LVAL | VT_SYM)) {
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_sym(30, sv->sym, svcul); // use x30 for address
|
2015-02-14 02:58:31 +08:00
|
|
|
if (IS_FREG(r))
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_strv(arm64_type_size(svtt), fltr(r), 30, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
else
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_strx(arm64_type_size(svtt), intr(r), 30, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
printf("store(%x, (%x, %x, %llx))\n", r, svtt, sv->r, (long long)svcul);
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void arm64_gen_bl_or_b(int b)
|
|
|
|
{
|
2018-07-02 07:54:45 +08:00
|
|
|
if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST && (vtop->r & VT_SYM)) {
|
|
|
|
assert(!b);
|
2017-05-13 14:59:06 +08:00
|
|
|
greloca(cur_text_section, vtop->sym, ind, R_AARCH64_CALL26, 0);
|
2016-10-14 23:32:10 +08:00
|
|
|
o(0x94000000); // bl .
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
else
|
2015-11-10 07:06:05 +08:00
|
|
|
o(0xd61f0000 | (uint32_t)!b << 21 | intr(gv(RC_R30)) << 5); // br/blr
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int arm64_hfa_aux(CType *type, int *fsize, int num)
|
|
|
|
{
|
|
|
|
if (is_float(type->t)) {
|
|
|
|
int a, n = type_size(type, &a);
|
|
|
|
if (num >= 4 || (*fsize && *fsize != n))
|
|
|
|
return -1;
|
|
|
|
*fsize = n;
|
|
|
|
return num + 1;
|
|
|
|
}
|
|
|
|
else if ((type->t & VT_BTYPE) == VT_STRUCT) {
|
|
|
|
int is_struct = 0; // rather than union
|
|
|
|
Sym *field;
|
|
|
|
for (field = type->ref->next; field; field = field->next)
|
|
|
|
if (field->c) {
|
|
|
|
is_struct = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (is_struct) {
|
|
|
|
int num0 = num;
|
|
|
|
for (field = type->ref->next; field; field = field->next) {
|
|
|
|
if (field->c != (num - num0) * *fsize)
|
|
|
|
return -1;
|
|
|
|
num = arm64_hfa_aux(&field->type, fsize, num);
|
|
|
|
if (num == -1)
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (type->ref->c != (num - num0) * *fsize)
|
|
|
|
return -1;
|
|
|
|
return num;
|
|
|
|
}
|
|
|
|
else { // union
|
|
|
|
int num0 = num;
|
|
|
|
for (field = type->ref->next; field; field = field->next) {
|
|
|
|
int num1 = arm64_hfa_aux(&field->type, fsize, num0);
|
|
|
|
if (num1 == -1)
|
|
|
|
return -1;
|
|
|
|
num = num1 < num ? num : num1;
|
|
|
|
}
|
|
|
|
if (type->ref->c != (num - num0) * *fsize)
|
|
|
|
return -1;
|
|
|
|
return num;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (type->t & VT_ARRAY) {
|
|
|
|
int num1;
|
|
|
|
if (!type->ref->c)
|
|
|
|
return num;
|
|
|
|
num1 = arm64_hfa_aux(&type->ref->type, fsize, num);
|
|
|
|
if (num1 == -1 || (num1 != num && type->ref->c > 4))
|
|
|
|
return -1;
|
|
|
|
num1 = num + type->ref->c * (num1 - num);
|
|
|
|
if (num1 > 4)
|
|
|
|
return -1;
|
|
|
|
return num1;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int arm64_hfa(CType *type, int *fsize)
|
|
|
|
{
|
|
|
|
if ((type->t & VT_BTYPE) == VT_STRUCT || (type->t & VT_ARRAY)) {
|
|
|
|
int sz = 0;
|
|
|
|
int n = arm64_hfa_aux(type, &sz, 0);
|
|
|
|
if (0 < n && n <= 4) {
|
|
|
|
if (fsize)
|
|
|
|
*fsize = sz;
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned long arm64_pcs_aux(int n, CType **type, unsigned long *a)
|
|
|
|
{
|
|
|
|
int nx = 0; // next integer register
|
|
|
|
int nv = 0; // next vector register
|
|
|
|
unsigned long ns = 32; // next stack offset
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
int hfa = arm64_hfa(type[i], 0);
|
|
|
|
int size, align;
|
|
|
|
|
|
|
|
if ((type[i]->t & VT_ARRAY) ||
|
|
|
|
(type[i]->t & VT_BTYPE) == VT_FUNC)
|
|
|
|
size = align = 8;
|
|
|
|
else
|
|
|
|
size = type_size(type[i], &align);
|
|
|
|
|
|
|
|
if (hfa)
|
|
|
|
// B.2
|
|
|
|
;
|
|
|
|
else if (size > 16) {
|
|
|
|
// B.3: replace with pointer
|
|
|
|
if (nx < 8)
|
|
|
|
a[i] = nx++ << 1 | 1;
|
|
|
|
else {
|
|
|
|
ns = (ns + 7) & ~7;
|
|
|
|
a[i] = ns | 1;
|
|
|
|
ns += 8;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else if ((type[i]->t & VT_BTYPE) == VT_STRUCT)
|
|
|
|
// B.4
|
|
|
|
size = (size + 7) & ~7;
|
|
|
|
|
|
|
|
// C.1
|
|
|
|
if (is_float(type[i]->t) && nv < 8) {
|
|
|
|
a[i] = 16 + (nv++ << 1);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// C.2
|
|
|
|
if (hfa && nv + hfa <= 8) {
|
|
|
|
a[i] = 16 + (nv << 1);
|
|
|
|
nv += hfa;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// C.3
|
|
|
|
if (hfa) {
|
|
|
|
nv = 8;
|
|
|
|
size = (size + 7) & ~7;
|
|
|
|
}
|
|
|
|
|
|
|
|
// C.4
|
|
|
|
if (hfa || (type[i]->t & VT_BTYPE) == VT_LDOUBLE) {
|
|
|
|
ns = (ns + 7) & ~7;
|
|
|
|
ns = (ns + align - 1) & -align;
|
|
|
|
}
|
|
|
|
|
|
|
|
// C.5
|
|
|
|
if ((type[i]->t & VT_BTYPE) == VT_FLOAT)
|
|
|
|
size = 8;
|
|
|
|
|
|
|
|
// C.6
|
|
|
|
if (hfa || is_float(type[i]->t)) {
|
|
|
|
a[i] = ns;
|
|
|
|
ns += size;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// C.7
|
|
|
|
if ((type[i]->t & VT_BTYPE) != VT_STRUCT && size <= 8 && nx < 8) {
|
|
|
|
a[i] = nx++ << 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// C.8
|
|
|
|
if (align == 16)
|
|
|
|
nx = (nx + 1) & ~1;
|
|
|
|
|
|
|
|
// C.9
|
|
|
|
if ((type[i]->t & VT_BTYPE) != VT_STRUCT && size == 16 && nx < 7) {
|
|
|
|
a[i] = nx << 1;
|
|
|
|
nx += 2;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// C.10
|
|
|
|
if ((type[i]->t & VT_BTYPE) == VT_STRUCT && size <= (8 - nx) * 8) {
|
|
|
|
a[i] = nx << 1;
|
|
|
|
nx += (size + 7) >> 3;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// C.11
|
|
|
|
nx = 8;
|
|
|
|
|
|
|
|
// C.12
|
|
|
|
ns = (ns + 7) & ~7;
|
|
|
|
ns = (ns + align - 1) & -align;
|
|
|
|
|
|
|
|
// C.13
|
|
|
|
if ((type[i]->t & VT_BTYPE) == VT_STRUCT) {
|
|
|
|
a[i] = ns;
|
|
|
|
ns += size;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// C.14
|
|
|
|
if (size < 8)
|
|
|
|
size = 8;
|
|
|
|
|
|
|
|
// C.15
|
|
|
|
a[i] = ns;
|
|
|
|
ns += size;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ns - 32;
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned long arm64_pcs(int n, CType **type, unsigned long *a)
|
|
|
|
{
|
|
|
|
unsigned long stack;
|
|
|
|
|
|
|
|
// Return type:
|
|
|
|
if ((type[0]->t & VT_BTYPE) == VT_VOID)
|
|
|
|
a[0] = -1;
|
|
|
|
else {
|
|
|
|
arm64_pcs_aux(1, type, a);
|
|
|
|
assert(a[0] == 0 || a[0] == 1 || a[0] == 16);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Argument types:
|
|
|
|
stack = arm64_pcs_aux(n, type + 1, a + 1);
|
|
|
|
|
|
|
|
if (0) {
|
|
|
|
int i;
|
|
|
|
for (i = 0; i <= n; i++) {
|
|
|
|
if (!i)
|
|
|
|
printf("arm64_pcs return: ");
|
|
|
|
else
|
|
|
|
printf("arm64_pcs arg %d: ", i);
|
|
|
|
if (a[i] == (unsigned long)-1)
|
|
|
|
printf("void\n");
|
|
|
|
else if (a[i] == 1 && !i)
|
|
|
|
printf("X8 pointer\n");
|
|
|
|
else if (a[i] < 16)
|
|
|
|
printf("X%lu%s\n", a[i] / 2, a[i] & 1 ? " pointer" : "");
|
|
|
|
else if (a[i] < 32)
|
|
|
|
printf("V%lu\n", a[i] / 2 - 8);
|
|
|
|
else
|
|
|
|
printf("stack %lu%s\n",
|
|
|
|
(a[i] - 32) & ~1, a[i] & 1 ? " pointer" : "");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return stack;
|
|
|
|
}
|
|
|
|
|
|
|
|
ST_FUNC void gfunc_call(int nb_args)
|
|
|
|
{
|
|
|
|
CType *return_type;
|
|
|
|
CType **t;
|
|
|
|
unsigned long *a, *a1;
|
|
|
|
unsigned long stack;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
return_type = &vtop[-nb_args].type.ref->type;
|
|
|
|
if ((return_type->t & VT_BTYPE) == VT_STRUCT)
|
|
|
|
--nb_args;
|
|
|
|
|
|
|
|
t = tcc_malloc((nb_args + 1) * sizeof(*t));
|
|
|
|
a = tcc_malloc((nb_args + 1) * sizeof(*a));
|
|
|
|
a1 = tcc_malloc((nb_args + 1) * sizeof(*a1));
|
|
|
|
|
|
|
|
t[0] = return_type;
|
|
|
|
for (i = 0; i < nb_args; i++)
|
|
|
|
t[nb_args - i] = &vtop[-i].type;
|
|
|
|
|
|
|
|
stack = arm64_pcs(nb_args, t, a);
|
|
|
|
|
|
|
|
// Allocate space for structs replaced by pointer:
|
|
|
|
for (i = nb_args; i; i--)
|
|
|
|
if (a[i] & 1) {
|
|
|
|
SValue *arg = &vtop[i - nb_args];
|
|
|
|
int align, size = type_size(&arg->type, &align);
|
|
|
|
assert((arg->type.t & VT_BTYPE) == VT_STRUCT);
|
|
|
|
stack = (stack + align - 1) & -align;
|
|
|
|
a1[i] = stack;
|
|
|
|
stack += size;
|
|
|
|
}
|
|
|
|
|
|
|
|
stack = (stack + 15) >> 4 << 4;
|
|
|
|
|
|
|
|
assert(stack < 0x1000);
|
|
|
|
if (stack)
|
|
|
|
o(0xd10003ff | stack << 10); // sub sp,sp,#(n)
|
|
|
|
|
|
|
|
// First pass: set all values on stack
|
|
|
|
for (i = nb_args; i; i--) {
|
|
|
|
vpushv(vtop - nb_args + i);
|
|
|
|
|
|
|
|
if (a[i] & 1) {
|
|
|
|
// struct replaced by pointer
|
|
|
|
int r = get_reg(RC_INT);
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_spoff(intr(r), a1[i]);
|
2015-02-14 02:58:31 +08:00
|
|
|
vset(&vtop->type, r | VT_LVAL, 0);
|
|
|
|
vswap();
|
|
|
|
vstore();
|
|
|
|
if (a[i] >= 32) {
|
|
|
|
// pointer on stack
|
|
|
|
r = get_reg(RC_INT);
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_spoff(intr(r), a1[i]);
|
|
|
|
arm64_strx(3, intr(r), 31, (a[i] - 32) >> 1 << 1);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (a[i] >= 32) {
|
|
|
|
// value on stack
|
|
|
|
if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
|
|
|
|
int r = get_reg(RC_INT);
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_spoff(intr(r), a[i] - 32);
|
2015-02-14 02:58:31 +08:00
|
|
|
vset(&vtop->type, r | VT_LVAL, 0);
|
|
|
|
vswap();
|
|
|
|
vstore();
|
|
|
|
}
|
|
|
|
else if (is_float(vtop->type.t)) {
|
|
|
|
gv(RC_FLOAT);
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_strv(arm64_type_size(vtop[0].type.t),
|
2015-02-14 02:58:31 +08:00
|
|
|
fltr(vtop[0].r), 31, a[i] - 32);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
gv(RC_INT);
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_strx(arm64_type_size(vtop[0].type.t),
|
|
|
|
intr(vtop[0].r), 31, a[i] - 32);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
--vtop;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Second pass: assign values to registers
|
|
|
|
for (i = nb_args; i; i--, vtop--) {
|
|
|
|
if (a[i] < 16 && !(a[i] & 1)) {
|
|
|
|
// value in general-purpose registers
|
|
|
|
if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
|
|
|
|
int align, size = type_size(&vtop->type, &align);
|
|
|
|
vtop->type.t = VT_PTR;
|
|
|
|
gaddrof();
|
|
|
|
gv(RC_R(a[i] / 2));
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrs(a[i] / 2, size);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
gv(RC_R(a[i] / 2));
|
|
|
|
}
|
|
|
|
else if (a[i] < 16)
|
|
|
|
// struct replaced by pointer in register
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_spoff(a[i] / 2, a1[i]);
|
2015-02-14 02:58:31 +08:00
|
|
|
else if (a[i] < 32) {
|
|
|
|
// value in floating-point registers
|
|
|
|
if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t j, sz, n = arm64_hfa(&vtop->type, &sz);
|
2015-02-14 02:58:31 +08:00
|
|
|
vtop->type.t = VT_PTR;
|
|
|
|
gaddrof();
|
|
|
|
gv(RC_R30);
|
|
|
|
for (j = 0; j < n; j++)
|
|
|
|
o(0x3d4003c0 |
|
|
|
|
(sz & 16) << 19 | -(sz & 8) << 27 | (sz & 4) << 29 |
|
|
|
|
(a[i] / 2 - 8 + j) |
|
|
|
|
j << 10); // ldr ([sdq])(*),[x30,#(j * sz)]
|
|
|
|
}
|
|
|
|
else
|
|
|
|
gv(RC_F(a[i] / 2 - 8));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((return_type->t & VT_BTYPE) == VT_STRUCT) {
|
|
|
|
if (a[0] == 1) {
|
|
|
|
// indirect return: set x8 and discard the stack value
|
|
|
|
gv(RC_R(8));
|
|
|
|
--vtop;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
// return in registers: keep the address for after the call
|
|
|
|
vswap();
|
|
|
|
}
|
|
|
|
|
2019-01-12 01:52:52 +08:00
|
|
|
save_regs(0);
|
2015-02-14 02:58:31 +08:00
|
|
|
arm64_gen_bl_or_b(0);
|
|
|
|
--vtop;
|
|
|
|
if (stack)
|
|
|
|
o(0x910003ff | stack << 10); // add sp,sp,#(n)
|
|
|
|
|
|
|
|
{
|
|
|
|
int rt = return_type->t;
|
|
|
|
int bt = rt & VT_BTYPE;
|
2019-09-23 23:45:39 +08:00
|
|
|
if (bt == VT_STRUCT && !(a[0] & 1)) {
|
2015-02-14 02:58:31 +08:00
|
|
|
// A struct was returned in registers, so write it out:
|
|
|
|
gv(RC_R(8));
|
|
|
|
--vtop;
|
|
|
|
if (a[0] == 0) {
|
|
|
|
int align, size = type_size(return_type, &align);
|
|
|
|
assert(size <= 16);
|
|
|
|
if (size > 8)
|
|
|
|
o(0xa9000500); // stp x0,x1,[x8]
|
|
|
|
else if (size)
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_strx(size > 4 ? 3 : size > 2 ? 2 : size > 1, 0, 8, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
}
|
|
|
|
else if (a[0] == 16) {
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t j, sz, n = arm64_hfa(return_type, &sz);
|
2015-02-14 02:58:31 +08:00
|
|
|
for (j = 0; j < n; j++)
|
|
|
|
o(0x3d000100 |
|
|
|
|
(sz & 16) << 19 | -(sz & 8) << 27 | (sz & 4) << 29 |
|
|
|
|
(a[i] / 2 - 8 + j) |
|
|
|
|
j << 10); // str ([sdq])(*),[x8,#(j * sz)]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
tcc_free(a1);
|
|
|
|
tcc_free(a);
|
|
|
|
tcc_free(t);
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned long arm64_func_va_list_stack;
|
|
|
|
static int arm64_func_va_list_gr_offs;
|
|
|
|
static int arm64_func_va_list_vr_offs;
|
|
|
|
static int arm64_func_sub_sp_offset;
|
|
|
|
|
2019-12-10 15:07:25 +08:00
|
|
|
ST_FUNC void gfunc_prolog(Sym *func_sym)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2019-12-10 15:07:25 +08:00
|
|
|
CType *func_type = &func_sym->type;
|
2015-02-14 02:58:31 +08:00
|
|
|
int n = 0;
|
|
|
|
int i = 0;
|
|
|
|
Sym *sym;
|
|
|
|
CType **t;
|
|
|
|
unsigned long *a;
|
|
|
|
|
|
|
|
// Why doesn't the caller (gen_function) set func_vt?
|
|
|
|
func_vt = func_type->ref->type;
|
|
|
|
func_vc = 144; // offset of where x8 is stored
|
|
|
|
|
|
|
|
for (sym = func_type->ref; sym; sym = sym->next)
|
|
|
|
++n;
|
|
|
|
t = tcc_malloc(n * sizeof(*t));
|
|
|
|
a = tcc_malloc(n * sizeof(*a));
|
|
|
|
|
|
|
|
for (sym = func_type->ref; sym; sym = sym->next)
|
|
|
|
t[i++] = &sym->type;
|
|
|
|
|
|
|
|
arm64_func_va_list_stack = arm64_pcs(n - 1, t, a);
|
|
|
|
|
|
|
|
o(0xa9b27bfd); // stp x29,x30,[sp,#-224]!
|
|
|
|
o(0xad0087e0); // stp q0,q1,[sp,#16]
|
|
|
|
o(0xad018fe2); // stp q2,q3,[sp,#48]
|
|
|
|
o(0xad0297e4); // stp q4,q5,[sp,#80]
|
|
|
|
o(0xad039fe6); // stp q6,q7,[sp,#112]
|
|
|
|
o(0xa90923e8); // stp x8,x8,[sp,#144]
|
|
|
|
o(0xa90a07e0); // stp x0,x1,[sp,#160]
|
|
|
|
o(0xa90b0fe2); // stp x2,x3,[sp,#176]
|
|
|
|
o(0xa90c17e4); // stp x4,x5,[sp,#192]
|
|
|
|
o(0xa90d1fe6); // stp x6,x7,[sp,#208]
|
|
|
|
|
|
|
|
arm64_func_va_list_gr_offs = -64;
|
|
|
|
arm64_func_va_list_vr_offs = -128;
|
|
|
|
|
|
|
|
for (i = 1, sym = func_type->ref->next; sym; i++, sym = sym->next) {
|
|
|
|
int off = (a[i] < 16 ? 160 + a[i] / 2 * 8 :
|
|
|
|
a[i] < 32 ? 16 + (a[i] - 16) / 2 * 16 :
|
|
|
|
224 + ((a[i] - 32) >> 1 << 1));
|
|
|
|
sym_push(sym->v & ~SYM_FIELD, &sym->type,
|
2019-12-17 01:48:31 +08:00
|
|
|
(a[i] & 1 ? VT_LLOCAL : VT_LOCAL) | VT_LVAL,
|
2015-02-14 02:58:31 +08:00
|
|
|
off);
|
|
|
|
|
|
|
|
if (a[i] < 16) {
|
|
|
|
int align, size = type_size(&sym->type, &align);
|
|
|
|
arm64_func_va_list_gr_offs = (a[i] / 2 - 7 +
|
|
|
|
(!(a[i] & 1) && size > 8)) * 8;
|
|
|
|
}
|
|
|
|
else if (a[i] < 32) {
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t hfa = arm64_hfa(&sym->type, 0);
|
2015-02-14 02:58:31 +08:00
|
|
|
arm64_func_va_list_vr_offs = (a[i] / 2 - 16 +
|
|
|
|
(hfa ? hfa : 1)) * 16;
|
|
|
|
}
|
|
|
|
|
|
|
|
// HFAs of float and double need to be written differently:
|
|
|
|
if (16 <= a[i] && a[i] < 32 && (sym->type.t & VT_BTYPE) == VT_STRUCT) {
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t j, sz, k = arm64_hfa(&sym->type, &sz);
|
2015-02-14 02:58:31 +08:00
|
|
|
if (sz < 16)
|
|
|
|
for (j = 0; j < k; j++) {
|
|
|
|
o(0x3d0003e0 | -(sz & 8) << 27 | (sz & 4) << 29 |
|
|
|
|
((a[i] - 16) / 2 + j) | (off / sz + j) << 10);
|
|
|
|
// str ([sdq])(*),[sp,#(j * sz)]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
tcc_free(a);
|
|
|
|
tcc_free(t);
|
|
|
|
|
|
|
|
o(0x910003fd); // mov x29,sp
|
|
|
|
arm64_func_sub_sp_offset = ind;
|
|
|
|
// In gfunc_epilog these will be replaced with code to decrement SP:
|
|
|
|
o(0xd503201f); // nop
|
|
|
|
o(0xd503201f); // nop
|
|
|
|
loc = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
ST_FUNC void gen_va_start(void)
|
|
|
|
{
|
|
|
|
int r;
|
|
|
|
--vtop; // we don't need the "arg"
|
|
|
|
gaddrof();
|
|
|
|
r = intr(gv(RC_INT));
|
|
|
|
|
|
|
|
if (arm64_func_va_list_stack) {
|
|
|
|
//xx could use add (immediate) here
|
|
|
|
arm64_movimm(30, arm64_func_va_list_stack + 224);
|
|
|
|
o(0x8b1e03be); // add x30,x29,x30
|
|
|
|
}
|
|
|
|
else
|
|
|
|
o(0x910383be); // add x30,x29,#224
|
|
|
|
o(0xf900001e | r << 5); // str x30,[x(r)]
|
|
|
|
|
|
|
|
if (arm64_func_va_list_gr_offs) {
|
|
|
|
if (arm64_func_va_list_stack)
|
|
|
|
o(0x910383be); // add x30,x29,#224
|
|
|
|
o(0xf900041e | r << 5); // str x30,[x(r),#8]
|
|
|
|
}
|
|
|
|
|
|
|
|
if (arm64_func_va_list_vr_offs) {
|
|
|
|
o(0x910243be); // add x30,x29,#144
|
|
|
|
o(0xf900081e | r << 5); // str x30,[x(r),#16]
|
|
|
|
}
|
|
|
|
|
|
|
|
arm64_movimm(30, arm64_func_va_list_gr_offs);
|
|
|
|
o(0xb900181e | r << 5); // str w30,[x(r),#24]
|
|
|
|
|
|
|
|
arm64_movimm(30, arm64_func_va_list_vr_offs);
|
|
|
|
o(0xb9001c1e | r << 5); // str w30,[x(r),#28]
|
|
|
|
|
|
|
|
--vtop;
|
|
|
|
}
|
|
|
|
|
|
|
|
ST_FUNC void gen_va_arg(CType *t)
|
|
|
|
{
|
|
|
|
int align, size = type_size(t, &align);
|
|
|
|
int fsize, hfa = arm64_hfa(t, &fsize);
|
2015-02-26 06:38:56 +08:00
|
|
|
uint32_t r0, r1;
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
if (is_float(t->t)) {
|
|
|
|
hfa = 1;
|
|
|
|
fsize = size;
|
|
|
|
}
|
|
|
|
|
|
|
|
gaddrof();
|
|
|
|
r0 = intr(gv(RC_INT));
|
|
|
|
r1 = get_reg(RC_INT);
|
2019-12-17 01:48:31 +08:00
|
|
|
vtop[0].r = r1 | VT_LVAL;
|
2015-02-14 02:58:31 +08:00
|
|
|
r1 = intr(r1);
|
|
|
|
|
|
|
|
if (!hfa) {
|
|
|
|
uint32_t n = size > 16 ? 8 : (size + 7) & -8;
|
|
|
|
o(0xb940181e | r0 << 5); // ldr w30,[x(r0),#24] // __gr_offs
|
2015-02-26 06:38:56 +08:00
|
|
|
if (align == 16) {
|
|
|
|
assert(0); // this path untested but needed for __uint128_t
|
|
|
|
o(0x11003fde); // add w30,w30,#15
|
|
|
|
o(0x121c6fde); // and w30,w30,#-16
|
|
|
|
}
|
2015-02-14 02:58:31 +08:00
|
|
|
o(0x310003c0 | r1 | n << 10); // adds w(r1),w30,#(n)
|
|
|
|
o(0x540000ad); // b.le .+20
|
|
|
|
o(0xf9400000 | r1 | r0 << 5); // ldr x(r1),[x(r0)] // __stack
|
|
|
|
o(0x9100001e | r1 << 5 | n << 10); // add x30,x(r1),#(n)
|
|
|
|
o(0xf900001e | r0 << 5); // str x30,[x(r0)] // __stack
|
|
|
|
o(0x14000004); // b .+16
|
|
|
|
o(0xb9001800 | r1 | r0 << 5); // str w(r1),[x(r0),#24] // __gr_offs
|
|
|
|
o(0xf9400400 | r1 | r0 << 5); // ldr x(r1),[x(r0),#8] // __gr_top
|
|
|
|
o(0x8b3ec000 | r1 | r1 << 5); // add x(r1),x(r1),w30,sxtw
|
|
|
|
if (size > 16)
|
|
|
|
o(0xf9400000 | r1 | r1 << 5); // ldr x(r1),[x(r1)]
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
uint32_t rsz = hfa << 4;
|
|
|
|
uint32_t ssz = (size + 7) & -(uint32_t)8;
|
2015-02-26 06:38:56 +08:00
|
|
|
uint32_t b1, b2;
|
2015-02-14 02:58:31 +08:00
|
|
|
o(0xb9401c1e | r0 << 5); // ldr w30,[x(r0),#28] // __vr_offs
|
|
|
|
o(0x310003c0 | r1 | rsz << 10); // adds w(r1),w30,#(rsz)
|
|
|
|
b1 = ind; o(0x5400000d); // b.le lab1
|
|
|
|
o(0xf9400000 | r1 | r0 << 5); // ldr x(r1),[x(r0)] // __stack
|
|
|
|
if (fsize == 16) {
|
|
|
|
o(0x91003c00 | r1 | r1 << 5); // add x(r1),x(r1),#15
|
|
|
|
o(0x927cec00 | r1 | r1 << 5); // and x(r1),x(r1),#-16
|
|
|
|
}
|
|
|
|
o(0x9100001e | r1 << 5 | ssz << 10); // add x30,x(r1),#(ssz)
|
|
|
|
o(0xf900001e | r0 << 5); // str x30,[x(r0)] // __stack
|
2015-02-26 06:38:56 +08:00
|
|
|
b2 = ind; o(0x14000000); // b lab2
|
2015-02-14 02:58:31 +08:00
|
|
|
// lab1:
|
2015-11-20 02:21:14 +08:00
|
|
|
write32le(cur_text_section->data + b1, 0x5400000d | (ind - b1) << 3);
|
2015-02-14 02:58:31 +08:00
|
|
|
o(0xb9001c00 | r1 | r0 << 5); // str w(r1),[x(r0),#28] // __vr_offs
|
|
|
|
o(0xf9400800 | r1 | r0 << 5); // ldr x(r1),[x(r0),#16] // __vr_top
|
2015-02-26 06:38:56 +08:00
|
|
|
if (hfa == 1 || fsize == 16)
|
|
|
|
o(0x8b3ec000 | r1 | r1 << 5); // add x(r1),x(r1),w30,sxtw
|
|
|
|
else {
|
|
|
|
// We need to change the layout of this HFA.
|
|
|
|
// Get some space on the stack using global variable "loc":
|
|
|
|
loc = (loc - size) & -(uint32_t)align;
|
|
|
|
o(0x8b3ec000 | 30 | r1 << 5); // add x30,x(r1),w30,sxtw
|
|
|
|
arm64_movimm(r1, loc);
|
|
|
|
o(0x8b0003a0 | r1 | r1 << 16); // add x(r1),x29,x(r1)
|
|
|
|
o(0x4c402bdc | (uint32_t)fsize << 7 |
|
|
|
|
(uint32_t)(hfa == 2) << 15 |
|
|
|
|
(uint32_t)(hfa == 3) << 14); // ld1 {v28.(4s|2d),...},[x30]
|
|
|
|
o(0x0d00801c | r1 << 5 | (fsize == 8) << 10 |
|
|
|
|
(uint32_t)(hfa != 2) << 13 |
|
|
|
|
(uint32_t)(hfa != 3) << 21); // st(hfa) {v28.(s|d),...}[0],[x(r1)]
|
|
|
|
}
|
|
|
|
// lab2:
|
2015-11-20 02:21:14 +08:00
|
|
|
write32le(cur_text_section->data + b2, 0x14000000 | (ind - b2) >> 2);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-11-10 07:06:05 +08:00
|
|
|
ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret,
|
|
|
|
int *align, int *regsize)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-02-09 02:45:31 +08:00
|
|
|
ST_FUNC void gfunc_return(CType *func_type)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2017-02-09 02:45:31 +08:00
|
|
|
CType *t = func_type;
|
2015-02-14 02:58:31 +08:00
|
|
|
unsigned long a;
|
|
|
|
|
|
|
|
arm64_pcs(0, &t, &a);
|
|
|
|
switch (a) {
|
|
|
|
case -1:
|
|
|
|
break;
|
|
|
|
case 0:
|
2017-02-09 02:45:31 +08:00
|
|
|
if ((func_type->t & VT_BTYPE) == VT_STRUCT) {
|
|
|
|
int align, size = type_size(func_type, &align);
|
2015-02-14 02:58:31 +08:00
|
|
|
gaddrof();
|
|
|
|
gv(RC_R(0));
|
2015-03-03 04:45:58 +08:00
|
|
|
arm64_ldrs(0, size);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
gv(RC_IRET);
|
|
|
|
break;
|
|
|
|
case 1: {
|
2017-02-09 02:45:31 +08:00
|
|
|
CType type = *func_type;
|
2015-02-14 02:58:31 +08:00
|
|
|
mk_pointer(&type);
|
|
|
|
vset(&type, VT_LOCAL | VT_LVAL, func_vc);
|
|
|
|
indir();
|
|
|
|
vswap();
|
|
|
|
vstore();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case 16:
|
2017-02-09 02:45:31 +08:00
|
|
|
if ((func_type->t & VT_BTYPE) == VT_STRUCT) {
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t j, sz, n = arm64_hfa(&vtop->type, &sz);
|
2015-02-14 02:58:31 +08:00
|
|
|
gaddrof();
|
|
|
|
gv(RC_R(0));
|
|
|
|
for (j = 0; j < n; j++)
|
|
|
|
o(0x3d400000 |
|
|
|
|
(sz & 16) << 19 | -(sz & 8) << 27 | (sz & 4) << 29 |
|
|
|
|
j | j << 10); // ldr ([sdq])(*),[x0,#(j * sz)]
|
|
|
|
}
|
|
|
|
else
|
|
|
|
gv(RC_FRET);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
}
|
2017-02-09 02:45:31 +08:00
|
|
|
vtop--;
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ST_FUNC void gfunc_epilog(void)
|
|
|
|
{
|
|
|
|
if (loc) {
|
|
|
|
// Insert instructions to subtract size of stack frame from SP.
|
2015-11-20 02:21:14 +08:00
|
|
|
unsigned char *ptr = cur_text_section->data + arm64_func_sub_sp_offset;
|
2015-02-14 02:58:31 +08:00
|
|
|
uint64_t diff = (-loc + 15) & ~15;
|
|
|
|
if (!(diff >> 24)) {
|
|
|
|
if (diff & 0xfff) // sub sp,sp,#(diff & 0xfff)
|
2015-11-20 02:21:14 +08:00
|
|
|
write32le(ptr, 0xd10003ff | (diff & 0xfff) << 10);
|
2015-02-14 02:58:31 +08:00
|
|
|
if (diff >> 12) // sub sp,sp,#(diff >> 12),lsl #12
|
2015-11-20 02:21:14 +08:00
|
|
|
write32le(ptr + 4, 0xd14003ff | (diff >> 12) << 10);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
// In this case we may subtract more than necessary,
|
|
|
|
// but always less than 17/16 of what we were aiming for.
|
|
|
|
int i = 0;
|
|
|
|
int j = 0;
|
|
|
|
while (diff >> 20) {
|
|
|
|
diff = (diff + 0xffff) >> 16;
|
|
|
|
++i;
|
|
|
|
}
|
|
|
|
while (diff >> 16) {
|
|
|
|
diff = (diff + 1) >> 1;
|
|
|
|
++j;
|
|
|
|
}
|
2015-11-20 02:21:14 +08:00
|
|
|
write32le(ptr, 0xd2800010 | diff << 5 | i << 21);
|
2015-02-14 02:58:31 +08:00
|
|
|
// mov x16,#(diff),lsl #(16 * i)
|
2015-11-20 02:21:14 +08:00
|
|
|
write32le(ptr + 4, 0xcb3063ff | j << 10);
|
2015-02-14 02:58:31 +08:00
|
|
|
// sub sp,sp,x16,lsl #(j)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
o(0x910003bf); // mov sp,x29
|
|
|
|
o(0xa8ce7bfd); // ldp x29,x30,[sp],#224
|
|
|
|
|
|
|
|
o(0xd65f03c0); // ret
|
|
|
|
}
|
|
|
|
|
2018-04-07 05:01:45 +08:00
|
|
|
ST_FUNC void gen_fill_nops(int bytes)
|
|
|
|
{
|
|
|
|
if ((bytes & 3))
|
|
|
|
tcc_error("alignment of code section not multiple of 4");
|
|
|
|
while (bytes > 0) {
|
|
|
|
o(0xd503201f); // nop
|
|
|
|
bytes -= 4;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-14 02:58:31 +08:00
|
|
|
// Generate forward branch to label:
|
|
|
|
ST_FUNC int gjmp(int t)
|
|
|
|
{
|
|
|
|
int r = ind;
|
2016-12-19 00:23:33 +08:00
|
|
|
if (nocode_wanted)
|
|
|
|
return t;
|
2015-02-14 02:58:31 +08:00
|
|
|
o(t);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Generate branch to known address:
|
|
|
|
ST_FUNC void gjmp_addr(int a)
|
|
|
|
{
|
|
|
|
assert(a - ind + 0x8000000 < 0x10000000);
|
|
|
|
o(0x14000000 | ((a - ind) >> 2 & 0x3ffffff));
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ST_FUNC int gjmp_append(int n, int t)
|
|
|
|
{
|
|
|
|
void *p;
|
|
|
|
/* insert vtop->c jump list in t */
|
|
|
|
if (n) {
|
|
|
|
uint32_t n1 = n, n2;
|
|
|
|
while ((n2 = read32le(p = cur_text_section->data + n1)))
|
|
|
|
n1 = n2;
|
|
|
|
write32le(p, t);
|
|
|
|
t = n;
|
|
|
|
}
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
|
|
|
void arm64_vset_VT_CMP(int op)
|
|
|
|
{
|
|
|
|
if (op >= TOK_ULT && op <= TOK_GT) {
|
|
|
|
vtop->cmp_r = vtop->r;
|
|
|
|
vset_VT_CMP(0x80);
|
|
|
|
}
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
static void arm64_gen_opil(int op, uint32_t l);
|
|
|
|
|
|
|
|
static void arm64_load_cmp(int r, SValue *sv)
|
|
|
|
{
|
|
|
|
sv->r = sv->cmp_r;
|
|
|
|
if (sv->c.i & 1) {
|
|
|
|
vpushi(1);
|
|
|
|
arm64_gen_opil('^', 0);
|
|
|
|
}
|
|
|
|
if (r != sv->r) {
|
|
|
|
load(r, sv);
|
|
|
|
sv->r = r;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ST_FUNC int gjmp_cond(int op, int t)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
|
|
|
int bt = vtop->type.t & VT_BTYPE;
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
|
|
|
|
int inv = op & 1;
|
|
|
|
vtop->r = vtop->cmp_r;
|
|
|
|
|
2015-02-14 02:58:31 +08:00
|
|
|
if (bt == VT_LDOUBLE) {
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t a, b, f = fltr(gv(RC_FLOAT));
|
2015-02-14 02:58:31 +08:00
|
|
|
a = get_reg(RC_INT);
|
|
|
|
vpushi(0);
|
|
|
|
vtop[0].r = a;
|
|
|
|
b = get_reg(RC_INT);
|
|
|
|
a = intr(a);
|
|
|
|
b = intr(b);
|
|
|
|
o(0x4e083c00 | a | f << 5); // mov x(a),v(f).d[0]
|
|
|
|
o(0x4e183c00 | b | f << 5); // mov x(b),v(f).d[1]
|
|
|
|
o(0xaa000400 | a | a << 5 | b << 16); // orr x(a),x(a),x(b),lsl #1
|
|
|
|
o(0xb4000040 | a | !!inv << 24); // cbz/cbnz x(a),.+8
|
|
|
|
--vtop;
|
|
|
|
}
|
|
|
|
else if (bt == VT_FLOAT || bt == VT_DOUBLE) {
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t a = fltr(gv(RC_FLOAT));
|
2015-02-14 02:58:31 +08:00
|
|
|
o(0x1e202008 | a << 5 | (bt != VT_FLOAT) << 22); // fcmp
|
|
|
|
o(0x54000040 | !!inv); // b.eq/b.ne .+8
|
|
|
|
}
|
|
|
|
else {
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t ll = (bt == VT_PTR || bt == VT_LLONG);
|
|
|
|
uint32_t a = intr(gv(RC_INT));
|
2015-02-14 02:58:31 +08:00
|
|
|
o(0x34000040 | a | !!inv << 24 | ll << 31); // cbz/cbnz wA,.+8
|
|
|
|
}
|
|
|
|
return gjmp(t);
|
|
|
|
}
|
|
|
|
|
2015-03-08 01:03:51 +08:00
|
|
|
static int arm64_iconst(uint64_t *val, SValue *sv)
|
2015-02-14 02:58:31 +08:00
|
|
|
{
|
2015-03-08 01:03:51 +08:00
|
|
|
if ((sv->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
|
|
|
|
return 0;
|
|
|
|
if (val) {
|
2015-11-18 03:09:35 +08:00
|
|
|
int t = sv->type.t;
|
2016-12-13 19:48:18 +08:00
|
|
|
int bt = t & VT_BTYPE;
|
|
|
|
*val = ((bt == VT_LLONG || bt == VT_PTR) ? sv->c.i :
|
2015-11-18 03:09:35 +08:00
|
|
|
(uint32_t)sv->c.i |
|
|
|
|
(t & VT_UNSIGNED ? 0 : -(sv->c.i & 0x80000000)));
|
2015-03-08 01:03:51 +08:00
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int arm64_gen_opic(int op, uint32_t l, int rev, uint64_t val,
|
|
|
|
uint32_t x, uint32_t a)
|
|
|
|
{
|
|
|
|
if (op == '-' && !rev) {
|
|
|
|
val = -val;
|
|
|
|
op = '+';
|
|
|
|
}
|
|
|
|
val = l ? val : (uint32_t)val;
|
|
|
|
|
|
|
|
switch (op) {
|
|
|
|
|
|
|
|
case '+': {
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t s = l ? val >> 63 : val >> 31;
|
2015-03-08 01:03:51 +08:00
|
|
|
val = s ? -val : val;
|
|
|
|
val = l ? val : (uint32_t)val;
|
|
|
|
if (!(val & ~(uint64_t)0xfff))
|
|
|
|
o(0x11000000 | l << 31 | s << 30 | x | a << 5 | val << 10);
|
|
|
|
else if (!(val & ~(uint64_t)0xfff000))
|
|
|
|
o(0x11400000 | l << 31 | s << 30 | x | a << 5 | val >> 12 << 10);
|
|
|
|
else {
|
|
|
|
arm64_movimm(30, val); // use x30
|
|
|
|
o(0x0b1e0000 | l << 31 | s << 30 | x | a << 5);
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
case '-':
|
|
|
|
if (!val)
|
|
|
|
o(0x4b0003e0 | l << 31 | x | a << 16); // neg
|
|
|
|
else if (val == (l ? (uint64_t)-1 : (uint32_t)-1))
|
|
|
|
o(0x2a2003e0 | l << 31 | x | a << 16); // mvn
|
|
|
|
else {
|
|
|
|
arm64_movimm(30, val); // use x30
|
|
|
|
o(0x4b0003c0 | l << 31 | x | a << 16); // sub
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
case '^':
|
|
|
|
if (val == -1 || (val == 0xffffffff && !l)) {
|
|
|
|
o(0x2a2003e0 | l << 31 | x | a << 16); // mvn
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
// fall through
|
|
|
|
case '&':
|
|
|
|
case '|': {
|
|
|
|
int e = arm64_encode_bimm64(l ? val : val | val << 32);
|
|
|
|
if (e < 0)
|
|
|
|
return 0;
|
|
|
|
o((op == '&' ? 0x12000000 :
|
|
|
|
op == '|' ? 0x32000000 : 0x52000000) |
|
|
|
|
l << 31 | x | a << 5 | (uint32_t)e << 10);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
case TOK_SAR:
|
|
|
|
case TOK_SHL:
|
|
|
|
case TOK_SHR: {
|
|
|
|
uint32_t n = 32 << l;
|
|
|
|
val = val & (n - 1);
|
|
|
|
if (rev)
|
|
|
|
return 0;
|
|
|
|
if (!val)
|
|
|
|
assert(0);
|
|
|
|
else if (op == TOK_SHL)
|
|
|
|
o(0x53000000 | l << 31 | l << 22 | x | a << 5 |
|
|
|
|
(n - val) << 16 | (n - 1 - val) << 10); // lsl
|
|
|
|
else
|
|
|
|
o(0x13000000 | (op == TOK_SHR) << 30 | l << 31 | l << 22 |
|
|
|
|
x | a << 5 | val << 16 | (n - 1) << 10); // lsr/asr
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void arm64_gen_opil(int op, uint32_t l)
|
|
|
|
{
|
|
|
|
uint32_t x, a, b;
|
|
|
|
|
|
|
|
// Special treatment for operations with a constant operand:
|
|
|
|
{
|
|
|
|
uint64_t val;
|
|
|
|
int rev = 1;
|
|
|
|
|
|
|
|
if (arm64_iconst(0, &vtop[0])) {
|
|
|
|
vswap();
|
|
|
|
rev = 0;
|
|
|
|
}
|
|
|
|
if (arm64_iconst(&val, &vtop[-1])) {
|
|
|
|
gv(RC_INT);
|
|
|
|
a = intr(vtop[0].r);
|
|
|
|
--vtop;
|
|
|
|
x = get_reg(RC_INT);
|
|
|
|
++vtop;
|
|
|
|
if (arm64_gen_opic(op, l, rev, val, intr(x), a)) {
|
|
|
|
vtop[0].r = x;
|
|
|
|
vswap();
|
|
|
|
--vtop;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!rev)
|
|
|
|
vswap();
|
|
|
|
}
|
|
|
|
|
2015-02-14 02:58:31 +08:00
|
|
|
gv2(RC_INT, RC_INT);
|
|
|
|
assert(vtop[-1].r < VT_CONST && vtop[0].r < VT_CONST);
|
|
|
|
a = intr(vtop[-1].r);
|
|
|
|
b = intr(vtop[0].r);
|
|
|
|
vtop -= 2;
|
|
|
|
x = get_reg(RC_INT);
|
|
|
|
++vtop;
|
|
|
|
vtop[0].r = x;
|
|
|
|
x = intr(x);
|
|
|
|
|
|
|
|
switch (op) {
|
|
|
|
case '%':
|
|
|
|
// Use x30 for quotient:
|
|
|
|
o(0x1ac00c00 | l << 31 | 30 | a << 5 | b << 16); // sdiv
|
2015-11-10 07:06:05 +08:00
|
|
|
o(0x1b008000 | l << 31 | x | (uint32_t)30 << 5 |
|
|
|
|
b << 16 | a << 10); // msub
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
case '&':
|
|
|
|
o(0x0a000000 | l << 31 | x | a << 5 | b << 16); // and
|
|
|
|
break;
|
|
|
|
case '*':
|
|
|
|
o(0x1b007c00 | l << 31 | x | a << 5 | b << 16); // mul
|
|
|
|
break;
|
|
|
|
case '+':
|
|
|
|
o(0x0b000000 | l << 31 | x | a << 5 | b << 16); // add
|
|
|
|
break;
|
|
|
|
case '-':
|
|
|
|
o(0x4b000000 | l << 31 | x | a << 5 | b << 16); // sub
|
|
|
|
break;
|
|
|
|
case '/':
|
|
|
|
o(0x1ac00c00 | l << 31 | x | a << 5 | b << 16); // sdiv
|
|
|
|
break;
|
|
|
|
case '^':
|
|
|
|
o(0x4a000000 | l << 31 | x | a << 5 | b << 16); // eor
|
|
|
|
break;
|
|
|
|
case '|':
|
|
|
|
o(0x2a000000 | l << 31 | x | a << 5 | b << 16); // orr
|
|
|
|
break;
|
|
|
|
case TOK_EQ:
|
|
|
|
o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp
|
|
|
|
o(0x1a9f17e0 | x); // cset wA,eq
|
|
|
|
break;
|
|
|
|
case TOK_GE:
|
|
|
|
o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp
|
|
|
|
o(0x1a9fb7e0 | x); // cset wA,ge
|
|
|
|
break;
|
|
|
|
case TOK_GT:
|
|
|
|
o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp
|
|
|
|
o(0x1a9fd7e0 | x); // cset wA,gt
|
|
|
|
break;
|
|
|
|
case TOK_LE:
|
|
|
|
o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp
|
|
|
|
o(0x1a9fc7e0 | x); // cset wA,le
|
|
|
|
break;
|
|
|
|
case TOK_LT:
|
|
|
|
o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp
|
|
|
|
o(0x1a9fa7e0 | x); // cset wA,lt
|
|
|
|
break;
|
|
|
|
case TOK_NE:
|
|
|
|
o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp
|
|
|
|
o(0x1a9f07e0 | x); // cset wA,ne
|
|
|
|
break;
|
|
|
|
case TOK_SAR:
|
|
|
|
o(0x1ac02800 | l << 31 | x | a << 5 | b << 16); // asr
|
|
|
|
break;
|
|
|
|
case TOK_SHL:
|
|
|
|
o(0x1ac02000 | l << 31 | x | a << 5 | b << 16); // lsl
|
|
|
|
break;
|
|
|
|
case TOK_SHR:
|
|
|
|
o(0x1ac02400 | l << 31 | x | a << 5 | b << 16); // lsr
|
|
|
|
break;
|
|
|
|
case TOK_UDIV:
|
|
|
|
case TOK_PDIV:
|
|
|
|
o(0x1ac00800 | l << 31 | x | a << 5 | b << 16); // udiv
|
|
|
|
break;
|
|
|
|
case TOK_UGE:
|
|
|
|
o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp
|
|
|
|
o(0x1a9f37e0 | x); // cset wA,cs
|
|
|
|
break;
|
|
|
|
case TOK_UGT:
|
|
|
|
o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp
|
|
|
|
o(0x1a9f97e0 | x); // cset wA,hi
|
|
|
|
break;
|
|
|
|
case TOK_ULT:
|
|
|
|
o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp
|
|
|
|
o(0x1a9f27e0 | x); // cset wA,cc
|
|
|
|
break;
|
|
|
|
case TOK_ULE:
|
|
|
|
o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp
|
|
|
|
o(0x1a9f87e0 | x); // cset wA,ls
|
|
|
|
break;
|
|
|
|
case TOK_UMOD:
|
|
|
|
// Use x30 for quotient:
|
|
|
|
o(0x1ac00800 | l << 31 | 30 | a << 5 | b << 16); // udiv
|
2015-11-10 07:06:05 +08:00
|
|
|
o(0x1b008000 | l << 31 | x | (uint32_t)30 << 5 |
|
|
|
|
b << 16 | a << 10); // msub
|
2015-02-14 02:58:31 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ST_FUNC void gen_opi(int op)
|
|
|
|
{
|
|
|
|
arm64_gen_opil(op, 0);
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
arm64_vset_VT_CMP(op);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ST_FUNC void gen_opl(int op)
|
|
|
|
{
|
|
|
|
arm64_gen_opil(op, 1);
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
arm64_vset_VT_CMP(op);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ST_FUNC void gen_opf(int op)
|
|
|
|
{
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t x, a, b, dbl;
|
2015-02-14 02:58:31 +08:00
|
|
|
|
|
|
|
if (vtop[0].type.t == VT_LDOUBLE) {
|
|
|
|
CType type = vtop[0].type;
|
|
|
|
int func = 0;
|
|
|
|
int cond = -1;
|
|
|
|
switch (op) {
|
|
|
|
case '*': func = TOK___multf3; break;
|
|
|
|
case '+': func = TOK___addtf3; break;
|
|
|
|
case '-': func = TOK___subtf3; break;
|
|
|
|
case '/': func = TOK___divtf3; break;
|
|
|
|
case TOK_EQ: func = TOK___eqtf2; cond = 1; break;
|
|
|
|
case TOK_NE: func = TOK___netf2; cond = 0; break;
|
|
|
|
case TOK_LT: func = TOK___lttf2; cond = 10; break;
|
|
|
|
case TOK_GE: func = TOK___getf2; cond = 11; break;
|
|
|
|
case TOK_LE: func = TOK___letf2; cond = 12; break;
|
|
|
|
case TOK_GT: func = TOK___gttf2; cond = 13; break;
|
|
|
|
default: assert(0); break;
|
|
|
|
}
|
|
|
|
vpush_global_sym(&func_old_type, func);
|
|
|
|
vrott(3);
|
|
|
|
gfunc_call(2);
|
|
|
|
vpushi(0);
|
|
|
|
vtop->r = cond < 0 ? REG_FRET : REG_IRET;
|
|
|
|
if (cond < 0)
|
|
|
|
vtop->type = type;
|
|
|
|
else {
|
|
|
|
o(0x7100001f); // cmp w0,#0
|
2015-11-10 07:06:05 +08:00
|
|
|
o(0x1a9f07e0 | (uint32_t)cond << 12); // cset w0,(cond)
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
dbl = vtop[0].type.t != VT_FLOAT;
|
|
|
|
gv2(RC_FLOAT, RC_FLOAT);
|
|
|
|
assert(vtop[-1].r < VT_CONST && vtop[0].r < VT_CONST);
|
|
|
|
a = fltr(vtop[-1].r);
|
|
|
|
b = fltr(vtop[0].r);
|
|
|
|
vtop -= 2;
|
|
|
|
switch (op) {
|
|
|
|
case TOK_EQ: case TOK_NE:
|
|
|
|
case TOK_LT: case TOK_GE: case TOK_LE: case TOK_GT:
|
|
|
|
x = get_reg(RC_INT);
|
|
|
|
++vtop;
|
|
|
|
vtop[0].r = x;
|
|
|
|
x = intr(x);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
x = get_reg(RC_FLOAT);
|
|
|
|
++vtop;
|
|
|
|
vtop[0].r = x;
|
|
|
|
x = fltr(x);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (op) {
|
|
|
|
case '*':
|
|
|
|
o(0x1e200800 | dbl << 22 | x | a << 5 | b << 16); // fmul
|
|
|
|
break;
|
|
|
|
case '+':
|
|
|
|
o(0x1e202800 | dbl << 22 | x | a << 5 | b << 16); // fadd
|
|
|
|
break;
|
|
|
|
case '-':
|
|
|
|
o(0x1e203800 | dbl << 22 | x | a << 5 | b << 16); // fsub
|
|
|
|
break;
|
|
|
|
case '/':
|
|
|
|
o(0x1e201800 | dbl << 22 | x | a << 5 | b << 16); // fdiv
|
|
|
|
break;
|
|
|
|
case TOK_EQ:
|
|
|
|
o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp
|
|
|
|
o(0x1a9f17e0 | x); // cset w(x),eq
|
|
|
|
break;
|
|
|
|
case TOK_GE:
|
|
|
|
o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp
|
|
|
|
o(0x1a9fb7e0 | x); // cset w(x),ge
|
|
|
|
break;
|
|
|
|
case TOK_GT:
|
|
|
|
o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp
|
|
|
|
o(0x1a9fd7e0 | x); // cset w(x),gt
|
|
|
|
break;
|
|
|
|
case TOK_LE:
|
|
|
|
o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp
|
|
|
|
o(0x1a9f87e0 | x); // cset w(x),ls
|
|
|
|
break;
|
|
|
|
case TOK_LT:
|
|
|
|
o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp
|
|
|
|
o(0x1a9f57e0 | x); // cset w(x),mi
|
|
|
|
break;
|
|
|
|
case TOK_NE:
|
|
|
|
o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp
|
|
|
|
o(0x1a9f07e0 | x); // cset w(x),ne
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
}
|
jump optimizations
This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.
Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).
example:
if (a && b || c && d)
e = 0;
before this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 11 00 00 00 je 27 <main+0x27>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 84 05 00 00 00 je 27 <main+0x27>
22: e9 22 00 00 00 jmp 49 <main+0x49>
27: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
2a: 83 f8 00 cmp $0x0,%eax
2d: 0f 84 11 00 00 00 je 44 <main+0x44>
33: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
36: 83 f8 00 cmp $0x0,%eax
39: 0f 84 05 00 00 00 je 44 <main+0x44>
3f: e9 05 00 00 00 jmp 49 <main+0x49>
44: e9 08 00 00 00 jmp 51 <main+0x51>
49: b8 00 00 00 00 mov $0x0,%eax
4e: 89 45 ec mov %eax,0xffffffec(%ebp)
51: ...
with this patch:
a: 8b 45 fc mov 0xfffffffc(%ebp),%eax
d: 83 f8 00 cmp $0x0,%eax
10: 0f 84 0c 00 00 00 je 22 <main+0x22>
16: 8b 45 f8 mov 0xfffffff8(%ebp),%eax
19: 83 f8 00 cmp $0x0,%eax
1c: 0f 85 18 00 00 00 jne 3a <main+0x3a>
22: 8b 45 f4 mov 0xfffffff4(%ebp),%eax
25: 83 f8 00 cmp $0x0,%eax
28: 0f 84 14 00 00 00 je 42 <main+0x42>
2e: 8b 45 f0 mov 0xfffffff0(%ebp),%eax
31: 83 f8 00 cmp $0x0,%eax
34: 0f 84 08 00 00 00 je 42 <main+0x42>
3a: b8 00 00 00 00 mov $0x0,%eax
3f: 89 45 ec mov %eax,0xffffffec(%ebp)
42: ...
2019-06-22 17:45:35 +08:00
|
|
|
arm64_vset_VT_CMP(op);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Generate sign extension from 32 to 64 bits:
|
|
|
|
ST_FUNC void gen_cvt_sxtw(void)
|
|
|
|
{
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t r = intr(gv(RC_INT));
|
2015-02-14 02:58:31 +08:00
|
|
|
o(0x93407c00 | r | r << 5); // sxtw x(r),w(r)
|
|
|
|
}
|
|
|
|
|
2019-12-17 01:51:28 +08:00
|
|
|
/* char/short to int conversion */
|
|
|
|
ST_FUNC void gen_cvt_csti(int t)
|
|
|
|
{
|
|
|
|
int r = intr(gv(RC_INT));
|
|
|
|
o(0x13001c00
|
|
|
|
| ((t & VT_BTYPE) == VT_SHORT) << 13
|
|
|
|
| (uint32_t)!!(t & VT_UNSIGNED) << 30
|
|
|
|
| r | r << 5); // [su]xt[bh] w(r),w(r)
|
|
|
|
}
|
|
|
|
|
2015-02-14 02:58:31 +08:00
|
|
|
ST_FUNC void gen_cvt_itof(int t)
|
|
|
|
{
|
|
|
|
if (t == VT_LDOUBLE) {
|
|
|
|
int f = vtop->type.t;
|
|
|
|
int func = (f & VT_BTYPE) == VT_LLONG ?
|
|
|
|
(f & VT_UNSIGNED ? TOK___floatunditf : TOK___floatditf) :
|
|
|
|
(f & VT_UNSIGNED ? TOK___floatunsitf : TOK___floatsitf);
|
|
|
|
vpush_global_sym(&func_old_type, func);
|
|
|
|
vrott(2);
|
|
|
|
gfunc_call(1);
|
|
|
|
vpushi(0);
|
|
|
|
vtop->type.t = t;
|
|
|
|
vtop->r = REG_FRET;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
int d, n = intr(gv(RC_INT));
|
|
|
|
int s = !(vtop->type.t & VT_UNSIGNED);
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t l = ((vtop->type.t & VT_BTYPE) == VT_LLONG);
|
2015-02-14 02:58:31 +08:00
|
|
|
--vtop;
|
|
|
|
d = get_reg(RC_FLOAT);
|
|
|
|
++vtop;
|
|
|
|
vtop[0].r = d;
|
2015-11-10 07:06:05 +08:00
|
|
|
o(0x1e220000 | (uint32_t)!s << 16 |
|
|
|
|
(uint32_t)(t != VT_FLOAT) << 22 | fltr(d) |
|
2015-02-14 02:58:31 +08:00
|
|
|
l << 31 | n << 5); // [us]cvtf [sd](d),[wx](n)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ST_FUNC void gen_cvt_ftoi(int t)
|
|
|
|
{
|
|
|
|
if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
|
|
|
|
int func = (t & VT_BTYPE) == VT_LLONG ?
|
|
|
|
(t & VT_UNSIGNED ? TOK___fixunstfdi : TOK___fixtfdi) :
|
|
|
|
(t & VT_UNSIGNED ? TOK___fixunstfsi : TOK___fixtfsi);
|
|
|
|
vpush_global_sym(&func_old_type, func);
|
|
|
|
vrott(2);
|
|
|
|
gfunc_call(1);
|
|
|
|
vpushi(0);
|
|
|
|
vtop->type.t = t;
|
|
|
|
vtop->r = REG_IRET;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
int d, n = fltr(gv(RC_FLOAT));
|
2015-11-10 07:06:05 +08:00
|
|
|
uint32_t l = ((vtop->type.t & VT_BTYPE) != VT_FLOAT);
|
2015-02-14 02:58:31 +08:00
|
|
|
--vtop;
|
|
|
|
d = get_reg(RC_INT);
|
|
|
|
++vtop;
|
|
|
|
vtop[0].r = d;
|
|
|
|
o(0x1e380000 |
|
2015-11-10 07:06:05 +08:00
|
|
|
(uint32_t)!!(t & VT_UNSIGNED) << 16 |
|
|
|
|
(uint32_t)((t & VT_BTYPE) == VT_LLONG) << 31 | intr(d) |
|
2015-02-14 02:58:31 +08:00
|
|
|
l << 22 | n << 5); // fcvtz[su] [wx](d),[sd](n)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ST_FUNC void gen_cvt_ftof(int t)
|
|
|
|
{
|
2019-11-08 14:55:05 +08:00
|
|
|
int f = vtop[0].type.t & VT_BTYPE;
|
2015-02-14 02:58:31 +08:00
|
|
|
assert(t == VT_FLOAT || t == VT_DOUBLE || t == VT_LDOUBLE);
|
|
|
|
assert(f == VT_FLOAT || f == VT_DOUBLE || f == VT_LDOUBLE);
|
|
|
|
if (t == f)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (t == VT_LDOUBLE || f == VT_LDOUBLE) {
|
|
|
|
int func = (t == VT_LDOUBLE) ?
|
|
|
|
(f == VT_FLOAT ? TOK___extendsftf2 : TOK___extenddftf2) :
|
|
|
|
(t == VT_FLOAT ? TOK___trunctfsf2 : TOK___trunctfdf2);
|
|
|
|
vpush_global_sym(&func_old_type, func);
|
|
|
|
vrott(2);
|
|
|
|
gfunc_call(1);
|
|
|
|
vpushi(0);
|
|
|
|
vtop->type.t = t;
|
|
|
|
vtop->r = REG_FRET;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
int x, a;
|
|
|
|
gv(RC_FLOAT);
|
|
|
|
assert(vtop[0].r < VT_CONST);
|
|
|
|
a = fltr(vtop[0].r);
|
|
|
|
--vtop;
|
|
|
|
x = get_reg(RC_FLOAT);
|
|
|
|
++vtop;
|
|
|
|
vtop[0].r = x;
|
|
|
|
x = fltr(x);
|
|
|
|
|
|
|
|
if (f == VT_FLOAT)
|
|
|
|
o(0x1e22c000 | x | a << 5); // fcvt d(x),s(a)
|
|
|
|
else
|
|
|
|
o(0x1e624000 | x | a << 5); // fcvt s(x),d(a)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ST_FUNC void ggoto(void)
|
|
|
|
{
|
|
|
|
arm64_gen_bl_or_b(1);
|
|
|
|
--vtop;
|
|
|
|
}
|
|
|
|
|
2015-03-08 08:10:44 +08:00
|
|
|
ST_FUNC void gen_clear_cache(void)
|
|
|
|
{
|
|
|
|
uint32_t beg, end, dsz, isz, p, lab1, b1;
|
|
|
|
gv2(RC_INT, RC_INT);
|
|
|
|
vpushi(0);
|
|
|
|
vtop->r = get_reg(RC_INT);
|
|
|
|
vpushi(0);
|
|
|
|
vtop->r = get_reg(RC_INT);
|
|
|
|
vpushi(0);
|
|
|
|
vtop->r = get_reg(RC_INT);
|
|
|
|
beg = intr(vtop[-4].r); // x0
|
|
|
|
end = intr(vtop[-3].r); // x1
|
|
|
|
dsz = intr(vtop[-2].r); // x2
|
|
|
|
isz = intr(vtop[-1].r); // x3
|
|
|
|
p = intr(vtop[0].r); // x4
|
|
|
|
vtop -= 5;
|
|
|
|
|
|
|
|
o(0xd53b0020 | isz); // mrs x(isz),ctr_el0
|
|
|
|
o(0x52800080 | p); // mov w(p),#4
|
|
|
|
o(0x53104c00 | dsz | isz << 5); // ubfx w(dsz),w(isz),#16,#4
|
|
|
|
o(0x1ac02000 | dsz | p << 5 | dsz << 16); // lsl w(dsz),w(p),w(dsz)
|
|
|
|
o(0x12000c00 | isz | isz << 5); // and w(isz),w(isz),#15
|
|
|
|
o(0x1ac02000 | isz | p << 5 | isz << 16); // lsl w(isz),w(p),w(isz)
|
|
|
|
o(0x51000400 | p | dsz << 5); // sub w(p),w(dsz),#1
|
|
|
|
o(0x8a240004 | p | beg << 5 | p << 16); // bic x(p),x(beg),x(p)
|
|
|
|
b1 = ind; o(0x14000000); // b
|
|
|
|
lab1 = ind;
|
|
|
|
o(0xd50b7b20 | p); // dc cvau,x(p)
|
|
|
|
o(0x8b000000 | p | p << 5 | dsz << 16); // add x(p),x(p),x(dsz)
|
2015-11-20 02:21:14 +08:00
|
|
|
write32le(cur_text_section->data + b1, 0x14000000 | (ind - b1) >> 2);
|
2015-03-08 08:10:44 +08:00
|
|
|
o(0xeb00001f | p << 5 | end << 16); // cmp x(p),x(end)
|
|
|
|
o(0x54ffffa3 | ((lab1 - ind) << 3 & 0xffffe0)); // b.cc lab1
|
|
|
|
o(0xd5033b9f); // dsb ish
|
|
|
|
o(0x51000400 | p | isz << 5); // sub w(p),w(isz),#1
|
|
|
|
o(0x8a240004 | p | beg << 5 | p << 16); // bic x(p),x(beg),x(p)
|
|
|
|
b1 = ind; o(0x14000000); // b
|
|
|
|
lab1 = ind;
|
|
|
|
o(0xd50b7520 | p); // ic ivau,x(p)
|
|
|
|
o(0x8b000000 | p | p << 5 | isz << 16); // add x(p),x(p),x(isz)
|
2015-11-20 02:21:14 +08:00
|
|
|
write32le(cur_text_section->data + b1, 0x14000000 | (ind - b1) >> 2);
|
2015-03-08 08:10:44 +08:00
|
|
|
o(0xeb00001f | p << 5 | end << 16); // cmp x(p),x(end)
|
|
|
|
o(0x54ffffa3 | ((lab1 - ind) << 3 & 0xffffe0)); // b.cc lab1
|
|
|
|
o(0xd5033b9f); // dsb ish
|
|
|
|
o(0xd5033fdf); // isb
|
|
|
|
}
|
|
|
|
|
2015-02-14 02:58:31 +08:00
|
|
|
ST_FUNC void gen_vla_sp_save(int addr) {
|
2015-10-31 19:04:52 +08:00
|
|
|
uint32_t r = intr(get_reg(RC_INT));
|
|
|
|
o(0x910003e0 | r); // mov x(r),sp
|
|
|
|
arm64_strx(3, r, 29, addr);
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ST_FUNC void gen_vla_sp_restore(int addr) {
|
2016-10-11 03:15:57 +08:00
|
|
|
// Use x30 because this function can be called when there
|
|
|
|
// is a live return value in x0 but there is nothing on
|
|
|
|
// the value stack to prevent get_reg from returning x0.
|
|
|
|
uint32_t r = 30;
|
2015-10-31 19:04:52 +08:00
|
|
|
arm64_ldrx(0, 3, r, 29, addr);
|
|
|
|
o(0x9100001f | r << 5); // mov sp,x(r)
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ST_FUNC void gen_vla_alloc(CType *type, int align) {
|
2015-10-31 19:04:52 +08:00
|
|
|
uint32_t r = intr(gv(RC_INT));
|
|
|
|
o(0x91003c00 | r | r << 5); // add x(r),x(r),#15
|
|
|
|
o(0x927cec00 | r | r << 5); // bic x(r),x(r),#15
|
|
|
|
o(0xcb2063ff | r << 16); // sub sp,sp,x(r)
|
|
|
|
vpop();
|
2015-02-14 02:58:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* end of A64 code generator */
|
|
|
|
/*************************************************************/
|
|
|
|
#endif
|
|
|
|
/*************************************************************/
|