diff --git a/Makefile b/Makefile index feb5e127..7ddc81c8 100644 --- a/Makefile +++ b/Makefile @@ -70,6 +70,7 @@ NATIVE_DEFINES_$(CONFIG_arm) += -DTCC_TARGET_ARM NATIVE_DEFINES_$(CONFIG_arm_eabihf) += -DTCC_ARM_EABI -DTCC_ARM_HARDFLOAT NATIVE_DEFINES_$(CONFIG_arm_eabi) += -DTCC_ARM_EABI NATIVE_DEFINES_$(CONFIG_arm_vfp) += -DTCC_ARM_VFP +NATIVE_DEFINES_$(CONFIG_arm64) += -DTCC_TARGET_ARM64 NATIVE_DEFINES += $(NATIVE_DEFINES_yes) ifeq ($(TOP),.) @@ -86,6 +87,7 @@ ARM_VFP_CROSS = arm-linux-gnu-tcc$(EXESUF) ARM_EABI_CROSS = arm-linux-gnueabi-tcc$(EXESUF) ARM_EABIHF_CROSS = arm-linux-gnueabihf-tcc$(EXESUF) ARM_CROSS = $(ARM_FPA_CROSS) $(ARM_FPA_LD_CROSS) $(ARM_VFP_CROSS) $(ARM_EABI_CROSS) +ARM64_CROSS = arm64-tcc$(EXESUF) C67_CROSS = c67-tcc$(EXESUF) # Legacy symlinks for cross compilers @@ -107,33 +109,39 @@ WIN64_FILES = $(CORE_FILES) x86_64-gen.c i386-asm.c x86_64-asm.h tccpe.c WINCE_FILES = $(CORE_FILES) arm-gen.c tccpe.c X86_64_FILES = $(CORE_FILES) x86_64-gen.c i386-asm.c x86_64-asm.h ARM_FILES = $(CORE_FILES) arm-gen.c +ARM64_FILES = $(CORE_FILES) arm64-gen.c C67_FILES = $(CORE_FILES) c67-gen.c tcccoff.c ifdef CONFIG_WIN64 PROGS+=tiny_impdef$(EXESUF) tiny_libmaker$(EXESUF) NATIVE_FILES=$(WIN64_FILES) -PROGS_CROSS=$(WIN32_CROSS) $(I386_CROSS) $(X64_CROSS) $(ARM_CROSS) $(C67_CROSS) $(WINCE_CROSS) +PROGS_CROSS=$(WIN32_CROSS) $(I386_CROSS) $(X64_CROSS) $(ARM_CROSS) $(ARM64_CROSS) $(C67_CROSS) $(WINCE_CROSS) LIBTCC1_CROSS=lib/i386-win32/libtcc1.a LIBTCC1=libtcc1.a else ifdef CONFIG_WIN32 PROGS+=tiny_impdef$(EXESUF) tiny_libmaker$(EXESUF) NATIVE_FILES=$(WIN32_FILES) -PROGS_CROSS=$(WIN64_CROSS) $(I386_CROSS) $(X64_CROSS) $(ARM_CROSS) $(C67_CROSS) $(WINCE_CROSS) +PROGS_CROSS=$(WIN64_CROSS) $(I386_CROSS) $(X64_CROSS) $(ARM_CROSS) $(ARM64_CROSS) $(C67_CROSS) $(WINCE_CROSS) LIBTCC1_CROSS=lib/x86_64-win32/libtcc1.a LIBTCC1=libtcc1.a else ifeq ($(ARCH),i386) NATIVE_FILES=$(I386_FILES) -PROGS_CROSS=$(X64_CROSS) $(WIN32_CROSS) $(WIN64_CROSS) $(ARM_CROSS) $(C67_CROSS) $(WINCE_CROSS) +PROGS_CROSS=$(X64_CROSS) $(WIN32_CROSS) $(WIN64_CROSS) $(ARM_CROSS) $(ARM64_CROSS) $(C67_CROSS) $(WINCE_CROSS) LIBTCC1_CROSS=lib/i386-win32/libtcc1.a lib/x86_64-win32/libtcc1.a LIBTCC1=libtcc1.a else ifeq ($(ARCH),x86-64) NATIVE_FILES=$(X86_64_FILES) -PROGS_CROSS=$(I386_CROSS) $(WIN32_CROSS) $(WIN64_CROSS) $(ARM_CROSS) $(C67_CROSS) $(WINCE_CROSS) +PROGS_CROSS=$(I386_CROSS) $(WIN32_CROSS) $(WIN64_CROSS) $(ARM_CROSS) $(ARM64_CROSS) $(C67_CROSS) $(WINCE_CROSS) LIBTCC1_CROSS=lib/i386-win32/libtcc1.a lib/x86_64-win32/libtcc1.a lib/i386/libtcc1.a LIBTCC1=libtcc1.a else ifeq ($(ARCH),arm) NATIVE_FILES=$(ARM_FILES) -PROGS_CROSS=$(I386_CROSS) $(X64_CROSS) $(WIN32_CROSS) $(WIN64_CROSS) $(C67_CROSS) $(WINCE_CROSS) +PROGS_CROSS=$(I386_CROSS) $(X64_CROSS) $(WIN32_CROSS) $(WIN64_CROSS) $(ARM64_CROSS) $(C67_CROSS) $(WINCE_CROSS) +LIBTCC1=libtcc1.a +LIBTCC1_CROSS=lib/i386-win32/libtcc1.a lib/x86_64-win32/libtcc1.a lib/i386/libtcc1.a +else ifeq ($(ARCH),arm64) +NATIVE_FILES=$(ARM64_FILES) +PROGS_CROSS=$(I386_CROSS) $(X64_CROSS) $(WIN32_CROSS) $(WIN64_CROSS) $(ARM_CROSS) $(C67_CROSS) $(WINCE_CROSS) LIBTCC1=libtcc1.a LIBTCC1_CROSS=lib/i386-win32/libtcc1.a lib/x86_64-win32/libtcc1.a lib/i386/libtcc1.a endif @@ -181,6 +189,7 @@ $(ARM_FPA_CROSS): DEFINES = -DTCC_TARGET_ARM $(ARM_FPA_LD_CROSS)$(EXESUF): DEFINES = -DTCC_TARGET_ARM -DLDOUBLE_SIZE=12 $(ARM_VFP_CROSS): DEFINES = -DTCC_TARGET_ARM -DTCC_ARM_VFP -DCONFIG_MULTIARCHDIR="\"arm-linux-gnu\"" $(ARM_EABI_CROSS): DEFINES = -DTCC_TARGET_ARM -DTCC_ARM_EABI -DTCC_ARM_VFP -DCONFIG_MULTIARCHDIR="\"arm-linux-gnueabi\"" +$(ARM64_CROSS): DEFINES = -DTCC_TARGET_ARM64 $(I386_CROSS): $(I386_FILES) $(X64_CROSS): $(X86_64_FILES) @@ -189,6 +198,7 @@ $(WIN64_CROSS): $(WIN64_FILES) $(WINCE_CROSS): $(WINCE_FILES) $(C67_CROSS): $(C67_FILES) $(ARM_FPA_CROSS) $(ARM_FPA_LD_CROSS) $(ARM_VFP_CROSS) $(ARM_EABI_CROSS): $(ARM_FILES) +$(ARM64_CROSS): $(ARM64_FILES) # libtcc generation and test ifndef ONE_SOURCE diff --git a/arm64-gen.c b/arm64-gen.c new file mode 100644 index 00000000..09e74167 --- /dev/null +++ b/arm64-gen.c @@ -0,0 +1,1621 @@ +/* + * A64 code generator for TCC + * + * Copyright (c) 2014-2015 Edmund Grimley Evans + * + * Copying and distribution of this file, with or without modification, + * are permitted in any medium without royalty provided the copyright + * notice and this notice are preserved. This file is offered as-is, + * without any warranty. + */ + +#ifdef TARGET_DEFS_ONLY + +// Number of registers available to allocator: +#define NB_REGS 28 // x0-x18, x30, v0-v7 + +#define TREG_R(x) (x) // x = 0..18 +#define TREG_R30 19 +#define TREG_F(x) (x + 20) // x = 0..7 + +// Register classes sorted from more general to more precise: +#define RC_INT (1 << 0) +#define RC_FLOAT (1 << 1) +#define RC_R(x) (1 << (2 + (x))) // x = 0..18 +#define RC_R30 (1 << 21) +#define RC_F(x) (1 << (22 + (x))) // x = 0..7 + +#define RC_IRET (RC_R(0)) // int return register class +#define RC_FRET (RC_F(0)) // float return register class + +#define REG_IRET (TREG_R(0)) // int return register number +#define REG_FRET (TREG_F(0)) // float return register number + +#define PTR_SIZE 8 + +#define LDOUBLE_SIZE 16 +#define LDOUBLE_ALIGN 16 + +#define MAX_ALIGN 16 + +#define CHAR_IS_UNSIGNED + +/******************************************************/ +/* ELF defines */ + +#define EM_TCC_TARGET EM_AARCH64 + +#define R_DATA_32 R_AARCH64_ABS32 +#define R_DATA_PTR R_AARCH64_ABS64 +#define R_JMP_SLOT R_AARCH64_JUMP_SLOT +#define R_COPY R_AARCH64_COPY + +#define ELF_START_ADDR 0x00400000 +#define ELF_PAGE_SIZE 0x1000 + +/******************************************************/ +#else /* ! TARGET_DEFS_ONLY */ +/******************************************************/ +#include "tcc.h" +#include + +ST_DATA const int reg_classes[NB_REGS] = { + RC_INT | RC_R(0), + RC_INT | RC_R(1), + RC_INT | RC_R(2), + RC_INT | RC_R(3), + RC_INT | RC_R(4), + RC_INT | RC_R(5), + RC_INT | RC_R(6), + RC_INT | RC_R(7), + RC_INT | RC_R(8), + RC_INT | RC_R(9), + RC_INT | RC_R(10), + RC_INT | RC_R(11), + RC_INT | RC_R(12), + RC_INT | RC_R(13), + RC_INT | RC_R(14), + RC_INT | RC_R(15), + RC_INT | RC_R(16), + RC_INT | RC_R(17), + RC_INT | RC_R(18), + RC_R30, // not in RC_INT as we make special use of x30 + RC_FLOAT | RC_F(0), + RC_FLOAT | RC_F(1), + RC_FLOAT | RC_F(2), + RC_FLOAT | RC_F(3), + RC_FLOAT | RC_F(4), + RC_FLOAT | RC_F(5), + RC_FLOAT | RC_F(6), + RC_FLOAT | RC_F(7) +}; + +#define IS_FREG(x) ((x) >= TREG_F(0)) + +static uint32_t intr(int r) +{ + assert(TREG_R(0) <= r && r <= TREG_R30); + return r < TREG_R30 ? r : 30; +} + +static uint32_t fltr(int r) +{ + assert(TREG_F(0) <= r && r <= TREG_F(7)); + return r - TREG_F(0); +} + +// Add an instruction to text section: +ST_FUNC void o(unsigned int c) +{ + int ind1 = ind + 4; + if (ind1 > cur_text_section->data_allocated) + section_realloc(cur_text_section, ind1); + *(uint32_t *)(cur_text_section->data + ind) = c; + ind = ind1; +} + +static int arm64_encode_bimm64(uint64_t x) +{ + int neg = x & 1; + int rep, pos, len; + + if (neg) + x = ~x; + if (!x) + return -1; + + if (x >> 2 == (x & (((uint64_t)1 << (64 - 2)) - 1))) + rep = 2, x &= ((uint64_t)1 << 2) - 1; + else if (x >> 4 == (x & (((uint64_t)1 << (64 - 4)) - 1))) + rep = 4, x &= ((uint64_t)1 << 4) - 1; + else if (x >> 8 == (x & (((uint64_t)1 << (64 - 8)) - 1))) + rep = 8, x &= ((uint64_t)1 << 8) - 1; + else if (x >> 16 == (x & (((uint64_t)1 << (64 - 16)) - 1))) + rep = 16, x &= ((uint64_t)1 << 16) - 1; + else if (x >> 32 == (x & (((uint64_t)1 << (64 - 32)) - 1))) + rep = 32, x &= ((uint64_t)1 << 32) - 1; + else + rep = 64; + + pos = 0; + if (!(x & (((uint64_t)1 << 32) - 1))) x >>= 32, pos += 32; + if (!(x & (((uint64_t)1 << 16) - 1))) x >>= 16, pos += 16; + if (!(x & (((uint64_t)1 << 8) - 1))) x >>= 8, pos += 8; + if (!(x & (((uint64_t)1 << 4) - 1))) x >>= 4, pos += 4; + if (!(x & (((uint64_t)1 << 2) - 1))) x >>= 2, pos += 2; + if (!(x & (((uint64_t)1 << 1) - 1))) x >>= 1, pos += 1; + + len = 0; + if (!(~x & (((uint64_t)1 << 32) - 1))) x >>= 32, len += 32; + if (!(~x & (((uint64_t)1 << 16) - 1))) x >>= 16, len += 16; + if (!(~x & (((uint64_t)1 << 8) - 1))) x >>= 8, len += 8; + if (!(~x & (((uint64_t)1 << 4) - 1))) x >>= 4, len += 4; + if (!(~x & (((uint64_t)1 << 2) - 1))) x >>= 2, len += 2; + if (!(~x & (((uint64_t)1 << 1) - 1))) x >>= 1, len += 1; + + if (x) + return -1; + if (neg) { + pos = (pos + len) & (rep - 1); + len = rep - len; + } + return ((0x1000 & rep << 6) | (((rep - 1) ^ 31) << 1 & 63) | + ((rep - pos) & (rep - 1)) << 6 | (len - 1)); +} + +static uint32_t arm64_movi(int r, uint64_t x) +{ + uint64_t m = 0xffff; + int e; + if (!(x & ~m)) + return 0x52800000 | r | x << 5; // movz w(r),#(x) + if (!(x & ~(m << 16))) + return 0x52a00000 | r | x >> 11; // movz w(r),#(x >> 16),lsl #16 + if (!(x & ~(m << 32))) + return 0xd2c00000 | r | x >> 27; // movz x(r),#(x >> 32),lsl #32 + if (!(x & ~(m << 48))) + return 0xd2e00000 | r | x >> 43; // movz x(r),#(x >> 48),lsl #48 + if ((x & ~m) == m << 16) + return (0x12800000 | r | + (~x << 5 & 0x1fffe0)); // movn w(r),#(~x) + if ((x & ~(m << 16)) == m) + return (0x12a00000 | r | + (~x >> 11 & 0x1fffe0)); // movn w(r),#(~x >> 16),lsl #16 + if (!~(x | m)) + return (0x92800000 | r | + (~x << 5 & 0x1fffe0)); // movn x(r),#(~x) + if (!~(x | m << 16)) + return (0x92a00000 | r | + (~x >> 11 & 0x1fffe0)); // movn x(r),#(~x >> 16),lsl #16 + if (!~(x | m << 32)) + return (0x92c00000 | r | + (~x >> 27 & 0x1fffe0)); // movn x(r),#(~x >> 32),lsl #32 + if (!~(x | m << 48)) + return (0x92e00000 | r | + (~x >> 43 & 0x1fffe0)); // movn x(r),#(~x >> 32),lsl #32 + if (!(x >> 32) && (e = arm64_encode_bimm64(x | x << 32)) >= 0) + return 0x320003e0 | r | (uint32_t)e << 10; // movi w(r),#(x) + if ((e = arm64_encode_bimm64(x)) >= 0) + return 0xb20003e0 | r | (uint32_t)e << 10; // movi x(r),#(x) + return 0; +} + +static void arm64_movimm(int r, uint64_t x) +{ + uint32_t i; + if ((i = arm64_movi(r, x))) + o(i); + else { + // This could be improved: + o(0x52800000 | r | (x & 0xffff) << 5); // movz w(r),#(x & 0xffff) + for (i = 1; i < 4; i++) + if (x >> 16 * i & 0xffff) { + o(0xf2800000 | r | (x >> 16 * i & 0xffff) << 5 | i << 21); + // movk w(r),#(*),lsl #(*) + } + + } +} + +// Patch all branches in list pointed to by t to branch to a: +ST_FUNC void gsym_addr(int t_, int a_) +{ + uint32_t t = t_; + uint32_t a = a_; + while (t) { + uint32_t *ptr = (uint32_t *)(cur_text_section->data + t); + uint32_t next = *ptr; + if (a - t + 0x8000000 >= 0x10000000) + tcc_error("branch out of range"); + *ptr = (a - t == 4 ? 0xd503201f : // nop + 0x14000000 | ((a - t) >> 2 & 0x3ffffff)); // b + t = next; + } +} + +// Patch all branches in list pointed to by t to branch to current location: +ST_FUNC void gsym(int t) +{ + gsym_addr(t, ind); +} + +static int arm64_type_size(int t) +{ + switch (t & VT_BTYPE) { + case VT_INT: return 2; + case VT_BYTE: return 0; + case VT_SHORT: return 1; + case VT_PTR: return 3; + case VT_ENUM: return 2; + case VT_FUNC: return 3; + case VT_FLOAT: return 2; + case VT_DOUBLE: return 3; + case VT_LDOUBLE: return 4; + case VT_BOOL: return 0; + case VT_LLONG: return 3; + } + assert(0); + return 0; +} + +static void gen_stack_addr(int reg, uint64_t off) +{ + arm64_movimm(30, off); // use x30 for offset + o(0x8b3e63e0 | reg); +} + +static void gen_load(int sg, int sz, int dst, int bas, uint64_t off) +{ + if (sz >= 2) + sg = 0; + if (!(off & ~(0xfff << sz))) + o(0x39400000 | dst | bas << 5 | off << (10 - sz) | + !!sg << 23 | sz << 30); + else if (off < 256 || -off <= 256) + o(0x38400000 | dst | bas << 5 | (off & 511) << 12 | + !!sg << 23 | sz << 30); + else { + arm64_movimm(30, off); // use x30 for offset + o(0x38206800 | dst | bas << 5 | 30 << 16 | + (!!sg + 1) << 22 | sz << 30); + } +} + +static void gen_fload(int sz, int dst, int bas, uint64_t off) +{ + if (!(off & ~(0xfff << sz))) + o(0x3d400000 | dst | bas << 5 | off << (10 - sz) | + (sz & 4) << 21 | (sz & 3) << 30); + else if (off < 256 || -off <= 256) + o(0x3c400000 | dst | bas << 5 | (off & 511) << 12 | + (sz & 4) << 21 | (sz & 3) << 30); + else { + arm64_movimm(30, off); // use x30 for offset + o(0x3c606800 | dst | bas << 5 | 30 << 16 | sz << 30 | (sz & 4) << 21); + } +} + +static void gen_sload(int reg, int size) +{ + // Use x30 for intermediate value in some cases. + switch (size) { + default: assert(0); break; + case 1: + gen_load(0, 0, reg, reg, 0); + break; + case 2: + gen_load(0, 1, reg, reg, 0); + break; + case 3: + gen_load(0, 1, 30, reg, 0); + gen_load(0, 0, reg, reg, 2); + o(0x2a0043c0 | reg | reg << 16); // orr x(reg),x30,x(reg),lsl #16 + break; + case 4: + gen_load(0, 2, reg, reg, 0); + break; + case 5: + gen_load(0, 2, 30, reg, 0); + gen_load(0, 0, reg, reg, 4); + o(0xaa0083c0 | reg | reg << 16); // orr x(reg),x30,x(reg),lsl #32 + break; + case 6: + gen_load(0, 2, 30, reg, 0); + gen_load(0, 1, reg, reg, 4); + o(0xaa0083c0 | reg | reg << 16); // orr x(reg),x30,x(reg),lsl #32 + break; + case 7: + gen_load(0, 2, 30, reg, 0); + gen_load(0, 2, reg, reg, 3); + o(0x53087c00 | reg | reg << 5); // lsr w(reg), w(reg), #8 + o(0xaa0083c0 | reg | reg << 16); // orr x(reg),x30,x(reg),lsl #32 + break; + case 8: + gen_load(0, 3, reg, reg, 0); + break; + case 9: + gen_load(0, 0, reg + 1, reg, 8); + gen_load(0, 3, reg, reg, 0); + break; + case 10: + gen_load(0, 1, reg + 1, reg, 8); + gen_load(0, 3, reg, reg, 0); + break; + case 11: + gen_load(0, 2, reg + 1, reg, 7); + o(0x53087c00 | (reg+1) | (reg+1) << 5); // lsr w(reg+1), w(reg+1), #8 + gen_load(0, 3, reg, reg, 0); + break; + case 12: + gen_load(0, 2, reg + 1, reg, 8); + gen_load(0, 3, reg, reg, 0); + break; + case 13: + gen_load(0, 3, reg + 1, reg, 5); + o(0xd358fc00 | (reg+1) | (reg+1) << 5); // lsr x(reg+1), x(reg+1), #24 + gen_load(0, 3, reg, reg, 0); + break; + case 14: + gen_load(0, 3, reg + 1, reg, 6); + o(0xd350fc00 | (reg+1) | (reg+1) << 5); // lsr x(reg+1), x(reg+1), #16 + gen_load(0, 3, reg, reg, 0); + break; + case 15: + gen_load(0, 3, reg + 1, reg, 7); + o(0xd348fc00 | (reg+1) | (reg+1) << 5); // lsr x(reg+1), x(reg+1), #8 + gen_load(0, 3, reg, reg, 0); + break; + case 16: + o(0xa9400000 | reg | (reg+1) << 10 | reg << 5); + // ldp x(reg),x(reg+1),[x(reg)] + break; + } +} + +static void gen_store(int sz, int dst, int bas, uint64_t off) +{ + if (!(off & ~(0xfff << sz))) + o(0x39000000 | dst | bas << 5 | off << (10 - sz) | sz << 30); + else if (off < 256 || -off <= 256) + o(0x38000000 | dst | bas << 5 | (off & 511) << 12 | sz << 30); + else { + arm64_movimm(30, off); // use x30 for offset + o(0x38206800 | dst | bas << 5 | 30 << 16 | sz << 30); + } +} + +static void gen_fstore(int sz, int dst, int bas, uint64_t off) +{ + if (!(off & ~(0xfff << sz))) + o(0x3d000000 | dst | bas << 5 | off << (10 - sz) | + (sz & 4) << 21 | (sz & 3) << 30); + else if (off < 256 || -off <= 256) + o(0x3c000000 | dst | bas << 5 | (off & 511) << 12 | + (sz & 4) << 21 | (sz & 3) << 30); + else { + arm64_movimm(30, off); // use x30 for offset + o(0x3c206800 | dst | bas << 5 | 30 << 16 | sz << 30 | (sz & 4) << 21); + } +} + +static void gen_addr(int r, Sym *sym, unsigned long addend) +{ +#if 0 + // This is normally the right way to do it, I think, + // but it does not work with "-run" when stdin or stderr is + // used by the program: "R_AARCH64_ADR_PREL_PG_HI21 relocation failed". + greloca(cur_text_section, sym, ind, R_AARCH64_ADR_PREL_PG_HI21, addend); + o(0x90000000 | r); + greloca(cur_text_section, sym, ind, R_AARCH64_ADD_ABS_LO12_NC, addend); + o(0x91000000 | r | r << 5); +#else + // This seems to work in all cases, unless you try to use an old buggy + // GCC for linking, which says: "unresolvable R_AARCH64_MOVW_UABS_G0_NC + // relocation against symbol `stderr@@GLIBC_2.17'". + greloca(cur_text_section, sym, ind, R_AARCH64_MOVW_UABS_G0_NC, addend); + o(0xf2800000 | r); // movk x(rt),#...,lsl #0 + greloca(cur_text_section, sym, ind, R_AARCH64_MOVW_UABS_G1_NC, addend); + o(0xf2a00000 | r); // movk x(rt),#...,lsl #16 + greloca(cur_text_section, sym, ind, R_AARCH64_MOVW_UABS_G2_NC, addend); + o(0xf2c00000 | r); // movk x(rt),#...,lsl #32 + greloca(cur_text_section, sym, ind, R_AARCH64_MOVW_UABS_G3, addend); + o(0xf2e00000 | r); // movk x(rt),#...,lsl #48 +#endif +} + +ST_FUNC void load(int r, SValue *sv) +{ + int svtt = sv->type.t; + int svr = sv->r & ~VT_LVAL_TYPE; + int svrv = svr & VT_VALMASK; + uint64_t svcul = (int32_t)sv->c.ul; + + if (svr == (VT_LOCAL | VT_LVAL)) { + if (IS_FREG(r)) + gen_fload(arm64_type_size(svtt), fltr(r), 29, svcul); + else + gen_load(!(svtt & VT_UNSIGNED), arm64_type_size(svtt), + intr(r), 29, svcul); + return; + } + + if ((svr & ~VT_VALMASK) == VT_LVAL && svrv < VT_CONST) { + if (IS_FREG(r)) + gen_fload(arm64_type_size(svtt), + fltr(r), intr(svrv), 0); + else + gen_load(!(svtt & VT_UNSIGNED), arm64_type_size(svtt), + intr(r), intr(svrv), 0); + return; + } + + if (svr == (VT_CONST | VT_LVAL | VT_SYM)) { + gen_addr(30, sv->sym, svcul); // use x30 for address + if (IS_FREG(r)) + gen_fload(arm64_type_size(svtt), fltr(r), 30, 0); + else + gen_load(!(svtt & VT_UNSIGNED), arm64_type_size(svtt), + intr(r), 30, 0); + return; + } + + if (svr == (VT_CONST | VT_SYM)) { + gen_addr(intr(r), sv->sym, svcul); + return; + } + + if (svr == VT_CONST) { + if ((svtt & VT_BTYPE) != VT_VOID) + arm64_movimm(intr(r), + arm64_type_size(svtt) == 3 ? sv->c.ull : svcul); + return; + } + + if (svr < VT_CONST) { + if (IS_FREG(r) && IS_FREG(svr)) + if (svtt == VT_LDOUBLE) + o(0x4ea01c00 | fltr(r) | fltr(svr) << 5); + // mov v(r).16b,v(svr).16b + else + o(0x1e604000 | fltr(r) | fltr(svr) << 5); // fmov d(r),d(svr) + else if (!IS_FREG(r) && !IS_FREG(svr)) + o(0xaa0003e0 | intr(r) | intr(svr) << 16); // mov x(r),x(svr) + else + assert(0); + return; + } + + if (svr == VT_LOCAL) { + if (-svcul < 0x1000) + o(0xd10003a0 | intr(r) | -svcul << 10); // sub x(r),x29,#... + else { + arm64_movimm(30, -svcul); // use x30 for offset + o(0xcb0003a0 | intr(r) | 30 << 16); // sub x(r),x29,x30 + } + return; + } + + if (svr == VT_JMP || svr == VT_JMPI) { + int t = (svr == VT_JMPI); + arm64_movimm(intr(r), t); + o(0x14000002); // b .+8 + gsym(svcul); + arm64_movimm(intr(r), t ^ 1); + return; + } + + if (svr == (VT_LLOCAL | VT_LVAL)) { + gen_load(0, 3, 30, 29, svcul); // use x30 for offset + if (IS_FREG(r)) + gen_fload(arm64_type_size(svtt), fltr(r), 30, 0); + else + gen_load(!(svtt & VT_UNSIGNED), arm64_type_size(svtt), + intr(r), 30, 0); + return; + } + + printf("load(%x, (%x, %x, %llx))\n", r, svtt, sv->r, (long long)svcul); + assert(0); +} + +ST_FUNC void store(int r, SValue *sv) +{ + int svtt = sv->type.t; + int svr = sv->r & ~VT_LVAL_TYPE; + int svrv = svr & VT_VALMASK; + uint64_t svcul = (int32_t)sv->c.ul; + + if (svr == (VT_LOCAL | VT_LVAL)) { + if (IS_FREG(r)) + gen_fstore(arm64_type_size(svtt), fltr(r), 29, svcul); + else + gen_store(arm64_type_size(svtt), intr(r), 29, svcul); + return; + } + + if ((svr & ~VT_VALMASK) == VT_LVAL && svrv < VT_CONST) { + if (IS_FREG(r)) + gen_fstore(arm64_type_size(svtt), fltr(r), intr(svrv), 0); + else + gen_store(arm64_type_size(svtt), intr(r), intr(svrv), 0); + return; + } + + if (svr == (VT_CONST | VT_LVAL | VT_SYM)) { + gen_addr(30, sv->sym, svcul); // use x30 for address + if (IS_FREG(r)) + gen_fstore(arm64_type_size(svtt), fltr(r), 30, 0); + else + gen_store(arm64_type_size(svtt), intr(r), 30, 0); + return; + } + + printf("store(%x, (%x, %x, %llx))\n", r, svtt, sv->r, (long long)svcul); + assert(0); +} + +static void arm64_gen_bl_or_b(int b) +{ + if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) { + assert(!b); + if (vtop->r & VT_SYM) + greloc(cur_text_section, vtop->sym, ind, R_AARCH64_CALL26); + else + assert(0); + o(0x94000000); // bl . + } + else + o(0xd61f0000 | !b << 21 | intr(gv(RC_R30)) << 5); // br/blr +} + +static int arm64_hfa_aux(CType *type, int *fsize, int num) +{ + if (is_float(type->t)) { + int a, n = type_size(type, &a); + if (num >= 4 || (*fsize && *fsize != n)) + return -1; + *fsize = n; + return num + 1; + } + else if ((type->t & VT_BTYPE) == VT_STRUCT) { + int is_struct = 0; // rather than union + Sym *field; + for (field = type->ref->next; field; field = field->next) + if (field->c) { + is_struct = 1; + break; + } + if (is_struct) { + int num0 = num; + for (field = type->ref->next; field; field = field->next) { + if (field->c != (num - num0) * *fsize) + return -1; + num = arm64_hfa_aux(&field->type, fsize, num); + if (num == -1) + return -1; + } + if (type->ref->c != (num - num0) * *fsize) + return -1; + return num; + } + else { // union + int num0 = num; + for (field = type->ref->next; field; field = field->next) { + int num1 = arm64_hfa_aux(&field->type, fsize, num0); + if (num1 == -1) + return -1; + num = num1 < num ? num : num1; + } + if (type->ref->c != (num - num0) * *fsize) + return -1; + return num; + } + } + else if (type->t & VT_ARRAY) { + int num1; + if (!type->ref->c) + return num; + num1 = arm64_hfa_aux(&type->ref->type, fsize, num); + if (num1 == -1 || (num1 != num && type->ref->c > 4)) + return -1; + num1 = num + type->ref->c * (num1 - num); + if (num1 > 4) + return -1; + return num1; + } + return -1; +} + +static int arm64_hfa(CType *type, int *fsize) +{ + if ((type->t & VT_BTYPE) == VT_STRUCT || (type->t & VT_ARRAY)) { + int sz = 0; + int n = arm64_hfa_aux(type, &sz, 0); + if (0 < n && n <= 4) { + if (fsize) + *fsize = sz; + return n; + } + } + return 0; +} + +static unsigned long arm64_pcs_aux(int n, CType **type, unsigned long *a) +{ + int nx = 0; // next integer register + int nv = 0; // next vector register + unsigned long ns = 32; // next stack offset + int i; + + for (i = 0; i < n; i++) { + int hfa = arm64_hfa(type[i], 0); + int size, align; + + if ((type[i]->t & VT_ARRAY) || + (type[i]->t & VT_BTYPE) == VT_FUNC) + size = align = 8; + else + size = type_size(type[i], &align); + + if (hfa) + // B.2 + ; + else if (size > 16) { + // B.3: replace with pointer + if (nx < 8) + a[i] = nx++ << 1 | 1; + else { + ns = (ns + 7) & ~7; + a[i] = ns | 1; + ns += 8; + } + continue; + } + else if ((type[i]->t & VT_BTYPE) == VT_STRUCT) + // B.4 + size = (size + 7) & ~7; + + // C.1 + if (is_float(type[i]->t) && nv < 8) { + a[i] = 16 + (nv++ << 1); + continue; + } + + // C.2 + if (hfa && nv + hfa <= 8) { + a[i] = 16 + (nv << 1); + nv += hfa; + continue; + } + + // C.3 + if (hfa) { + nv = 8; + size = (size + 7) & ~7; + } + + // C.4 + if (hfa || (type[i]->t & VT_BTYPE) == VT_LDOUBLE) { + ns = (ns + 7) & ~7; + ns = (ns + align - 1) & -align; + } + + // C.5 + if ((type[i]->t & VT_BTYPE) == VT_FLOAT) + size = 8; + + // C.6 + if (hfa || is_float(type[i]->t)) { + a[i] = ns; + ns += size; + continue; + } + + // C.7 + if ((type[i]->t & VT_BTYPE) != VT_STRUCT && size <= 8 && nx < 8) { + a[i] = nx++ << 1; + continue; + } + + // C.8 + if (align == 16) + nx = (nx + 1) & ~1; + + // C.9 + if ((type[i]->t & VT_BTYPE) != VT_STRUCT && size == 16 && nx < 7) { + a[i] = nx << 1; + nx += 2; + continue; + } + + // C.10 + if ((type[i]->t & VT_BTYPE) == VT_STRUCT && size <= (8 - nx) * 8) { + a[i] = nx << 1; + nx += (size + 7) >> 3; + continue; + } + + // C.11 + nx = 8; + + // C.12 + ns = (ns + 7) & ~7; + ns = (ns + align - 1) & -align; + + // C.13 + if ((type[i]->t & VT_BTYPE) == VT_STRUCT) { + a[i] = ns; + ns += size; + continue; + } + + // C.14 + if (size < 8) + size = 8; + + // C.15 + a[i] = ns; + ns += size; + } + + return ns - 32; +} + +static unsigned long arm64_pcs(int n, CType **type, unsigned long *a) +{ + unsigned long stack; + + // Return type: + if ((type[0]->t & VT_BTYPE) == VT_VOID) + a[0] = -1; + else { + arm64_pcs_aux(1, type, a); + assert(a[0] == 0 || a[0] == 1 || a[0] == 16); + } + + // Argument types: + stack = arm64_pcs_aux(n, type + 1, a + 1); + + if (0) { + int i; + for (i = 0; i <= n; i++) { + if (!i) + printf("arm64_pcs return: "); + else + printf("arm64_pcs arg %d: ", i); + if (a[i] == (unsigned long)-1) + printf("void\n"); + else if (a[i] == 1 && !i) + printf("X8 pointer\n"); + else if (a[i] < 16) + printf("X%lu%s\n", a[i] / 2, a[i] & 1 ? " pointer" : ""); + else if (a[i] < 32) + printf("V%lu\n", a[i] / 2 - 8); + else + printf("stack %lu%s\n", + (a[i] - 32) & ~1, a[i] & 1 ? " pointer" : ""); + } + } + + return stack; +} + +ST_FUNC void gfunc_call(int nb_args) +{ + CType *return_type; + CType **t; + unsigned long *a, *a1; + unsigned long stack; + int i; + + return_type = &vtop[-nb_args].type.ref->type; + if ((return_type->t & VT_BTYPE) == VT_STRUCT) + --nb_args; + + t = tcc_malloc((nb_args + 1) * sizeof(*t)); + a = tcc_malloc((nb_args + 1) * sizeof(*a)); + a1 = tcc_malloc((nb_args + 1) * sizeof(*a1)); + + t[0] = return_type; + for (i = 0; i < nb_args; i++) + t[nb_args - i] = &vtop[-i].type; + + stack = arm64_pcs(nb_args, t, a); + + // Allocate space for structs replaced by pointer: + for (i = nb_args; i; i--) + if (a[i] & 1) { + SValue *arg = &vtop[i - nb_args]; + int align, size = type_size(&arg->type, &align); + assert((arg->type.t & VT_BTYPE) == VT_STRUCT); + stack = (stack + align - 1) & -align; + a1[i] = stack; + stack += size; + } + + stack = (stack + 15) >> 4 << 4; + + assert(stack < 0x1000); + if (stack) + o(0xd10003ff | stack << 10); // sub sp,sp,#(n) + + // First pass: set all values on stack + for (i = nb_args; i; i--) { + vpushv(vtop - nb_args + i); + + if (a[i] & 1) { + // struct replaced by pointer + int r = get_reg(RC_INT); + gen_stack_addr(intr(r), a1[i]); + vset(&vtop->type, r | VT_LVAL, 0); + vswap(); + vstore(); + if (a[i] >= 32) { + // pointer on stack + r = get_reg(RC_INT); + gen_stack_addr(intr(r), a1[i]); + gen_store(3, intr(r), 31, (a[i] - 32) >> 1 << 1); + } + } + else if (a[i] >= 32) { + // value on stack + if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) { + int r = get_reg(RC_INT); + gen_stack_addr(intr(r), a[i] - 32); + vset(&vtop->type, r | VT_LVAL, 0); + vswap(); + vstore(); + } + else if (is_float(vtop->type.t)) { + gv(RC_FLOAT); + gen_fstore(arm64_type_size(vtop[0].type.t), + fltr(vtop[0].r), 31, a[i] - 32); + } + else { + gv(RC_INT); + gen_store(arm64_type_size(vtop[0].type.t), + intr(vtop[0].r), 31, a[i] - 32); + } + } + + --vtop; + } + + // Second pass: assign values to registers + for (i = nb_args; i; i--, vtop--) { + if (a[i] < 16 && !(a[i] & 1)) { + // value in general-purpose registers + if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) { + int align, size = type_size(&vtop->type, &align); + vtop->type.t = VT_PTR; + gaddrof(); + gv(RC_R(a[i] / 2)); + gen_sload(a[i] / 2, size); + } + else + gv(RC_R(a[i] / 2)); + } + else if (a[i] < 16) + // struct replaced by pointer in register + gen_stack_addr(a[i] / 2, a1[i]); + else if (a[i] < 32) { + // value in floating-point registers + if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) { + int j, sz, n = arm64_hfa(&vtop->type, &sz); + vtop->type.t = VT_PTR; + gaddrof(); + gv(RC_R30); + for (j = 0; j < n; j++) + o(0x3d4003c0 | + (sz & 16) << 19 | -(sz & 8) << 27 | (sz & 4) << 29 | + (a[i] / 2 - 8 + j) | + j << 10); // ldr ([sdq])(*),[x30,#(j * sz)] + } + else + gv(RC_F(a[i] / 2 - 8)); + } + } + + if ((return_type->t & VT_BTYPE) == VT_STRUCT) { + if (a[0] == 1) { + // indirect return: set x8 and discard the stack value + gv(RC_R(8)); + --vtop; + } + else + // return in registers: keep the address for after the call + vswap(); + } + + save_regs(0); + arm64_gen_bl_or_b(0); + --vtop; + if (stack) + o(0x910003ff | stack << 10); // add sp,sp,#(n) + + { + int rt = return_type->t; + int bt = rt & VT_BTYPE; + if (bt == VT_BYTE || bt == VT_SHORT) + // Promote small integers: + o(0x13001c00 | (bt == VT_SHORT) << 13 | + !!(rt & VT_UNSIGNED) << 30); // [su]xt[bh] w0,w0 + else if (bt == VT_STRUCT && !(a[0] & 1)) { + // A struct was returned in registers, so write it out: + gv(RC_R(8)); + --vtop; + if (a[0] == 0) { + int align, size = type_size(return_type, &align); + assert(size <= 16); + if (size > 8) + o(0xa9000500); // stp x0,x1,[x8] + else if (size) + gen_store(size > 4 ? 3 : size > 2 ? 2 : size > 1, + 0, 8, 0); + + } + else if (a[0] == 16) { + int j, sz, n = arm64_hfa(return_type, &sz); + for (j = 0; j < n; j++) + o(0x3d000100 | + (sz & 16) << 19 | -(sz & 8) << 27 | (sz & 4) << 29 | + (a[i] / 2 - 8 + j) | + j << 10); // str ([sdq])(*),[x8,#(j * sz)] + } + } + } + + tcc_free(a1); + tcc_free(a); + tcc_free(t); +} + +static unsigned long arm64_func_va_list_stack; +static int arm64_func_va_list_gr_offs; +static int arm64_func_va_list_vr_offs; +static int arm64_func_sub_sp_offset; + +ST_FUNC void gfunc_prolog(CType *func_type) +{ + int n = 0; + int i = 0; + Sym *sym; + CType **t; + unsigned long *a; + + // Why doesn't the caller (gen_function) set func_vt? + func_vt = func_type->ref->type; + func_vc = 144; // offset of where x8 is stored + + for (sym = func_type->ref; sym; sym = sym->next) + ++n; + t = tcc_malloc(n * sizeof(*t)); + a = tcc_malloc(n * sizeof(*a)); + + for (sym = func_type->ref; sym; sym = sym->next) + t[i++] = &sym->type; + + arm64_func_va_list_stack = arm64_pcs(n - 1, t, a); + + o(0xa9b27bfd); // stp x29,x30,[sp,#-224]! + o(0xad0087e0); // stp q0,q1,[sp,#16] + o(0xad018fe2); // stp q2,q3,[sp,#48] + o(0xad0297e4); // stp q4,q5,[sp,#80] + o(0xad039fe6); // stp q6,q7,[sp,#112] + o(0xa90923e8); // stp x8,x8,[sp,#144] + o(0xa90a07e0); // stp x0,x1,[sp,#160] + o(0xa90b0fe2); // stp x2,x3,[sp,#176] + o(0xa90c17e4); // stp x4,x5,[sp,#192] + o(0xa90d1fe6); // stp x6,x7,[sp,#208] + + arm64_func_va_list_gr_offs = -64; + arm64_func_va_list_vr_offs = -128; + + for (i = 1, sym = func_type->ref->next; sym; i++, sym = sym->next) { + int off = (a[i] < 16 ? 160 + a[i] / 2 * 8 : + a[i] < 32 ? 16 + (a[i] - 16) / 2 * 16 : + 224 + ((a[i] - 32) >> 1 << 1)); + sym_push(sym->v & ~SYM_FIELD, &sym->type, + (a[i] & 1 ? VT_LLOCAL : VT_LOCAL) | lvalue_type(sym->type.t), + off); + + if (a[i] < 16) { + int align, size = type_size(&sym->type, &align); + arm64_func_va_list_gr_offs = (a[i] / 2 - 7 + + (!(a[i] & 1) && size > 8)) * 8; + } + else if (a[i] < 32) { + int hfa = arm64_hfa(&sym->type, 0); + arm64_func_va_list_vr_offs = (a[i] / 2 - 16 + + (hfa ? hfa : 1)) * 16; + } + + // HFAs of float and double need to be written differently: + if (16 <= a[i] && a[i] < 32 && (sym->type.t & VT_BTYPE) == VT_STRUCT) { + int j, sz, k = arm64_hfa(&sym->type, &sz); + if (sz < 16) + for (j = 0; j < k; j++) { + o(0x3d0003e0 | -(sz & 8) << 27 | (sz & 4) << 29 | + ((a[i] - 16) / 2 + j) | (off / sz + j) << 10); + // str ([sdq])(*),[sp,#(j * sz)] + } + } + } + + tcc_free(a); + tcc_free(t); + + o(0x910003fd); // mov x29,sp + arm64_func_sub_sp_offset = ind; + // In gfunc_epilog these will be replaced with code to decrement SP: + o(0xd503201f); // nop + o(0xd503201f); // nop + loc = 0; +} + +ST_FUNC void gen_va_start(void) +{ + int r; + --vtop; // we don't need the "arg" + gaddrof(); + r = intr(gv(RC_INT)); + + if (arm64_func_va_list_stack) { + //xx could use add (immediate) here + arm64_movimm(30, arm64_func_va_list_stack + 224); + o(0x8b1e03be); // add x30,x29,x30 + } + else + o(0x910383be); // add x30,x29,#224 + o(0xf900001e | r << 5); // str x30,[x(r)] + + if (arm64_func_va_list_gr_offs) { + if (arm64_func_va_list_stack) + o(0x910383be); // add x30,x29,#224 + o(0xf900041e | r << 5); // str x30,[x(r),#8] + } + + if (arm64_func_va_list_vr_offs) { + o(0x910243be); // add x30,x29,#144 + o(0xf900081e | r << 5); // str x30,[x(r),#16] + } + + arm64_movimm(30, arm64_func_va_list_gr_offs); + o(0xb900181e | r << 5); // str w30,[x(r),#24] + + arm64_movimm(30, arm64_func_va_list_vr_offs); + o(0xb9001c1e | r << 5); // str w30,[x(r),#28] + + --vtop; +} + +ST_FUNC void gen_va_arg(CType *t) +{ + int align, size = type_size(t, &align); + int fsize, hfa = arm64_hfa(t, &fsize); + int r0, r1; + + if (is_float(t->t)) { + hfa = 1; + fsize = size; + } + + gaddrof(); + r0 = intr(gv(RC_INT)); + r1 = get_reg(RC_INT); + vtop[0].r = r1 | lvalue_type(t->t); + r1 = intr(r1); + + if (!hfa) { + uint32_t n = size > 16 ? 8 : (size + 7) & -8; + if (size == 16 && align == 16) + tcc_error("va_arg(ap, __uint128_t) unimplemented"); + o(0xb940181e | r0 << 5); // ldr w30,[x(r0),#24] // __gr_offs + o(0x310003c0 | r1 | n << 10); // adds w(r1),w30,#(n) + o(0x540000ad); // b.le .+20 + o(0xf9400000 | r1 | r0 << 5); // ldr x(r1),[x(r0)] // __stack + o(0x9100001e | r1 << 5 | n << 10); // add x30,x(r1),#(n) + o(0xf900001e | r0 << 5); // str x30,[x(r0)] // __stack + o(0x14000004); // b .+16 + o(0xb9001800 | r1 | r0 << 5); // str w(r1),[x(r0),#24] // __gr_offs + o(0xf9400400 | r1 | r0 << 5); // ldr x(r1),[x(r0),#8] // __gr_top + o(0x8b3ec000 | r1 | r1 << 5); // add x(r1),x(r1),w30,sxtw + if (size > 16) + o(0xf9400000 | r1 | r1 << 5); // ldr x(r1),[x(r1)] + } + else { + uint32_t rsz = hfa << 4; + uint32_t ssz = (size + 7) & -(uint32_t)8; + uint32_t b1; + if (hfa > 1 && fsize < 16) + // We may need to change the layout of this HFA + tcc_error("va_arg(ap, HFA) unimplemented"); + o(0xb9401c1e | r0 << 5); // ldr w30,[x(r0),#28] // __vr_offs + o(0x310003c0 | r1 | rsz << 10); // adds w(r1),w30,#(rsz) + b1 = ind; o(0x5400000d); // b.le lab1 + o(0xf9400000 | r1 | r0 << 5); // ldr x(r1),[x(r0)] // __stack + if (fsize == 16) { + o(0x91003c00 | r1 | r1 << 5); // add x(r1),x(r1),#15 + o(0x927cec00 | r1 | r1 << 5); // and x(r1),x(r1),#-16 + } + o(0x9100001e | r1 << 5 | ssz << 10); // add x30,x(r1),#(ssz) + o(0xf900001e | r0 << 5); // str x30,[x(r0)] // __stack + o(0x14000004); // b .+16 + // lab1: + *(uint32_t *)(cur_text_section->data + b1) = + (0x5400000d | (ind - b1) << 3); + o(0xb9001c00 | r1 | r0 << 5); // str w(r1),[x(r0),#28] // __vr_offs + o(0xf9400800 | r1 | r0 << 5); // ldr x(r1),[x(r0),#16] // __vr_top + o(0x8b3ec000 | r1 | r1 << 5); // add x(r1),x(r1),w30,sxtw + } +} + +ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *align) +{ + return 0; +} + +ST_FUNC void greturn(void) +{ + CType *t = &func_vt; + unsigned long a; + + arm64_pcs(0, &t, &a); + switch (a) { + case -1: + break; + case 0: + if ((func_vt.t & VT_BTYPE) == VT_STRUCT) { + int align, size = type_size(&func_vt, &align); + gaddrof(); + gv(RC_R(0)); + gen_sload(0, size); + } + else + gv(RC_IRET); + break; + case 1: { + CType type = func_vt; + mk_pointer(&type); + vset(&type, VT_LOCAL | VT_LVAL, func_vc); + indir(); + vswap(); + vstore(); + break; + } + case 16: + if ((func_vt.t & VT_BTYPE) == VT_STRUCT) { + int j, sz, n = arm64_hfa(&vtop->type, &sz); + gaddrof(); + gv(RC_R(0)); + for (j = 0; j < n; j++) + o(0x3d400000 | + (sz & 16) << 19 | -(sz & 8) << 27 | (sz & 4) << 29 | + j | j << 10); // ldr ([sdq])(*),[x0,#(j * sz)] + } + else + gv(RC_FRET); + break; + default: + assert(0); + } +} + +ST_FUNC void gfunc_epilog(void) +{ + if (loc) { + // Insert instructions to subtract size of stack frame from SP. + uint32_t *ptr = + (uint32_t *)(cur_text_section->data + arm64_func_sub_sp_offset); + uint64_t diff = (-loc + 15) & ~15; + if (!(diff >> 24)) { + if (diff & 0xfff) // sub sp,sp,#(diff & 0xfff) + ptr[0] = 0xd10003ff | (diff & 0xfff) << 10; + if (diff >> 12) // sub sp,sp,#(diff >> 12),lsl #12 + ptr[1] = 0xd14003ff | (diff >> 12) << 10; + } + else { + // In this case we may subtract more than necessary, + // but always less than 17/16 of what we were aiming for. + int i = 0; + int j = 0; + while (diff >> 20) { + diff = (diff + 0xffff) >> 16; + ++i; + } + while (diff >> 16) { + diff = (diff + 1) >> 1; + ++j; + } + ptr[0] = 0xd2800010 | diff << 5 | i << 21; + // mov x16,#(diff),lsl #(16 * i) + ptr[1] = 0xcb3063ff | j << 10; + // sub sp,sp,x16,lsl #(j) + } + } + o(0x910003bf); // mov sp,x29 + o(0xa8ce7bfd); // ldp x29,x30,[sp],#224 + + o(0xd65f03c0); // ret +} + +// Generate forward branch to label: +ST_FUNC int gjmp(int t) +{ + int r = ind; + o(t); + return r; +} + +// Generate branch to known address: +ST_FUNC void gjmp_addr(int a) +{ + assert(a - ind + 0x8000000 < 0x10000000); + o(0x14000000 | ((a - ind) >> 2 & 0x3ffffff)); +} + +ST_FUNC int gtst(int inv, int t) +{ + int bt = vtop->type.t & VT_BTYPE; + if (bt == VT_LDOUBLE) { + int a, b, f = fltr(gv(RC_FLOAT)); + a = get_reg(RC_INT); + vpushi(0); + vtop[0].r = a; + b = get_reg(RC_INT); + a = intr(a); + b = intr(b); + o(0x4e083c00 | a | f << 5); // mov x(a),v(f).d[0] + o(0x4e183c00 | b | f << 5); // mov x(b),v(f).d[1] + o(0xaa000400 | a | a << 5 | b << 16); // orr x(a),x(a),x(b),lsl #1 + o(0xb4000040 | a | !!inv << 24); // cbz/cbnz x(a),.+8 + --vtop; + } + else if (bt == VT_FLOAT || bt == VT_DOUBLE) { + int a = fltr(gv(RC_FLOAT)); + o(0x1e202008 | a << 5 | (bt != VT_FLOAT) << 22); // fcmp + o(0x54000040 | !!inv); // b.eq/b.ne .+8 + } + else { + int ll = (bt == VT_PTR || bt == VT_LLONG); + int a = intr(gv(RC_INT)); + o(0x34000040 | a | !!inv << 24 | ll << 31); // cbz/cbnz wA,.+8 + } + --vtop; + return gjmp(t); +} + +static void arm64_gen_opil(int op, int l) +{ + int x, a, b; + gv2(RC_INT, RC_INT); + assert(vtop[-1].r < VT_CONST && vtop[0].r < VT_CONST); + a = intr(vtop[-1].r); + b = intr(vtop[0].r); + vtop -= 2; + x = get_reg(RC_INT); + ++vtop; + vtop[0].r = x; + x = intr(x); + + switch (op) { + case '%': + // Use x30 for quotient: + o(0x1ac00c00 | l << 31 | 30 | a << 5 | b << 16); // sdiv + o(0x1b008000 | l << 31 | x | 30 << 5 | b << 16 | a << 10); // msub + break; + case '&': + o(0x0a000000 | l << 31 | x | a << 5 | b << 16); // and + break; + case '*': + o(0x1b007c00 | l << 31 | x | a << 5 | b << 16); // mul + break; + case '+': + o(0x0b000000 | l << 31 | x | a << 5 | b << 16); // add + break; + case '-': + o(0x4b000000 | l << 31 | x | a << 5 | b << 16); // sub + break; + case '/': + o(0x1ac00c00 | l << 31 | x | a << 5 | b << 16); // sdiv + break; + case '^': + o(0x4a000000 | l << 31 | x | a << 5 | b << 16); // eor + break; + case '|': + o(0x2a000000 | l << 31 | x | a << 5 | b << 16); // orr + break; + case TOK_EQ: + o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp + o(0x1a9f17e0 | x); // cset wA,eq + break; + case TOK_GE: + o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp + o(0x1a9fb7e0 | x); // cset wA,ge + break; + case TOK_GT: + o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp + o(0x1a9fd7e0 | x); // cset wA,gt + break; + case TOK_LE: + o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp + o(0x1a9fc7e0 | x); // cset wA,le + break; + case TOK_LT: + o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp + o(0x1a9fa7e0 | x); // cset wA,lt + break; + case TOK_NE: + o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp + o(0x1a9f07e0 | x); // cset wA,ne + break; + case TOK_SAR: + o(0x1ac02800 | l << 31 | x | a << 5 | b << 16); // asr + break; + case TOK_SHL: + o(0x1ac02000 | l << 31 | x | a << 5 | b << 16); // lsl + break; + case TOK_SHR: + o(0x1ac02400 | l << 31 | x | a << 5 | b << 16); // lsr + break; + case TOK_UDIV: + case TOK_PDIV: + o(0x1ac00800 | l << 31 | x | a << 5 | b << 16); // udiv + break; + case TOK_UGE: + o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp + o(0x1a9f37e0 | x); // cset wA,cs + break; + case TOK_UGT: + o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp + o(0x1a9f97e0 | x); // cset wA,hi + break; + case TOK_ULT: + o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp + o(0x1a9f27e0 | x); // cset wA,cc + break; + case TOK_ULE: + o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp + o(0x1a9f87e0 | x); // cset wA,ls + break; + case TOK_UMOD: + // Use x30 for quotient: + o(0x1ac00800 | l << 31 | 30 | a << 5 | b << 16); // udiv + o(0x1b008000 | l << 31 | x | 30 << 5 | b << 16 | a << 10); // msub + break; + default: + assert(0); + } +} + +ST_FUNC void gen_opi(int op) +{ + arm64_gen_opil(op, 0); +} + +ST_FUNC void gen_opl(int op) +{ + arm64_gen_opil(op, 1); +} + +ST_FUNC void gen_opf(int op) +{ + int x, a, b, dbl; + + if (vtop[0].type.t == VT_LDOUBLE) { + CType type = vtop[0].type; + int func = 0; + int cond = -1; + switch (op) { + case '*': func = TOK___multf3; break; + case '+': func = TOK___addtf3; break; + case '-': func = TOK___subtf3; break; + case '/': func = TOK___divtf3; break; + case TOK_EQ: func = TOK___eqtf2; cond = 1; break; + case TOK_NE: func = TOK___netf2; cond = 0; break; + case TOK_LT: func = TOK___lttf2; cond = 10; break; + case TOK_GE: func = TOK___getf2; cond = 11; break; + case TOK_LE: func = TOK___letf2; cond = 12; break; + case TOK_GT: func = TOK___gttf2; cond = 13; break; + default: assert(0); break; + } + vpush_global_sym(&func_old_type, func); + vrott(3); + gfunc_call(2); + vpushi(0); + vtop->r = cond < 0 ? REG_FRET : REG_IRET; + if (cond < 0) + vtop->type = type; + else { + o(0x7100001f); // cmp w0,#0 + o(0x1a9f07e0 | cond << 12); // cset w0,(cond) + } + return; + } + + dbl = vtop[0].type.t != VT_FLOAT; + gv2(RC_FLOAT, RC_FLOAT); + assert(vtop[-1].r < VT_CONST && vtop[0].r < VT_CONST); + a = fltr(vtop[-1].r); + b = fltr(vtop[0].r); + vtop -= 2; + switch (op) { + case TOK_EQ: case TOK_NE: + case TOK_LT: case TOK_GE: case TOK_LE: case TOK_GT: + x = get_reg(RC_INT); + ++vtop; + vtop[0].r = x; + x = intr(x); + break; + default: + x = get_reg(RC_FLOAT); + ++vtop; + vtop[0].r = x; + x = fltr(x); + break; + } + + switch (op) { + case '*': + o(0x1e200800 | dbl << 22 | x | a << 5 | b << 16); // fmul + break; + case '+': + o(0x1e202800 | dbl << 22 | x | a << 5 | b << 16); // fadd + break; + case '-': + o(0x1e203800 | dbl << 22 | x | a << 5 | b << 16); // fsub + break; + case '/': + o(0x1e201800 | dbl << 22 | x | a << 5 | b << 16); // fdiv + break; + case TOK_EQ: + o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp + o(0x1a9f17e0 | x); // cset w(x),eq + break; + case TOK_GE: + o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp + o(0x1a9fb7e0 | x); // cset w(x),ge + break; + case TOK_GT: + o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp + o(0x1a9fd7e0 | x); // cset w(x),gt + break; + case TOK_LE: + o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp + o(0x1a9f87e0 | x); // cset w(x),ls + break; + case TOK_LT: + o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp + o(0x1a9f57e0 | x); // cset w(x),mi + break; + case TOK_NE: + o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp + o(0x1a9f07e0 | x); // cset w(x),ne + break; + default: + assert(0); + } +} + +// Generate sign extension from 32 to 64 bits: +ST_FUNC void gen_cvt_sxtw(void) +{ + int r = intr(gv(RC_INT)); + o(0x93407c00 | r | r << 5); // sxtw x(r),w(r) +} + +ST_FUNC void gen_cvt_itof(int t) +{ + if (t == VT_LDOUBLE) { + int f = vtop->type.t; + int func = (f & VT_BTYPE) == VT_LLONG ? + (f & VT_UNSIGNED ? TOK___floatunditf : TOK___floatditf) : + (f & VT_UNSIGNED ? TOK___floatunsitf : TOK___floatsitf); + vpush_global_sym(&func_old_type, func); + vrott(2); + gfunc_call(1); + vpushi(0); + vtop->type.t = t; + vtop->r = REG_FRET; + return; + } + else { + int d, n = intr(gv(RC_INT)); + int s = !(vtop->type.t & VT_UNSIGNED); + int l = ((vtop->type.t & VT_BTYPE) == VT_LLONG); + --vtop; + d = get_reg(RC_FLOAT); + ++vtop; + vtop[0].r = d; + o(0x1e220000 | !s << 16 | (t != VT_FLOAT) << 22 | fltr(d) | + l << 31 | n << 5); // [us]cvtf [sd](d),[wx](n) + } +} + +ST_FUNC void gen_cvt_ftoi(int t) +{ + if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) { + int func = (t & VT_BTYPE) == VT_LLONG ? + (t & VT_UNSIGNED ? TOK___fixunstfdi : TOK___fixtfdi) : + (t & VT_UNSIGNED ? TOK___fixunstfsi : TOK___fixtfsi); + vpush_global_sym(&func_old_type, func); + vrott(2); + gfunc_call(1); + vpushi(0); + vtop->type.t = t; + vtop->r = REG_IRET; + return; + } + else { + int d, n = fltr(gv(RC_FLOAT)); + int l = ((vtop->type.t & VT_BTYPE) != VT_FLOAT); + --vtop; + d = get_reg(RC_INT); + ++vtop; + vtop[0].r = d; + o(0x1e380000 | + !!(t & VT_UNSIGNED) << 16 | + ((t & VT_BTYPE) == VT_LLONG) << 31 | intr(d) | + l << 22 | n << 5); // fcvtz[su] [wx](d),[sd](n) + } +} + +ST_FUNC void gen_cvt_ftof(int t) +{ + int f = vtop[0].type.t; + assert(t == VT_FLOAT || t == VT_DOUBLE || t == VT_LDOUBLE); + assert(f == VT_FLOAT || f == VT_DOUBLE || f == VT_LDOUBLE); + if (t == f) + return; + + if (t == VT_LDOUBLE || f == VT_LDOUBLE) { + int func = (t == VT_LDOUBLE) ? + (f == VT_FLOAT ? TOK___extendsftf2 : TOK___extenddftf2) : + (t == VT_FLOAT ? TOK___trunctfsf2 : TOK___trunctfdf2); + vpush_global_sym(&func_old_type, func); + vrott(2); + gfunc_call(1); + vpushi(0); + vtop->type.t = t; + vtop->r = REG_FRET; + } + else { + int x, a; + gv(RC_FLOAT); + assert(vtop[0].r < VT_CONST); + a = fltr(vtop[0].r); + --vtop; + x = get_reg(RC_FLOAT); + ++vtop; + vtop[0].r = x; + x = fltr(x); + + if (f == VT_FLOAT) + o(0x1e22c000 | x | a << 5); // fcvt d(x),s(a) + else + o(0x1e624000 | x | a << 5); // fcvt s(x),d(a) + } +} + +ST_FUNC void ggoto(void) +{ + arm64_gen_bl_or_b(1); + --vtop; +} + +ST_FUNC void gen_vla_sp_save(int addr) { + tcc_error("variable length arrays unsupported for this target"); +} + +ST_FUNC void gen_vla_sp_restore(int addr) { + tcc_error("variable length arrays unsupported for this target"); +} + +ST_FUNC void gen_vla_alloc(CType *type, int align) { + tcc_error("variable length arrays unsupported for this target"); +} + +/* end of A64 code generator */ +/*************************************************************/ +#endif +/*************************************************************/ diff --git a/configure b/configure index f6df9ada..c75d4601 100755 --- a/configure +++ b/configure @@ -99,6 +99,9 @@ classify_cpu () esac cpu="armv4l" ;; + aarch64) + cpu="aarch64" + ;; alpha) cpu="alpha" ;; @@ -435,6 +438,9 @@ elif test "$cpu" = "armv4l" ; then echo "ARCH=arm" >> config.mak echo "#define HOST_ARM 1" >> $TMPH echo "#define TCC_ARM_VERSION $cpuver" >> $TMPH +elif test "$cpu" = "aarch64" ; then + echo "ARCH=arm64" >> config.mak + echo "#define HOST_ARM64 1" >> $TMPH elif test "$cpu" = "powerpc" ; then echo "ARCH=ppc" >> config.mak echo "#define HOST_PPC 1" >> $TMPH diff --git a/conftest.c b/conftest.c index ddb7a20b..fa07a1b5 100644 --- a/conftest.c +++ b/conftest.c @@ -7,6 +7,8 @@ # define TRIPLET_ARCH "x86_64" #elif defined(__arm__) # define TRIPLET_ARCH "arm" +#elif defined(__aarch64__) +# define TRIPLET_ARCH "aarch64" #else # define TRIPLET_ARCH "unknown" #endif diff --git a/elf.h b/elf.h index a3597f99..a40c736c 100644 --- a/elf.h +++ b/elf.h @@ -2336,6 +2336,117 @@ typedef Elf32_Addr Elf32_Conflict; #define R_AARCH64_NONE 0 /* No relocation. */ #define R_AARCH64_ABS64 257 /* Direct 64 bit. */ #define R_AARCH64_ABS32 258 /* Direct 32 bit. */ +#define R_AARCH64_ABS16 259 /* Direct 16-bit. */ +#define R_AARCH64_PREL64 260 /* PC-relative 64-bit. */ +#define R_AARCH64_PREL32 261 /* PC-relative 32-bit. */ +#define R_AARCH64_PREL16 262 /* PC-relative 16-bit. */ +#define R_AARCH64_MOVW_UABS_G0 263 /* Dir. MOVZ imm. from bits 15:0. */ +#define R_AARCH64_MOVW_UABS_G0_NC 264 /* Likewise for MOVK; no check. */ +#define R_AARCH64_MOVW_UABS_G1 265 /* Dir. MOVZ imm. from bits 31:16. */ +#define R_AARCH64_MOVW_UABS_G1_NC 266 /* Likewise for MOVK; no check. */ +#define R_AARCH64_MOVW_UABS_G2 267 /* Dir. MOVZ imm. from bits 47:32. */ +#define R_AARCH64_MOVW_UABS_G2_NC 268 /* Likewise for MOVK; no check. */ +#define R_AARCH64_MOVW_UABS_G3 269 /* Dir. MOV{K,Z} imm. from 63:48. */ +#define R_AARCH64_MOVW_SABS_G0 270 /* Dir. MOV{N,Z} imm. from 15:0. */ +#define R_AARCH64_MOVW_SABS_G1 271 /* Dir. MOV{N,Z} imm. from 31:16. */ +#define R_AARCH64_MOVW_SABS_G2 272 /* Dir. MOV{N,Z} imm. from 47:32. */ +#define R_AARCH64_LD_PREL_LO19 273 /* PC-rel. LD imm. from bits 20:2. */ +#define R_AARCH64_ADR_PREL_LO21 274 /* PC-rel. ADR imm. from bits 20:0. */ +#define R_AARCH64_ADR_PREL_PG_HI21 275 /* Page-rel. ADRP imm. from 32:12. */ +#define R_AARCH64_ADR_PREL_PG_HI21_NC 276 /* Likewise; no overflow check. */ +#define R_AARCH64_ADD_ABS_LO12_NC 277 /* Dir. ADD imm. from bits 11:0. */ +#define R_AARCH64_LDST8_ABS_LO12_NC 278 /* Likewise for LD/ST; no check. */ +#define R_AARCH64_TSTBR14 279 /* PC-rel. TBZ/TBNZ imm. from 15:2. */ +#define R_AARCH64_CONDBR19 280 /* PC-rel. cond. br. imm. from 20:2. */ +#define R_AARCH64_JUMP26 282 /* PC-rel. B imm. from bits 27:2. */ +#define R_AARCH64_CALL26 283 /* Likewise for CALL. */ +#define R_AARCH64_LDST16_ABS_LO12_NC 284 /* Dir. ADD imm. from bits 11:1. */ +#define R_AARCH64_LDST32_ABS_LO12_NC 285 /* Likewise for bits 11:2. */ +#define R_AARCH64_LDST64_ABS_LO12_NC 286 /* Likewise for bits 11:3. */ +#define R_AARCH64_MOVW_PREL_G0 287 /* PC-rel. MOV{N,Z} imm. from 15:0. */ +#define R_AARCH64_MOVW_PREL_G0_NC 288 /* Likewise for MOVK; no check. */ +#define R_AARCH64_MOVW_PREL_G1 289 /* PC-rel. MOV{N,Z} imm. from 31:16. */ +#define R_AARCH64_MOVW_PREL_G1_NC 290 /* Likewise for MOVK; no check. */ +#define R_AARCH64_MOVW_PREL_G2 291 /* PC-rel. MOV{N,Z} imm. from 47:32. */ +#define R_AARCH64_MOVW_PREL_G2_NC 292 /* Likewise for MOVK; no check. */ +#define R_AARCH64_MOVW_PREL_G3 293 /* PC-rel. MOV{N,Z} imm. from 63:48. */ +#define R_AARCH64_LDST128_ABS_LO12_NC 299 /* Dir. ADD imm. from bits 11:4. */ +#define R_AARCH64_MOVW_GOTOFF_G0 300 /* GOT-rel. off. MOV{N,Z} imm. 15:0. */ +#define R_AARCH64_MOVW_GOTOFF_G0_NC 301 /* Likewise for MOVK; no check. */ +#define R_AARCH64_MOVW_GOTOFF_G1 302 /* GOT-rel. o. MOV{N,Z} imm. 31:16. */ +#define R_AARCH64_MOVW_GOTOFF_G1_NC 303 /* Likewise for MOVK; no check. */ +#define R_AARCH64_MOVW_GOTOFF_G2 304 /* GOT-rel. o. MOV{N,Z} imm. 47:32. */ +#define R_AARCH64_MOVW_GOTOFF_G2_NC 305 /* Likewise for MOVK; no check. */ +#define R_AARCH64_MOVW_GOTOFF_G3 306 /* GOT-rel. o. MOV{N,Z} imm. 63:48. */ +#define R_AARCH64_GOTREL64 307 /* GOT-relative 64-bit. */ +#define R_AARCH64_GOTREL32 308 /* GOT-relative 32-bit. */ +#define R_AARCH64_GOT_LD_PREL19 309 /* PC-rel. GOT off. load imm. 20:2. */ +#define R_AARCH64_LD64_GOTOFF_LO15 310 /* GOT-rel. off. LD/ST imm. 14:3. */ +#define R_AARCH64_ADR_GOT_PAGE 311 /* P-page-rel. GOT off. ADRP 32:12. */ +#define R_AARCH64_LD64_GOT_LO12_NC 312 /* Dir. GOT off. LD/ST imm. 11:3. */ +#define R_AARCH64_LD64_GOTPAGE_LO15 313 /* GOT-page-rel. GOT off. LD/ST 14:3 */ +#define R_AARCH64_TLSGD_ADR_PREL21 512 /* PC-relative ADR imm. 20:0. */ +#define R_AARCH64_TLSGD_ADR_PAGE21 513 /* page-rel. ADRP imm. 32:12. */ +#define R_AARCH64_TLSGD_ADD_LO12_NC 514 /* direct ADD imm. from 11:0. */ +#define R_AARCH64_TLSGD_MOVW_G1 515 /* GOT-rel. MOV{N,Z} 31:16. */ +#define R_AARCH64_TLSGD_MOVW_G0_NC 516 /* GOT-rel. MOVK imm. 15:0. */ +#define R_AARCH64_TLSLD_ADR_PREL21 517 /* Like 512; local dynamic model. */ +#define R_AARCH64_TLSLD_ADR_PAGE21 518 /* Like 513; local dynamic model. */ +#define R_AARCH64_TLSLD_ADD_LO12_NC 519 /* Like 514; local dynamic model. */ +#define R_AARCH64_TLSLD_MOVW_G1 520 /* Like 515; local dynamic model. */ +#define R_AARCH64_TLSLD_MOVW_G0_NC 521 /* Like 516; local dynamic model. */ +#define R_AARCH64_TLSLD_LD_PREL19 522 /* TLS PC-rel. load imm. 20:2. */ +#define R_AARCH64_TLSLD_MOVW_DTPREL_G2 523 /* TLS DTP-rel. MOV{N,Z} 47:32. */ +#define R_AARCH64_TLSLD_MOVW_DTPREL_G1 524 /* TLS DTP-rel. MOV{N,Z} 31:16. */ +#define R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC 525 /* Likewise; MOVK; no check. */ +#define R_AARCH64_TLSLD_MOVW_DTPREL_G0 526 /* TLS DTP-rel. MOV{N,Z} 15:0. */ +#define R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC 527 /* Likewise; MOVK; no check. */ +#define R_AARCH64_TLSLD_ADD_DTPREL_HI12 528 /* DTP-rel. ADD imm. from 23:12. */ +#define R_AARCH64_TLSLD_ADD_DTPREL_LO12 529 /* DTP-rel. ADD imm. from 11:0. */ +#define R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC 530 /* Likewise; no ovfl. check. */ +#define R_AARCH64_TLSLD_LDST8_DTPREL_LO12 531 /* DTP-rel. LD/ST imm. 11:0. */ +#define R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC 532 /* Likewise; no check. */ +#define R_AARCH64_TLSLD_LDST16_DTPREL_LO12 533 /* DTP-rel. LD/ST imm. 11:1. */ +#define R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC 534 /* Likewise; no check. */ +#define R_AARCH64_TLSLD_LDST32_DTPREL_LO12 535 /* DTP-rel. LD/ST imm. 11:2. */ +#define R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC 536 /* Likewise; no check. */ +#define R_AARCH64_TLSLD_LDST64_DTPREL_LO12 537 /* DTP-rel. LD/ST imm. 11:3. */ +#define R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC 538 /* Likewise; no check. */ +#define R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 539 /* GOT-rel. MOV{N,Z} 31:16. */ +#define R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC 540 /* GOT-rel. MOVK 15:0. */ +#define R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 541 /* Page-rel. ADRP 32:12. */ +#define R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC 542 /* Direct LD off. 11:3. */ +#define R_AARCH64_TLSIE_LD_GOTTPREL_PREL19 543 /* PC-rel. load imm. 20:2. */ +#define R_AARCH64_TLSLE_MOVW_TPREL_G2 544 /* TLS TP-rel. MOV{N,Z} 47:32. */ +#define R_AARCH64_TLSLE_MOVW_TPREL_G1 545 /* TLS TP-rel. MOV{N,Z} 31:16. */ +#define R_AARCH64_TLSLE_MOVW_TPREL_G1_NC 546 /* Likewise; MOVK; no check. */ +#define R_AARCH64_TLSLE_MOVW_TPREL_G0 547 /* TLS TP-rel. MOV{N,Z} 15:0. */ +#define R_AARCH64_TLSLE_MOVW_TPREL_G0_NC 548 /* Likewise; MOVK; no check. */ +#define R_AARCH64_TLSLE_ADD_TPREL_HI12 549 /* TP-rel. ADD imm. 23:12. */ +#define R_AARCH64_TLSLE_ADD_TPREL_LO12 550 /* TP-rel. ADD imm. 11:0. */ +#define R_AARCH64_TLSLE_ADD_TPREL_LO12_NC 551 /* Likewise; no ovfl. check. */ +#define R_AARCH64_TLSLE_LDST8_TPREL_LO12 552 /* TP-rel. LD/ST off. 11:0. */ +#define R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC 553 /* Likewise; no ovfl. check. */ +#define R_AARCH64_TLSLE_LDST16_TPREL_LO12 554 /* TP-rel. LD/ST off. 11:1. */ +#define R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC 555 /* Likewise; no check. */ +#define R_AARCH64_TLSLE_LDST32_TPREL_LO12 556 /* TP-rel. LD/ST off. 11:2. */ +#define R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC 557 /* Likewise; no check. */ +#define R_AARCH64_TLSLE_LDST64_TPREL_LO12 558 /* TP-rel. LD/ST off. 11:3. */ +#define R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC 559 /* Likewise; no check. */ +#define R_AARCH64_TLSDESC_LD_PREL19 560 /* PC-rel. load immediate 20:2. */ +#define R_AARCH64_TLSDESC_ADR_PREL21 561 /* PC-rel. ADR immediate 20:0. */ +#define R_AARCH64_TLSDESC_ADR_PAGE21 562 /* Page-rel. ADRP imm. 32:12. */ +#define R_AARCH64_TLSDESC_LD64_LO12 563 /* Direct LD off. from 11:3. */ +#define R_AARCH64_TLSDESC_ADD_LO12 564 /* Direct ADD imm. from 11:0. */ +#define R_AARCH64_TLSDESC_OFF_G1 565 /* GOT-rel. MOV{N,Z} imm. 31:16. */ +#define R_AARCH64_TLSDESC_OFF_G0_NC 566 /* GOT-rel. MOVK imm. 15:0; no ck. */ +#define R_AARCH64_TLSDESC_LDR 567 /* Relax LDR. */ +#define R_AARCH64_TLSDESC_ADD 568 /* Relax ADD. */ +#define R_AARCH64_TLSDESC_CALL 569 /* Relax BLR. */ +#define R_AARCH64_TLSLE_LDST128_TPREL_LO12 570 /* TP-rel. LD/ST off. 11:4. */ +#define R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC 571 /* Likewise; no check. */ +#define R_AARCH64_TLSLD_LDST128_DTPREL_LO12 572 /* DTP-rel. LD/ST imm. 11:4. */ +#define R_AARCH64_TLSLD_LDST128_DTPREL_LO12_NC 573 /* Likewise; no check. */ #define R_AARCH64_COPY 1024 /* Copy symbol at runtime. */ #define R_AARCH64_GLOB_DAT 1025 /* Create GOT entry. */ #define R_AARCH64_JUMP_SLOT 1026 /* Create PLT entry. */ @@ -2344,6 +2455,7 @@ typedef Elf32_Addr Elf32_Conflict; #define R_AARCH64_TLS_DTPREL64 1029 /* Module-relative offset, 64 bit. */ #define R_AARCH64_TLS_TPREL64 1030 /* TP-relative offset, 64 bit. */ #define R_AARCH64_TLSDESC 1031 /* TLS Descriptor. */ +#define R_AARCH64_IRELATIVE 1032 /* STT_GNU_IFUNC relocation. */ /* ARM relocs. */ diff --git a/include/stdarg.h b/include/stdarg.h index 5aa9d57b..06d592b9 100644 --- a/include/stdarg.h +++ b/include/stdarg.h @@ -46,6 +46,19 @@ typedef char *va_list; #define va_copy(dest, src) (dest) = (src) #define va_end(ap) +#elif defined(__aarch64__) +typedef struct { + void *__stack; + void *__gr_top; + void *__vr_top; + int __gr_offs; + int __vr_offs; +} va_list; +#define va_start(ap, last) __va_start(ap, last) +#define va_arg(ap, type) __va_arg(ap, type) +#define va_end(ap) +#define va_copy(dest, src) ((dest) = (src)) + #else /* __i386__ */ typedef char *va_list; /* only correct for i386 */ diff --git a/lib/Makefile b/lib/Makefile index 37d4711d..6e192b93 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -28,6 +28,11 @@ ifndef TARGET # native library ifeq ($(ARCH),arm) TARGET = arm XCC = $(CC) + else + ifeq ($(ARCH),arm64) + TARGET = arm64 + else + endif endif endif endif @@ -49,6 +54,7 @@ X86_64_O = libtcc1.o alloca86_64.o ARM_O = libtcc1.o armeabi.o alloca-arm.o WIN32_O = $(I386_O) crt1.o wincrt1.o dllcrt1.o dllmain.o chkstk.o WIN64_O = $(X86_64_O) crt1.o wincrt1.o dllcrt1.o dllmain.o chkstk.o +ARM64_O = lib-arm64.o # build TCC runtime library to contain PIC code, so it can be linked # into shared libraries @@ -86,6 +92,11 @@ ifeq "$(TARGET)" "arm" OBJ = $(addprefix $(DIR)/,$(ARM_O)) TGT = -DTCC_TARGET_ARM XCC ?= $(TCC) -B$(TOP) +else +ifeq "$(TARGET)" "arm64" + OBJ = $(addprefix $(DIR)/,$(ARM64_O)) + TGT = -DTCC_TARGET_ARM64 + XCC ?= $(TCC) -B$(TOP) else $(error libtcc1.a not supported on target '$(TARGET)') endif @@ -93,6 +104,7 @@ endif endif endif endif +endif XFLAGS = $(CPPFLAGS) $(CFLAGS) $(PICFLAGS) $(TGT) diff --git a/lib/lib-arm64.c b/lib/lib-arm64.c new file mode 100644 index 00000000..fd73506e --- /dev/null +++ b/lib/lib-arm64.c @@ -0,0 +1,652 @@ +/* + * TCC runtime library for arm64. + * + * Copyright (c) 2015 Edmund Grimley Evans + * + * Copying and distribution of this file, with or without modification, + * are permitted in any medium without royalty provided the copyright + * notice and this notice are preserved. This file is offered as-is, + * without any warranty. + */ + +#include +#include + +void __clear_cache(char *beg, char *end) +{ +#warning __clear_cache not yet implemented +} + +typedef struct { + uint64_t x0, x1; +} u128_t; + +static long double f3_zero(int sgn) +{ + long double f; + u128_t x = { 0, (uint64_t)sgn << 63 }; + memcpy(&f, &x, 16); + return f; +} + +static long double f3_infinity(int sgn) +{ + long double f; + u128_t x = { 0, (uint64_t)sgn << 63 | 0x7fff000000000000 }; + memcpy(&f, &x, 16); + return f; +} + +static long double f3_NaN(void) +{ + long double f; +#if 0 + // ARM's default NaN usually has just the top fraction bit set: + u128_t x = { 0, 0x7fff800000000000 }; +#else + // GCC's library sets all fraction bits: + u128_t x = { -1, 0x7fffffffffffffff }; +#endif + memcpy(&f, &x, 16); + return f; +} + +static int fp3_convert_NaN(long double *f, int sgn, u128_t mnt) +{ + u128_t x = { mnt.x0, + mnt.x1 | 0x7fff800000000000 | (uint64_t)sgn << 63 }; + memcpy(f, &x, 16); + return 1; +} + +static int fp3_detect_NaNs(long double *f, + int a_sgn, int a_exp, u128_t a, + int b_sgn, int b_exp, u128_t b) +{ + // Detect signalling NaNs: + if (a_exp == 32767 && (a.x0 | a.x1 << 16) && !(a.x1 >> 47 & 1)) + return fp3_convert_NaN(f, a_sgn, a); + if (b_exp == 32767 && (b.x0 | b.x1 << 16) && !(b.x1 >> 47 & 1)) + return fp3_convert_NaN(f, b_sgn, b); + + // Detect quiet NaNs: + if (a_exp == 32767 && (a.x0 | a.x1 << 16)) + return fp3_convert_NaN(f, a_sgn, a); + if (b_exp == 32767 && (b.x0 | b.x1 << 16)) + return fp3_convert_NaN(f, b_sgn, b); + + return 0; +} + +static void f3_unpack(int *sgn, int32_t *exp, u128_t *mnt, long double f) +{ + u128_t x; + memcpy(&x, &f, 16); + *sgn = x.x1 >> 63; + *exp = x.x1 >> 48 & 32767; + x.x1 = x.x1 << 16 >> 16; + if (*exp) + x.x1 |= (uint64_t)1 << 48; + else + *exp = 1; + *mnt = x; +} + +static u128_t f3_normalise(int32_t *exp, u128_t mnt) +{ + int sh; + if (!(mnt.x0 | mnt.x1)) + return mnt; + if (!mnt.x1) { + mnt.x1 = mnt.x0; + mnt.x0 = 0; + *exp -= 64; + } + for (sh = 32; sh; sh >>= 1) { + if (!(mnt.x1 >> (64 - sh))) { + mnt.x1 = mnt.x1 << sh | mnt.x0 >> (64 - sh); + mnt.x0 = mnt.x0 << sh; + *exp -= sh; + } + } + return mnt; +} + +static u128_t f3_sticky_shift(int32_t sh, u128_t x) +{ + if (sh >= 128) { + x.x0 = !!(x.x0 | x.x1); + x.x1 = 0; + return x; + } + if (sh >= 64) { + x.x0 = x.x1 | !!x.x0; + x.x1 = 0; + sh -= 64; + } + if (sh > 0) { + x.x0 = x.x0 >> sh | x.x1 << (64 - sh) | !!(x.x0 << (64 - sh)); + x.x1 = x.x1 >> sh; + } + return x; +} + +static long double f3_round(int sgn, int32_t exp, u128_t x) +{ + long double f; + int error; + + if (exp > 0) { + x = f3_sticky_shift(13, x); + } + else { + x = f3_sticky_shift(14 - exp, x); + exp = 0; + } + + error = x.x0 & 3; + x.x0 = x.x0 >> 2 | x.x1 << 62; + x.x1 = x.x1 >> 2; + + if (error == 3 || ((error == 2) & (x.x0 & 1))) { + if (!++x.x0) { + ++x.x1; + if (x.x1 == (uint64_t)1 << 48) + exp = 1; + else if (x.x1 == (uint64_t)1 << 49) { + ++exp; + x.x0 = x.x0 >> 1 | x.x1 << 63; + x.x1 = x.x1 >> 1; + } + } + } + + if (exp >= 32767) + return f3_infinity(sgn); + + x.x1 = x.x1 << 16 >> 16 | (uint64_t)exp << 48 | (uint64_t)sgn << 63; + memcpy(&f, &x, 16); + return f; +} + +static long double f3_add(long double fa, long double fb, int neg) +{ + u128_t a, b, x; + int32_t a_exp, b_exp, x_exp; + int a_sgn, b_sgn, x_sgn; + long double fx; + + f3_unpack(&a_sgn, &a_exp, &a, fa); + f3_unpack(&b_sgn, &b_exp, &b, fb); + + if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b)) + return fx; + + b_sgn ^= neg; + + // Handle infinities and zeroes: + if (a_exp == 32767 && b_exp == 32767 && a_sgn != b_sgn) + return f3_NaN(); + if (a_exp == 32767) + return f3_infinity(a_sgn); + if (b_exp == 32767) + return f3_infinity(b_sgn); + if (!(a.x0 | a.x1 | b.x0 | b.x1)) + return f3_zero(a_sgn & b_sgn); + + a.x1 = a.x1 << 3 | a.x0 >> 61; + a.x0 = a.x0 << 3; + b.x1 = b.x1 << 3 | b.x0 >> 61; + b.x0 = b.x0 << 3; + + if (a_exp <= b_exp) { + a = f3_sticky_shift(b_exp - a_exp, a); + a_exp = b_exp; + } + else { + b = f3_sticky_shift(a_exp - b_exp, b); + b_exp = a_exp; + } + + x_sgn = a_sgn; + x_exp = a_exp; + if (a_sgn == b_sgn) { + x.x0 = a.x0 + b.x0; + x.x1 = a.x1 + b.x1 + (x.x0 < a.x0); + } + else { + x.x0 = a.x0 - b.x0; + x.x1 = a.x1 - b.x1 - (x.x0 > a.x0); + if (x.x1 >> 63) { + x_sgn ^= 1; + x.x0 = -x.x0; + x.x1 = -x.x1 - !!x.x0; + } + } + + if (!(x.x0 | x.x1)) + return f3_zero(0); + + x = f3_normalise(&x_exp, x); + + return f3_round(x_sgn, x_exp + 12, x); +} + +long double __addtf3(long double a, long double b) +{ + return f3_add(a, b, 0); +} + +long double __subtf3(long double a, long double b) +{ + return f3_add(a, b, 1); +} + +long double __multf3(long double fa, long double fb) +{ + u128_t a, b, x; + int32_t a_exp, b_exp, x_exp; + int a_sgn, b_sgn, x_sgn; + long double fx; + + f3_unpack(&a_sgn, &a_exp, &a, fa); + f3_unpack(&b_sgn, &b_exp, &b, fb); + + if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b)) + return fx; + + // Handle infinities and zeroes: + if ((a_exp == 32767 && !(b.x0 | b.x1)) || + (b_exp == 32767 && !(a.x0 | a.x1))) + return f3_NaN(); + if (a_exp == 32767 || b_exp == 32767) + return f3_infinity(a_sgn ^ b_sgn); + if (!(a.x0 | a.x1) || !(b.x0 | b.x1)) + return f3_zero(a_sgn ^ b_sgn); + + a = f3_normalise(&a_exp, a); + b = f3_normalise(&b_exp, b); + + x_sgn = a_sgn ^ b_sgn; + x_exp = a_exp + b_exp - 16352; + + { + // Convert to base (1 << 30), discarding bottom 6 bits, which are zero, + // so there are (32, 30, 30, 30) bits in (a3, a2, a1, a0): + uint64_t a0 = a.x0 << 28 >> 34; + uint64_t b0 = b.x0 << 28 >> 34; + uint64_t a1 = a.x0 >> 36 | a.x1 << 62 >> 34; + uint64_t b1 = b.x0 >> 36 | b.x1 << 62 >> 34; + uint64_t a2 = a.x1 << 32 >> 34; + uint64_t b2 = b.x1 << 32 >> 34; + uint64_t a3 = a.x1 >> 32; + uint64_t b3 = b.x1 >> 32; + // Use 16 small multiplications and additions that do not overflow: + uint64_t x0 = a0 * b0; + uint64_t x1 = (x0 >> 30) + a0 * b1 + a1 * b0; + uint64_t x2 = (x1 >> 30) + a0 * b2 + a1 * b1 + a2 * b0; + uint64_t x3 = (x2 >> 30) + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0; + uint64_t x4 = (x3 >> 30) + a1 * b3 + a2 * b2 + a3 * b1; + uint64_t x5 = (x4 >> 30) + a2 * b3 + a3 * b2; + uint64_t x6 = (x5 >> 30) + a3 * b3; + // We now have (64, 30, 30, ...) bits in (x6, x5, x4, ...). + // Take the top 128 bits, setting bottom bit if any lower bits were set: + uint64_t y0 = (x5 << 34 | x4 << 34 >> 30 | x3 << 34 >> 60 | + !!(x3 << 38 | (x2 | x1 | x0) << 34)); + uint64_t y1 = x6; + // Top bit may be zero. Renormalise: + if (!(y1 >> 63)) { + y1 = y1 << 1 | y0 >> 63; + y0 = y0 << 1; + --x_exp; + } + x.x0 = y0; + x.x1 = y1; + } + + return f3_round(x_sgn, x_exp, x); +} + +long double __divtf3(long double fa, long double fb) +{ + u128_t a, b, x; + int32_t a_exp, b_exp, x_exp; + int a_sgn, b_sgn, x_sgn, i; + long double fx; + + f3_unpack(&a_sgn, &a_exp, &a, fa); + f3_unpack(&b_sgn, &b_exp, &b, fb); + + if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b)) + return fx; + + // Handle infinities and zeroes: + if ((a_exp == 32767 && b_exp == 32767) || + (!(a.x0 | a.x1) && !(b.x0 | b.x1))) + return f3_NaN(); + if (a_exp == 32767 || !(b.x0 | b.x1)) + return f3_infinity(a_sgn ^ b_sgn); + if (!(a.x0 | a.x1) || b_exp == 32767) + return f3_zero(a_sgn ^ b_sgn); + + a = f3_normalise(&a_exp, a); + b = f3_normalise(&b_exp, b); + + x_sgn = a_sgn ^ b_sgn; + x_exp = a_exp - b_exp + 16395; + + a.x0 = a.x0 >> 1 | a.x1 << 63; + a.x1 = a.x1 >> 1; + b.x0 = b.x0 >> 1 | b.x1 << 63; + b.x1 = b.x1 >> 1; + x.x0 = 0; + x.x1 = 0; + for (i = 0; i < 116; i++) { + x.x1 = x.x1 << 1 | x.x0 >> 63; + x.x0 = x.x0 << 1; + if (a.x1 > b.x1 || (a.x1 == b.x1 && a.x0 >= b.x0)) { + a.x1 = a.x1 - b.x1 - (a.x0 < b.x0); + a.x0 = a.x0 - b.x0; + x.x0 |= 1; + } + a.x1 = a.x1 << 1 | a.x0 >> 63; + a.x0 = a.x0 << 1; + } + x.x0 |= !!(a.x0 | a.x1); + + x = f3_normalise(&x_exp, x); + + return f3_round(x_sgn, x_exp, x); +} + +long double __extendsftf2(float f) +{ + long double fx; + u128_t x; + uint32_t a; + uint64_t aa; + memcpy(&a, &f, 4); + aa = a; + x.x0 = 0; + if (!(a << 1)) + x.x1 = aa << 32; + else if (a << 1 >> 24 == 255) + x.x1 = (0x7fff000000000000 | aa >> 31 << 63 | aa << 41 >> 16 | + (uint64_t)!!(a << 9) << 47); + else + x.x1 = (aa >> 31 << 63 | ((aa >> 23 & 255) + 16256) << 48 | + aa << 41 >> 16); + memcpy(&fx, &x, 16); + return fx; +} + +long double __extenddftf2(double f) +{ + long double fx; + u128_t x; + uint64_t a; + memcpy(&a, &f, 8); + x.x0 = a << 60; + if (!(a << 1)) + x.x1 = a; + else if (a << 1 >> 53 == 2047) + x.x1 = (0x7fff000000000000 | a >> 63 << 63 | a << 12 >> 16 | + (uint64_t)!!(a << 12) << 47); + else + x.x1 = a >> 63 << 63 | ((a >> 52 & 2047) + 15360) << 48 | a << 12 >> 16; + memcpy(&fx, &x, 16); + return fx; +} + +float __trunctfsf2(long double f) +{ + u128_t mnt; + int32_t exp; + int sgn; + uint32_t x; + float fx; + + f3_unpack(&sgn, &exp, &mnt, f); + + if (exp == 32767 && (mnt.x0 | mnt.x1 << 16)) + x = 0x7fc00000 | (uint32_t)sgn << 31 | (mnt.x1 >> 25 & 0x007fffff); + else if (exp > 16510) + x = 0x7f800000 | (uint32_t)sgn << 31; + else if (exp < 16233) + x = (uint32_t)sgn << 31; + else { + exp -= 16257; + x = mnt.x1 >> 23 | !!(mnt.x0 | mnt.x1 << 41); + if (exp < 0) { + x = x >> -exp | !!(x << (32 + exp)); + exp = 0; + } + if ((x & 3) == 3 || (x & 7) == 6) + x += 4; + x = ((x >> 2) + (exp << 23)) | (uint32_t)sgn << 31; + } + memcpy(&fx, &x, 4); + return fx; +} + +double __trunctfdf2(long double f) +{ + u128_t mnt; + int32_t exp; + int sgn; + uint64_t x; + double fx; + + f3_unpack(&sgn, &exp, &mnt, f); + + if (exp == 32767 && (mnt.x0 | mnt.x1 << 16)) + x = (0x7ff8000000000000 | (uint64_t)sgn << 63 | + mnt.x1 << 16 >> 12 | mnt.x0 >> 60); + else if (exp > 17406) + x = 0x7ff0000000000000 | (uint64_t)sgn << 63; + else if (exp < 15308) + x = (uint64_t)sgn << 63; + else { + exp -= 15361; + x = mnt.x1 << 6 | mnt.x0 >> 58 | !!(mnt.x0 << 6); + if (exp < 0) { + x = x >> -exp | !!(x << (64 + exp)); + exp = 0; + } + if ((x & 3) == 3 || (x & 7) == 6) + x += 4; + x = ((x >> 2) + ((uint64_t)exp << 52)) | (uint64_t)sgn << 63; + } + memcpy(&fx, &x, 8); + return fx; +} + +int32_t __fixtfsi(long double fa) +{ + u128_t a; + int32_t a_exp; + int a_sgn; + int32_t x; + f3_unpack(&a_sgn, &a_exp, &a, fa); + if (a_exp < 16369) + return 0; + if (a_exp > 16413) + return a_sgn ? -0x80000000 : 0x7fffffff; + x = a.x1 >> (16431 - a_exp); + return a_sgn ? -x : x; +} + +int64_t __fixtfdi(long double fa) +{ + u128_t a; + int32_t a_exp; + int a_sgn; + int64_t x; + f3_unpack(&a_sgn, &a_exp, &a, fa); + if (a_exp < 16383) + return 0; + if (a_exp > 16445) + return a_sgn ? -0x8000000000000000 : 0x7fffffffffffffff; + x = (a.x1 << 15 | a.x0 >> 49) >> (16446 - a_exp); + return a_sgn ? -x : x; +} + +uint32_t __fixunstfsi(long double fa) +{ + u128_t a; + int32_t a_exp; + int a_sgn; + f3_unpack(&a_sgn, &a_exp, &a, fa); + if (a_sgn || a_exp < 16369) + return 0; + if (a_exp > 16414) + return -1; + return a.x1 >> (16431 - a_exp); +} + +uint64_t __fixunstfdi(long double fa) +{ + u128_t a; + int32_t a_exp; + int a_sgn; + f3_unpack(&a_sgn, &a_exp, &a, fa); + if (a_sgn || a_exp < 16383) + return 0; + if (a_exp > 16446) + return -1; + return (a.x1 << 15 | a.x0 >> 49) >> (16446 - a_exp); +} + +long double __floatsitf(int32_t a) +{ + int sgn = 0; + int exp = 16414; + uint32_t mnt = a; + u128_t x = { 0, 0 }; + long double f; + int i; + if (a) { + if (a < 0) { + sgn = 1; + mnt = -mnt; + } + for (i = 16; i; i >>= 1) + if (!(mnt >> (32 - i))) { + mnt <<= i; + exp -= i; + } + x.x1 = ((uint64_t)sgn << 63 | (uint64_t)exp << 48 | + (uint64_t)(mnt << 1) << 16); + } + memcpy(&f, &x, 16); + return f; +} + +long double __floatditf(int64_t a) +{ + int sgn = 0; + int exp = 16446; + uint64_t mnt = a; + u128_t x = { 0, 0 }; + long double f; + int i; + if (a) { + if (a < 0) { + sgn = 1; + mnt = -mnt; + } + for (i = 32; i; i >>= 1) + if (!(mnt >> (64 - i))) { + mnt <<= i; + exp -= i; + } + x.x0 = mnt << 49; + x.x1 = (uint64_t)sgn << 63 | (uint64_t)exp << 48 | mnt << 1 >> 16; + } + memcpy(&f, &x, 16); + return f; +} + +long double __floatunsitf(uint32_t a) +{ + int exp = 16414; + uint32_t mnt = a; + u128_t x = { 0, 0 }; + long double f; + int i; + if (a) { + for (i = 16; i; i >>= 1) + if (!(mnt >> (32 - i))) { + mnt <<= i; + exp -= i; + } + x.x1 = (uint64_t)exp << 48 | (uint64_t)(mnt << 1) << 16; + } + memcpy(&f, &x, 16); + return f; +} + +long double __floatunditf(uint64_t a) +{ + int exp = 16446; + uint64_t mnt = a; + u128_t x = { 0, 0 }; + long double f; + int i; + if (a) { + for (i = 32; i; i >>= 1) + if (!(mnt >> (64 - i))) { + mnt <<= i; + exp -= i; + } + x.x0 = mnt << 49; + x.x1 = (uint64_t)exp << 48 | mnt << 1 >> 16; + } + memcpy(&f, &x, 16); + return f; +} + +static int f3_cmp(long double fa, long double fb) +{ + u128_t a, b; + memcpy(&a, &fa, 16); + memcpy(&b, &fb, 16); + return (!(a.x0 | a.x1 << 1 | b.x0 | b.x1 << 1) ? 0 : + ((a.x1 << 1 >> 49 == 0x7fff && (a.x0 | a.x1 << 16)) || + (b.x1 << 1 >> 49 == 0x7fff && (b.x0 | b.x1 << 16))) ? 2 : + a.x1 >> 63 != b.x1 >> 63 ? (int)(b.x1 >> 63) - (int)(a.x1 >> 63) : + a.x1 < b.x1 ? (int)(a.x1 >> 63 << 1) - 1 : + a.x1 > b.x1 ? 1 - (int)(a.x1 >> 63 << 1) : + a.x0 < b.x0 ? (int)(a.x1 >> 63 << 1) - 1 : + b.x0 < a.x0 ? 1 - (int)(a.x1 >> 63 << 1) : 0); +} + +int __eqtf2(long double a, long double b) +{ + return !!f3_cmp(a, b); +} + +int __netf2(long double a, long double b) +{ + return !!f3_cmp(a, b); +} + +int __lttf2(long double a, long double b) +{ + return f3_cmp(a, b); +} + +int __letf2(long double a, long double b) +{ + return f3_cmp(a, b); +} + +int __gttf2(long double a, long double b) +{ + return -f3_cmp(b, a); +} + +int __getf2(long double a, long double b) +{ + return -f3_cmp(b, a); +} diff --git a/lib/testfp.c b/lib/testfp.c new file mode 100644 index 00000000..63342b42 --- /dev/null +++ b/lib/testfp.c @@ -0,0 +1,510 @@ +/* + * Test 128-bit floating-point arithmetic on arm64: + * build with two different compilers and compare the output. + * + * Copyright (c) 2015 Edmund Grimley Evans + * + * Copying and distribution of this file, with or without modification, + * are permitted in any medium without royalty provided the copyright + * notice and this notice are preserved. This file is offered as-is, + * without any warranty. + */ + +#include +#include +#include +#include + +#define check(x) ((x) ? (void)0 : check_fail(#x, __FILE__, __LINE__)) + +void check_fail(const char *assertion, const char *file, unsigned int line) +{ + printf("%s:%d: Check (%s) failed.", file, line, assertion); + exit(1); +} + +typedef struct { + unsigned long long x0, x1; +} u128_t; + +float copy_fi(uint32_t x) +{ + float f; + memcpy(&f, &x, 4); + return f; +} + +double copy_di(uint64_t x) +{ + double f; + memcpy(&f, &x, 8); + return f; +} + +long double copy_ldi(u128_t x) +{ + long double f; + memcpy(&f, &x, 16); + return f; +} + +uint32_t copy_if(float f) +{ + uint32_t x; + memcpy(&x, &f, 4); + return x; +} + +uint64_t copy_id(double f) +{ + uint64_t x; + memcpy(&x, &f, 8); + return x; +} + +u128_t copy_ild(long double f) +{ + u128_t x; + memcpy(&x, &f, 16); + return x; +} + +long double make(int sgn, int exp, uint64_t high, uint64_t low) +{ + u128_t x = { low, + (0x0000ffffffffffff & high) | + (0x7fff000000000000 & (uint64_t)exp << 48) | + (0x8000000000000000 & (uint64_t)sgn << 63) }; + return copy_ldi(x); +} + +void cmp(long double a, long double b) +{ + u128_t ax = copy_ild(a); + u128_t bx = copy_ild(b); + int eq = (a == b); + int ne = (a != b); + int lt = (a < b); + int le = (a <= b); + int gt = (a > b); + int ge = (a >= b); + + check(eq == 0 || eq == 1); + check(lt == 0 || lt == 1); + check(gt == 0 || gt == 1); + check(ne == !eq && le == (lt | eq) && ge == (gt | eq)); + check(eq + lt + gt < 2); + + printf("cmp %016llx%016llx %016llx%016llx %d %d %d\n", + ax.x1, ax.x0, bx.x1, bx.x0, lt, eq, gt); +} + +void cmps(void) +{ + int i, j; + + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + cmp(make(i, 0, 0, 0), make(j, 0, 0, 0)); + + for (i = 0; i < 2; i++) { + for (j = 0; j < 64; j++) { + long double f1 = make(i, 32767, (uint64_t)1 << j, 0); + long double f2 = make(i, 32767, 0, (uint64_t)1 << j); + cmp(f1, 0); + cmp(f2, 0); + cmp(0, f1); + cmp(0, f2); + } + } + + for (i = 0; i < 6; i++) + for (j = 0; j < 6; j++) + cmp(make(i & 1, i >> 1, 0, 0), + make(j & 1, j >> 1, 0, 0)); + + for (i = 0; i < 2; i++) { + for (j = 0; j < 2; j++) { + int a, b; + for (a = 0; a < 2; a++) { + for (b = 0; b < 2; b++) { + cmp(make(i, j, a, b), make(i, j, 0, 0)); + cmp(make(i, j, 0, 0), make(i, j, a, b)); + } + } + } + } +} + +void xop(const char *name, long double a, long double b, long double c) +{ + u128_t ax = copy_ild(a); + u128_t bx = copy_ild(b); + u128_t cx = copy_ild(c); + printf("%s %016llx%016llx %016llx%016llx %016llx%016llx\n", + name, ax.x1, ax.x0, bx.x1, bx.x0, cx.x1, cx.x0); +} + +void fadd(long double a, long double b) +{ + xop("add", a, b, a + b); +} + +void fsub(long double a, long double b) +{ + xop("sub", a, b, a - b); +} + +void fmul(long double a, long double b) +{ + xop("mul", a, b, a * b); +} + +void fdiv(long double a, long double b) +{ + xop("div", a, b, a / b); +} + +void nanz(void) +{ + // Check NaNs: + { + long double x[7]; + int i, j, n = 0; + x[n++] = make(0, 32000, 0x95132b76effc, 0xd79035214b4f8d53); + x[n++] = make(1, 32001, 0xbe71d7a51587, 0x30601c6815d6c3ac); + x[n++] = make(0, 32767, 0, 1); + x[n++] = make(0, 32767, (uint64_t)1 << 46, 0); + x[n++] = make(1, 32767, (uint64_t)1 << 47, 0); + x[n++] = make(1, 32767, 0x7596c7099ad5, 0xe25fed2c58f73fc9); + x[n++] = make(0, 32767, 0x835d143360f9, 0x5e315efb35630666); + check(n == sizeof(x) / sizeof(*x)); + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) { + fadd(x[i], x[j]); + fsub(x[i], x[j]); + fmul(x[i], x[j]); + fdiv(x[i], x[j]); + } + } + } + + // Check infinities and zeroes: + { + long double x[6]; + int i, j, n = 0; + x[n++] = make(1, 32000, 0x62acda85f700, 0x47b6c9f35edc4044); + x[n++] = make(0, 32001, 0x94b7abf55af7, 0x9f425fe354428e19); + x[n++] = make(0, 32767, 0, 0); + x[n++] = make(1, 32767, 0, 0); + x[n++] = make(0, 0, 0, 0); + x[n++] = make(1, 0, 0, 0); + check(n == sizeof(x) / sizeof(*x)); + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) { + fadd(x[i], x[j]); + fsub(x[i], x[j]); + fmul(x[i], x[j]); + fdiv(x[i], x[j]); + } + } + } +} + +void adds(void) +{ + // Check shifting and add/sub: + { + int i; + for (i = -130; i <= 130; i++) { + int s1 = (uint32_t)i % 3 < 1; + int s2 = (uint32_t)i % 5 < 2; + fadd(make(s1, 16384 , 0x502c065e4f71a65d, 0xd2f9bdb031f4f031), + make(s2, 16384 + i, 0xae267395a9bc1033, 0xb56b5800da1ba448)); + } + } + + // Check normalisation: + { + uint64_t a0 = 0xc6bab0a6afbef5ed; + uint64_t a1 = 0x4f84136c4a2e9b52; + int ee[] = { 0, 1, 10000 }; + int e, i; + for (e = 0; e < sizeof(ee) / sizeof(*ee); e++) { + int exp = ee[e]; + fsub(make(0, exp, a1, a0), make(0, 0, 0, 0)); + for (i = 63; i >= 0; i--) + fsub(make(0, exp, a1 | (uint64_t)1 << i >> 1, a0), + make(0, exp, a1 >> i << i, 0)); + for (i = 63; i >=0; i--) + fsub(make(0, exp, a1, a0 | (uint64_t)1 << i >> 1), + make(0, exp, a1, a0 >> i << i)); + } + } + + // Carry/overflow from rounding: + { + fadd(make(0, 114, -1, -1), make(0, 1, 0, 0)); + fadd(make(0, 32766, -1, -1), make(0, 32653, 0, 0)); + fsub(make(1, 32766, -1, -1), make(0, 32653, 0, 0)); + } +} + +void muls(void) +{ + int i, j; + + { + long double max = make(0, 32766, -1, -1); + long double min = make(0, 0, 0, 1); + fmul(max, max); + fmul(max, min); + fmul(min, min); + } + + for (i = 117; i > 0; i--) + fmul(make(0, 16268, 0x643dcea76edc, 0xe0877a598403627a), + make(i & 1, i, 0, 0)); + + fmul(make(0, 16383, -1, -3), make(0, 16383, 0, 1)); + // Round to next exponent: + fmul(make(0, 16383, -1, -2), make(0, 16383, 0, 1)); + // Round from subnormal to normal: + fmul(make(0, 1, -1, -1), make(0, 16382, 0, 0)); + + for (i = 0; i < 2; i++) + for (j = 0; j < 112; j++) + fmul(make(0, 16383, (uint64_t)1 << i, 0), + make(0, 16383, + j < 64 ? 0 : (uint64_t)1 << (j - 64), + j < 64 ? (uint64_t)1 << j : 0)); +} + +void divs(void) +{ + int i; + + { + long double max = make(0, 32766, -1, -1); + long double min = make(0, 0, 0, 1); + fdiv(max, max); + fdiv(max, min); + fdiv(min, max); + fdiv(min, min); + } + + for (i = 0; i < 64; i++) + fdiv(make(0, 16383, -1, -1), make(0, 16383, -1, -(uint64_t)1 << i)); + for (i = 0; i < 48; i++) + fdiv(make(0, 16383, -1, -1), make(0, 16383, -(uint64_t)1 << i, 0)); +} + +void cvtlsw(int32_t a) +{ + long double f = a; + u128_t x = copy_ild(f); + printf("cvtlsw %08lx %016llx%016llx\n", (long)(uint32_t)a, x.x1, x.x0); +} + +void cvtlsx(int64_t a) +{ + long double f = a; + u128_t x = copy_ild(f); + printf("cvtlsx %016llx %016llx%016llx\n", + (long long)(uint64_t)a, x.x1, x.x0); +} + +void cvtluw(uint32_t a) +{ + long double f = a; + u128_t x = copy_ild(f); + printf("cvtluw %08lx %016llx%016llx\n", (long)a, x.x1, x.x0); +} + +void cvtlux(uint64_t a) +{ + long double f = a; + u128_t x = copy_ild(f); + printf("cvtlux %016llx %016llx%016llx\n", (long long)a, x.x1, x.x0); +} + +void cvtil(long double a) +{ + u128_t x = copy_ild(a); + int32_t b1 = a; + int64_t b2 = a; + uint32_t b3 = a; + uint64_t b4 = a; + printf("cvtswl %016llx%016llx %08lx\n", + x.x1, x.x0, (long)(uint32_t)b1); + printf("cvtsxl %016llx%016llx %016llx\n", + x.x1, x.x0, (long long)(uint64_t)b2); + printf("cvtuwl %016llx%016llx %08lx\n", + x.x1, x.x0, (long)b3); + printf("cvtuxl %016llx%016llx %016llx\n", + x.x1, x.x0, (long long)b4); +} + +void cvtlf(float a) +{ + uint32_t ax = copy_if(a); + long double b = a; + u128_t bx = copy_ild(b); + printf("cvtlf %08lx %016llx%016llx\n", (long)ax, bx.x1, bx.x0); +} + +void cvtld(double a) +{ + uint64_t ax = copy_id(a); + long double b = a; + u128_t bx = copy_ild(b); + printf("cvtld %016llx %016llx%016llx\n", (long long)ax, bx.x1, bx.x0); +} + +void cvtfl(long double a) +{ + u128_t ax = copy_ild(a); + float b = a; + uint32_t bx = copy_if(b); + printf("cvtfl %016llx%016llx %08lx\n", ax.x1, ax.x0, (long)bx); +} + +void cvtdl(long double a) +{ + u128_t ax = copy_ild(a); + double b = a; + uint64_t bx = copy_id(b); + printf("cvtdl %016llx%016llx %016llx\n", ax.x1, ax.x0, (long long)bx); +} + +void cvts(void) +{ + int i, j; + + { + uint32_t x = 0xad040c5b; + cvtlsw(0); + for (i = 0; i < 31; i++) + cvtlsw(x >> (31 - i)); + for (i = 0; i < 31; i++) + cvtlsw(-(x >> (31 - i))); + cvtlsw(0x80000000); + } + { + uint64_t x = 0xb630a248cad9afd2; + cvtlsx(0); + for (i = 0; i < 63; i++) + cvtlsx(x >> (63 - i)); + for (i = 0; i < 63; i++) + cvtlsx(-(x >> (63 - i))); + cvtlsx(0x8000000000000000); + } + { + uint32_t x = 0xad040c5b; + cvtluw(0); + for (i = 0; i < 32; i++) + cvtluw(x >> (31 - i)); + } + { + uint64_t x = 0xb630a248cad9afd2; + cvtlux(0); + for (i = 0; i < 64; i++) + cvtlux(x >> (63 - i)); + } + + for (i = 0; i < 2; i++) { + cvtil(make(i, 32767, 0, 1)); + cvtil(make(i, 32767, (uint64_t)1 << 47, 0)); + cvtil(make(i, 32767, 123, 456)); + cvtil(make(i, 32767, 0, 0)); + cvtil(make(i, 16382, -1, -1)); + cvtil(make(i, 16383, -1, -1)); + cvtil(make(i, 16384, 0x7fffffffffff, -1)); + cvtil(make(i, 16384, 0x800000000000, 0)); + for (j = 0; j < 68; j++) + cvtil(make(i, 16381 + j, 0xd4822c0a10ec, 0x1fe2f8b2669f5c9d)); + } + + cvtlf(copy_fi(0x00000000)); + cvtlf(copy_fi(0x456789ab)); + cvtlf(copy_fi(0x7f800000)); + cvtlf(copy_fi(0x7f923456)); + cvtlf(copy_fi(0x7fdbcdef)); + cvtlf(copy_fi(0x80000000)); + cvtlf(copy_fi(0xabcdef12)); + cvtlf(copy_fi(0xff800000)); + cvtlf(copy_fi(0xff923456)); + cvtlf(copy_fi(0xffdbcdef)); + + cvtld(copy_di(0x0000000000000000)); + cvtld(copy_di(0x456789abcdef0123)); + cvtld(copy_di(0x7ff0000000000000)); + cvtld(copy_di(0x7ff123456789abcd)); + cvtld(copy_di(0x7ffabcdef1234567)); + cvtld(copy_di(0x8000000000000000)); + cvtld(copy_di(0xcdef123456789abc)); + cvtld(copy_di(0xfff0000000000000)); + cvtld(copy_di(0xfff123456789abcd)); + cvtld(copy_di(0xfffabcdef1234567)); + + for (i = 0; i < 2; i++) { \ + cvtfl(make(i, 0, 0, 0)); + cvtfl(make(i, 16232, -1, -1)); + cvtfl(make(i, 16233, 0, 0)); + cvtfl(make(i, 16233, 0, 1)); + cvtfl(make(i, 16383, 0xab0ffd000000, 0)); + cvtfl(make(i, 16383, 0xab0ffd000001, 0)); + cvtfl(make(i, 16383, 0xab0ffeffffff, 0)); + cvtfl(make(i, 16383, 0xab0fff000000, 0)); + cvtfl(make(i, 16383, 0xab0fff000001, 0)); + cvtfl(make(i, 16510, 0xfffffeffffff, -1)); + cvtfl(make(i, 16510, 0xffffff000000, 0)); + cvtfl(make(i, 16511, 0, 0)); + cvtfl(make(i, 32767, 0, 0)); + cvtfl(make(i, 32767, 0, 1)); + cvtfl(make(i, 32767, 0x4cbe01ac5f40, 0x75cee3c6afbb00b5)); + cvtfl(make(i, 32767, 0x800000000000, 1)); + cvtfl(make(i, 32767, 0xa11caaaf6a52, 0x696033e871eab099)); + } + + for (i = 0; i < 2; i++) { + cvtdl(make(i, 0, 0, 0)); + cvtdl(make(i, 15307, -1, -1)); + cvtdl(make(i, 15308, 0, 0)); + cvtdl(make(i, 15308, 0, 1)); + cvtdl(make(i, 16383, 0xabc123abc0ff, 0xe800000000000000)); + cvtdl(make(i, 16383, 0xabc123abc0ff, 0xe800000000000001)); + cvtdl(make(i, 16383, 0xabc123abc0ff, 0xf7ffffffffffffff)); + cvtdl(make(i, 16383, 0xabc123abc0ff, 0xf800000000000000)); + cvtdl(make(i, 16383, 0xabc123abc0ff, 0xf800000000000001)); + cvtdl(make(i, 17406, 0xffffffffffff, 0xf7ffffffffffffff)); + cvtdl(make(i, 17406, 0xffffffffffff, 0xf800000000000000)); + cvtdl(make(i, 17407, 0, 0)); + cvtdl(make(i, 32767, 0, 0)); + cvtdl(make(i, 32767, 0, 1)); + cvtdl(make(i, 32767, 0x4cbe01ac5f40, 0x75cee3c6afbb00b5)); + cvtdl(make(i, 32767, 0x800000000000, 1)); + cvtdl(make(i, 32767, 0xa11caaaf6a52, 0x696033e871eab099)); + } +} + +void tests(void) +{ + cmps(); + nanz(); + adds(); + muls(); + divs(); + cvts(); +} + +int main() +{ +#ifdef __aarch64__ + tests(); +#else + printf("This test program is intended for a little-endian architecture\n" + "with an IEEE-standard 128-bit long double.\n"); +#endif + return 0; +} diff --git a/libtcc.c b/libtcc.c index 01497b28..c68221e0 100644 --- a/libtcc.c +++ b/libtcc.c @@ -45,6 +45,9 @@ ST_DATA struct TCCState *tcc_state; #ifdef TCC_TARGET_ARM #include "arm-gen.c" #endif +#ifdef TCC_TARGET_ARM64 +#include "arm64-gen.c" +#endif #ifdef TCC_TARGET_C67 #include "c67-gen.c" #endif @@ -959,6 +962,8 @@ LIBTCCAPI TCCState *tcc_new(void) #else s->float_abi = ARM_SOFTFP_FLOAT; #endif +#elif defined(TCC_TARGET_ARM64) + tcc_define_symbol(s, "__aarch64__", NULL); #endif #ifdef TCC_TARGET_PE @@ -1560,7 +1565,7 @@ static int tcc_set_linker(TCCState *s, const char *option) } else if (link_option(option, "oformat=", &p)) { #if defined(TCC_TARGET_PE) if (strstart("pe-", &p)) { -#elif defined(TCC_TARGET_X86_64) +#elif defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64) if (strstart("elf64-", &p)) { #else if (strstart("elf32-", &p)) { diff --git a/tcc.c b/tcc.c index c80bb4fd..52afca70 100644 --- a/tcc.c +++ b/tcc.c @@ -203,6 +203,8 @@ static void display_info(TCCState *s, int what) # endif #elif defined TCC_TARGET_ARM "ARM" +#elif defined TCC_TARGET_ARM64 + "AArch64" # ifdef TCC_ARM_HARDFLOAT " Hard Float" # endif diff --git a/tcc.h b/tcc.h index aaf5be0a..0a052329 100644 --- a/tcc.h +++ b/tcc.h @@ -113,7 +113,7 @@ #endif #include "elf.h" -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64) # define ELFCLASSW ELFCLASS64 # define ElfW(type) Elf##64##_##type # define ELFW(type) ELF##64##_##type @@ -151,23 +151,26 @@ /* target selection */ /* #define TCC_TARGET_I386 *//* i386 code generator */ /* #define TCC_TARGET_ARM *//* ARMv4 code generator */ +/* #define TCC_TARGET_ARM64 *//* ARMv8 code generator */ /* #define TCC_TARGET_C67 *//* TMS320C67xx code generator */ /* #define TCC_TARGET_X86_64 *//* x86-64 code generator */ /* default target is I386 */ #if !defined(TCC_TARGET_I386) && !defined(TCC_TARGET_ARM) && \ - !defined(TCC_TARGET_C67) && !defined(TCC_TARGET_X86_64) + !defined(TCC_TARGET_ARM64) && !defined(TCC_TARGET_C67) && \ + !defined(TCC_TARGET_X86_64) #define TCC_TARGET_I386 #endif #if !defined(TCC_UCLIBC) && !defined(TCC_TARGET_ARM) && \ - !defined(TCC_TARGET_C67) && !defined(TCC_TARGET_X86_64) && \ - !defined(CONFIG_USE_LIBGCC) + !defined(TCC_TARGET_ARM64) && !defined(TCC_TARGET_C67) && \ + !defined(TCC_TARGET_X86_64) && !defined(CONFIG_USE_LIBGCC) #define CONFIG_TCC_BCHECK /* enable bound checking code */ #endif /* define it to include assembler support */ -#if !defined(TCC_TARGET_ARM) && !defined(TCC_TARGET_C67) +#if !defined(TCC_TARGET_ARM) && !defined(TCC_TARGET_ARM64) && \ + !defined(TCC_TARGET_C67) #define CONFIG_TCC_ASM #endif @@ -184,6 +187,8 @@ # define TCC_IS_NATIVE # elif defined __arm__ && defined TCC_TARGET_ARM # define TCC_IS_NATIVE +# elif defined __aarch64__ && defined TCC_TARGET_ARM64 +# define TCC_IS_NATIVE # endif #endif @@ -256,6 +261,8 @@ # define CONFIG_TCC_ELFINTERP "/usr/libexec/ld-elf.so.2" # elif defined __GNU__ # define CONFIG_TCC_ELFINTERP "/lib/ld.so" +# elif defined TCC_TARGET_ARM64 +# define CONFIG_TCC_ELFINTERP "/lib/ld-linux-aarch64.so.1" # elif defined(TCC_TARGET_X86_64) # define CONFIG_TCC_ELFINTERP "/lib64/ld-linux-x86-64.so.2" # elif defined(TCC_UCLIBC) @@ -290,6 +297,9 @@ #ifdef TCC_TARGET_ARM # include "arm-gen.c" #endif +#ifdef TCC_TARGET_ARM64 +# include "arm64-gen.c" +#endif #ifdef TCC_TARGET_C67 # include "coff.h" # include "c67-gen.c" @@ -1214,6 +1224,7 @@ ST_FUNC void vpushv(SValue *v); ST_FUNC void save_reg(int r); ST_FUNC int get_reg(int rc); ST_FUNC void save_regs(int n); +ST_FUNC void gaddrof(void); ST_FUNC int gv(int rc); ST_FUNC void gv2(int rc1, int rc2); ST_FUNC void vpop(void); @@ -1357,6 +1368,15 @@ ST_FUNC uint32_t encbranch(int pos, int addr, int fail); ST_FUNC void gen_cvt_itof1(int t); #endif +/* ------------ arm64-gen.c ------------ */ +#ifdef TCC_TARGET_ARM64 +ST_FUNC void gen_cvt_sxtw(void); +ST_FUNC void gen_opl(int op); +ST_FUNC void greturn(void); +ST_FUNC void gen_va_start(void); +ST_FUNC void gen_va_arg(CType *t); +#endif + /* ------------ c67-gen.c ------------ */ #ifdef TCC_TARGET_C67 #endif diff --git a/tccelf.c b/tccelf.c index dc0a1443..4f892245 100644 --- a/tccelf.c +++ b/tccelf.c @@ -291,7 +291,7 @@ ST_FUNC void put_elf_reloca(Section *symtab, Section *s, unsigned long offset, rel = section_ptr_add(sr, sizeof(ElfW_Rel)); rel->r_offset = offset; rel->r_info = ELFW(R_INFO)(symbol, type); -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64) rel->r_addend = addend; #else if (addend) @@ -506,7 +506,7 @@ ST_FUNC void relocate_section(TCCState *s1, Section *s) sym_index = ELFW(R_SYM)(rel->r_info); sym = &((ElfW(Sym) *)symtab_section->data)[sym_index]; val = sym->st_value; -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64) val += rel->r_addend; #endif type = ELFW(R_TYPE)(rel->r_info); @@ -760,6 +760,69 @@ ST_FUNC void relocate_section(TCCState *s1, Section *s) fprintf(stderr,"FIXME: handle reloc type %x at %x [%p] to %x\n", type, (unsigned)addr, ptr, (unsigned)val); break; +#elif defined(TCC_TARGET_ARM64) + case R_AARCH64_ABS64: + *(uint64_t *)ptr = val; + break; + case R_AARCH64_ABS32: + *(uint32_t *)ptr = val; + break; + case R_AARCH64_MOVW_UABS_G0_NC: + *(uint32_t *)ptr = (*(uint32_t *)ptr & 0xffe0001f) | + (val & 0xffff) << 5; + break; + case R_AARCH64_MOVW_UABS_G1_NC: + *(uint32_t *)ptr = (*(uint32_t *)ptr & 0xffe0001f) | + (val >> 16 & 0xffff) << 5; + break; + case R_AARCH64_MOVW_UABS_G2_NC: + *(uint32_t *)ptr = (*(uint32_t *)ptr & 0xffe0001f) | + (val >> 32 & 0xffff) << 5; + break; + case R_AARCH64_MOVW_UABS_G3: + *(uint32_t *)ptr = (*(uint32_t *)ptr & 0xffe0001f) | + (val >> 48 & 0xffff) << 5; + break; + case R_AARCH64_ADR_PREL_PG_HI21: { + uint64_t off = (val >> 12) - (addr >> 12); + if ((off + ((uint64_t)1 << 20)) >> 21) + tcc_error("R_AARCH64_ADR_PREL_PG_HI21 relocation failed"); + *(uint32_t *)ptr = (*(uint32_t *)ptr & 0x9f00001f) | + (off & 0x1ffffc) << 3 | (off & 3) << 29; + break; + } + case R_AARCH64_ADD_ABS_LO12_NC: + *(uint32_t *)ptr = (*(uint32_t *)ptr & 0xffc003ff) | + (val & 0xfff) << 10; + break; + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: + if (((val - addr) + ((uint64_t)1 << 27)) & ~(uint64_t)0xffffffc) + tcc_error("R_AARCH64_(JUMP|CALL)26 relocation failed"); + *(uint32_t *)ptr = 0x14000000 | (type == R_AARCH64_CALL26) << 31 | + ((val - addr) >> 2 & 0x3ffffff); + break; + case R_AARCH64_ADR_GOT_PAGE: { + uint64_t off = + (((s1->got->sh_addr + + s1->sym_attrs[sym_index].got_offset) >> 12) - (addr >> 12)); + if ((off + ((uint64_t)1 << 20)) >> 21) + tcc_error("R_AARCH64_ADR_GOT_PAGE relocation failed"); + *(uint32_t *)ptr = (*(uint32_t *)ptr & 0x9f00001f) | + (off & 0x1ffffc) << 3 | (off & 3) << 29; + break; + } + case R_AARCH64_LD64_GOT_LO12_NC: + *(uint32_t *)ptr = (*(uint32_t *)ptr & 0xfff803ff) | + ((s1->got->sh_addr + s1->sym_attrs[sym_index].got_offset) + & 0xff8) << 7; + break; + case R_AARCH64_COPY: + break; + default: + fprintf(stderr, "FIXME: handle reloc type %x at %x [%p] to %x\n", + type, (unsigned)addr, ptr, (unsigned)val); + break; #elif defined(TCC_TARGET_C67) case R_C60_32: *(int *)ptr += val; @@ -955,7 +1018,7 @@ static void put32(unsigned char *p, uint32_t val) } #if defined(TCC_TARGET_I386) || defined(TCC_TARGET_ARM) || \ - defined(TCC_TARGET_X86_64) + defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64) static uint32_t get32(unsigned char *p) { return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24); @@ -1014,6 +1077,8 @@ static unsigned long put_got_entry(TCCState *s1, (reloc_type == R_386_JMP_SLOT); #elif defined(TCC_TARGET_ARM) (reloc_type == R_ARM_JUMP_SLOT); +#elif defined(TCC_TARGET_ARM64) + (reloc_type == R_AARCH64_JUMP_SLOT); #else 0; #endif @@ -1135,6 +1200,24 @@ static unsigned long put_got_entry(TCCState *s1, if (sym->st_shndx == SHN_UNDEF) offset = plt->data_offset - 16; } +#elif defined(TCC_TARGET_ARM64) + if (need_plt_entry) { + Section *plt; + uint8_t *p; + + if (s1->output_type == TCC_OUTPUT_DLL) + tcc_error("DLLs unimplemented!"); + + plt = s1->plt; + if (plt->data_offset == 0) + section_ptr_add(plt, 32); + p = section_ptr_add(plt, 16); + put32(p, s1->got->data_offset); + put32(p + 4, (uint64_t)s1->got->data_offset >> 32); + + if (sym->st_shndx == SHN_UNDEF) + offset = plt->data_offset - 16; + } #elif defined(TCC_TARGET_C67) if (s1->dynsym) { tcc_error("C67 got not implemented"); @@ -1277,6 +1360,18 @@ ST_FUNC void build_got_entries(TCCState *s1) put32(p+2, 0x46c0); /* nop */ put32(p+4, 0xeafffffe); /* b $sym */ } +#elif defined(TCC_TARGET_ARM64) + //xx Other cases may be required here: + case R_AARCH64_ADR_GOT_PAGE: + case R_AARCH64_LD64_GOT_LO12_NC: + if (!s1->got) + build_got(s1); + sym_index = ELFW(R_SYM)(rel->r_info); + sym = &((ElfW(Sym) *)symtab_section->data)[sym_index]; + reloc_type = R_AARCH64_GLOB_DAT; + put_got_entry(s1, reloc_type, sym->st_size, sym->st_info, + sym_index); + break; #elif defined(TCC_TARGET_C67) case R_C60_GOT32: case R_C60_GOTOFF: @@ -1796,6 +1891,40 @@ ST_FUNC void relocate_plt(TCCState *s1) put32(p + 12, x + get32(p + 12) + s1->plt->data - p); p += 16; } +#elif defined(TCC_TARGET_ARM64) + uint64_t plt = s1->plt->sh_addr; + uint64_t got = s1->got->sh_addr; + uint64_t off = (got >> 12) - (plt >> 12); + if ((off + ((uint64_t)1 << 20)) >> 21) + tcc_error("Failed relocating PLT"); + put32(p, 0xa9bf7bf0); // stp x16,x30,[sp,#-16]! + put32(p + 4, (0x90000010 | // adrp x16,... + (off & 0x1ffffc) << 3 | (off & 3) << 29)); + put32(p + 8, (0xf9400211 | // ldr x17,[x16,#...] + (got & 0xff8) << 7)); + put32(p + 12, (0x91000210 | // add x16,x16,#... + (got & 0xfff) << 10)); + put32(p + 16, 0xd61f0220); // br x17 + put32(p + 20, 0xd503201f); // nop + put32(p + 24, 0xd503201f); // nop + put32(p + 28, 0xd503201f); // nop + p += 32; + while (p < p_end) { + uint64_t pc = plt + (p - s1->plt->data); + uint64_t addr = got + + (get32(p) | (uint64_t)get32(p + 4) << 32); + uint32_t off = (addr >> 12) - (pc >> 12); + if ((off + ((uint64_t)1 << 20)) >> 21) + tcc_error("Failed relocating PLT"); + put32(p, (0x90000010 | // adrp x16,... + (off & 0x1ffffc) << 3 | (off & 3) << 29)); + put32(p + 4, (0xf9400211 | // ldr x17,[x16,#...] + (addr & 0xff8) << 7)); + put32(p + 8, (0x91000210 | // add x16,x16,#... + (addr & 0xfff) << 10)); + put32(p + 12, 0xd61f0220); // br x17 + p += 16; + } #elif defined(TCC_TARGET_C67) /* XXX: TODO */ #else @@ -2093,7 +2222,7 @@ static void fill_dynamic(TCCState *s1, struct dyn_inf *dyninf) put_dt(dynamic, DT_SYMTAB, s1->dynsym->sh_addr); put_dt(dynamic, DT_STRSZ, dyninf->dynstr->data_offset); put_dt(dynamic, DT_SYMENT, sizeof(ElfW(Sym))); -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64) put_dt(dynamic, DT_RELA, dyninf->rel_addr); put_dt(dynamic, DT_RELASZ, dyninf->rel_size); put_dt(dynamic, DT_RELAENT, sizeof(ElfW_Rel)); diff --git a/tccgen.c b/tccgen.c index ae075634..b96b2857 100644 --- a/tccgen.c +++ b/tccgen.c @@ -545,7 +545,7 @@ ST_FUNC void save_reg(int r) type = &p->type; if ((p->r & VT_LVAL) || (!is_float(type->t) && (type->t & VT_BTYPE) != VT_LLONG)) -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64) type = &char_pointer_type; #else type = &int_type; @@ -562,7 +562,7 @@ ST_FUNC void save_reg(int r) o(0xd8dd); /* fstp %st(0) */ } #endif -#ifndef TCC_TARGET_X86_64 +#if !defined(TCC_TARGET_ARM64) && !defined(TCC_TARGET_X86_64) /* special long long case */ if ((type->t & VT_BTYPE) == VT_LLONG) { sv.c.ul += 4; @@ -681,7 +681,7 @@ static void move_reg(int r, int s, int t) } /* get address of vtop (vtop MUST BE an lvalue) */ -static void gaddrof(void) +ST_FUNC void gaddrof(void) { if (vtop->r & VT_REF) gv(RC_INT); @@ -803,11 +803,13 @@ ST_FUNC int gv(int rc) r = vtop->r & VT_VALMASK; rc2 = (rc & RC_FLOAT) ? RC_FLOAT : RC_INT; +#ifndef TCC_TARGET_ARM64 if (rc == RC_IRET) rc2 = RC_LRET; #ifdef TCC_TARGET_X86_64 else if (rc == RC_FRET) rc2 = RC_QRET; +#endif #endif /* need to reload if: @@ -817,7 +819,7 @@ ST_FUNC int gv(int rc) if (r >= VT_CONST || (vtop->r & VT_LVAL) || !(reg_classes[r] & rc) -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64) || ((vtop->type.t & VT_BTYPE) == VT_QLONG && !(reg_classes[vtop->r2] & rc2)) || ((vtop->type.t & VT_BTYPE) == VT_QFLOAT && !(reg_classes[vtop->r2] & rc2)) #else @@ -826,7 +828,7 @@ ST_FUNC int gv(int rc) ) { r = get_reg(rc); -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64) if (((vtop->type.t & VT_BTYPE) == VT_QLONG) || ((vtop->type.t & VT_BTYPE) == VT_QFLOAT)) { int addr_type = VT_LLONG, load_size = 8, load_type = ((vtop->type.t & VT_BTYPE) == VT_QLONG) ? VT_LLONG : VT_DOUBLE; #else @@ -838,7 +840,7 @@ ST_FUNC int gv(int rc) original_type = vtop->type.t; /* two register type load : expand to two words temporarily */ -#ifndef TCC_TARGET_X86_64 +#if !defined(TCC_TARGET_ARM64) && !defined(TCC_TARGET_X86_64) if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) { /* load constant */ ll = vtop->c.ull; @@ -890,7 +892,7 @@ ST_FUNC int gv(int rc) t1 = t; /* compute memory access type */ if (vtop->r & VT_REF) -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64) t = VT_PTR; #else t = VT_INT; @@ -952,6 +954,7 @@ ST_FUNC void gv2(int rc1, int rc2) } } +#ifndef TCC_TARGET_ARM64 /* wrapper around RC_FRET to return a register by type */ static int rc_fret(int t) { @@ -962,6 +965,7 @@ static int rc_fret(int t) #endif return RC_FRET; } +#endif /* wrapper around REG_FRET to return a register by type */ static int reg_fret(int t) @@ -1147,7 +1151,7 @@ ST_FUNC int gvtst(int inv, int t) return gtst(inv, t); } -#ifndef TCC_TARGET_X86_64 +#if !defined(TCC_TARGET_ARM64) && !defined(TCC_TARGET_X86_64) /* generate CPU independent (unsigned) long long operations */ static void gen_opl(int op) { @@ -1358,7 +1362,7 @@ static void gen_opl(int op) #elif defined(TCC_TARGET_ARM) b = ind; o(0x1A000000 | encbranch(ind, 0, 1)); -#elif defined(TCC_TARGET_C67) +#elif defined(TCC_TARGET_C67) || defined(TCC_TARGET_ARM64) tcc_error("not implemented"); #else #error not supported @@ -1512,7 +1516,8 @@ static void gen_opic(int op) general_case: if (!nocode_wanted) { /* call low level op generator */ - if (t1 == VT_LLONG || t2 == VT_LLONG) + if (t1 == VT_LLONG || t2 == VT_LLONG || + (PTR_SIZE == 8 && (t1 == VT_PTR || t2 == VT_PTR))) gen_opl(op); else gen_opi(op); @@ -1679,7 +1684,7 @@ ST_FUNC void gen_op(int op) if (op >= TOK_ULT && op <= TOK_LOR) { check_comparison_pointer_types(vtop - 1, vtop, op); /* pointers are handled are unsigned */ -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64) t = VT_LLONG | VT_UNSIGNED; #else t = VT_INT | VT_UNSIGNED; @@ -1700,7 +1705,7 @@ ST_FUNC void gen_op(int op) vrott(3); gen_opic(op); /* set to integer type */ -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64) vtop->type.t = VT_LLONG; #else vtop->type.t = VT_INT; @@ -1724,7 +1729,7 @@ ST_FUNC void gen_op(int op) u = pointed_size(&vtop[-1].type); if (u < 0) tcc_error("unknown array element size"); -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64) vpushll(u); #else /* XXX: cast to int ? (long long case) */ @@ -1833,6 +1838,9 @@ ST_FUNC void gen_op(int op) /* generic itof for unsigned long long case */ static void gen_cvt_itof1(int t) { +#ifdef TCC_TARGET_ARM64 + gen_cvt_itof(t); +#else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) == (VT_LLONG | VT_UNSIGNED)) { @@ -1851,12 +1859,16 @@ static void gen_cvt_itof1(int t) } else { gen_cvt_itof(t); } +#endif } #endif /* generic ftoi for unsigned long long case */ static void gen_cvt_ftoi1(int t) { +#ifdef TCC_TARGET_ARM64 + gen_cvt_ftoi(t); +#else int st; if (t == (VT_LLONG | VT_UNSIGNED)) { @@ -1878,6 +1890,7 @@ static void gen_cvt_ftoi1(int t) } else { gen_cvt_ftoi(t); } +#endif } /* force char or short cast */ @@ -1968,7 +1981,7 @@ static void gen_cast(CType *type) vtop->c.ll = vtop->c.ull; else if (sbt & VT_UNSIGNED) vtop->c.ll = vtop->c.ui; -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64) else if (sbt == VT_PTR) ; #endif @@ -1979,7 +1992,7 @@ static void gen_cast(CType *type) vtop->c.ull = vtop->c.ll; else if (dbt == VT_BOOL) vtop->c.i = (vtop->c.ll != 0); -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64) else if (dbt == VT_PTR) ; #endif @@ -2024,7 +2037,7 @@ static void gen_cast(CType *type) gen_cast(type); } } -#ifndef TCC_TARGET_X86_64 +#if !defined(TCC_TARGET_ARM64) && !defined(TCC_TARGET_X86_64) } else if ((dbt & VT_BTYPE) == VT_LLONG) { if ((sbt & VT_BTYPE) != VT_LLONG) { /* scalar to long long */ @@ -2056,11 +2069,18 @@ static void gen_cast(CType *type) (sbt & VT_BTYPE) != VT_PTR && (sbt & VT_BTYPE) != VT_FUNC) { /* need to convert from 32bit to 64bit */ - int r = gv(RC_INT); + gv(RC_INT); if (sbt != (VT_INT | VT_UNSIGNED)) { +#if defined(TCC_TARGET_ARM64) + gen_cvt_sxtw(); +#elif defined(TCC_TARGET_X86_64) + int r = gv(RC_INT); /* x86_64 specific: movslq */ o(0x6348); o(0xc0 + (REG_VALUE(r) << 3) + REG_VALUE(r)); +#else +#error +#endif } } #endif @@ -2589,7 +2609,7 @@ ST_FUNC void vstore(void) if ((vtop[-1].r & VT_VALMASK) == VT_LLOCAL) { SValue sv; t = get_reg(RC_INT); -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64) sv.type.t = VT_PTR; #else sv.type.t = VT_INT; @@ -2600,7 +2620,7 @@ ST_FUNC void vstore(void) vtop[-1].r = t | VT_LVAL; } /* two word case handling : store second register at word + 4 (or +8 for x86-64) */ -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64) if (((ft & VT_BTYPE) == VT_QLONG) || ((ft & VT_BTYPE) == VT_QFLOAT)) { int addr_type = VT_LLONG, load_size = 8, load_type = ((vtop->type.t & VT_BTYPE) == VT_QLONG) ? VT_LLONG : VT_DOUBLE; #else @@ -3098,6 +3118,13 @@ static int parse_btype(CType *type, AttributeDef *ad) goto basic_type1; } break; +#ifdef TCC_TARGET_ARM64 + case TOK_UINT128: + /* GCC's __uint128_t appears in some Linux header files. Make it a + synonym for long double to get the size and alignment right. */ + u = VT_LDOUBLE; + goto basic_type; +#endif case TOK_BOOL: u = VT_BOOL; goto basic_type; @@ -3233,7 +3260,8 @@ the_end: /* long is never used as type */ if ((t & VT_BTYPE) == VT_LONG) -#if !defined TCC_TARGET_X86_64 || defined TCC_TARGET_PE +#if (!defined TCC_TARGET_X86_64 && !defined TCC_TARGET_ARM64) || \ + defined TCC_TARGET_PE t = (t & ~VT_BTYPE) | VT_INT; #else t = (t & ~VT_BTYPE) | VT_LLONG; @@ -3881,6 +3909,36 @@ ST_FUNC void unary(void) break; #endif #endif + +#ifdef TCC_TARGET_ARM64 + case TOK___va_start: { + next(); + skip('('); + expr_eq(); + skip(','); + expr_eq(); + skip(')'); + //xx check types + gen_va_start(); + vpushi(0); + vtop->type.t = VT_VOID; + break; + } + case TOK___va_arg: { + CType type; + next(); + skip('('); + expr_eq(); + skip(','); + parse_type(&type); + skip(')'); + //xx check types + gen_va_arg(&type); + vtop->type = type; + break; + } +#endif + case TOK_INC: case TOK_DEC: t = tok; @@ -4071,6 +4129,15 @@ ST_FUNC void unary(void) if (!ret_nregs) { /* get some space for the returned structure */ size = type_size(&s->type, &align); +#ifdef TCC_TARGET_ARM64 + /* On arm64, a small struct is return in registers. + It is much easier to write it to memory if we know + that we are allowed to write some extra bytes, so + round the allocated space up to a power of 2: */ + if (size < 16) + while (size & (size - 1)) + size = (size | (size - 1)) + 1; +#endif loc = (loc - size) & -align; ret.type = s->type; ret.r = VT_LOCAL | VT_LVAL; @@ -4094,12 +4161,14 @@ ST_FUNC void unary(void) ret.r2 = REG_QRET; #endif } else { +#ifndef TCC_TARGET_ARM64 #ifdef TCC_TARGET_X86_64 if ((ret.type.t & VT_BTYPE) == VT_QLONG) #else if ((ret.type.t & VT_BTYPE) == VT_LLONG) #endif ret.r2 = REG_LRET; +#endif ret.r = REG_IRET; } ret.c.i = 0; @@ -4717,6 +4786,10 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym, if (tok != ';') { gexpr(); gen_assign_cast(&func_vt); +#ifdef TCC_TARGET_ARM64 + // Perhaps it would be better to use this for all backends: + greturn(); +#else if ((func_vt.t & VT_BTYPE) == VT_STRUCT) { CType type, ret_type; int ret_align, ret_nregs; @@ -4770,6 +4843,7 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym, } else { gv(RC_IRET); } +#endif vtop--; /* NOT vpop() because on x86 it would flush the fp stack */ } skip(';'); @@ -5160,9 +5234,9 @@ static void init_putv(CType *type, Section *sec, unsigned long c, /* XXX: generate error if incorrect relocation */ gen_assign_cast(&dtype); bt = type->t & VT_BTYPE; - /* we'll write at most 12 bytes */ - if (c + 12 > sec->data_allocated) { - section_realloc(sec, c + 12); + /* we'll write at most 16 bytes */ + if (c + 16 > sec->data_allocated) { + section_realloc(sec, c + 16); } ptr = sec->data + c; /* XXX: make code faster ? */ @@ -5184,6 +5258,9 @@ static void init_putv(CType *type, Section *sec, unsigned long c, (bt == VT_INT && bit_size != 32))) tcc_error("initializer element is not computable at load time"); switch(bt) { + /* XXX: when cross-compiling we assume that each type has the + same representation on host and target, which is likely to + be wrong in the case of long double */ case VT_BOOL: vtop->c.i = (vtop->c.i != 0); case VT_BYTE: @@ -5203,7 +5280,7 @@ static void init_putv(CType *type, Section *sec, unsigned long c, break; case VT_PTR: { addr_t val = (vtop->c.ptr_offset & bit_mask) << bit_pos; -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64) if (vtop->r & VT_SYM) greloca(sec, vtop->sym, c, R_DATA_PTR, val); else @@ -5217,7 +5294,7 @@ static void init_putv(CType *type, Section *sec, unsigned long c, } default: { int val = (vtop->c.i & bit_mask) << bit_pos; -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64) if (vtop->r & VT_SYM) greloca(sec, vtop->sym, c, R_DATA_PTR, val); else diff --git a/tccrun.c b/tccrun.c index 13c20120..264322bf 100644 --- a/tccrun.c +++ b/tccrun.c @@ -604,6 +604,27 @@ static int rt_get_caller_pc(addr_t *paddr, ucontext_t *uc, int level) } } +/* ------------------------------------------------------------- */ +#elif defined(__aarch64__) + +static int rt_get_caller_pc(addr_t *paddr, ucontext_t *uc, int level) +{ + if (level < 0) + return -1; + else if (level == 0) { + *paddr = uc->uc_mcontext.pc; + return 0; + } + else { + addr_t *fp = (addr_t *)uc->uc_mcontext.regs[29]; + int i; + for (i = 1; i < level; i++) + fp = (addr_t *)fp[0]; + *paddr = fp[1]; + return 0; + } +} + /* ------------------------------------------------------------- */ #else diff --git a/tcctok.h b/tcctok.h index 53d0659c..64d1288f 100644 --- a/tcctok.h +++ b/tcctok.h @@ -59,6 +59,10 @@ DEF(TOK_ASM2, "__asm") DEF(TOK_ASM3, "__asm__") +#ifdef TCC_TARGET_ARM64 + DEF(TOK_UINT128, "__uint128_t") +#endif + /*********************************************************************/ /* the following are not keywords. They are included to ease parsing */ /* preprocessor only */ @@ -136,6 +140,11 @@ DEF(TOK_REGPARM1, "regparm") DEF(TOK_REGPARM2, "__regparm__") +#ifdef TCC_TARGET_ARM64 + DEF(TOK___va_start, "__va_start") + DEF(TOK___va_arg, "__va_arg") +#endif + /* pragma */ DEF(TOK_pack, "pack") #if !defined(TCC_TARGET_I386) && !defined(TCC_TARGET_X86_64) @@ -229,6 +238,30 @@ #if defined TCC_TARGET_PE DEF(TOK___chkstk, "__chkstk") #endif +#ifdef TCC_TARGET_ARM64 + DEF(TOK___addtf3, "__addtf3") + DEF(TOK___subtf3, "__subtf3") + DEF(TOK___multf3, "__multf3") + DEF(TOK___divtf3, "__divtf3") + DEF(TOK___extendsftf2, "__extendsftf2") + DEF(TOK___extenddftf2, "__extenddftf2") + DEF(TOK___trunctfsf2, "__trunctfsf2") + DEF(TOK___trunctfdf2, "__trunctfdf2") + DEF(TOK___fixtfsi, "__fixtfsi") + DEF(TOK___fixtfdi, "__fixtfdi") + DEF(TOK___fixunstfsi, "__fixunstfsi") + DEF(TOK___fixunstfdi, "__fixunstfdi") + DEF(TOK___floatsitf, "__floatsitf") + DEF(TOK___floatditf, "__floatditf") + DEF(TOK___floatunsitf, "__floatunsitf") + DEF(TOK___floatunditf, "__floatunditf") + DEF(TOK___eqtf2, "__eqtf2") + DEF(TOK___netf2, "__netf2") + DEF(TOK___lttf2, "__lttf2") + DEF(TOK___letf2, "__letf2") + DEF(TOK___gttf2, "__gttf2") + DEF(TOK___getf2, "__getf2") +#endif /* bound checking symbols */ #ifdef CONFIG_TCC_BCHECK