Add arm64 (AArch64) as a target architecture.

This commit is contained in:
Edmund Grimley Evans 2015-02-13 18:58:31 +00:00
parent 738606dbd5
commit b14ef0e24b
16 changed files with 3265 additions and 40 deletions

View File

@ -70,6 +70,7 @@ NATIVE_DEFINES_$(CONFIG_arm) += -DTCC_TARGET_ARM
NATIVE_DEFINES_$(CONFIG_arm_eabihf) += -DTCC_ARM_EABI -DTCC_ARM_HARDFLOAT
NATIVE_DEFINES_$(CONFIG_arm_eabi) += -DTCC_ARM_EABI
NATIVE_DEFINES_$(CONFIG_arm_vfp) += -DTCC_ARM_VFP
NATIVE_DEFINES_$(CONFIG_arm64) += -DTCC_TARGET_ARM64
NATIVE_DEFINES += $(NATIVE_DEFINES_yes)
ifeq ($(TOP),.)
@ -86,6 +87,7 @@ ARM_VFP_CROSS = arm-linux-gnu-tcc$(EXESUF)
ARM_EABI_CROSS = arm-linux-gnueabi-tcc$(EXESUF)
ARM_EABIHF_CROSS = arm-linux-gnueabihf-tcc$(EXESUF)
ARM_CROSS = $(ARM_FPA_CROSS) $(ARM_FPA_LD_CROSS) $(ARM_VFP_CROSS) $(ARM_EABI_CROSS)
ARM64_CROSS = arm64-tcc$(EXESUF)
C67_CROSS = c67-tcc$(EXESUF)
# Legacy symlinks for cross compilers
@ -107,33 +109,39 @@ WIN64_FILES = $(CORE_FILES) x86_64-gen.c i386-asm.c x86_64-asm.h tccpe.c
WINCE_FILES = $(CORE_FILES) arm-gen.c tccpe.c
X86_64_FILES = $(CORE_FILES) x86_64-gen.c i386-asm.c x86_64-asm.h
ARM_FILES = $(CORE_FILES) arm-gen.c
ARM64_FILES = $(CORE_FILES) arm64-gen.c
C67_FILES = $(CORE_FILES) c67-gen.c tcccoff.c
ifdef CONFIG_WIN64
PROGS+=tiny_impdef$(EXESUF) tiny_libmaker$(EXESUF)
NATIVE_FILES=$(WIN64_FILES)
PROGS_CROSS=$(WIN32_CROSS) $(I386_CROSS) $(X64_CROSS) $(ARM_CROSS) $(C67_CROSS) $(WINCE_CROSS)
PROGS_CROSS=$(WIN32_CROSS) $(I386_CROSS) $(X64_CROSS) $(ARM_CROSS) $(ARM64_CROSS) $(C67_CROSS) $(WINCE_CROSS)
LIBTCC1_CROSS=lib/i386-win32/libtcc1.a
LIBTCC1=libtcc1.a
else ifdef CONFIG_WIN32
PROGS+=tiny_impdef$(EXESUF) tiny_libmaker$(EXESUF)
NATIVE_FILES=$(WIN32_FILES)
PROGS_CROSS=$(WIN64_CROSS) $(I386_CROSS) $(X64_CROSS) $(ARM_CROSS) $(C67_CROSS) $(WINCE_CROSS)
PROGS_CROSS=$(WIN64_CROSS) $(I386_CROSS) $(X64_CROSS) $(ARM_CROSS) $(ARM64_CROSS) $(C67_CROSS) $(WINCE_CROSS)
LIBTCC1_CROSS=lib/x86_64-win32/libtcc1.a
LIBTCC1=libtcc1.a
else ifeq ($(ARCH),i386)
NATIVE_FILES=$(I386_FILES)
PROGS_CROSS=$(X64_CROSS) $(WIN32_CROSS) $(WIN64_CROSS) $(ARM_CROSS) $(C67_CROSS) $(WINCE_CROSS)
PROGS_CROSS=$(X64_CROSS) $(WIN32_CROSS) $(WIN64_CROSS) $(ARM_CROSS) $(ARM64_CROSS) $(C67_CROSS) $(WINCE_CROSS)
LIBTCC1_CROSS=lib/i386-win32/libtcc1.a lib/x86_64-win32/libtcc1.a
LIBTCC1=libtcc1.a
else ifeq ($(ARCH),x86-64)
NATIVE_FILES=$(X86_64_FILES)
PROGS_CROSS=$(I386_CROSS) $(WIN32_CROSS) $(WIN64_CROSS) $(ARM_CROSS) $(C67_CROSS) $(WINCE_CROSS)
PROGS_CROSS=$(I386_CROSS) $(WIN32_CROSS) $(WIN64_CROSS) $(ARM_CROSS) $(ARM64_CROSS) $(C67_CROSS) $(WINCE_CROSS)
LIBTCC1_CROSS=lib/i386-win32/libtcc1.a lib/x86_64-win32/libtcc1.a lib/i386/libtcc1.a
LIBTCC1=libtcc1.a
else ifeq ($(ARCH),arm)
NATIVE_FILES=$(ARM_FILES)
PROGS_CROSS=$(I386_CROSS) $(X64_CROSS) $(WIN32_CROSS) $(WIN64_CROSS) $(C67_CROSS) $(WINCE_CROSS)
PROGS_CROSS=$(I386_CROSS) $(X64_CROSS) $(WIN32_CROSS) $(WIN64_CROSS) $(ARM64_CROSS) $(C67_CROSS) $(WINCE_CROSS)
LIBTCC1=libtcc1.a
LIBTCC1_CROSS=lib/i386-win32/libtcc1.a lib/x86_64-win32/libtcc1.a lib/i386/libtcc1.a
else ifeq ($(ARCH),arm64)
NATIVE_FILES=$(ARM64_FILES)
PROGS_CROSS=$(I386_CROSS) $(X64_CROSS) $(WIN32_CROSS) $(WIN64_CROSS) $(ARM_CROSS) $(C67_CROSS) $(WINCE_CROSS)
LIBTCC1=libtcc1.a
LIBTCC1_CROSS=lib/i386-win32/libtcc1.a lib/x86_64-win32/libtcc1.a lib/i386/libtcc1.a
endif
@ -181,6 +189,7 @@ $(ARM_FPA_CROSS): DEFINES = -DTCC_TARGET_ARM
$(ARM_FPA_LD_CROSS)$(EXESUF): DEFINES = -DTCC_TARGET_ARM -DLDOUBLE_SIZE=12
$(ARM_VFP_CROSS): DEFINES = -DTCC_TARGET_ARM -DTCC_ARM_VFP -DCONFIG_MULTIARCHDIR="\"arm-linux-gnu\""
$(ARM_EABI_CROSS): DEFINES = -DTCC_TARGET_ARM -DTCC_ARM_EABI -DTCC_ARM_VFP -DCONFIG_MULTIARCHDIR="\"arm-linux-gnueabi\""
$(ARM64_CROSS): DEFINES = -DTCC_TARGET_ARM64
$(I386_CROSS): $(I386_FILES)
$(X64_CROSS): $(X86_64_FILES)
@ -189,6 +198,7 @@ $(WIN64_CROSS): $(WIN64_FILES)
$(WINCE_CROSS): $(WINCE_FILES)
$(C67_CROSS): $(C67_FILES)
$(ARM_FPA_CROSS) $(ARM_FPA_LD_CROSS) $(ARM_VFP_CROSS) $(ARM_EABI_CROSS): $(ARM_FILES)
$(ARM64_CROSS): $(ARM64_FILES)
# libtcc generation and test
ifndef ONE_SOURCE

1621
arm64-gen.c Normal file

File diff suppressed because it is too large Load Diff

6
configure vendored
View File

@ -99,6 +99,9 @@ classify_cpu ()
esac
cpu="armv4l"
;;
aarch64)
cpu="aarch64"
;;
alpha)
cpu="alpha"
;;
@ -435,6 +438,9 @@ elif test "$cpu" = "armv4l" ; then
echo "ARCH=arm" >> config.mak
echo "#define HOST_ARM 1" >> $TMPH
echo "#define TCC_ARM_VERSION $cpuver" >> $TMPH
elif test "$cpu" = "aarch64" ; then
echo "ARCH=arm64" >> config.mak
echo "#define HOST_ARM64 1" >> $TMPH
elif test "$cpu" = "powerpc" ; then
echo "ARCH=ppc" >> config.mak
echo "#define HOST_PPC 1" >> $TMPH

View File

@ -7,6 +7,8 @@
# define TRIPLET_ARCH "x86_64"
#elif defined(__arm__)
# define TRIPLET_ARCH "arm"
#elif defined(__aarch64__)
# define TRIPLET_ARCH "aarch64"
#else
# define TRIPLET_ARCH "unknown"
#endif

112
elf.h
View File

@ -2336,6 +2336,117 @@ typedef Elf32_Addr Elf32_Conflict;
#define R_AARCH64_NONE 0 /* No relocation. */
#define R_AARCH64_ABS64 257 /* Direct 64 bit. */
#define R_AARCH64_ABS32 258 /* Direct 32 bit. */
#define R_AARCH64_ABS16 259 /* Direct 16-bit. */
#define R_AARCH64_PREL64 260 /* PC-relative 64-bit. */
#define R_AARCH64_PREL32 261 /* PC-relative 32-bit. */
#define R_AARCH64_PREL16 262 /* PC-relative 16-bit. */
#define R_AARCH64_MOVW_UABS_G0 263 /* Dir. MOVZ imm. from bits 15:0. */
#define R_AARCH64_MOVW_UABS_G0_NC 264 /* Likewise for MOVK; no check. */
#define R_AARCH64_MOVW_UABS_G1 265 /* Dir. MOVZ imm. from bits 31:16. */
#define R_AARCH64_MOVW_UABS_G1_NC 266 /* Likewise for MOVK; no check. */
#define R_AARCH64_MOVW_UABS_G2 267 /* Dir. MOVZ imm. from bits 47:32. */
#define R_AARCH64_MOVW_UABS_G2_NC 268 /* Likewise for MOVK; no check. */
#define R_AARCH64_MOVW_UABS_G3 269 /* Dir. MOV{K,Z} imm. from 63:48. */
#define R_AARCH64_MOVW_SABS_G0 270 /* Dir. MOV{N,Z} imm. from 15:0. */
#define R_AARCH64_MOVW_SABS_G1 271 /* Dir. MOV{N,Z} imm. from 31:16. */
#define R_AARCH64_MOVW_SABS_G2 272 /* Dir. MOV{N,Z} imm. from 47:32. */
#define R_AARCH64_LD_PREL_LO19 273 /* PC-rel. LD imm. from bits 20:2. */
#define R_AARCH64_ADR_PREL_LO21 274 /* PC-rel. ADR imm. from bits 20:0. */
#define R_AARCH64_ADR_PREL_PG_HI21 275 /* Page-rel. ADRP imm. from 32:12. */
#define R_AARCH64_ADR_PREL_PG_HI21_NC 276 /* Likewise; no overflow check. */
#define R_AARCH64_ADD_ABS_LO12_NC 277 /* Dir. ADD imm. from bits 11:0. */
#define R_AARCH64_LDST8_ABS_LO12_NC 278 /* Likewise for LD/ST; no check. */
#define R_AARCH64_TSTBR14 279 /* PC-rel. TBZ/TBNZ imm. from 15:2. */
#define R_AARCH64_CONDBR19 280 /* PC-rel. cond. br. imm. from 20:2. */
#define R_AARCH64_JUMP26 282 /* PC-rel. B imm. from bits 27:2. */
#define R_AARCH64_CALL26 283 /* Likewise for CALL. */
#define R_AARCH64_LDST16_ABS_LO12_NC 284 /* Dir. ADD imm. from bits 11:1. */
#define R_AARCH64_LDST32_ABS_LO12_NC 285 /* Likewise for bits 11:2. */
#define R_AARCH64_LDST64_ABS_LO12_NC 286 /* Likewise for bits 11:3. */
#define R_AARCH64_MOVW_PREL_G0 287 /* PC-rel. MOV{N,Z} imm. from 15:0. */
#define R_AARCH64_MOVW_PREL_G0_NC 288 /* Likewise for MOVK; no check. */
#define R_AARCH64_MOVW_PREL_G1 289 /* PC-rel. MOV{N,Z} imm. from 31:16. */
#define R_AARCH64_MOVW_PREL_G1_NC 290 /* Likewise for MOVK; no check. */
#define R_AARCH64_MOVW_PREL_G2 291 /* PC-rel. MOV{N,Z} imm. from 47:32. */
#define R_AARCH64_MOVW_PREL_G2_NC 292 /* Likewise for MOVK; no check. */
#define R_AARCH64_MOVW_PREL_G3 293 /* PC-rel. MOV{N,Z} imm. from 63:48. */
#define R_AARCH64_LDST128_ABS_LO12_NC 299 /* Dir. ADD imm. from bits 11:4. */
#define R_AARCH64_MOVW_GOTOFF_G0 300 /* GOT-rel. off. MOV{N,Z} imm. 15:0. */
#define R_AARCH64_MOVW_GOTOFF_G0_NC 301 /* Likewise for MOVK; no check. */
#define R_AARCH64_MOVW_GOTOFF_G1 302 /* GOT-rel. o. MOV{N,Z} imm. 31:16. */
#define R_AARCH64_MOVW_GOTOFF_G1_NC 303 /* Likewise for MOVK; no check. */
#define R_AARCH64_MOVW_GOTOFF_G2 304 /* GOT-rel. o. MOV{N,Z} imm. 47:32. */
#define R_AARCH64_MOVW_GOTOFF_G2_NC 305 /* Likewise for MOVK; no check. */
#define R_AARCH64_MOVW_GOTOFF_G3 306 /* GOT-rel. o. MOV{N,Z} imm. 63:48. */
#define R_AARCH64_GOTREL64 307 /* GOT-relative 64-bit. */
#define R_AARCH64_GOTREL32 308 /* GOT-relative 32-bit. */
#define R_AARCH64_GOT_LD_PREL19 309 /* PC-rel. GOT off. load imm. 20:2. */
#define R_AARCH64_LD64_GOTOFF_LO15 310 /* GOT-rel. off. LD/ST imm. 14:3. */
#define R_AARCH64_ADR_GOT_PAGE 311 /* P-page-rel. GOT off. ADRP 32:12. */
#define R_AARCH64_LD64_GOT_LO12_NC 312 /* Dir. GOT off. LD/ST imm. 11:3. */
#define R_AARCH64_LD64_GOTPAGE_LO15 313 /* GOT-page-rel. GOT off. LD/ST 14:3 */
#define R_AARCH64_TLSGD_ADR_PREL21 512 /* PC-relative ADR imm. 20:0. */
#define R_AARCH64_TLSGD_ADR_PAGE21 513 /* page-rel. ADRP imm. 32:12. */
#define R_AARCH64_TLSGD_ADD_LO12_NC 514 /* direct ADD imm. from 11:0. */
#define R_AARCH64_TLSGD_MOVW_G1 515 /* GOT-rel. MOV{N,Z} 31:16. */
#define R_AARCH64_TLSGD_MOVW_G0_NC 516 /* GOT-rel. MOVK imm. 15:0. */
#define R_AARCH64_TLSLD_ADR_PREL21 517 /* Like 512; local dynamic model. */
#define R_AARCH64_TLSLD_ADR_PAGE21 518 /* Like 513; local dynamic model. */
#define R_AARCH64_TLSLD_ADD_LO12_NC 519 /* Like 514; local dynamic model. */
#define R_AARCH64_TLSLD_MOVW_G1 520 /* Like 515; local dynamic model. */
#define R_AARCH64_TLSLD_MOVW_G0_NC 521 /* Like 516; local dynamic model. */
#define R_AARCH64_TLSLD_LD_PREL19 522 /* TLS PC-rel. load imm. 20:2. */
#define R_AARCH64_TLSLD_MOVW_DTPREL_G2 523 /* TLS DTP-rel. MOV{N,Z} 47:32. */
#define R_AARCH64_TLSLD_MOVW_DTPREL_G1 524 /* TLS DTP-rel. MOV{N,Z} 31:16. */
#define R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC 525 /* Likewise; MOVK; no check. */
#define R_AARCH64_TLSLD_MOVW_DTPREL_G0 526 /* TLS DTP-rel. MOV{N,Z} 15:0. */
#define R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC 527 /* Likewise; MOVK; no check. */
#define R_AARCH64_TLSLD_ADD_DTPREL_HI12 528 /* DTP-rel. ADD imm. from 23:12. */
#define R_AARCH64_TLSLD_ADD_DTPREL_LO12 529 /* DTP-rel. ADD imm. from 11:0. */
#define R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC 530 /* Likewise; no ovfl. check. */
#define R_AARCH64_TLSLD_LDST8_DTPREL_LO12 531 /* DTP-rel. LD/ST imm. 11:0. */
#define R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC 532 /* Likewise; no check. */
#define R_AARCH64_TLSLD_LDST16_DTPREL_LO12 533 /* DTP-rel. LD/ST imm. 11:1. */
#define R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC 534 /* Likewise; no check. */
#define R_AARCH64_TLSLD_LDST32_DTPREL_LO12 535 /* DTP-rel. LD/ST imm. 11:2. */
#define R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC 536 /* Likewise; no check. */
#define R_AARCH64_TLSLD_LDST64_DTPREL_LO12 537 /* DTP-rel. LD/ST imm. 11:3. */
#define R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC 538 /* Likewise; no check. */
#define R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 539 /* GOT-rel. MOV{N,Z} 31:16. */
#define R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC 540 /* GOT-rel. MOVK 15:0. */
#define R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 541 /* Page-rel. ADRP 32:12. */
#define R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC 542 /* Direct LD off. 11:3. */
#define R_AARCH64_TLSIE_LD_GOTTPREL_PREL19 543 /* PC-rel. load imm. 20:2. */
#define R_AARCH64_TLSLE_MOVW_TPREL_G2 544 /* TLS TP-rel. MOV{N,Z} 47:32. */
#define R_AARCH64_TLSLE_MOVW_TPREL_G1 545 /* TLS TP-rel. MOV{N,Z} 31:16. */
#define R_AARCH64_TLSLE_MOVW_TPREL_G1_NC 546 /* Likewise; MOVK; no check. */
#define R_AARCH64_TLSLE_MOVW_TPREL_G0 547 /* TLS TP-rel. MOV{N,Z} 15:0. */
#define R_AARCH64_TLSLE_MOVW_TPREL_G0_NC 548 /* Likewise; MOVK; no check. */
#define R_AARCH64_TLSLE_ADD_TPREL_HI12 549 /* TP-rel. ADD imm. 23:12. */
#define R_AARCH64_TLSLE_ADD_TPREL_LO12 550 /* TP-rel. ADD imm. 11:0. */
#define R_AARCH64_TLSLE_ADD_TPREL_LO12_NC 551 /* Likewise; no ovfl. check. */
#define R_AARCH64_TLSLE_LDST8_TPREL_LO12 552 /* TP-rel. LD/ST off. 11:0. */
#define R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC 553 /* Likewise; no ovfl. check. */
#define R_AARCH64_TLSLE_LDST16_TPREL_LO12 554 /* TP-rel. LD/ST off. 11:1. */
#define R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC 555 /* Likewise; no check. */
#define R_AARCH64_TLSLE_LDST32_TPREL_LO12 556 /* TP-rel. LD/ST off. 11:2. */
#define R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC 557 /* Likewise; no check. */
#define R_AARCH64_TLSLE_LDST64_TPREL_LO12 558 /* TP-rel. LD/ST off. 11:3. */
#define R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC 559 /* Likewise; no check. */
#define R_AARCH64_TLSDESC_LD_PREL19 560 /* PC-rel. load immediate 20:2. */
#define R_AARCH64_TLSDESC_ADR_PREL21 561 /* PC-rel. ADR immediate 20:0. */
#define R_AARCH64_TLSDESC_ADR_PAGE21 562 /* Page-rel. ADRP imm. 32:12. */
#define R_AARCH64_TLSDESC_LD64_LO12 563 /* Direct LD off. from 11:3. */
#define R_AARCH64_TLSDESC_ADD_LO12 564 /* Direct ADD imm. from 11:0. */
#define R_AARCH64_TLSDESC_OFF_G1 565 /* GOT-rel. MOV{N,Z} imm. 31:16. */
#define R_AARCH64_TLSDESC_OFF_G0_NC 566 /* GOT-rel. MOVK imm. 15:0; no ck. */
#define R_AARCH64_TLSDESC_LDR 567 /* Relax LDR. */
#define R_AARCH64_TLSDESC_ADD 568 /* Relax ADD. */
#define R_AARCH64_TLSDESC_CALL 569 /* Relax BLR. */
#define R_AARCH64_TLSLE_LDST128_TPREL_LO12 570 /* TP-rel. LD/ST off. 11:4. */
#define R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC 571 /* Likewise; no check. */
#define R_AARCH64_TLSLD_LDST128_DTPREL_LO12 572 /* DTP-rel. LD/ST imm. 11:4. */
#define R_AARCH64_TLSLD_LDST128_DTPREL_LO12_NC 573 /* Likewise; no check. */
#define R_AARCH64_COPY 1024 /* Copy symbol at runtime. */
#define R_AARCH64_GLOB_DAT 1025 /* Create GOT entry. */
#define R_AARCH64_JUMP_SLOT 1026 /* Create PLT entry. */
@ -2344,6 +2455,7 @@ typedef Elf32_Addr Elf32_Conflict;
#define R_AARCH64_TLS_DTPREL64 1029 /* Module-relative offset, 64 bit. */
#define R_AARCH64_TLS_TPREL64 1030 /* TP-relative offset, 64 bit. */
#define R_AARCH64_TLSDESC 1031 /* TLS Descriptor. */
#define R_AARCH64_IRELATIVE 1032 /* STT_GNU_IFUNC relocation. */
/* ARM relocs. */

View File

@ -46,6 +46,19 @@ typedef char *va_list;
#define va_copy(dest, src) (dest) = (src)
#define va_end(ap)
#elif defined(__aarch64__)
typedef struct {
void *__stack;
void *__gr_top;
void *__vr_top;
int __gr_offs;
int __vr_offs;
} va_list;
#define va_start(ap, last) __va_start(ap, last)
#define va_arg(ap, type) __va_arg(ap, type)
#define va_end(ap)
#define va_copy(dest, src) ((dest) = (src))
#else /* __i386__ */
typedef char *va_list;
/* only correct for i386 */

View File

@ -28,6 +28,11 @@ ifndef TARGET # native library
ifeq ($(ARCH),arm)
TARGET = arm
XCC = $(CC)
else
ifeq ($(ARCH),arm64)
TARGET = arm64
else
endif
endif
endif
endif
@ -49,6 +54,7 @@ X86_64_O = libtcc1.o alloca86_64.o
ARM_O = libtcc1.o armeabi.o alloca-arm.o
WIN32_O = $(I386_O) crt1.o wincrt1.o dllcrt1.o dllmain.o chkstk.o
WIN64_O = $(X86_64_O) crt1.o wincrt1.o dllcrt1.o dllmain.o chkstk.o
ARM64_O = lib-arm64.o
# build TCC runtime library to contain PIC code, so it can be linked
# into shared libraries
@ -86,6 +92,11 @@ ifeq "$(TARGET)" "arm"
OBJ = $(addprefix $(DIR)/,$(ARM_O))
TGT = -DTCC_TARGET_ARM
XCC ?= $(TCC) -B$(TOP)
else
ifeq "$(TARGET)" "arm64"
OBJ = $(addprefix $(DIR)/,$(ARM64_O))
TGT = -DTCC_TARGET_ARM64
XCC ?= $(TCC) -B$(TOP)
else
$(error libtcc1.a not supported on target '$(TARGET)')
endif
@ -93,6 +104,7 @@ endif
endif
endif
endif
endif
XFLAGS = $(CPPFLAGS) $(CFLAGS) $(PICFLAGS) $(TGT)

652
lib/lib-arm64.c Normal file
View File

@ -0,0 +1,652 @@
/*
* TCC runtime library for arm64.
*
* Copyright (c) 2015 Edmund Grimley Evans
*
* Copying and distribution of this file, with or without modification,
* are permitted in any medium without royalty provided the copyright
* notice and this notice are preserved. This file is offered as-is,
* without any warranty.
*/
#include <stdint.h>
#include <string.h>
void __clear_cache(char *beg, char *end)
{
#warning __clear_cache not yet implemented
}
typedef struct {
uint64_t x0, x1;
} u128_t;
static long double f3_zero(int sgn)
{
long double f;
u128_t x = { 0, (uint64_t)sgn << 63 };
memcpy(&f, &x, 16);
return f;
}
static long double f3_infinity(int sgn)
{
long double f;
u128_t x = { 0, (uint64_t)sgn << 63 | 0x7fff000000000000 };
memcpy(&f, &x, 16);
return f;
}
static long double f3_NaN(void)
{
long double f;
#if 0
// ARM's default NaN usually has just the top fraction bit set:
u128_t x = { 0, 0x7fff800000000000 };
#else
// GCC's library sets all fraction bits:
u128_t x = { -1, 0x7fffffffffffffff };
#endif
memcpy(&f, &x, 16);
return f;
}
static int fp3_convert_NaN(long double *f, int sgn, u128_t mnt)
{
u128_t x = { mnt.x0,
mnt.x1 | 0x7fff800000000000 | (uint64_t)sgn << 63 };
memcpy(f, &x, 16);
return 1;
}
static int fp3_detect_NaNs(long double *f,
int a_sgn, int a_exp, u128_t a,
int b_sgn, int b_exp, u128_t b)
{
// Detect signalling NaNs:
if (a_exp == 32767 && (a.x0 | a.x1 << 16) && !(a.x1 >> 47 & 1))
return fp3_convert_NaN(f, a_sgn, a);
if (b_exp == 32767 && (b.x0 | b.x1 << 16) && !(b.x1 >> 47 & 1))
return fp3_convert_NaN(f, b_sgn, b);
// Detect quiet NaNs:
if (a_exp == 32767 && (a.x0 | a.x1 << 16))
return fp3_convert_NaN(f, a_sgn, a);
if (b_exp == 32767 && (b.x0 | b.x1 << 16))
return fp3_convert_NaN(f, b_sgn, b);
return 0;
}
static void f3_unpack(int *sgn, int32_t *exp, u128_t *mnt, long double f)
{
u128_t x;
memcpy(&x, &f, 16);
*sgn = x.x1 >> 63;
*exp = x.x1 >> 48 & 32767;
x.x1 = x.x1 << 16 >> 16;
if (*exp)
x.x1 |= (uint64_t)1 << 48;
else
*exp = 1;
*mnt = x;
}
static u128_t f3_normalise(int32_t *exp, u128_t mnt)
{
int sh;
if (!(mnt.x0 | mnt.x1))
return mnt;
if (!mnt.x1) {
mnt.x1 = mnt.x0;
mnt.x0 = 0;
*exp -= 64;
}
for (sh = 32; sh; sh >>= 1) {
if (!(mnt.x1 >> (64 - sh))) {
mnt.x1 = mnt.x1 << sh | mnt.x0 >> (64 - sh);
mnt.x0 = mnt.x0 << sh;
*exp -= sh;
}
}
return mnt;
}
static u128_t f3_sticky_shift(int32_t sh, u128_t x)
{
if (sh >= 128) {
x.x0 = !!(x.x0 | x.x1);
x.x1 = 0;
return x;
}
if (sh >= 64) {
x.x0 = x.x1 | !!x.x0;
x.x1 = 0;
sh -= 64;
}
if (sh > 0) {
x.x0 = x.x0 >> sh | x.x1 << (64 - sh) | !!(x.x0 << (64 - sh));
x.x1 = x.x1 >> sh;
}
return x;
}
static long double f3_round(int sgn, int32_t exp, u128_t x)
{
long double f;
int error;
if (exp > 0) {
x = f3_sticky_shift(13, x);
}
else {
x = f3_sticky_shift(14 - exp, x);
exp = 0;
}
error = x.x0 & 3;
x.x0 = x.x0 >> 2 | x.x1 << 62;
x.x1 = x.x1 >> 2;
if (error == 3 || ((error == 2) & (x.x0 & 1))) {
if (!++x.x0) {
++x.x1;
if (x.x1 == (uint64_t)1 << 48)
exp = 1;
else if (x.x1 == (uint64_t)1 << 49) {
++exp;
x.x0 = x.x0 >> 1 | x.x1 << 63;
x.x1 = x.x1 >> 1;
}
}
}
if (exp >= 32767)
return f3_infinity(sgn);
x.x1 = x.x1 << 16 >> 16 | (uint64_t)exp << 48 | (uint64_t)sgn << 63;
memcpy(&f, &x, 16);
return f;
}
static long double f3_add(long double fa, long double fb, int neg)
{
u128_t a, b, x;
int32_t a_exp, b_exp, x_exp;
int a_sgn, b_sgn, x_sgn;
long double fx;
f3_unpack(&a_sgn, &a_exp, &a, fa);
f3_unpack(&b_sgn, &b_exp, &b, fb);
if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b))
return fx;
b_sgn ^= neg;
// Handle infinities and zeroes:
if (a_exp == 32767 && b_exp == 32767 && a_sgn != b_sgn)
return f3_NaN();
if (a_exp == 32767)
return f3_infinity(a_sgn);
if (b_exp == 32767)
return f3_infinity(b_sgn);
if (!(a.x0 | a.x1 | b.x0 | b.x1))
return f3_zero(a_sgn & b_sgn);
a.x1 = a.x1 << 3 | a.x0 >> 61;
a.x0 = a.x0 << 3;
b.x1 = b.x1 << 3 | b.x0 >> 61;
b.x0 = b.x0 << 3;
if (a_exp <= b_exp) {
a = f3_sticky_shift(b_exp - a_exp, a);
a_exp = b_exp;
}
else {
b = f3_sticky_shift(a_exp - b_exp, b);
b_exp = a_exp;
}
x_sgn = a_sgn;
x_exp = a_exp;
if (a_sgn == b_sgn) {
x.x0 = a.x0 + b.x0;
x.x1 = a.x1 + b.x1 + (x.x0 < a.x0);
}
else {
x.x0 = a.x0 - b.x0;
x.x1 = a.x1 - b.x1 - (x.x0 > a.x0);
if (x.x1 >> 63) {
x_sgn ^= 1;
x.x0 = -x.x0;
x.x1 = -x.x1 - !!x.x0;
}
}
if (!(x.x0 | x.x1))
return f3_zero(0);
x = f3_normalise(&x_exp, x);
return f3_round(x_sgn, x_exp + 12, x);
}
long double __addtf3(long double a, long double b)
{
return f3_add(a, b, 0);
}
long double __subtf3(long double a, long double b)
{
return f3_add(a, b, 1);
}
long double __multf3(long double fa, long double fb)
{
u128_t a, b, x;
int32_t a_exp, b_exp, x_exp;
int a_sgn, b_sgn, x_sgn;
long double fx;
f3_unpack(&a_sgn, &a_exp, &a, fa);
f3_unpack(&b_sgn, &b_exp, &b, fb);
if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b))
return fx;
// Handle infinities and zeroes:
if ((a_exp == 32767 && !(b.x0 | b.x1)) ||
(b_exp == 32767 && !(a.x0 | a.x1)))
return f3_NaN();
if (a_exp == 32767 || b_exp == 32767)
return f3_infinity(a_sgn ^ b_sgn);
if (!(a.x0 | a.x1) || !(b.x0 | b.x1))
return f3_zero(a_sgn ^ b_sgn);
a = f3_normalise(&a_exp, a);
b = f3_normalise(&b_exp, b);
x_sgn = a_sgn ^ b_sgn;
x_exp = a_exp + b_exp - 16352;
{
// Convert to base (1 << 30), discarding bottom 6 bits, which are zero,
// so there are (32, 30, 30, 30) bits in (a3, a2, a1, a0):
uint64_t a0 = a.x0 << 28 >> 34;
uint64_t b0 = b.x0 << 28 >> 34;
uint64_t a1 = a.x0 >> 36 | a.x1 << 62 >> 34;
uint64_t b1 = b.x0 >> 36 | b.x1 << 62 >> 34;
uint64_t a2 = a.x1 << 32 >> 34;
uint64_t b2 = b.x1 << 32 >> 34;
uint64_t a3 = a.x1 >> 32;
uint64_t b3 = b.x1 >> 32;
// Use 16 small multiplications and additions that do not overflow:
uint64_t x0 = a0 * b0;
uint64_t x1 = (x0 >> 30) + a0 * b1 + a1 * b0;
uint64_t x2 = (x1 >> 30) + a0 * b2 + a1 * b1 + a2 * b0;
uint64_t x3 = (x2 >> 30) + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
uint64_t x4 = (x3 >> 30) + a1 * b3 + a2 * b2 + a3 * b1;
uint64_t x5 = (x4 >> 30) + a2 * b3 + a3 * b2;
uint64_t x6 = (x5 >> 30) + a3 * b3;
// We now have (64, 30, 30, ...) bits in (x6, x5, x4, ...).
// Take the top 128 bits, setting bottom bit if any lower bits were set:
uint64_t y0 = (x5 << 34 | x4 << 34 >> 30 | x3 << 34 >> 60 |
!!(x3 << 38 | (x2 | x1 | x0) << 34));
uint64_t y1 = x6;
// Top bit may be zero. Renormalise:
if (!(y1 >> 63)) {
y1 = y1 << 1 | y0 >> 63;
y0 = y0 << 1;
--x_exp;
}
x.x0 = y0;
x.x1 = y1;
}
return f3_round(x_sgn, x_exp, x);
}
long double __divtf3(long double fa, long double fb)
{
u128_t a, b, x;
int32_t a_exp, b_exp, x_exp;
int a_sgn, b_sgn, x_sgn, i;
long double fx;
f3_unpack(&a_sgn, &a_exp, &a, fa);
f3_unpack(&b_sgn, &b_exp, &b, fb);
if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b))
return fx;
// Handle infinities and zeroes:
if ((a_exp == 32767 && b_exp == 32767) ||
(!(a.x0 | a.x1) && !(b.x0 | b.x1)))
return f3_NaN();
if (a_exp == 32767 || !(b.x0 | b.x1))
return f3_infinity(a_sgn ^ b_sgn);
if (!(a.x0 | a.x1) || b_exp == 32767)
return f3_zero(a_sgn ^ b_sgn);
a = f3_normalise(&a_exp, a);
b = f3_normalise(&b_exp, b);
x_sgn = a_sgn ^ b_sgn;
x_exp = a_exp - b_exp + 16395;
a.x0 = a.x0 >> 1 | a.x1 << 63;
a.x1 = a.x1 >> 1;
b.x0 = b.x0 >> 1 | b.x1 << 63;
b.x1 = b.x1 >> 1;
x.x0 = 0;
x.x1 = 0;
for (i = 0; i < 116; i++) {
x.x1 = x.x1 << 1 | x.x0 >> 63;
x.x0 = x.x0 << 1;
if (a.x1 > b.x1 || (a.x1 == b.x1 && a.x0 >= b.x0)) {
a.x1 = a.x1 - b.x1 - (a.x0 < b.x0);
a.x0 = a.x0 - b.x0;
x.x0 |= 1;
}
a.x1 = a.x1 << 1 | a.x0 >> 63;
a.x0 = a.x0 << 1;
}
x.x0 |= !!(a.x0 | a.x1);
x = f3_normalise(&x_exp, x);
return f3_round(x_sgn, x_exp, x);
}
long double __extendsftf2(float f)
{
long double fx;
u128_t x;
uint32_t a;
uint64_t aa;
memcpy(&a, &f, 4);
aa = a;
x.x0 = 0;
if (!(a << 1))
x.x1 = aa << 32;
else if (a << 1 >> 24 == 255)
x.x1 = (0x7fff000000000000 | aa >> 31 << 63 | aa << 41 >> 16 |
(uint64_t)!!(a << 9) << 47);
else
x.x1 = (aa >> 31 << 63 | ((aa >> 23 & 255) + 16256) << 48 |
aa << 41 >> 16);
memcpy(&fx, &x, 16);
return fx;
}
long double __extenddftf2(double f)
{
long double fx;
u128_t x;
uint64_t a;
memcpy(&a, &f, 8);
x.x0 = a << 60;
if (!(a << 1))
x.x1 = a;
else if (a << 1 >> 53 == 2047)
x.x1 = (0x7fff000000000000 | a >> 63 << 63 | a << 12 >> 16 |
(uint64_t)!!(a << 12) << 47);
else
x.x1 = a >> 63 << 63 | ((a >> 52 & 2047) + 15360) << 48 | a << 12 >> 16;
memcpy(&fx, &x, 16);
return fx;
}
float __trunctfsf2(long double f)
{
u128_t mnt;
int32_t exp;
int sgn;
uint32_t x;
float fx;
f3_unpack(&sgn, &exp, &mnt, f);
if (exp == 32767 && (mnt.x0 | mnt.x1 << 16))
x = 0x7fc00000 | (uint32_t)sgn << 31 | (mnt.x1 >> 25 & 0x007fffff);
else if (exp > 16510)
x = 0x7f800000 | (uint32_t)sgn << 31;
else if (exp < 16233)
x = (uint32_t)sgn << 31;
else {
exp -= 16257;
x = mnt.x1 >> 23 | !!(mnt.x0 | mnt.x1 << 41);
if (exp < 0) {
x = x >> -exp | !!(x << (32 + exp));
exp = 0;
}
if ((x & 3) == 3 || (x & 7) == 6)
x += 4;
x = ((x >> 2) + (exp << 23)) | (uint32_t)sgn << 31;
}
memcpy(&fx, &x, 4);
return fx;
}
double __trunctfdf2(long double f)
{
u128_t mnt;
int32_t exp;
int sgn;
uint64_t x;
double fx;
f3_unpack(&sgn, &exp, &mnt, f);
if (exp == 32767 && (mnt.x0 | mnt.x1 << 16))
x = (0x7ff8000000000000 | (uint64_t)sgn << 63 |
mnt.x1 << 16 >> 12 | mnt.x0 >> 60);
else if (exp > 17406)
x = 0x7ff0000000000000 | (uint64_t)sgn << 63;
else if (exp < 15308)
x = (uint64_t)sgn << 63;
else {
exp -= 15361;
x = mnt.x1 << 6 | mnt.x0 >> 58 | !!(mnt.x0 << 6);
if (exp < 0) {
x = x >> -exp | !!(x << (64 + exp));
exp = 0;
}
if ((x & 3) == 3 || (x & 7) == 6)
x += 4;
x = ((x >> 2) + ((uint64_t)exp << 52)) | (uint64_t)sgn << 63;
}
memcpy(&fx, &x, 8);
return fx;
}
int32_t __fixtfsi(long double fa)
{
u128_t a;
int32_t a_exp;
int a_sgn;
int32_t x;
f3_unpack(&a_sgn, &a_exp, &a, fa);
if (a_exp < 16369)
return 0;
if (a_exp > 16413)
return a_sgn ? -0x80000000 : 0x7fffffff;
x = a.x1 >> (16431 - a_exp);
return a_sgn ? -x : x;
}
int64_t __fixtfdi(long double fa)
{
u128_t a;
int32_t a_exp;
int a_sgn;
int64_t x;
f3_unpack(&a_sgn, &a_exp, &a, fa);
if (a_exp < 16383)
return 0;
if (a_exp > 16445)
return a_sgn ? -0x8000000000000000 : 0x7fffffffffffffff;
x = (a.x1 << 15 | a.x0 >> 49) >> (16446 - a_exp);
return a_sgn ? -x : x;
}
uint32_t __fixunstfsi(long double fa)
{
u128_t a;
int32_t a_exp;
int a_sgn;
f3_unpack(&a_sgn, &a_exp, &a, fa);
if (a_sgn || a_exp < 16369)
return 0;
if (a_exp > 16414)
return -1;
return a.x1 >> (16431 - a_exp);
}
uint64_t __fixunstfdi(long double fa)
{
u128_t a;
int32_t a_exp;
int a_sgn;
f3_unpack(&a_sgn, &a_exp, &a, fa);
if (a_sgn || a_exp < 16383)
return 0;
if (a_exp > 16446)
return -1;
return (a.x1 << 15 | a.x0 >> 49) >> (16446 - a_exp);
}
long double __floatsitf(int32_t a)
{
int sgn = 0;
int exp = 16414;
uint32_t mnt = a;
u128_t x = { 0, 0 };
long double f;
int i;
if (a) {
if (a < 0) {
sgn = 1;
mnt = -mnt;
}
for (i = 16; i; i >>= 1)
if (!(mnt >> (32 - i))) {
mnt <<= i;
exp -= i;
}
x.x1 = ((uint64_t)sgn << 63 | (uint64_t)exp << 48 |
(uint64_t)(mnt << 1) << 16);
}
memcpy(&f, &x, 16);
return f;
}
long double __floatditf(int64_t a)
{
int sgn = 0;
int exp = 16446;
uint64_t mnt = a;
u128_t x = { 0, 0 };
long double f;
int i;
if (a) {
if (a < 0) {
sgn = 1;
mnt = -mnt;
}
for (i = 32; i; i >>= 1)
if (!(mnt >> (64 - i))) {
mnt <<= i;
exp -= i;
}
x.x0 = mnt << 49;
x.x1 = (uint64_t)sgn << 63 | (uint64_t)exp << 48 | mnt << 1 >> 16;
}
memcpy(&f, &x, 16);
return f;
}
long double __floatunsitf(uint32_t a)
{
int exp = 16414;
uint32_t mnt = a;
u128_t x = { 0, 0 };
long double f;
int i;
if (a) {
for (i = 16; i; i >>= 1)
if (!(mnt >> (32 - i))) {
mnt <<= i;
exp -= i;
}
x.x1 = (uint64_t)exp << 48 | (uint64_t)(mnt << 1) << 16;
}
memcpy(&f, &x, 16);
return f;
}
long double __floatunditf(uint64_t a)
{
int exp = 16446;
uint64_t mnt = a;
u128_t x = { 0, 0 };
long double f;
int i;
if (a) {
for (i = 32; i; i >>= 1)
if (!(mnt >> (64 - i))) {
mnt <<= i;
exp -= i;
}
x.x0 = mnt << 49;
x.x1 = (uint64_t)exp << 48 | mnt << 1 >> 16;
}
memcpy(&f, &x, 16);
return f;
}
static int f3_cmp(long double fa, long double fb)
{
u128_t a, b;
memcpy(&a, &fa, 16);
memcpy(&b, &fb, 16);
return (!(a.x0 | a.x1 << 1 | b.x0 | b.x1 << 1) ? 0 :
((a.x1 << 1 >> 49 == 0x7fff && (a.x0 | a.x1 << 16)) ||
(b.x1 << 1 >> 49 == 0x7fff && (b.x0 | b.x1 << 16))) ? 2 :
a.x1 >> 63 != b.x1 >> 63 ? (int)(b.x1 >> 63) - (int)(a.x1 >> 63) :
a.x1 < b.x1 ? (int)(a.x1 >> 63 << 1) - 1 :
a.x1 > b.x1 ? 1 - (int)(a.x1 >> 63 << 1) :
a.x0 < b.x0 ? (int)(a.x1 >> 63 << 1) - 1 :
b.x0 < a.x0 ? 1 - (int)(a.x1 >> 63 << 1) : 0);
}
int __eqtf2(long double a, long double b)
{
return !!f3_cmp(a, b);
}
int __netf2(long double a, long double b)
{
return !!f3_cmp(a, b);
}
int __lttf2(long double a, long double b)
{
return f3_cmp(a, b);
}
int __letf2(long double a, long double b)
{
return f3_cmp(a, b);
}
int __gttf2(long double a, long double b)
{
return -f3_cmp(b, a);
}
int __getf2(long double a, long double b)
{
return -f3_cmp(b, a);
}

510
lib/testfp.c Normal file
View File

@ -0,0 +1,510 @@
/*
* Test 128-bit floating-point arithmetic on arm64:
* build with two different compilers and compare the output.
*
* Copyright (c) 2015 Edmund Grimley Evans
*
* Copying and distribution of this file, with or without modification,
* are permitted in any medium without royalty provided the copyright
* notice and this notice are preserved. This file is offered as-is,
* without any warranty.
*/
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define check(x) ((x) ? (void)0 : check_fail(#x, __FILE__, __LINE__))
void check_fail(const char *assertion, const char *file, unsigned int line)
{
printf("%s:%d: Check (%s) failed.", file, line, assertion);
exit(1);
}
typedef struct {
unsigned long long x0, x1;
} u128_t;
float copy_fi(uint32_t x)
{
float f;
memcpy(&f, &x, 4);
return f;
}
double copy_di(uint64_t x)
{
double f;
memcpy(&f, &x, 8);
return f;
}
long double copy_ldi(u128_t x)
{
long double f;
memcpy(&f, &x, 16);
return f;
}
uint32_t copy_if(float f)
{
uint32_t x;
memcpy(&x, &f, 4);
return x;
}
uint64_t copy_id(double f)
{
uint64_t x;
memcpy(&x, &f, 8);
return x;
}
u128_t copy_ild(long double f)
{
u128_t x;
memcpy(&x, &f, 16);
return x;
}
long double make(int sgn, int exp, uint64_t high, uint64_t low)
{
u128_t x = { low,
(0x0000ffffffffffff & high) |
(0x7fff000000000000 & (uint64_t)exp << 48) |
(0x8000000000000000 & (uint64_t)sgn << 63) };
return copy_ldi(x);
}
void cmp(long double a, long double b)
{
u128_t ax = copy_ild(a);
u128_t bx = copy_ild(b);
int eq = (a == b);
int ne = (a != b);
int lt = (a < b);
int le = (a <= b);
int gt = (a > b);
int ge = (a >= b);
check(eq == 0 || eq == 1);
check(lt == 0 || lt == 1);
check(gt == 0 || gt == 1);
check(ne == !eq && le == (lt | eq) && ge == (gt | eq));
check(eq + lt + gt < 2);
printf("cmp %016llx%016llx %016llx%016llx %d %d %d\n",
ax.x1, ax.x0, bx.x1, bx.x0, lt, eq, gt);
}
void cmps(void)
{
int i, j;
for (i = 0; i < 2; i++)
for (j = 0; j < 2; j++)
cmp(make(i, 0, 0, 0), make(j, 0, 0, 0));
for (i = 0; i < 2; i++) {
for (j = 0; j < 64; j++) {
long double f1 = make(i, 32767, (uint64_t)1 << j, 0);
long double f2 = make(i, 32767, 0, (uint64_t)1 << j);
cmp(f1, 0);
cmp(f2, 0);
cmp(0, f1);
cmp(0, f2);
}
}
for (i = 0; i < 6; i++)
for (j = 0; j < 6; j++)
cmp(make(i & 1, i >> 1, 0, 0),
make(j & 1, j >> 1, 0, 0));
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
int a, b;
for (a = 0; a < 2; a++) {
for (b = 0; b < 2; b++) {
cmp(make(i, j, a, b), make(i, j, 0, 0));
cmp(make(i, j, 0, 0), make(i, j, a, b));
}
}
}
}
}
void xop(const char *name, long double a, long double b, long double c)
{
u128_t ax = copy_ild(a);
u128_t bx = copy_ild(b);
u128_t cx = copy_ild(c);
printf("%s %016llx%016llx %016llx%016llx %016llx%016llx\n",
name, ax.x1, ax.x0, bx.x1, bx.x0, cx.x1, cx.x0);
}
void fadd(long double a, long double b)
{
xop("add", a, b, a + b);
}
void fsub(long double a, long double b)
{
xop("sub", a, b, a - b);
}
void fmul(long double a, long double b)
{
xop("mul", a, b, a * b);
}
void fdiv(long double a, long double b)
{
xop("div", a, b, a / b);
}
void nanz(void)
{
// Check NaNs:
{
long double x[7];
int i, j, n = 0;
x[n++] = make(0, 32000, 0x95132b76effc, 0xd79035214b4f8d53);
x[n++] = make(1, 32001, 0xbe71d7a51587, 0x30601c6815d6c3ac);
x[n++] = make(0, 32767, 0, 1);
x[n++] = make(0, 32767, (uint64_t)1 << 46, 0);
x[n++] = make(1, 32767, (uint64_t)1 << 47, 0);
x[n++] = make(1, 32767, 0x7596c7099ad5, 0xe25fed2c58f73fc9);
x[n++] = make(0, 32767, 0x835d143360f9, 0x5e315efb35630666);
check(n == sizeof(x) / sizeof(*x));
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) {
fadd(x[i], x[j]);
fsub(x[i], x[j]);
fmul(x[i], x[j]);
fdiv(x[i], x[j]);
}
}
}
// Check infinities and zeroes:
{
long double x[6];
int i, j, n = 0;
x[n++] = make(1, 32000, 0x62acda85f700, 0x47b6c9f35edc4044);
x[n++] = make(0, 32001, 0x94b7abf55af7, 0x9f425fe354428e19);
x[n++] = make(0, 32767, 0, 0);
x[n++] = make(1, 32767, 0, 0);
x[n++] = make(0, 0, 0, 0);
x[n++] = make(1, 0, 0, 0);
check(n == sizeof(x) / sizeof(*x));
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) {
fadd(x[i], x[j]);
fsub(x[i], x[j]);
fmul(x[i], x[j]);
fdiv(x[i], x[j]);
}
}
}
}
void adds(void)
{
// Check shifting and add/sub:
{
int i;
for (i = -130; i <= 130; i++) {
int s1 = (uint32_t)i % 3 < 1;
int s2 = (uint32_t)i % 5 < 2;
fadd(make(s1, 16384 , 0x502c065e4f71a65d, 0xd2f9bdb031f4f031),
make(s2, 16384 + i, 0xae267395a9bc1033, 0xb56b5800da1ba448));
}
}
// Check normalisation:
{
uint64_t a0 = 0xc6bab0a6afbef5ed;
uint64_t a1 = 0x4f84136c4a2e9b52;
int ee[] = { 0, 1, 10000 };
int e, i;
for (e = 0; e < sizeof(ee) / sizeof(*ee); e++) {
int exp = ee[e];
fsub(make(0, exp, a1, a0), make(0, 0, 0, 0));
for (i = 63; i >= 0; i--)
fsub(make(0, exp, a1 | (uint64_t)1 << i >> 1, a0),
make(0, exp, a1 >> i << i, 0));
for (i = 63; i >=0; i--)
fsub(make(0, exp, a1, a0 | (uint64_t)1 << i >> 1),
make(0, exp, a1, a0 >> i << i));
}
}
// Carry/overflow from rounding:
{
fadd(make(0, 114, -1, -1), make(0, 1, 0, 0));
fadd(make(0, 32766, -1, -1), make(0, 32653, 0, 0));
fsub(make(1, 32766, -1, -1), make(0, 32653, 0, 0));
}
}
void muls(void)
{
int i, j;
{
long double max = make(0, 32766, -1, -1);
long double min = make(0, 0, 0, 1);
fmul(max, max);
fmul(max, min);
fmul(min, min);
}
for (i = 117; i > 0; i--)
fmul(make(0, 16268, 0x643dcea76edc, 0xe0877a598403627a),
make(i & 1, i, 0, 0));
fmul(make(0, 16383, -1, -3), make(0, 16383, 0, 1));
// Round to next exponent:
fmul(make(0, 16383, -1, -2), make(0, 16383, 0, 1));
// Round from subnormal to normal:
fmul(make(0, 1, -1, -1), make(0, 16382, 0, 0));
for (i = 0; i < 2; i++)
for (j = 0; j < 112; j++)
fmul(make(0, 16383, (uint64_t)1 << i, 0),
make(0, 16383,
j < 64 ? 0 : (uint64_t)1 << (j - 64),
j < 64 ? (uint64_t)1 << j : 0));
}
void divs(void)
{
int i;
{
long double max = make(0, 32766, -1, -1);
long double min = make(0, 0, 0, 1);
fdiv(max, max);
fdiv(max, min);
fdiv(min, max);
fdiv(min, min);
}
for (i = 0; i < 64; i++)
fdiv(make(0, 16383, -1, -1), make(0, 16383, -1, -(uint64_t)1 << i));
for (i = 0; i < 48; i++)
fdiv(make(0, 16383, -1, -1), make(0, 16383, -(uint64_t)1 << i, 0));
}
void cvtlsw(int32_t a)
{
long double f = a;
u128_t x = copy_ild(f);
printf("cvtlsw %08lx %016llx%016llx\n", (long)(uint32_t)a, x.x1, x.x0);
}
void cvtlsx(int64_t a)
{
long double f = a;
u128_t x = copy_ild(f);
printf("cvtlsx %016llx %016llx%016llx\n",
(long long)(uint64_t)a, x.x1, x.x0);
}
void cvtluw(uint32_t a)
{
long double f = a;
u128_t x = copy_ild(f);
printf("cvtluw %08lx %016llx%016llx\n", (long)a, x.x1, x.x0);
}
void cvtlux(uint64_t a)
{
long double f = a;
u128_t x = copy_ild(f);
printf("cvtlux %016llx %016llx%016llx\n", (long long)a, x.x1, x.x0);
}
void cvtil(long double a)
{
u128_t x = copy_ild(a);
int32_t b1 = a;
int64_t b2 = a;
uint32_t b3 = a;
uint64_t b4 = a;
printf("cvtswl %016llx%016llx %08lx\n",
x.x1, x.x0, (long)(uint32_t)b1);
printf("cvtsxl %016llx%016llx %016llx\n",
x.x1, x.x0, (long long)(uint64_t)b2);
printf("cvtuwl %016llx%016llx %08lx\n",
x.x1, x.x0, (long)b3);
printf("cvtuxl %016llx%016llx %016llx\n",
x.x1, x.x0, (long long)b4);
}
void cvtlf(float a)
{
uint32_t ax = copy_if(a);
long double b = a;
u128_t bx = copy_ild(b);
printf("cvtlf %08lx %016llx%016llx\n", (long)ax, bx.x1, bx.x0);
}
void cvtld(double a)
{
uint64_t ax = copy_id(a);
long double b = a;
u128_t bx = copy_ild(b);
printf("cvtld %016llx %016llx%016llx\n", (long long)ax, bx.x1, bx.x0);
}
void cvtfl(long double a)
{
u128_t ax = copy_ild(a);
float b = a;
uint32_t bx = copy_if(b);
printf("cvtfl %016llx%016llx %08lx\n", ax.x1, ax.x0, (long)bx);
}
void cvtdl(long double a)
{
u128_t ax = copy_ild(a);
double b = a;
uint64_t bx = copy_id(b);
printf("cvtdl %016llx%016llx %016llx\n", ax.x1, ax.x0, (long long)bx);
}
void cvts(void)
{
int i, j;
{
uint32_t x = 0xad040c5b;
cvtlsw(0);
for (i = 0; i < 31; i++)
cvtlsw(x >> (31 - i));
for (i = 0; i < 31; i++)
cvtlsw(-(x >> (31 - i)));
cvtlsw(0x80000000);
}
{
uint64_t x = 0xb630a248cad9afd2;
cvtlsx(0);
for (i = 0; i < 63; i++)
cvtlsx(x >> (63 - i));
for (i = 0; i < 63; i++)
cvtlsx(-(x >> (63 - i)));
cvtlsx(0x8000000000000000);
}
{
uint32_t x = 0xad040c5b;
cvtluw(0);
for (i = 0; i < 32; i++)
cvtluw(x >> (31 - i));
}
{
uint64_t x = 0xb630a248cad9afd2;
cvtlux(0);
for (i = 0; i < 64; i++)
cvtlux(x >> (63 - i));
}
for (i = 0; i < 2; i++) {
cvtil(make(i, 32767, 0, 1));
cvtil(make(i, 32767, (uint64_t)1 << 47, 0));
cvtil(make(i, 32767, 123, 456));
cvtil(make(i, 32767, 0, 0));
cvtil(make(i, 16382, -1, -1));
cvtil(make(i, 16383, -1, -1));
cvtil(make(i, 16384, 0x7fffffffffff, -1));
cvtil(make(i, 16384, 0x800000000000, 0));
for (j = 0; j < 68; j++)
cvtil(make(i, 16381 + j, 0xd4822c0a10ec, 0x1fe2f8b2669f5c9d));
}
cvtlf(copy_fi(0x00000000));
cvtlf(copy_fi(0x456789ab));
cvtlf(copy_fi(0x7f800000));
cvtlf(copy_fi(0x7f923456));
cvtlf(copy_fi(0x7fdbcdef));
cvtlf(copy_fi(0x80000000));
cvtlf(copy_fi(0xabcdef12));
cvtlf(copy_fi(0xff800000));
cvtlf(copy_fi(0xff923456));
cvtlf(copy_fi(0xffdbcdef));
cvtld(copy_di(0x0000000000000000));
cvtld(copy_di(0x456789abcdef0123));
cvtld(copy_di(0x7ff0000000000000));
cvtld(copy_di(0x7ff123456789abcd));
cvtld(copy_di(0x7ffabcdef1234567));
cvtld(copy_di(0x8000000000000000));
cvtld(copy_di(0xcdef123456789abc));
cvtld(copy_di(0xfff0000000000000));
cvtld(copy_di(0xfff123456789abcd));
cvtld(copy_di(0xfffabcdef1234567));
for (i = 0; i < 2; i++) { \
cvtfl(make(i, 0, 0, 0));
cvtfl(make(i, 16232, -1, -1));
cvtfl(make(i, 16233, 0, 0));
cvtfl(make(i, 16233, 0, 1));
cvtfl(make(i, 16383, 0xab0ffd000000, 0));
cvtfl(make(i, 16383, 0xab0ffd000001, 0));
cvtfl(make(i, 16383, 0xab0ffeffffff, 0));
cvtfl(make(i, 16383, 0xab0fff000000, 0));
cvtfl(make(i, 16383, 0xab0fff000001, 0));
cvtfl(make(i, 16510, 0xfffffeffffff, -1));
cvtfl(make(i, 16510, 0xffffff000000, 0));
cvtfl(make(i, 16511, 0, 0));
cvtfl(make(i, 32767, 0, 0));
cvtfl(make(i, 32767, 0, 1));
cvtfl(make(i, 32767, 0x4cbe01ac5f40, 0x75cee3c6afbb00b5));
cvtfl(make(i, 32767, 0x800000000000, 1));
cvtfl(make(i, 32767, 0xa11caaaf6a52, 0x696033e871eab099));
}
for (i = 0; i < 2; i++) {
cvtdl(make(i, 0, 0, 0));
cvtdl(make(i, 15307, -1, -1));
cvtdl(make(i, 15308, 0, 0));
cvtdl(make(i, 15308, 0, 1));
cvtdl(make(i, 16383, 0xabc123abc0ff, 0xe800000000000000));
cvtdl(make(i, 16383, 0xabc123abc0ff, 0xe800000000000001));
cvtdl(make(i, 16383, 0xabc123abc0ff, 0xf7ffffffffffffff));
cvtdl(make(i, 16383, 0xabc123abc0ff, 0xf800000000000000));
cvtdl(make(i, 16383, 0xabc123abc0ff, 0xf800000000000001));
cvtdl(make(i, 17406, 0xffffffffffff, 0xf7ffffffffffffff));
cvtdl(make(i, 17406, 0xffffffffffff, 0xf800000000000000));
cvtdl(make(i, 17407, 0, 0));
cvtdl(make(i, 32767, 0, 0));
cvtdl(make(i, 32767, 0, 1));
cvtdl(make(i, 32767, 0x4cbe01ac5f40, 0x75cee3c6afbb00b5));
cvtdl(make(i, 32767, 0x800000000000, 1));
cvtdl(make(i, 32767, 0xa11caaaf6a52, 0x696033e871eab099));
}
}
void tests(void)
{
cmps();
nanz();
adds();
muls();
divs();
cvts();
}
int main()
{
#ifdef __aarch64__
tests();
#else
printf("This test program is intended for a little-endian architecture\n"
"with an IEEE-standard 128-bit long double.\n");
#endif
return 0;
}

View File

@ -45,6 +45,9 @@ ST_DATA struct TCCState *tcc_state;
#ifdef TCC_TARGET_ARM
#include "arm-gen.c"
#endif
#ifdef TCC_TARGET_ARM64
#include "arm64-gen.c"
#endif
#ifdef TCC_TARGET_C67
#include "c67-gen.c"
#endif
@ -959,6 +962,8 @@ LIBTCCAPI TCCState *tcc_new(void)
#else
s->float_abi = ARM_SOFTFP_FLOAT;
#endif
#elif defined(TCC_TARGET_ARM64)
tcc_define_symbol(s, "__aarch64__", NULL);
#endif
#ifdef TCC_TARGET_PE
@ -1560,7 +1565,7 @@ static int tcc_set_linker(TCCState *s, const char *option)
} else if (link_option(option, "oformat=", &p)) {
#if defined(TCC_TARGET_PE)
if (strstart("pe-", &p)) {
#elif defined(TCC_TARGET_X86_64)
#elif defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64)
if (strstart("elf64-", &p)) {
#else
if (strstart("elf32-", &p)) {

2
tcc.c
View File

@ -203,6 +203,8 @@ static void display_info(TCCState *s, int what)
# endif
#elif defined TCC_TARGET_ARM
"ARM"
#elif defined TCC_TARGET_ARM64
"AArch64"
# ifdef TCC_ARM_HARDFLOAT
" Hard Float"
# endif

30
tcc.h
View File

@ -113,7 +113,7 @@
#endif
#include "elf.h"
#ifdef TCC_TARGET_X86_64
#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64)
# define ELFCLASSW ELFCLASS64
# define ElfW(type) Elf##64##_##type
# define ELFW(type) ELF##64##_##type
@ -151,23 +151,26 @@
/* target selection */
/* #define TCC_TARGET_I386 *//* i386 code generator */
/* #define TCC_TARGET_ARM *//* ARMv4 code generator */
/* #define TCC_TARGET_ARM64 *//* ARMv8 code generator */
/* #define TCC_TARGET_C67 *//* TMS320C67xx code generator */
/* #define TCC_TARGET_X86_64 *//* x86-64 code generator */
/* default target is I386 */
#if !defined(TCC_TARGET_I386) && !defined(TCC_TARGET_ARM) && \
!defined(TCC_TARGET_C67) && !defined(TCC_TARGET_X86_64)
!defined(TCC_TARGET_ARM64) && !defined(TCC_TARGET_C67) && \
!defined(TCC_TARGET_X86_64)
#define TCC_TARGET_I386
#endif
#if !defined(TCC_UCLIBC) && !defined(TCC_TARGET_ARM) && \
!defined(TCC_TARGET_C67) && !defined(TCC_TARGET_X86_64) && \
!defined(CONFIG_USE_LIBGCC)
!defined(TCC_TARGET_ARM64) && !defined(TCC_TARGET_C67) && \
!defined(TCC_TARGET_X86_64) && !defined(CONFIG_USE_LIBGCC)
#define CONFIG_TCC_BCHECK /* enable bound checking code */
#endif
/* define it to include assembler support */
#if !defined(TCC_TARGET_ARM) && !defined(TCC_TARGET_C67)
#if !defined(TCC_TARGET_ARM) && !defined(TCC_TARGET_ARM64) && \
!defined(TCC_TARGET_C67)
#define CONFIG_TCC_ASM
#endif
@ -184,6 +187,8 @@
# define TCC_IS_NATIVE
# elif defined __arm__ && defined TCC_TARGET_ARM
# define TCC_IS_NATIVE
# elif defined __aarch64__ && defined TCC_TARGET_ARM64
# define TCC_IS_NATIVE
# endif
#endif
@ -256,6 +261,8 @@
# define CONFIG_TCC_ELFINTERP "/usr/libexec/ld-elf.so.2"
# elif defined __GNU__
# define CONFIG_TCC_ELFINTERP "/lib/ld.so"
# elif defined TCC_TARGET_ARM64
# define CONFIG_TCC_ELFINTERP "/lib/ld-linux-aarch64.so.1"
# elif defined(TCC_TARGET_X86_64)
# define CONFIG_TCC_ELFINTERP "/lib64/ld-linux-x86-64.so.2"
# elif defined(TCC_UCLIBC)
@ -290,6 +297,9 @@
#ifdef TCC_TARGET_ARM
# include "arm-gen.c"
#endif
#ifdef TCC_TARGET_ARM64
# include "arm64-gen.c"
#endif
#ifdef TCC_TARGET_C67
# include "coff.h"
# include "c67-gen.c"
@ -1214,6 +1224,7 @@ ST_FUNC void vpushv(SValue *v);
ST_FUNC void save_reg(int r);
ST_FUNC int get_reg(int rc);
ST_FUNC void save_regs(int n);
ST_FUNC void gaddrof(void);
ST_FUNC int gv(int rc);
ST_FUNC void gv2(int rc1, int rc2);
ST_FUNC void vpop(void);
@ -1357,6 +1368,15 @@ ST_FUNC uint32_t encbranch(int pos, int addr, int fail);
ST_FUNC void gen_cvt_itof1(int t);
#endif
/* ------------ arm64-gen.c ------------ */
#ifdef TCC_TARGET_ARM64
ST_FUNC void gen_cvt_sxtw(void);
ST_FUNC void gen_opl(int op);
ST_FUNC void greturn(void);
ST_FUNC void gen_va_start(void);
ST_FUNC void gen_va_arg(CType *t);
#endif
/* ------------ c67-gen.c ------------ */
#ifdef TCC_TARGET_C67
#endif

137
tccelf.c
View File

@ -291,7 +291,7 @@ ST_FUNC void put_elf_reloca(Section *symtab, Section *s, unsigned long offset,
rel = section_ptr_add(sr, sizeof(ElfW_Rel));
rel->r_offset = offset;
rel->r_info = ELFW(R_INFO)(symbol, type);
#ifdef TCC_TARGET_X86_64
#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64)
rel->r_addend = addend;
#else
if (addend)
@ -506,7 +506,7 @@ ST_FUNC void relocate_section(TCCState *s1, Section *s)
sym_index = ELFW(R_SYM)(rel->r_info);
sym = &((ElfW(Sym) *)symtab_section->data)[sym_index];
val = sym->st_value;
#ifdef TCC_TARGET_X86_64
#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64)
val += rel->r_addend;
#endif
type = ELFW(R_TYPE)(rel->r_info);
@ -760,6 +760,69 @@ ST_FUNC void relocate_section(TCCState *s1, Section *s)
fprintf(stderr,"FIXME: handle reloc type %x at %x [%p] to %x\n",
type, (unsigned)addr, ptr, (unsigned)val);
break;
#elif defined(TCC_TARGET_ARM64)
case R_AARCH64_ABS64:
*(uint64_t *)ptr = val;
break;
case R_AARCH64_ABS32:
*(uint32_t *)ptr = val;
break;
case R_AARCH64_MOVW_UABS_G0_NC:
*(uint32_t *)ptr = (*(uint32_t *)ptr & 0xffe0001f) |
(val & 0xffff) << 5;
break;
case R_AARCH64_MOVW_UABS_G1_NC:
*(uint32_t *)ptr = (*(uint32_t *)ptr & 0xffe0001f) |
(val >> 16 & 0xffff) << 5;
break;
case R_AARCH64_MOVW_UABS_G2_NC:
*(uint32_t *)ptr = (*(uint32_t *)ptr & 0xffe0001f) |
(val >> 32 & 0xffff) << 5;
break;
case R_AARCH64_MOVW_UABS_G3:
*(uint32_t *)ptr = (*(uint32_t *)ptr & 0xffe0001f) |
(val >> 48 & 0xffff) << 5;
break;
case R_AARCH64_ADR_PREL_PG_HI21: {
uint64_t off = (val >> 12) - (addr >> 12);
if ((off + ((uint64_t)1 << 20)) >> 21)
tcc_error("R_AARCH64_ADR_PREL_PG_HI21 relocation failed");
*(uint32_t *)ptr = (*(uint32_t *)ptr & 0x9f00001f) |
(off & 0x1ffffc) << 3 | (off & 3) << 29;
break;
}
case R_AARCH64_ADD_ABS_LO12_NC:
*(uint32_t *)ptr = (*(uint32_t *)ptr & 0xffc003ff) |
(val & 0xfff) << 10;
break;
case R_AARCH64_JUMP26:
case R_AARCH64_CALL26:
if (((val - addr) + ((uint64_t)1 << 27)) & ~(uint64_t)0xffffffc)
tcc_error("R_AARCH64_(JUMP|CALL)26 relocation failed");
*(uint32_t *)ptr = 0x14000000 | (type == R_AARCH64_CALL26) << 31 |
((val - addr) >> 2 & 0x3ffffff);
break;
case R_AARCH64_ADR_GOT_PAGE: {
uint64_t off =
(((s1->got->sh_addr +
s1->sym_attrs[sym_index].got_offset) >> 12) - (addr >> 12));
if ((off + ((uint64_t)1 << 20)) >> 21)
tcc_error("R_AARCH64_ADR_GOT_PAGE relocation failed");
*(uint32_t *)ptr = (*(uint32_t *)ptr & 0x9f00001f) |
(off & 0x1ffffc) << 3 | (off & 3) << 29;
break;
}
case R_AARCH64_LD64_GOT_LO12_NC:
*(uint32_t *)ptr = (*(uint32_t *)ptr & 0xfff803ff) |
((s1->got->sh_addr + s1->sym_attrs[sym_index].got_offset)
& 0xff8) << 7;
break;
case R_AARCH64_COPY:
break;
default:
fprintf(stderr, "FIXME: handle reloc type %x at %x [%p] to %x\n",
type, (unsigned)addr, ptr, (unsigned)val);
break;
#elif defined(TCC_TARGET_C67)
case R_C60_32:
*(int *)ptr += val;
@ -955,7 +1018,7 @@ static void put32(unsigned char *p, uint32_t val)
}
#if defined(TCC_TARGET_I386) || defined(TCC_TARGET_ARM) || \
defined(TCC_TARGET_X86_64)
defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64)
static uint32_t get32(unsigned char *p)
{
return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
@ -1014,6 +1077,8 @@ static unsigned long put_got_entry(TCCState *s1,
(reloc_type == R_386_JMP_SLOT);
#elif defined(TCC_TARGET_ARM)
(reloc_type == R_ARM_JUMP_SLOT);
#elif defined(TCC_TARGET_ARM64)
(reloc_type == R_AARCH64_JUMP_SLOT);
#else
0;
#endif
@ -1135,6 +1200,24 @@ static unsigned long put_got_entry(TCCState *s1,
if (sym->st_shndx == SHN_UNDEF)
offset = plt->data_offset - 16;
}
#elif defined(TCC_TARGET_ARM64)
if (need_plt_entry) {
Section *plt;
uint8_t *p;
if (s1->output_type == TCC_OUTPUT_DLL)
tcc_error("DLLs unimplemented!");
plt = s1->plt;
if (plt->data_offset == 0)
section_ptr_add(plt, 32);
p = section_ptr_add(plt, 16);
put32(p, s1->got->data_offset);
put32(p + 4, (uint64_t)s1->got->data_offset >> 32);
if (sym->st_shndx == SHN_UNDEF)
offset = plt->data_offset - 16;
}
#elif defined(TCC_TARGET_C67)
if (s1->dynsym) {
tcc_error("C67 got not implemented");
@ -1277,6 +1360,18 @@ ST_FUNC void build_got_entries(TCCState *s1)
put32(p+2, 0x46c0); /* nop */
put32(p+4, 0xeafffffe); /* b $sym */
}
#elif defined(TCC_TARGET_ARM64)
//xx Other cases may be required here:
case R_AARCH64_ADR_GOT_PAGE:
case R_AARCH64_LD64_GOT_LO12_NC:
if (!s1->got)
build_got(s1);
sym_index = ELFW(R_SYM)(rel->r_info);
sym = &((ElfW(Sym) *)symtab_section->data)[sym_index];
reloc_type = R_AARCH64_GLOB_DAT;
put_got_entry(s1, reloc_type, sym->st_size, sym->st_info,
sym_index);
break;
#elif defined(TCC_TARGET_C67)
case R_C60_GOT32:
case R_C60_GOTOFF:
@ -1796,6 +1891,40 @@ ST_FUNC void relocate_plt(TCCState *s1)
put32(p + 12, x + get32(p + 12) + s1->plt->data - p);
p += 16;
}
#elif defined(TCC_TARGET_ARM64)
uint64_t plt = s1->plt->sh_addr;
uint64_t got = s1->got->sh_addr;
uint64_t off = (got >> 12) - (plt >> 12);
if ((off + ((uint64_t)1 << 20)) >> 21)
tcc_error("Failed relocating PLT");
put32(p, 0xa9bf7bf0); // stp x16,x30,[sp,#-16]!
put32(p + 4, (0x90000010 | // adrp x16,...
(off & 0x1ffffc) << 3 | (off & 3) << 29));
put32(p + 8, (0xf9400211 | // ldr x17,[x16,#...]
(got & 0xff8) << 7));
put32(p + 12, (0x91000210 | // add x16,x16,#...
(got & 0xfff) << 10));
put32(p + 16, 0xd61f0220); // br x17
put32(p + 20, 0xd503201f); // nop
put32(p + 24, 0xd503201f); // nop
put32(p + 28, 0xd503201f); // nop
p += 32;
while (p < p_end) {
uint64_t pc = plt + (p - s1->plt->data);
uint64_t addr = got +
(get32(p) | (uint64_t)get32(p + 4) << 32);
uint32_t off = (addr >> 12) - (pc >> 12);
if ((off + ((uint64_t)1 << 20)) >> 21)
tcc_error("Failed relocating PLT");
put32(p, (0x90000010 | // adrp x16,...
(off & 0x1ffffc) << 3 | (off & 3) << 29));
put32(p + 4, (0xf9400211 | // ldr x17,[x16,#...]
(addr & 0xff8) << 7));
put32(p + 8, (0x91000210 | // add x16,x16,#...
(addr & 0xfff) << 10));
put32(p + 12, 0xd61f0220); // br x17
p += 16;
}
#elif defined(TCC_TARGET_C67)
/* XXX: TODO */
#else
@ -2093,7 +2222,7 @@ static void fill_dynamic(TCCState *s1, struct dyn_inf *dyninf)
put_dt(dynamic, DT_SYMTAB, s1->dynsym->sh_addr);
put_dt(dynamic, DT_STRSZ, dyninf->dynstr->data_offset);
put_dt(dynamic, DT_SYMENT, sizeof(ElfW(Sym)));
#ifdef TCC_TARGET_X86_64
#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64)
put_dt(dynamic, DT_RELA, dyninf->rel_addr);
put_dt(dynamic, DT_RELASZ, dyninf->rel_size);
put_dt(dynamic, DT_RELAENT, sizeof(ElfW_Rel));

127
tccgen.c
View File

@ -545,7 +545,7 @@ ST_FUNC void save_reg(int r)
type = &p->type;
if ((p->r & VT_LVAL) ||
(!is_float(type->t) && (type->t & VT_BTYPE) != VT_LLONG))
#ifdef TCC_TARGET_X86_64
#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64)
type = &char_pointer_type;
#else
type = &int_type;
@ -562,7 +562,7 @@ ST_FUNC void save_reg(int r)
o(0xd8dd); /* fstp %st(0) */
}
#endif
#ifndef TCC_TARGET_X86_64
#if !defined(TCC_TARGET_ARM64) && !defined(TCC_TARGET_X86_64)
/* special long long case */
if ((type->t & VT_BTYPE) == VT_LLONG) {
sv.c.ul += 4;
@ -681,7 +681,7 @@ static void move_reg(int r, int s, int t)
}
/* get address of vtop (vtop MUST BE an lvalue) */
static void gaddrof(void)
ST_FUNC void gaddrof(void)
{
if (vtop->r & VT_REF)
gv(RC_INT);
@ -803,11 +803,13 @@ ST_FUNC int gv(int rc)
r = vtop->r & VT_VALMASK;
rc2 = (rc & RC_FLOAT) ? RC_FLOAT : RC_INT;
#ifndef TCC_TARGET_ARM64
if (rc == RC_IRET)
rc2 = RC_LRET;
#ifdef TCC_TARGET_X86_64
else if (rc == RC_FRET)
rc2 = RC_QRET;
#endif
#endif
/* need to reload if:
@ -817,7 +819,7 @@ ST_FUNC int gv(int rc)
if (r >= VT_CONST
|| (vtop->r & VT_LVAL)
|| !(reg_classes[r] & rc)
#ifdef TCC_TARGET_X86_64
#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64)
|| ((vtop->type.t & VT_BTYPE) == VT_QLONG && !(reg_classes[vtop->r2] & rc2))
|| ((vtop->type.t & VT_BTYPE) == VT_QFLOAT && !(reg_classes[vtop->r2] & rc2))
#else
@ -826,7 +828,7 @@ ST_FUNC int gv(int rc)
)
{
r = get_reg(rc);
#ifdef TCC_TARGET_X86_64
#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64)
if (((vtop->type.t & VT_BTYPE) == VT_QLONG) || ((vtop->type.t & VT_BTYPE) == VT_QFLOAT)) {
int addr_type = VT_LLONG, load_size = 8, load_type = ((vtop->type.t & VT_BTYPE) == VT_QLONG) ? VT_LLONG : VT_DOUBLE;
#else
@ -838,7 +840,7 @@ ST_FUNC int gv(int rc)
original_type = vtop->type.t;
/* two register type load : expand to two words
temporarily */
#ifndef TCC_TARGET_X86_64
#if !defined(TCC_TARGET_ARM64) && !defined(TCC_TARGET_X86_64)
if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
/* load constant */
ll = vtop->c.ull;
@ -890,7 +892,7 @@ ST_FUNC int gv(int rc)
t1 = t;
/* compute memory access type */
if (vtop->r & VT_REF)
#ifdef TCC_TARGET_X86_64
#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64)
t = VT_PTR;
#else
t = VT_INT;
@ -952,6 +954,7 @@ ST_FUNC void gv2(int rc1, int rc2)
}
}
#ifndef TCC_TARGET_ARM64
/* wrapper around RC_FRET to return a register by type */
static int rc_fret(int t)
{
@ -962,6 +965,7 @@ static int rc_fret(int t)
#endif
return RC_FRET;
}
#endif
/* wrapper around REG_FRET to return a register by type */
static int reg_fret(int t)
@ -1147,7 +1151,7 @@ ST_FUNC int gvtst(int inv, int t)
return gtst(inv, t);
}
#ifndef TCC_TARGET_X86_64
#if !defined(TCC_TARGET_ARM64) && !defined(TCC_TARGET_X86_64)
/* generate CPU independent (unsigned) long long operations */
static void gen_opl(int op)
{
@ -1358,7 +1362,7 @@ static void gen_opl(int op)
#elif defined(TCC_TARGET_ARM)
b = ind;
o(0x1A000000 | encbranch(ind, 0, 1));
#elif defined(TCC_TARGET_C67)
#elif defined(TCC_TARGET_C67) || defined(TCC_TARGET_ARM64)
tcc_error("not implemented");
#else
#error not supported
@ -1512,7 +1516,8 @@ static void gen_opic(int op)
general_case:
if (!nocode_wanted) {
/* call low level op generator */
if (t1 == VT_LLONG || t2 == VT_LLONG)
if (t1 == VT_LLONG || t2 == VT_LLONG ||
(PTR_SIZE == 8 && (t1 == VT_PTR || t2 == VT_PTR)))
gen_opl(op);
else
gen_opi(op);
@ -1679,7 +1684,7 @@ ST_FUNC void gen_op(int op)
if (op >= TOK_ULT && op <= TOK_LOR) {
check_comparison_pointer_types(vtop - 1, vtop, op);
/* pointers are handled are unsigned */
#ifdef TCC_TARGET_X86_64
#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64)
t = VT_LLONG | VT_UNSIGNED;
#else
t = VT_INT | VT_UNSIGNED;
@ -1700,7 +1705,7 @@ ST_FUNC void gen_op(int op)
vrott(3);
gen_opic(op);
/* set to integer type */
#ifdef TCC_TARGET_X86_64
#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64)
vtop->type.t = VT_LLONG;
#else
vtop->type.t = VT_INT;
@ -1724,7 +1729,7 @@ ST_FUNC void gen_op(int op)
u = pointed_size(&vtop[-1].type);
if (u < 0)
tcc_error("unknown array element size");
#ifdef TCC_TARGET_X86_64
#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64)
vpushll(u);
#else
/* XXX: cast to int ? (long long case) */
@ -1833,6 +1838,9 @@ ST_FUNC void gen_op(int op)
/* generic itof for unsigned long long case */
static void gen_cvt_itof1(int t)
{
#ifdef TCC_TARGET_ARM64
gen_cvt_itof(t);
#else
if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
(VT_LLONG | VT_UNSIGNED)) {
@ -1851,12 +1859,16 @@ static void gen_cvt_itof1(int t)
} else {
gen_cvt_itof(t);
}
#endif
}
#endif
/* generic ftoi for unsigned long long case */
static void gen_cvt_ftoi1(int t)
{
#ifdef TCC_TARGET_ARM64
gen_cvt_ftoi(t);
#else
int st;
if (t == (VT_LLONG | VT_UNSIGNED)) {
@ -1878,6 +1890,7 @@ static void gen_cvt_ftoi1(int t)
} else {
gen_cvt_ftoi(t);
}
#endif
}
/* force char or short cast */
@ -1968,7 +1981,7 @@ static void gen_cast(CType *type)
vtop->c.ll = vtop->c.ull;
else if (sbt & VT_UNSIGNED)
vtop->c.ll = vtop->c.ui;
#ifdef TCC_TARGET_X86_64
#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64)
else if (sbt == VT_PTR)
;
#endif
@ -1979,7 +1992,7 @@ static void gen_cast(CType *type)
vtop->c.ull = vtop->c.ll;
else if (dbt == VT_BOOL)
vtop->c.i = (vtop->c.ll != 0);
#ifdef TCC_TARGET_X86_64
#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64)
else if (dbt == VT_PTR)
;
#endif
@ -2024,7 +2037,7 @@ static void gen_cast(CType *type)
gen_cast(type);
}
}
#ifndef TCC_TARGET_X86_64
#if !defined(TCC_TARGET_ARM64) && !defined(TCC_TARGET_X86_64)
} else if ((dbt & VT_BTYPE) == VT_LLONG) {
if ((sbt & VT_BTYPE) != VT_LLONG) {
/* scalar to long long */
@ -2056,11 +2069,18 @@ static void gen_cast(CType *type)
(sbt & VT_BTYPE) != VT_PTR &&
(sbt & VT_BTYPE) != VT_FUNC) {
/* need to convert from 32bit to 64bit */
int r = gv(RC_INT);
gv(RC_INT);
if (sbt != (VT_INT | VT_UNSIGNED)) {
#if defined(TCC_TARGET_ARM64)
gen_cvt_sxtw();
#elif defined(TCC_TARGET_X86_64)
int r = gv(RC_INT);
/* x86_64 specific: movslq */
o(0x6348);
o(0xc0 + (REG_VALUE(r) << 3) + REG_VALUE(r));
#else
#error
#endif
}
}
#endif
@ -2589,7 +2609,7 @@ ST_FUNC void vstore(void)
if ((vtop[-1].r & VT_VALMASK) == VT_LLOCAL) {
SValue sv;
t = get_reg(RC_INT);
#ifdef TCC_TARGET_X86_64
#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64)
sv.type.t = VT_PTR;
#else
sv.type.t = VT_INT;
@ -2600,7 +2620,7 @@ ST_FUNC void vstore(void)
vtop[-1].r = t | VT_LVAL;
}
/* two word case handling : store second register at word + 4 (or +8 for x86-64) */
#ifdef TCC_TARGET_X86_64
#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64)
if (((ft & VT_BTYPE) == VT_QLONG) || ((ft & VT_BTYPE) == VT_QFLOAT)) {
int addr_type = VT_LLONG, load_size = 8, load_type = ((vtop->type.t & VT_BTYPE) == VT_QLONG) ? VT_LLONG : VT_DOUBLE;
#else
@ -3098,6 +3118,13 @@ static int parse_btype(CType *type, AttributeDef *ad)
goto basic_type1;
}
break;
#ifdef TCC_TARGET_ARM64
case TOK_UINT128:
/* GCC's __uint128_t appears in some Linux header files. Make it a
synonym for long double to get the size and alignment right. */
u = VT_LDOUBLE;
goto basic_type;
#endif
case TOK_BOOL:
u = VT_BOOL;
goto basic_type;
@ -3233,7 +3260,8 @@ the_end:
/* long is never used as type */
if ((t & VT_BTYPE) == VT_LONG)
#if !defined TCC_TARGET_X86_64 || defined TCC_TARGET_PE
#if (!defined TCC_TARGET_X86_64 && !defined TCC_TARGET_ARM64) || \
defined TCC_TARGET_PE
t = (t & ~VT_BTYPE) | VT_INT;
#else
t = (t & ~VT_BTYPE) | VT_LLONG;
@ -3881,6 +3909,36 @@ ST_FUNC void unary(void)
break;
#endif
#endif
#ifdef TCC_TARGET_ARM64
case TOK___va_start: {
next();
skip('(');
expr_eq();
skip(',');
expr_eq();
skip(')');
//xx check types
gen_va_start();
vpushi(0);
vtop->type.t = VT_VOID;
break;
}
case TOK___va_arg: {
CType type;
next();
skip('(');
expr_eq();
skip(',');
parse_type(&type);
skip(')');
//xx check types
gen_va_arg(&type);
vtop->type = type;
break;
}
#endif
case TOK_INC:
case TOK_DEC:
t = tok;
@ -4071,6 +4129,15 @@ ST_FUNC void unary(void)
if (!ret_nregs) {
/* get some space for the returned structure */
size = type_size(&s->type, &align);
#ifdef TCC_TARGET_ARM64
/* On arm64, a small struct is return in registers.
It is much easier to write it to memory if we know
that we are allowed to write some extra bytes, so
round the allocated space up to a power of 2: */
if (size < 16)
while (size & (size - 1))
size = (size | (size - 1)) + 1;
#endif
loc = (loc - size) & -align;
ret.type = s->type;
ret.r = VT_LOCAL | VT_LVAL;
@ -4094,12 +4161,14 @@ ST_FUNC void unary(void)
ret.r2 = REG_QRET;
#endif
} else {
#ifndef TCC_TARGET_ARM64
#ifdef TCC_TARGET_X86_64
if ((ret.type.t & VT_BTYPE) == VT_QLONG)
#else
if ((ret.type.t & VT_BTYPE) == VT_LLONG)
#endif
ret.r2 = REG_LRET;
#endif
ret.r = REG_IRET;
}
ret.c.i = 0;
@ -4717,6 +4786,10 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym,
if (tok != ';') {
gexpr();
gen_assign_cast(&func_vt);
#ifdef TCC_TARGET_ARM64
// Perhaps it would be better to use this for all backends:
greturn();
#else
if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
CType type, ret_type;
int ret_align, ret_nregs;
@ -4770,6 +4843,7 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym,
} else {
gv(RC_IRET);
}
#endif
vtop--; /* NOT vpop() because on x86 it would flush the fp stack */
}
skip(';');
@ -5160,9 +5234,9 @@ static void init_putv(CType *type, Section *sec, unsigned long c,
/* XXX: generate error if incorrect relocation */
gen_assign_cast(&dtype);
bt = type->t & VT_BTYPE;
/* we'll write at most 12 bytes */
if (c + 12 > sec->data_allocated) {
section_realloc(sec, c + 12);
/* we'll write at most 16 bytes */
if (c + 16 > sec->data_allocated) {
section_realloc(sec, c + 16);
}
ptr = sec->data + c;
/* XXX: make code faster ? */
@ -5184,6 +5258,9 @@ static void init_putv(CType *type, Section *sec, unsigned long c,
(bt == VT_INT && bit_size != 32)))
tcc_error("initializer element is not computable at load time");
switch(bt) {
/* XXX: when cross-compiling we assume that each type has the
same representation on host and target, which is likely to
be wrong in the case of long double */
case VT_BOOL:
vtop->c.i = (vtop->c.i != 0);
case VT_BYTE:
@ -5203,7 +5280,7 @@ static void init_putv(CType *type, Section *sec, unsigned long c,
break;
case VT_PTR: {
addr_t val = (vtop->c.ptr_offset & bit_mask) << bit_pos;
#ifdef TCC_TARGET_X86_64
#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64)
if (vtop->r & VT_SYM)
greloca(sec, vtop->sym, c, R_DATA_PTR, val);
else
@ -5217,7 +5294,7 @@ static void init_putv(CType *type, Section *sec, unsigned long c,
}
default: {
int val = (vtop->c.i & bit_mask) << bit_pos;
#ifdef TCC_TARGET_X86_64
#if defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64)
if (vtop->r & VT_SYM)
greloca(sec, vtop->sym, c, R_DATA_PTR, val);
else

View File

@ -604,6 +604,27 @@ static int rt_get_caller_pc(addr_t *paddr, ucontext_t *uc, int level)
}
}
/* ------------------------------------------------------------- */
#elif defined(__aarch64__)
static int rt_get_caller_pc(addr_t *paddr, ucontext_t *uc, int level)
{
if (level < 0)
return -1;
else if (level == 0) {
*paddr = uc->uc_mcontext.pc;
return 0;
}
else {
addr_t *fp = (addr_t *)uc->uc_mcontext.regs[29];
int i;
for (i = 1; i < level; i++)
fp = (addr_t *)fp[0];
*paddr = fp[1];
return 0;
}
}
/* ------------------------------------------------------------- */
#else

View File

@ -59,6 +59,10 @@
DEF(TOK_ASM2, "__asm")
DEF(TOK_ASM3, "__asm__")
#ifdef TCC_TARGET_ARM64
DEF(TOK_UINT128, "__uint128_t")
#endif
/*********************************************************************/
/* the following are not keywords. They are included to ease parsing */
/* preprocessor only */
@ -136,6 +140,11 @@
DEF(TOK_REGPARM1, "regparm")
DEF(TOK_REGPARM2, "__regparm__")
#ifdef TCC_TARGET_ARM64
DEF(TOK___va_start, "__va_start")
DEF(TOK___va_arg, "__va_arg")
#endif
/* pragma */
DEF(TOK_pack, "pack")
#if !defined(TCC_TARGET_I386) && !defined(TCC_TARGET_X86_64)
@ -229,6 +238,30 @@
#if defined TCC_TARGET_PE
DEF(TOK___chkstk, "__chkstk")
#endif
#ifdef TCC_TARGET_ARM64
DEF(TOK___addtf3, "__addtf3")
DEF(TOK___subtf3, "__subtf3")
DEF(TOK___multf3, "__multf3")
DEF(TOK___divtf3, "__divtf3")
DEF(TOK___extendsftf2, "__extendsftf2")
DEF(TOK___extenddftf2, "__extenddftf2")
DEF(TOK___trunctfsf2, "__trunctfsf2")
DEF(TOK___trunctfdf2, "__trunctfdf2")
DEF(TOK___fixtfsi, "__fixtfsi")
DEF(TOK___fixtfdi, "__fixtfdi")
DEF(TOK___fixunstfsi, "__fixunstfsi")
DEF(TOK___fixunstfdi, "__fixunstfdi")
DEF(TOK___floatsitf, "__floatsitf")
DEF(TOK___floatditf, "__floatditf")
DEF(TOK___floatunsitf, "__floatunsitf")
DEF(TOK___floatunditf, "__floatunditf")
DEF(TOK___eqtf2, "__eqtf2")
DEF(TOK___netf2, "__netf2")
DEF(TOK___lttf2, "__lttf2")
DEF(TOK___letf2, "__letf2")
DEF(TOK___gttf2, "__gttf2")
DEF(TOK___getf2, "__getf2")
#endif
/* bound checking symbols */
#ifdef CONFIG_TCC_BCHECK