From dd69143ae65da78964e61c8f67de98de2183d720 Mon Sep 17 00:00:00 2001 From: herman ten brugge Date: Sun, 11 Dec 2022 08:23:22 +0100 Subject: [PATCH] Add builtins ffs, clz, ctz, clrsb, popcount, parity Add new file lib/builtin.c Modify include/tccdefs.h, lib/Makefile to compile it. Update tests/tcctest.c to test it. --- include/tccdefs.h | 15 ++++++ lib/Makefile | 10 ++-- lib/builtin.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++ tests/tcctest.c | 49 ++++++++++++++++++ 4 files changed, 193 insertions(+), 5 deletions(-) create mode 100644 lib/builtin.c diff --git a/include/tccdefs.h b/include/tccdefs.h index 01d57ef5..814edf37 100644 --- a/include/tccdefs.h +++ b/include/tccdefs.h @@ -309,4 +309,19 @@ #undef __MAYBE_REDIR #undef __RENAME +#if !defined _WIN32 + #define __BUILTIN_EXTERN(name,u) \ + int __builtin_##name(u int); \ + int __builtin_##name##l(u long); \ + int __builtin_##name##ll(u long long); + + __BUILTIN_EXTERN(ffs,) + __BUILTIN_EXTERN(clz, unsigned) + __BUILTIN_EXTERN(ctz, unsigned) + __BUILTIN_EXTERN(clrsb,) + __BUILTIN_EXTERN(popcount, unsigned) + __BUILTIN_EXTERN(parity, unsigned) + #undef __BUILTIN_EXTERN +#endif + #endif /* ndef __TCC_PP__ */ diff --git a/lib/Makefile b/lib/Makefile index 56966157..81e390bb 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -42,11 +42,11 @@ $(X)BT_O += tcov.o DSO_O = dsohandle.o -I386_O = libtcc1.o alloca.o alloca-bt.o stdatomic.o atomic.o $(BT_O) -X86_64_O = libtcc1.o alloca.o alloca-bt.o stdatomic.o atomic.o $(BT_O) -ARM_O = libtcc1.o armeabi.o alloca.o armflush.o stdatomic.o atomic.o $(BT_O) -ARM64_O = lib-arm64.o stdatomic.o atomic.o $(BT_O) -RISCV64_O = lib-arm64.o stdatomic.o atomic.o $(BT_O) +I386_O = libtcc1.o alloca.o alloca-bt.o stdatomic.o atomic.o builtin.o $(BT_O) +X86_64_O = libtcc1.o alloca.o alloca-bt.o stdatomic.o atomic.o builtin.o $(BT_O) +ARM_O = libtcc1.o armeabi.o alloca.o armflush.o stdatomic.o atomic.o builtin.o $(BT_O) +ARM64_O = lib-arm64.o stdatomic.o atomic.o builtin.o $(BT_O) +RISCV64_O = lib-arm64.o stdatomic.o atomic.o builtin.o $(BT_O) WIN_O = crt1.o crt1w.o wincrt1.o wincrt1w.o dllcrt1.o dllmain.o OBJ-i386 = $(I386_O) $(BCHECK_O) $(DSO_O) diff --git a/lib/builtin.c b/lib/builtin.c new file mode 100644 index 00000000..e822ab07 --- /dev/null +++ b/lib/builtin.c @@ -0,0 +1,124 @@ +/* ---------------------------------------------- */ +/* This file implements: + * __builtin_ffs + * __builtin_clz + * __builtin_ctz + * __builtin_clrsb + * __builtin_popcount + * __builtin_parity + * for int, long and long long + */ + +static const unsigned char table_1_32[] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 +}; +static const unsigned char table_2_32[32] = { + 31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1, + 23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0 +}; +static const unsigned char table_1_64[] = { + 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, + 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, + 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, + 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12 +}; +static const unsigned char table_2_64[] = { + 63, 16, 62, 7, 15, 36, 61, 3, 6, 14, 22, 26, 35, 47, 60, 2, + 9, 5, 28, 11, 13, 21, 42, 19, 25, 31, 34, 40, 46, 52, 59, 1, + 17, 8, 37, 4, 23, 27, 48, 10, 29, 12, 43, 20, 32, 41, 53, 18, + 38, 24, 49, 30, 44, 33, 54, 39, 50, 45, 55, 51, 56, 57, 58, 0 +}; + +#define FFSI(x) \ + return table_1_32[((x & -x) * 0x077cb531u) >> 27] + (x != 0); +#define FFSL(x) \ + return table_1_64[((x & -x) * 0x022fdd63cc95386dull) >> 58] + (x != 0); +#define CTZI(x) \ + return table_1_32[((x & -x) * 0x077cb531u) >> 27]; +#define CTZL(x) \ + return table_1_64[((x & -x) * 0x022fdd63cc95386dull) >> 58]; +#define CLZI(x) \ + x |= x >> 1; \ + x |= x >> 2; \ + x |= x >> 4; \ + x |= x >> 8; \ + x |= x >> 16; \ + return table_2_32[(x * 0x07c4acddu) >> 27]; +#define CLZL(x) \ + x |= x >> 1; \ + x |= x >> 2; \ + x |= x >> 4; \ + x |= x >> 8; \ + x |= x >> 16; \ + x |= x >> 32; \ + return table_2_64[x * 0x03f79d71b4cb0a89ull >> 58]; +#define POPCOUNTI(x, m) \ + x = x - ((x >> 1) & 0x55555555); \ + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); \ + x = (x + (x >> 4)) & 0xf0f0f0f; \ + return ((x * 0x01010101) >> 24) & m; +#define POPCOUNTL(x, m) \ + x = x - ((x >> 1) & 0x5555555555555555ull); \ + x = (x & 0x3333333333333333ull) + ((x >> 2) & 0x3333333333333333ull); \ + x = (x + (x >> 4)) & 0xf0f0f0f0f0f0f0full; \ + return ((x * 0x0101010101010101ull) >> 56) & m; + +/* Returns one plus the index of the least significant 1-bit of x, + or if x is zero, returns zero. */ +int __builtin_ffs (int x) { FFSI(x) } +#if __SIZEOF_LONG__ == 4 +int __builtin_ffsl (long x) { FFSI(x) } +#else +int __builtin_ffsl (long x) { FFSL(x) } +#endif +int __builtin_ffsll (long long x) { FFSL(x) } + +/* Returns the number of leading 0-bits in x, starting at the most significant + bit position. If x is 0, the result is undefined. */ +int __builtin_clz (unsigned int x) { CLZI(x) } +#if __SIZEOF_LONG__ == 4 +int __builtin_clzl (unsigned long x) { CLZI(x) } +#else +int __builtin_clzl (unsigned long x) { CLZL(x) } +#endif +int __builtin_clzll (unsigned long long x) { CLZL(x) } + +/* Returns the number of trailing 0-bits in x, starting at the least + significant bit position. If x is 0, the result is undefined. */ +int __builtin_ctz (unsigned int x) { CTZI(x) } +#if __SIZEOF_LONG__ == 4 +int __builtin_ctzl (unsigned long x) { CTZI(x) } +#else +int __builtin_ctzl (unsigned long x) { CTZL(x) } +#endif +int __builtin_ctzll (unsigned long long x) { CTZL(x) } + +/* Returns the number of leading redundant sign bits in x, i.e. the number + of bits following the most significant bit that are identical to it. + There are no special cases for 0 or other values. */ +int __builtin_clrsb (int x) { if (x < 0) x = ~x; x <<= 1; CLZI(x) } +#if __SIZEOF_LONG__ == 4 +int __builtin_clrsbl (long x) { if (x < 0) x = ~x; x <<= 1; CLZI(x) } +#else +int __builtin_clrsbl (long x) { if (x < 0) x = ~x; x <<= 1; CLZL(x) } +#endif +int __builtin_clrsbll (long long x) { if (x < 0) x = ~x; x <<= 1; CLZL(x) } + +/* Returns the number of 1-bits in x.*/ +int __builtin_popcount (unsigned int x) { POPCOUNTI(x, 0x3f) } +#if __SIZEOF_LONG__ == 4 +int __builtin_popcountl (unsigned long x) { POPCOUNTI(x, 0x3f) } +#else +int __builtin_popcountl (unsigned long x) { POPCOUNTL(x, 0x7f) } +#endif +int __builtin_popcountll (unsigned long long x) { POPCOUNTL(x, 0x7f) } + +/* Returns the parity of x, i.e. the number of 1-bits in x modulo 2. */ +int __builtin_parity (unsigned int x) { POPCOUNTI(x, 0x01) } +#if __SIZEOF_LONG__ == 4 +int __builtin_parityl (unsigned long x) { POPCOUNTI(x, 0x01) } +#else +int __builtin_parityl (unsigned long x) { POPCOUNTL(x, 0x01) } +#endif +int __builtin_parityll (unsigned long long x) { POPCOUNTL(x, 0x01) } diff --git a/tests/tcctest.c b/tests/tcctest.c index bd14cd09..5f5783de 100644 --- a/tests/tcctest.c +++ b/tests/tcctest.c @@ -3844,6 +3844,36 @@ int constant_p_var; int func(void); +#if !defined _WIN32 +/* __builtin_clz and __builtin_ctz return random values for 0 */ +static void builtin_test_bits(unsigned long long x, int cnt[]) +{ + cnt[0] += __builtin_ffs(x); + cnt[1] += __builtin_ffsl(x); + cnt[2] += __builtin_ffsll(x); + + if ((unsigned int) x) cnt[3] += __builtin_clz(x); + if ((unsigned long) x) cnt[4] += __builtin_clzl(x); + if ((unsigned long long) x) cnt[5] += __builtin_clzll(x); + + if ((unsigned int) x) cnt[6] += __builtin_ctz(x); + if ((unsigned long) x) cnt[7] += __builtin_ctzl(x); + if ((unsigned long long) x) cnt[8] += __builtin_ctzll(x); + + cnt[9] += __builtin_clrsb(x); + cnt[10] += __builtin_clrsbl(x); + cnt[11] += __builtin_clrsbll(x); + + cnt[12] += __builtin_popcount(x); + cnt[13] += __builtin_popcountl(x); + cnt[14] += __builtin_popcountll(x); + + cnt[15] += __builtin_parity(x); + cnt[16] += __builtin_parityl(x); + cnt[17] += __builtin_parityll(x); +} +#endif + void builtin_test(void) { short s; @@ -3897,6 +3927,25 @@ void builtin_test(void) printf("bce: %d\n", i); //printf("bera: %p\n", __builtin_extract_return_addr((void*)43)); + +#if !defined _WIN32 + { + int cnt[18]; + unsigned long long r = 0; + + memset(cnt, 0, sizeof(cnt)); + builtin_test_bits(0, cnt); + builtin_test_bits(0xffffffffffffffffull, cnt); + for (i = 0; i < 64; i++) + builtin_test_bits(1ull << i, cnt); + for (i = 0; i < 1000; i++) { + r = 0x5851f42d4c957f2dull * r + 0x14057b7ef767814full; + builtin_test_bits(r, cnt); + } + for (i = 0; i < 18; i++) + printf ("%d %d\n", i, cnt[i]); + } +#endif } #if defined _WIN32