mirror of
https://github.com/mirror/tinycc.git
synced 2025-01-17 05:20:08 +08:00
49d3118621
On 2016-08-11 09:24 +0100, Balazs Kezes wrote:
> I think it's just that that copy_params() never restores the spilled
> registers. Maybe it needs some extra code at the end to see if any
> parameters have been spilled to stack and then restore them?
I've spent some time on this and I've found an alternative solution.
Although I'm not entirely sure about it but I've attached a patch
nevertheless.
And while poking at that I've found another problem affecting the
unsigned long long division on arm and I've attached a patch for that
too.
More details in the patches themselves. Please review and consider them
for merging! Thank you!
--
Balazs
[PATCH 1/2] Fix slow unsigned long long division on ARM
The macro AEABI_UXDIVMOD expands to this bit:
#define AEABI_UXDIVMOD(name,type, rettype, typemacro) \
...
while (num >= den) { \
...
while ((q << 1) * den <= num && q * den <= typemacro ## _MAX / 2) \
q <<= 1; \
...
With the current ULONG_MAX version the inner loop goes only until 4
billion so the outer loop will progress very slowly if num is large.
With ULLONG_MAX the inner loop works as expected. The current version is
probably a result of a typo.
The following bash snippet demonstrates the bug:
$ uname -a
Linux eper 4.4.16-2-ARCH #1 Wed Aug 10 20:03:13 MDT 2016 armv6l GNU/Linux
$ cat div.c
int printf(const char *, ...);
int main(void) {
unsigned long long num, denom;
num = 12345678901234567ULL;
denom = 7;
printf("%lld\n", num / denom);
return 0;
}
$ time tcc -run div.c
1763668414462081
real 0m16.291s
user 0m15.860s
sys 0m0.020s
[PATCH 2/2] Fix long long dereference during argument passing on ARMv6
For some reason the code spills the register to the stack. copy_params
in arm-gen.c doesn't expect this so bad code is generated. It's not
entirely clear why the saving part is necessary. It was added in commit
59c35638
with the comment "fixed long long code gen bug" with no further
clarification. Given that tcctest.c passes without this, maybe it's no
longer needed? Let's remove it.
Also add a new testcase just for this. After I've managed to make the
tests compile on a raspberry pi, I get the following diff without this
patch:
--- test.ref 2016-08-22 22:12:43.380000000 +0100
+++ test.out3 2016-08-22 22:12:49.990000000 +0100
@@ -499,7 +499,7 @@
2
1 0 1 0
4886718345
-shift: 9 9 9312
+shift: 291 291 291
shiftc: 36 36 2328
shiftc: 0 0 9998683865088
manyarg_test:
More discussion on this thread:
https://lists.nongnu.org/archive/html/tinycc-devel/2016-08/msg00004.html
490 lines
27 KiB
C
490 lines
27 KiB
C
/* TCC ARM runtime EABI
|
|
Copyright (C) 2013 Thomas Preud'homme
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.*/
|
|
|
|
#include <limits.h>
|
|
|
|
/* We rely on the little endianness and EABI calling convention for this to
|
|
work */
|
|
|
|
typedef struct double_unsigned_struct {
|
|
unsigned low;
|
|
unsigned high;
|
|
} double_unsigned_struct;
|
|
|
|
typedef struct unsigned_int_struct {
|
|
unsigned low;
|
|
int high;
|
|
} unsigned_int_struct;
|
|
|
|
#define REGS_RETURN(name, type) \
|
|
void name ## _return(type ret) {}
|
|
|
|
|
|
/* Float helper functions */
|
|
|
|
#define FLOAT_EXP_BITS 8
|
|
#define FLOAT_FRAC_BITS 23
|
|
|
|
#define DOUBLE_EXP_BITS 11
|
|
#define DOUBLE_FRAC_BITS 52
|
|
|
|
#define ONE_EXP(type) ((1 << (type ## _EXP_BITS - 1)) - 1)
|
|
|
|
REGS_RETURN(unsigned_int_struct, unsigned_int_struct)
|
|
REGS_RETURN(double_unsigned_struct, double_unsigned_struct)
|
|
|
|
/* float -> integer: (sign) 1.fraction x 2^(exponent - exp_for_one) */
|
|
|
|
|
|
/* float to [unsigned] long long conversion */
|
|
#define DEFINE__AEABI_F2XLZ(name, with_sign) \
|
|
void __aeabi_ ## name(unsigned val) \
|
|
{ \
|
|
int exp, high_shift, sign; \
|
|
double_unsigned_struct ret; \
|
|
\
|
|
/* compute sign */ \
|
|
sign = val >> 31; \
|
|
\
|
|
/* compute real exponent */ \
|
|
exp = val >> FLOAT_FRAC_BITS; \
|
|
exp &= (1 << FLOAT_EXP_BITS) - 1; \
|
|
exp -= ONE_EXP(FLOAT); \
|
|
\
|
|
/* undefined behavior if truncated value cannot be represented */ \
|
|
if (with_sign) { \
|
|
if (exp > 62) /* |val| too big, double cannot represent LLONG_MAX */ \
|
|
return; \
|
|
} else { \
|
|
if ((sign && exp >= 0) || exp > 63) /* if val < 0 || val too big */ \
|
|
return; \
|
|
} \
|
|
\
|
|
val &= (1 << FLOAT_FRAC_BITS) - 1; \
|
|
if (exp >= 32) { \
|
|
ret.high = 1 << (exp - 32); \
|
|
if (exp - 32 >= FLOAT_FRAC_BITS) { \
|
|
ret.high |= val << (exp - 32 - FLOAT_FRAC_BITS); \
|
|
ret.low = 0; \
|
|
} else { \
|
|
high_shift = FLOAT_FRAC_BITS - (exp - 32); \
|
|
ret.high |= val >> high_shift; \
|
|
ret.low = val << (32 - high_shift); \
|
|
} \
|
|
} else { \
|
|
ret.high = 0; \
|
|
ret.low = 1 << exp; \
|
|
if (exp > FLOAT_FRAC_BITS) \
|
|
ret.low |= val << (exp - FLOAT_FRAC_BITS); \
|
|
else \
|
|
ret.low |= val >> (FLOAT_FRAC_BITS - exp); \
|
|
} \
|
|
\
|
|
/* encode negative integer using 2's complement */ \
|
|
if (with_sign && sign) { \
|
|
ret.low = ~ret.low; \
|
|
ret.high = ~ret.high; \
|
|
if (ret.low == UINT_MAX) { \
|
|
ret.low = 0; \
|
|
ret.high++; \
|
|
} else \
|
|
ret.low++; \
|
|
} \
|
|
\
|
|
double_unsigned_struct_return(ret); \
|
|
}
|
|
|
|
/* float to unsigned long long conversion */
|
|
DEFINE__AEABI_F2XLZ(f2ulz, 0)
|
|
|
|
/* float to long long conversion */
|
|
DEFINE__AEABI_F2XLZ(f2lz, 1)
|
|
|
|
/* double to [unsigned] long long conversion */
|
|
#define DEFINE__AEABI_D2XLZ(name, with_sign) \
|
|
void __aeabi_ ## name(double_unsigned_struct val) \
|
|
{ \
|
|
int exp, high_shift, sign; \
|
|
double_unsigned_struct ret; \
|
|
\
|
|
/* compute sign */ \
|
|
sign = val.high >> 31; \
|
|
\
|
|
/* compute real exponent */ \
|
|
exp = (val.high >> (DOUBLE_FRAC_BITS - 32)); \
|
|
exp &= (1 << DOUBLE_EXP_BITS) - 1; \
|
|
exp -= ONE_EXP(DOUBLE); \
|
|
\
|
|
/* undefined behavior if truncated value cannot be represented */ \
|
|
if (with_sign) { \
|
|
if (exp > 62) /* |val| too big, double cannot represent LLONG_MAX */ \
|
|
return; \
|
|
} else { \
|
|
if ((sign && exp >= 0) || exp > 63) /* if val < 0 || val too big */ \
|
|
return; \
|
|
} \
|
|
\
|
|
val.high &= (1 << (DOUBLE_FRAC_BITS - 32)) - 1; \
|
|
if (exp >= 32) { \
|
|
ret.high = 1 << (exp - 32); \
|
|
if (exp >= DOUBLE_FRAC_BITS) { \
|
|
high_shift = exp - DOUBLE_FRAC_BITS; \
|
|
ret.high |= val.high << high_shift; \
|
|
ret.high |= val.low >> (32 - high_shift); \
|
|
ret.low = val.low << high_shift; \
|
|
} else { \
|
|
high_shift = DOUBLE_FRAC_BITS - exp; \
|
|
ret.high |= val.high >> high_shift; \
|
|
ret.low = val.high << (32 - high_shift); \
|
|
ret.low |= val.low >> high_shift; \
|
|
} \
|
|
} else { \
|
|
ret.high = 0; \
|
|
ret.low = 1 << exp; \
|
|
if (exp > DOUBLE_FRAC_BITS - 32) { \
|
|
high_shift = exp - DOUBLE_FRAC_BITS - 32; \
|
|
ret.low |= val.high << high_shift; \
|
|
ret.low |= val.low >> (32 - high_shift); \
|
|
} else \
|
|
ret.low |= val.high >> (DOUBLE_FRAC_BITS - 32 - exp); \
|
|
} \
|
|
\
|
|
/* encode negative integer using 2's complement */ \
|
|
if (with_sign && sign) { \
|
|
ret.low = ~ret.low; \
|
|
ret.high = ~ret.high; \
|
|
if (ret.low == UINT_MAX) { \
|
|
ret.low = 0; \
|
|
ret.high++; \
|
|
} else \
|
|
ret.low++; \
|
|
} \
|
|
\
|
|
double_unsigned_struct_return(ret); \
|
|
}
|
|
|
|
/* double to unsigned long long conversion */
|
|
DEFINE__AEABI_D2XLZ(d2ulz, 0)
|
|
|
|
/* double to long long conversion */
|
|
DEFINE__AEABI_D2XLZ(d2lz, 1)
|
|
|
|
/* long long to float conversion */
|
|
#define DEFINE__AEABI_XL2F(name, with_sign) \
|
|
unsigned __aeabi_ ## name(unsigned long long v) \
|
|
{ \
|
|
int s /* shift */, flb /* first lost bit */, sign = 0; \
|
|
unsigned p = 0 /* power */, ret; \
|
|
double_unsigned_struct val; \
|
|
\
|
|
/* fraction in negative float is encoded in 1's complement */ \
|
|
if (with_sign && (v & (1ULL << 63))) { \
|
|
sign = 1; \
|
|
v = ~v + 1; \
|
|
} \
|
|
val.low = v; \
|
|
val.high = v >> 32; \
|
|
/* fill fraction bits */ \
|
|
for (s = 31, p = 1 << 31; p && !(val.high & p); s--, p >>= 1); \
|
|
if (p) { \
|
|
ret = val.high & (p - 1); \
|
|
if (s < FLOAT_FRAC_BITS) { \
|
|
ret <<= FLOAT_FRAC_BITS - s; \
|
|
ret |= val.low >> (32 - (FLOAT_FRAC_BITS - s)); \
|
|
flb = (val.low >> (32 - (FLOAT_FRAC_BITS - s - 1))) & 1; \
|
|
} else { \
|
|
flb = (ret >> (s - FLOAT_FRAC_BITS - 1)) & 1; \
|
|
ret >>= s - FLOAT_FRAC_BITS; \
|
|
} \
|
|
s += 32; \
|
|
} else { \
|
|
for (s = 31, p = 1 << 31; p && !(val.low & p); s--, p >>= 1); \
|
|
if (p) { \
|
|
ret = val.low & (p - 1); \
|
|
if (s <= FLOAT_FRAC_BITS) { \
|
|
ret <<= FLOAT_FRAC_BITS - s; \
|
|
flb = 0; \
|
|
} else { \
|
|
flb = (ret >> (s - FLOAT_FRAC_BITS - 1)) & 1; \
|
|
ret >>= s - FLOAT_FRAC_BITS; \
|
|
} \
|
|
} else \
|
|
return 0; \
|
|
} \
|
|
if (flb) \
|
|
ret++; \
|
|
\
|
|
/* fill exponent bits */ \
|
|
ret |= (s + ONE_EXP(FLOAT)) << FLOAT_FRAC_BITS; \
|
|
\
|
|
/* fill sign bit */ \
|
|
ret |= sign << 31; \
|
|
\
|
|
return ret; \
|
|
}
|
|
|
|
/* unsigned long long to float conversion */
|
|
DEFINE__AEABI_XL2F(ul2f, 0)
|
|
|
|
/* long long to float conversion */
|
|
DEFINE__AEABI_XL2F(l2f, 1)
|
|
|
|
/* long long to double conversion */
|
|
#define __AEABI_XL2D(name, with_sign) \
|
|
void __aeabi_ ## name(unsigned long long v) \
|
|
{ \
|
|
int s /* shift */, high_shift, sign = 0; \
|
|
unsigned tmp, p = 0; \
|
|
double_unsigned_struct val, ret; \
|
|
\
|
|
/* fraction in negative float is encoded in 1's complement */ \
|
|
if (with_sign && (v & (1ULL << 63))) { \
|
|
sign = 1; \
|
|
v = ~v + 1; \
|
|
} \
|
|
val.low = v; \
|
|
val.high = v >> 32; \
|
|
\
|
|
/* fill fraction bits */ \
|
|
for (s = 31, p = 1 << 31; p && !(val.high & p); s--, p >>= 1); \
|
|
if (p) { \
|
|
tmp = val.high & (p - 1); \
|
|
if (s < DOUBLE_FRAC_BITS - 32) { \
|
|
high_shift = DOUBLE_FRAC_BITS - 32 - s; \
|
|
ret.high = tmp << high_shift; \
|
|
ret.high |= val.low >> (32 - high_shift); \
|
|
ret.low = val.low << high_shift; \
|
|
} else { \
|
|
high_shift = s - (DOUBLE_FRAC_BITS - 32); \
|
|
ret.high = tmp >> high_shift; \
|
|
ret.low = tmp << (32 - high_shift); \
|
|
ret.low |= val.low >> high_shift; \
|
|
if ((val.low >> (high_shift - 1)) & 1) { \
|
|
if (ret.low == UINT_MAX) { \
|
|
ret.high++; \
|
|
ret.low = 0; \
|
|
} else \
|
|
ret.low++; \
|
|
} \
|
|
} \
|
|
s += 32; \
|
|
} else { \
|
|
for (s = 31, p = 1 << 31; p && !(val.low & p); s--, p >>= 1); \
|
|
if (p) { \
|
|
tmp = val.low & (p - 1); \
|
|
if (s <= DOUBLE_FRAC_BITS - 32) { \
|
|
high_shift = DOUBLE_FRAC_BITS - 32 - s; \
|
|
ret.high = tmp << high_shift; \
|
|
ret.low = 0; \
|
|
} else { \
|
|
high_shift = s - (DOUBLE_FRAC_BITS - 32); \
|
|
ret.high = tmp >> high_shift; \
|
|
ret.low = tmp << (32 - high_shift); \
|
|
} \
|
|
} else { \
|
|
ret.high = ret.low = 0; \
|
|
double_unsigned_struct_return(ret); \
|
|
} \
|
|
} \
|
|
\
|
|
/* fill exponent bits */ \
|
|
ret.high |= (s + ONE_EXP(DOUBLE)) << (DOUBLE_FRAC_BITS - 32); \
|
|
\
|
|
/* fill sign bit */ \
|
|
ret.high |= sign << 31; \
|
|
\
|
|
double_unsigned_struct_return(ret); \
|
|
}
|
|
|
|
/* unsigned long long to double conversion */
|
|
__AEABI_XL2D(ul2d, 0)
|
|
|
|
/* long long to double conversion */
|
|
__AEABI_XL2D(l2d, 1)
|
|
|
|
|
|
/* Long long helper functions */
|
|
|
|
/* TODO: add error in case of den == 0 (see §4.3.1 and §4.3.2) */
|
|
|
|
#define define_aeabi_xdivmod_signed_type(basetype, type) \
|
|
typedef struct type { \
|
|
basetype quot; \
|
|
unsigned basetype rem; \
|
|
} type
|
|
|
|
#define define_aeabi_xdivmod_unsigned_type(basetype, type) \
|
|
typedef struct type { \
|
|
basetype quot; \
|
|
basetype rem; \
|
|
} type
|
|
|
|
#define AEABI_UXDIVMOD(name,type, rettype, typemacro) \
|
|
static inline rettype aeabi_ ## name (type num, type den) \
|
|
{ \
|
|
rettype ret; \
|
|
type quot = 0; \
|
|
\
|
|
/* Increase quotient while it is less than numerator */ \
|
|
while (num >= den) { \
|
|
type q = 1; \
|
|
\
|
|
/* Find closest power of two */ \
|
|
while ((q << 1) * den <= num && q * den <= typemacro ## _MAX / 2) \
|
|
q <<= 1; \
|
|
\
|
|
/* Compute difference between current quotient and numerator */ \
|
|
num -= q * den; \
|
|
quot += q; \
|
|
} \
|
|
ret.quot = quot; \
|
|
ret.rem = num; \
|
|
return ret; \
|
|
}
|
|
|
|
#define __AEABI_XDIVMOD(name, type, uiname, rettype, urettype, typemacro) \
|
|
void __aeabi_ ## name(type numerator, type denominator) \
|
|
{ \
|
|
unsigned type num, den; \
|
|
urettype uxdiv_ret; \
|
|
rettype ret; \
|
|
\
|
|
if (numerator >= 0) \
|
|
num = numerator; \
|
|
else \
|
|
num = 0 - numerator; \
|
|
if (denominator >= 0) \
|
|
den = denominator; \
|
|
else \
|
|
den = 0 - denominator; \
|
|
uxdiv_ret = aeabi_ ## uiname(num, den); \
|
|
/* signs differ */ \
|
|
if ((numerator & typemacro ## _MIN) != (denominator & typemacro ## _MIN)) \
|
|
ret.quot = 0 - uxdiv_ret.quot; \
|
|
else \
|
|
ret.quot = uxdiv_ret.quot; \
|
|
if (numerator < 0) \
|
|
ret.rem = 0 - uxdiv_ret.rem; \
|
|
else \
|
|
ret.rem = uxdiv_ret.rem; \
|
|
\
|
|
rettype ## _return(ret); \
|
|
}
|
|
|
|
define_aeabi_xdivmod_signed_type(long long, lldiv_t);
|
|
define_aeabi_xdivmod_unsigned_type(unsigned long long, ulldiv_t);
|
|
define_aeabi_xdivmod_signed_type(int, idiv_t);
|
|
define_aeabi_xdivmod_unsigned_type(unsigned, uidiv_t);
|
|
|
|
REGS_RETURN(lldiv_t, lldiv_t)
|
|
REGS_RETURN(ulldiv_t, ulldiv_t)
|
|
REGS_RETURN(idiv_t, idiv_t)
|
|
REGS_RETURN(uidiv_t, uidiv_t)
|
|
|
|
AEABI_UXDIVMOD(uldivmod, unsigned long long, ulldiv_t, ULLONG)
|
|
|
|
__AEABI_XDIVMOD(ldivmod, long long, uldivmod, lldiv_t, ulldiv_t, LLONG)
|
|
|
|
void __aeabi_uldivmod(unsigned long long num, unsigned long long den)
|
|
{
|
|
ulldiv_t_return(aeabi_uldivmod(num, den));
|
|
}
|
|
|
|
void __aeabi_llsl(double_unsigned_struct val, int shift)
|
|
{
|
|
double_unsigned_struct ret;
|
|
|
|
if (shift >= 32) {
|
|
val.high = val.low;
|
|
val.low = 0;
|
|
shift -= 32;
|
|
}
|
|
if (shift > 0) {
|
|
ret.low = val.low << shift;
|
|
ret.high = (val.high << shift) | (val.low >> (32 - shift));
|
|
double_unsigned_struct_return(ret);
|
|
return;
|
|
}
|
|
double_unsigned_struct_return(val);
|
|
}
|
|
|
|
#define aeabi_lsr(val, shift, fill, type) \
|
|
type ## _struct ret; \
|
|
\
|
|
if (shift >= 32) { \
|
|
val.low = val.high; \
|
|
val.high = fill; \
|
|
shift -= 32; \
|
|
} \
|
|
if (shift > 0) { \
|
|
ret.high = val.high >> shift; \
|
|
ret.low = (val.high << (32 - shift)) | (val.low >> shift); \
|
|
type ## _struct_return(ret); \
|
|
return; \
|
|
} \
|
|
type ## _struct_return(val);
|
|
|
|
void __aeabi_llsr(double_unsigned_struct val, int shift)
|
|
{
|
|
aeabi_lsr(val, shift, 0, double_unsigned);
|
|
}
|
|
|
|
void __aeabi_lasr(unsigned_int_struct val, int shift)
|
|
{
|
|
aeabi_lsr(val, shift, val.high >> 31, unsigned_int);
|
|
}
|
|
|
|
|
|
/* Integer division functions */
|
|
|
|
AEABI_UXDIVMOD(uidivmod, unsigned, uidiv_t, UINT)
|
|
|
|
int __aeabi_idiv(int numerator, int denominator)
|
|
{
|
|
unsigned num, den;
|
|
uidiv_t ret;
|
|
|
|
if (numerator >= 0)
|
|
num = numerator;
|
|
else
|
|
num = 0 - numerator;
|
|
if (denominator >= 0)
|
|
den = denominator;
|
|
else
|
|
den = 0 - denominator;
|
|
ret = aeabi_uidivmod(num, den);
|
|
if ((numerator & INT_MIN) != (denominator & INT_MIN)) /* signs differ */
|
|
ret.quot *= -1;
|
|
return ret.quot;
|
|
}
|
|
|
|
unsigned __aeabi_uidiv(unsigned num, unsigned den)
|
|
{
|
|
return aeabi_uidivmod(num, den).quot;
|
|
}
|
|
|
|
__AEABI_XDIVMOD(idivmod, int, uidivmod, idiv_t, uidiv_t, INT)
|
|
|
|
void __aeabi_uidivmod(unsigned num, unsigned den)
|
|
{
|
|
uidiv_t_return(aeabi_uidivmod(num, den));
|
|
}
|