From 2e798523e4b98ec856a69e637a4332c4973a4294 Mon Sep 17 00:00:00 2001 From: Michael Matz Date: Wed, 15 Jul 2020 22:02:02 +0200 Subject: [PATCH] Fix conversions of subnormals to long double those need to be normalized when extending from float/double to binary128. --- lib/lib-arm64.c | 15 +++++++++++++-- tests/tcctest.c | 29 +++++++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/lib/lib-arm64.c b/lib/lib-arm64.c index a8799808..33df5092 100644 --- a/lib/lib-arm64.c +++ b/lib/lib-arm64.c @@ -387,7 +387,12 @@ long double __extendsftf2(float f) else if (a << 1 >> 24 == 255) x.x1 = (0x7fff000000000000 | aa >> 31 << 63 | aa << 41 >> 16 | (uint64_t)!!(a << 9) << 47); - else + else if (a << 1 >> 24 == 0) { + uint64_t adj = 0; + while (!(a << 1 >> 1 >> (23 - adj))) + adj++; + x.x1 = aa >> 31 << 63 | (16256 - adj + 1) << 48 | aa << adj << 41 >> 16; + } else x.x1 = (aa >> 31 << 63 | ((aa >> 23 & 255) + 16256) << 48 | aa << 41 >> 16); memcpy(&fx, &x, 16); @@ -406,7 +411,13 @@ long double __extenddftf2(double f) else if (a << 1 >> 53 == 2047) x.x1 = (0x7fff000000000000 | a >> 63 << 63 | a << 12 >> 16 | (uint64_t)!!(a << 12) << 47); - else + else if (a << 1 >> 53 == 0) { + uint64_t adj = 0; + while (!(a << 1 >> 1 >> (52 - adj))) + adj++; + x.x0 <<= adj; + x.x1 = a >> 63 << 63 | (15360 - adj + 1) << 48 | a << adj << 12 >> 16; + } else x.x1 = a >> 63 << 63 | ((a >> 52 & 2047) + 15360) << 48 | a << 12 >> 16; memcpy(&fx, &x, 16); return fx; diff --git a/tests/tcctest.c b/tests/tcctest.c index 027a8856..4ac2cc00 100644 --- a/tests/tcctest.c +++ b/tests/tcctest.c @@ -2215,13 +2215,14 @@ double ftab1[3] = { 1.2, 3.4, -5.6 }; void float_test(void) { #if !defined(__arm__) || defined(__ARM_PCS_VFP) - float fa, fb; - double da, db; + volatile float fa, fb; + volatile double da, db; int a; unsigned int b; static double nan2 = 0.0/0.0; static double inf1 = 1.0/0.0; static double inf2 = 1e5000; + volatile LONG_DOUBLE la; printf("sizeof(float) = %d\n", sizeof(float)); printf("sizeof(double) = %d\n", sizeof(double)); @@ -2243,6 +2244,30 @@ void float_test(void) db = b; printf("db = %f\n", db); printf("nan != nan = %d, inf1 = %f, inf2 = %f\n", nan2 != nan2, inf1, inf2); + da = 0x0.88p-1022; /* a subnormal */ + la = da; + printf ("da subnormal = %a\n", da); + printf ("da subnormal = %.40g\n", da); + printf ("la subnormal = %La\n", la); + printf ("la subnormal = %.40Lg\n", la); + da /= 2; + la = da; + printf ("da/2 subnormal = %a\n", da); + printf ("da/2 subnormal = %.40g\n", da); + printf ("la/2 subnormal = %La\n", la); + printf ("la/2 subnormal = %.40Lg\n", la); + fa = 0x0.88p-126f; /* a subnormal */ + la = fa; + printf ("fa subnormal = %a\n", fa); + printf ("fa subnormal = %.40g\n", fa); + printf ("la subnormal = %La\n", la); + printf ("la subnormal = %.40Lg\n", la); + fa /= 2; + la = fa; + printf ("fa/2 subnormal = %a\n", fa); + printf ("fa/2 subnormal = %.40g\n", fa); + printf ("la/2 subnormal = %La\n", la); + printf ("la/2 subnormal = %.40Lg\n", la); #endif }