| /* Copyright (C) 2007-2021 Free Software Foundation, Inc. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 3, or (at your option) any later |
| version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| for more details. |
| |
| Under Section 7 of GPL version 3, you are granted additional |
| permissions described in the GCC Runtime Library Exception, version |
| 3.1, as published by the Free Software Foundation. |
| |
| You should have received a copy of the GNU General Public License and |
| a copy of the GCC Runtime Library Exception along with this program; |
| see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #define BID_128RES |
| |
| #include "bid_internal.h" |
| |
| /***************************************************************************** |
| * BID128_round_integral_exact |
| ****************************************************************************/ |
| |
| BID128_FUNCTION_ARG1 (bid128_round_integral_exact, x) |
| |
| UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} |
| }; |
| UINT64 x_sign; |
| UINT64 x_exp; |
| int exp; // unbiased exponent |
| // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64) |
| UINT64 tmp64; |
| BID_UI64DOUBLE tmp1; |
| unsigned int x_nr_bits; |
| int q, ind, shift; |
| UINT128 C1; |
| UINT256 fstar; |
| UINT256 P256; |
| |
| // check for NaN or Infinity |
| if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { |
| // x is special |
| if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN |
| // if x = NaN, then res = Q (x) |
| // check first for non-canonical NaN payload |
| if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || |
| (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && |
| (x.w[0] > 0x38c15b09ffffffffull))) { |
| x.w[1] = x.w[1] & 0xffffc00000000000ull; |
| x.w[0] = 0x0ull; |
| } |
| if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN |
| // set invalid flag |
| *pfpsf |= INVALID_EXCEPTION; |
| // return quiet (x) |
| res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] |
| res.w[0] = x.w[0]; |
| } else { // x is QNaN |
| // return x |
| res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] |
| res.w[0] = x.w[0]; |
| } |
| BID_RETURN (res) |
| } else { // x is not a NaN, so it must be infinity |
| if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf |
| // return +inf |
| res.w[1] = 0x7800000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } else { // x is -inf |
| // return -inf |
| res.w[1] = 0xf800000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } |
| BID_RETURN (res); |
| } |
| } |
| // unpack x |
| x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative |
| C1.w[1] = x.w[1] & MASK_COEFF; |
| C1.w[0] = x.w[0]; |
| |
| // check for non-canonical values (treated as zero) |
| if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 |
| // non-canonical |
| x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits |
| C1.w[1] = 0; // significand high |
| C1.w[0] = 0; // significand low |
| } else { // G0_G1 != 11 |
| x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits |
| if (C1.w[1] > 0x0001ed09bead87c0ull || |
| (C1.w[1] == 0x0001ed09bead87c0ull |
| && C1.w[0] > 0x378d8e63ffffffffull)) { |
| // x is non-canonical if coefficient is larger than 10^34 -1 |
| C1.w[1] = 0; |
| C1.w[0] = 0; |
| } else { // canonical |
| ; |
| } |
| } |
| |
| // test for input equal to zero |
| if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { |
| // x is 0 |
| // return 0 preserving the sign bit and the preferred exponent |
| // of MAX(Q(x), 0) |
| if (x_exp <= (0x1820ull << 49)) { |
| res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; |
| } else { |
| res.w[1] = x_sign | x_exp; |
| } |
| res.w[0] = 0x0000000000000000ull; |
| BID_RETURN (res); |
| } |
| // x is not special and is not zero |
| |
| switch (rnd_mode) { |
| case ROUNDING_TO_NEAREST: |
| case ROUNDING_TIES_AWAY: |
| // if (exp <= -(p+1)) return 0.0 |
| if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35 |
| res.w[1] = x_sign | 0x3040000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| *pfpsf |= INEXACT_EXCEPTION; |
| BID_RETURN (res); |
| } |
| break; |
| case ROUNDING_DOWN: |
| // if (exp <= -p) return -1.0 or +0.0 |
| if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffa000000000000ull == -34 |
| if (x_sign) { |
| // if negative, return negative 1, because we know coefficient |
| // is non-zero (would have been caught above) |
| res.w[1] = 0xb040000000000000ull; |
| res.w[0] = 0x0000000000000001ull; |
| } else { |
| // if positive, return positive 0, because we know coefficient is |
| // non-zero (would have been caught above) |
| res.w[1] = 0x3040000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } |
| *pfpsf |= INEXACT_EXCEPTION; |
| BID_RETURN (res); |
| } |
| break; |
| case ROUNDING_UP: |
| // if (exp <= -p) return -0.0 or +1.0 |
| if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 |
| if (x_sign) { |
| // if negative, return negative 0, because we know the coefficient |
| // is non-zero (would have been caught above) |
| res.w[1] = 0xb040000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } else { |
| // if positive, return positive 1, because we know coefficient is |
| // non-zero (would have been caught above) |
| res.w[1] = 0x3040000000000000ull; |
| res.w[0] = 0x0000000000000001ull; |
| } |
| *pfpsf |= INEXACT_EXCEPTION; |
| BID_RETURN (res); |
| } |
| break; |
| case ROUNDING_TO_ZERO: |
| // if (exp <= -p) return -0.0 or +0.0 |
| if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 |
| res.w[1] = x_sign | 0x3040000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| *pfpsf |= INEXACT_EXCEPTION; |
| BID_RETURN (res); |
| } |
| break; |
| } |
| |
| // q = nr. of decimal digits in x |
| // determine first the nr. of bits in x |
| if (C1.w[1] == 0) { |
| if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 |
| // split the 64-bit value in two 32-bit halves to avoid rounding errors |
| if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 |
| tmp1.d = (double) (C1.w[0] >> 32); // exact conversion |
| x_nr_bits = |
| 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } else { // x < 2^32 |
| tmp1.d = (double) (C1.w[0]); // exact conversion |
| x_nr_bits = |
| 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| } else { // if x < 2^53 |
| tmp1.d = (double) C1.w[0]; // exact conversion |
| x_nr_bits = |
| 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) |
| tmp1.d = (double) C1.w[1]; // exact conversion |
| x_nr_bits = |
| 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| |
| q = nr_digits[x_nr_bits - 1].digits; |
| if (q == 0) { |
| q = nr_digits[x_nr_bits - 1].digits1; |
| if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || |
| (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && |
| C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) |
| q++; |
| } |
| exp = (x_exp >> 49) - 6176; |
| if (exp >= 0) { // -exp <= 0 |
| // the argument is an integer already |
| res.w[1] = x.w[1]; |
| res.w[0] = x.w[0]; |
| BID_RETURN (res); |
| } |
| // exp < 0 |
| switch (rnd_mode) { |
| case ROUNDING_TO_NEAREST: |
| if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q |
| // need to shift right -exp digits from the coefficient; exp will be 0 |
| ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' |
| // chop off ind digits from the lower part of C1 |
| // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits |
| tmp64 = C1.w[0]; |
| if (ind <= 19) { |
| C1.w[0] = C1.w[0] + midpoint64[ind - 1]; |
| } else { |
| C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; |
| C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; |
| } |
| if (C1.w[0] < tmp64) |
| C1.w[1]++; |
| // calculate C* and f* |
| // C* is actually floor(C*) in this case |
| // C* and f* need shifting and masking, as shown by |
| // shiftright128[] and maskhigh128[] |
| // 1 <= x <= 34 |
| // kx = 10^(-x) = ten2mk128[ind - 1] |
| // C* = (C1 + 1/2 * 10^x) * 10^(-x) |
| // the approximation of 10^(-x) was rounded up to 118 bits |
| __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); |
| // determine the value of res and fstar |
| |
| // determine inexactness of the rounding of C* |
| // if (0 < f* - 1/2 < 10^(-x)) then |
| // the result is exact |
| // else // if (f* - 1/2 > T*) then |
| // the result is inexact |
| // Note: we are going to use ten2mk128[] instead of ten2mk128trunc[] |
| |
| if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 |
| // redundant shift = shiftright128[ind - 1]; // shift = 0 |
| res.w[1] = P256.w[3]; |
| res.w[0] = P256.w[2]; |
| // redundant fstar.w[3] = 0; |
| // redundant fstar.w[2] = 0; |
| fstar.w[1] = P256.w[1]; |
| fstar.w[0] = P256.w[0]; |
| // fraction f* < 10^(-x) <=> midpoint |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even) |
| if ((res.w[0] & 0x0000000000000001ull) && // is result odd? |
| ((fstar.w[1] < (ten2mk128[ind - 1].w[1])) |
| || ((fstar.w[1] == ten2mk128[ind - 1].w[1]) |
| && (fstar.w[0] < ten2mk128[ind - 1].w[0])))) { |
| // subract 1 to make even |
| if (res.w[0]-- == 0) { |
| res.w[1]--; |
| } |
| } |
| if (fstar.w[1] > 0x8000000000000000ull || |
| (fstar.w[1] == 0x8000000000000000ull |
| && fstar.w[0] > 0x0ull)) { |
| // f* > 1/2 and the result may be exact |
| tmp64 = fstar.w[1] - 0x8000000000000000ull; // f* - 1/2 |
| if (tmp64 > ten2mk128[ind - 1].w[1] || |
| (tmp64 == ten2mk128[ind - 1].w[1] && |
| fstar.w[0] >= ten2mk128[ind - 1].w[0])) { |
| // set the inexact flag |
| *pfpsf |= INEXACT_EXCEPTION; |
| } // else the result is exact |
| } else { // the result is inexact; f2* <= 1/2 |
| // set the inexact flag |
| *pfpsf |= INEXACT_EXCEPTION; |
| } |
| } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 |
| shift = shiftright128[ind - 1]; // 3 <= shift <= 63 |
| res.w[1] = (P256.w[3] >> shift); |
| res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); |
| // redundant fstar.w[3] = 0; |
| fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; |
| fstar.w[1] = P256.w[1]; |
| fstar.w[0] = P256.w[0]; |
| // fraction f* < 10^(-x) <=> midpoint |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| if ((res.w[0] & 0x0000000000000001ull) && // is result odd? |
| fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] || |
| (fstar.w[1] == ten2mk128[ind - 1].w[1] && |
| fstar.w[0] < ten2mk128[ind - 1].w[0]))) { |
| // subract 1 to make even |
| if (res.w[0]-- == 0) { |
| res.w[1]--; |
| } |
| } |
| if (fstar.w[2] > onehalf128[ind - 1] || |
| (fstar.w[2] == onehalf128[ind - 1] |
| && (fstar.w[1] || fstar.w[0]))) { |
| // f2* > 1/2 and the result may be exact |
| // Calculate f2* - 1/2 |
| tmp64 = fstar.w[2] - onehalf128[ind - 1]; |
| if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] || |
| (fstar.w[1] == ten2mk128[ind - 1].w[1] && |
| fstar.w[0] >= ten2mk128[ind - 1].w[0])) { |
| // set the inexact flag |
| *pfpsf |= INEXACT_EXCEPTION; |
| } // else the result is exact |
| } else { // the result is inexact; f2* <= 1/2 |
| // set the inexact flag |
| *pfpsf |= INEXACT_EXCEPTION; |
| } |
| } else { // 22 <= ind - 1 <= 33 |
| shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 |
| res.w[1] = 0; |
| res.w[0] = P256.w[3] >> shift; |
| fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; |
| fstar.w[2] = P256.w[2]; |
| fstar.w[1] = P256.w[1]; |
| fstar.w[0] = P256.w[0]; |
| // fraction f* < 10^(-x) <=> midpoint |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| if ((res.w[0] & 0x0000000000000001ull) && // is result odd? |
| fstar.w[3] == 0 && fstar.w[2] == 0 && |
| (fstar.w[1] < ten2mk128[ind - 1].w[1] || |
| (fstar.w[1] == ten2mk128[ind - 1].w[1] && |
| fstar.w[0] < ten2mk128[ind - 1].w[0]))) { |
| // subract 1 to make even |
| if (res.w[0]-- == 0) { |
| res.w[1]--; |
| } |
| } |
| if (fstar.w[3] > onehalf128[ind - 1] || |
| (fstar.w[3] == onehalf128[ind - 1] && |
| (fstar.w[2] || fstar.w[1] || fstar.w[0]))) { |
| // f2* > 1/2 and the result may be exact |
| // Calculate f2* - 1/2 |
| tmp64 = fstar.w[3] - onehalf128[ind - 1]; |
| if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] |
| || (fstar.w[1] == ten2mk128[ind - 1].w[1] |
| && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { |
| // set the inexact flag |
| *pfpsf |= INEXACT_EXCEPTION; |
| } // else the result is exact |
| } else { // the result is inexact; f2* <= 1/2 |
| // set the inexact flag |
| *pfpsf |= INEXACT_EXCEPTION; |
| } |
| } |
| res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; |
| BID_RETURN (res); |
| } else { // if ((q + exp) < 0) <=> q < -exp |
| // the result is +0 or -0 |
| res.w[1] = x_sign | 0x3040000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| *pfpsf |= INEXACT_EXCEPTION; |
| BID_RETURN (res); |
| } |
| break; |
| case ROUNDING_TIES_AWAY: |
| if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q |
| // need to shift right -exp digits from the coefficient; exp will be 0 |
| ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' |
| // chop off ind digits from the lower part of C1 |
| // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits |
| tmp64 = C1.w[0]; |
| if (ind <= 19) { |
| C1.w[0] = C1.w[0] + midpoint64[ind - 1]; |
| } else { |
| C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; |
| C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; |
| } |
| if (C1.w[0] < tmp64) |
| C1.w[1]++; |
| // calculate C* and f* |
| // C* is actually floor(C*) in this case |
| // C* and f* need shifting and masking, as shown by |
| // shiftright128[] and maskhigh128[] |
| // 1 <= x <= 34 |
| // kx = 10^(-x) = ten2mk128[ind - 1] |
| // C* = (C1 + 1/2 * 10^x) * 10^(-x) |
| // the approximation of 10^(-x) was rounded up to 118 bits |
| __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); |
| // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g. |
| // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999 |
| // if (0 < f* < 10^(-x)) then the result is a midpoint |
| // if floor(C*) is even then C* = floor(C*) - logical right |
| // shift; C* has p decimal digits, correct by Prop. 1) |
| // else if floor(C*) is odd C* = floor(C*)-1 (logical right |
| // shift; C* has p decimal digits, correct by Pr. 1) |
| // else |
| // C* = floor(C*) (logical right shift; C has p decimal digits, |
| // correct by Property 1) |
| // n = C* * 10^(e+x) |
| |
| // determine also the inexactness of the rounding of C* |
| // if (0 < f* - 1/2 < 10^(-x)) then |
| // the result is exact |
| // else // if (f* - 1/2 > T*) then |
| // the result is inexact |
| // Note: we are going to use ten2mk128[] instead of ten2mk128trunc[] |
| // shift right C* by Ex-128 = shiftright128[ind] |
| if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 |
| // redundant shift = shiftright128[ind - 1]; // shift = 0 |
| res.w[1] = P256.w[3]; |
| res.w[0] = P256.w[2]; |
| // redundant fstar.w[3] = 0; |
| // redundant fstar.w[2] = 0; |
| fstar.w[1] = P256.w[1]; |
| fstar.w[0] = P256.w[0]; |
| if (fstar.w[1] > 0x8000000000000000ull || |
| (fstar.w[1] == 0x8000000000000000ull |
| && fstar.w[0] > 0x0ull)) { |
| // f* > 1/2 and the result may be exact |
| tmp64 = fstar.w[1] - 0x8000000000000000ull; // f* - 1/2 |
| if ((tmp64 > ten2mk128[ind - 1].w[1] || |
| (tmp64 == ten2mk128[ind - 1].w[1] && |
| fstar.w[0] >= ten2mk128[ind - 1].w[0]))) { |
| // set the inexact flag |
| *pfpsf |= INEXACT_EXCEPTION; |
| } // else the result is exact |
| } else { // the result is inexact; f2* <= 1/2 |
| // set the inexact flag |
| *pfpsf |= INEXACT_EXCEPTION; |
| } |
| } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 |
| shift = shiftright128[ind - 1]; // 3 <= shift <= 63 |
| res.w[1] = (P256.w[3] >> shift); |
| res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); |
| // redundant fstar.w[3] = 0; |
| fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; |
| fstar.w[1] = P256.w[1]; |
| fstar.w[0] = P256.w[0]; |
| if (fstar.w[2] > onehalf128[ind - 1] || |
| (fstar.w[2] == onehalf128[ind - 1] |
| && (fstar.w[1] || fstar.w[0]))) { |
| // f2* > 1/2 and the result may be exact |
| // Calculate f2* - 1/2 |
| tmp64 = fstar.w[2] - onehalf128[ind - 1]; |
| if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] || |
| (fstar.w[1] == ten2mk128[ind - 1].w[1] && |
| fstar.w[0] >= ten2mk128[ind - 1].w[0])) { |
| // set the inexact flag |
| *pfpsf |= INEXACT_EXCEPTION; |
| } // else the result is exact |
| } else { // the result is inexact; f2* <= 1/2 |
| // set the inexact flag |
| *pfpsf |= INEXACT_EXCEPTION; |
| } |
| } else { // 22 <= ind - 1 <= 33 |
| shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 |
| res.w[1] = 0; |
| res.w[0] = P256.w[3] >> shift; |
| fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; |
| fstar.w[2] = P256.w[2]; |
| fstar.w[1] = P256.w[1]; |
| fstar.w[0] = P256.w[0]; |
| if (fstar.w[3] > onehalf128[ind - 1] || |
| (fstar.w[3] == onehalf128[ind - 1] && |
| (fstar.w[2] || fstar.w[1] || fstar.w[0]))) { |
| // f2* > 1/2 and the result may be exact |
| // Calculate f2* - 1/2 |
| tmp64 = fstar.w[3] - onehalf128[ind - 1]; |
| if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] |
| || (fstar.w[1] == ten2mk128[ind - 1].w[1] |
| && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { |
| // set the inexact flag |
| *pfpsf |= INEXACT_EXCEPTION; |
| } // else the result is exact |
| } else { // the result is inexact; f2* <= 1/2 |
| // set the inexact flag |
| *pfpsf |= INEXACT_EXCEPTION; |
| } |
| } |
| // if the result was a midpoint, it was already rounded away from zero |
| res.w[1] |= x_sign | 0x3040000000000000ull; |
| BID_RETURN (res); |
| } else { // if ((q + exp) < 0) <=> q < -exp |
| // the result is +0 or -0 |
| res.w[1] = x_sign | 0x3040000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| *pfpsf |= INEXACT_EXCEPTION; |
| BID_RETURN (res); |
| } |
| break; |
| case ROUNDING_DOWN: |
| if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q |
| // need to shift right -exp digits from the coefficient; exp will be 0 |
| ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' |
| // (number of digits to be chopped off) |
| // chop off ind digits from the lower part of C1 |
| // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate |
| // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP |
| // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE |
| // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE |
| // tmp64 = C1.w[0]; |
| // if (ind <= 19) { |
| // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; |
| // } else { |
| // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; |
| // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; |
| // } |
| // if (C1.w[0] < tmp64) C1.w[1]++; |
| // if carry-out from C1.w[0], increment C1.w[1] |
| // calculate C* and f* |
| // C* is actually floor(C*) in this case |
| // C* and f* need shifting and masking, as shown by |
| // shiftright128[] and maskhigh128[] |
| // 1 <= x <= 34 |
| // kx = 10^(-x) = ten2mk128[ind - 1] |
| // C* = (C1 + 1/2 * 10^x) * 10^(-x) |
| // the approximation of 10^(-x) was rounded up to 118 bits |
| __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); |
| if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 |
| res.w[1] = P256.w[3]; |
| res.w[0] = P256.w[2]; |
| // redundant fstar.w[3] = 0; |
| // redundant fstar.w[2] = 0; |
| // redundant fstar.w[1] = P256.w[1]; |
| // redundant fstar.w[0] = P256.w[0]; |
| // fraction f* > 10^(-x) <=> inexact |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| if ((P256.w[1] > ten2mk128[ind - 1].w[1]) |
| || (P256.w[1] == ten2mk128[ind - 1].w[1] |
| && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) { |
| *pfpsf |= INEXACT_EXCEPTION; |
| // if positive, the truncated value is already the correct result |
| if (x_sign) { // if negative |
| if (++res.w[0] == 0) { |
| res.w[1]++; |
| } |
| } |
| } |
| } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 |
| shift = shiftright128[ind - 1]; // 0 <= shift <= 102 |
| res.w[1] = (P256.w[3] >> shift); |
| res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); |
| // redundant fstar.w[3] = 0; |
| fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; |
| fstar.w[1] = P256.w[1]; |
| fstar.w[0] = P256.w[0]; |
| // fraction f* > 10^(-x) <=> inexact |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] || |
| (fstar.w[1] == ten2mk128[ind - 1].w[1] && |
| fstar.w[0] >= ten2mk128[ind - 1].w[0])) { |
| *pfpsf |= INEXACT_EXCEPTION; |
| // if positive, the truncated value is already the correct result |
| if (x_sign) { // if negative |
| if (++res.w[0] == 0) { |
| res.w[1]++; |
| } |
| } |
| } |
| } else { // 22 <= ind - 1 <= 33 |
| shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 |
| res.w[1] = 0; |
| res.w[0] = P256.w[3] >> shift; |
| fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; |
| fstar.w[2] = P256.w[2]; |
| fstar.w[1] = P256.w[1]; |
| fstar.w[0] = P256.w[0]; |
| // fraction f* > 10^(-x) <=> inexact |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| if (fstar.w[3] || fstar.w[2] |
| || fstar.w[1] > ten2mk128[ind - 1].w[1] |
| || (fstar.w[1] == ten2mk128[ind - 1].w[1] |
| && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { |
| *pfpsf |= INEXACT_EXCEPTION; |
| // if positive, the truncated value is already the correct result |
| if (x_sign) { // if negative |
| if (++res.w[0] == 0) { |
| res.w[1]++; |
| } |
| } |
| } |
| } |
| res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; |
| BID_RETURN (res); |
| } else { // if exp < 0 and q + exp <= 0 |
| if (x_sign) { // negative rounds down to -1.0 |
| res.w[1] = 0xb040000000000000ull; |
| res.w[0] = 0x0000000000000001ull; |
| } else { // positive rpunds down to +0.0 |
| res.w[1] = 0x3040000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } |
| *pfpsf |= INEXACT_EXCEPTION; |
| BID_RETURN (res); |
| } |
| break; |
| case ROUNDING_UP: |
| if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q |
| // need to shift right -exp digits from the coefficient; exp will be 0 |
| ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' |
| // (number of digits to be chopped off) |
| // chop off ind digits from the lower part of C1 |
| // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate |
| // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP |
| // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE |
| // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE |
| // tmp64 = C1.w[0]; |
| // if (ind <= 19) { |
| // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; |
| // } else { |
| // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; |
| // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; |
| // } |
| // if (C1.w[0] < tmp64) C1.w[1]++; |
| // if carry-out from C1.w[0], increment C1.w[1] |
| // calculate C* and f* |
| // C* is actually floor(C*) in this case |
| // C* and f* need shifting and masking, as shown by |
| // shiftright128[] and maskhigh128[] |
| // 1 <= x <= 34 |
| // kx = 10^(-x) = ten2mk128[ind - 1] |
| // C* = C1 * 10^(-x) |
| // the approximation of 10^(-x) was rounded up to 118 bits |
| __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); |
| if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 |
| res.w[1] = P256.w[3]; |
| res.w[0] = P256.w[2]; |
| // redundant fstar.w[3] = 0; |
| // redundant fstar.w[2] = 0; |
| // redundant fstar.w[1] = P256.w[1]; |
| // redundant fstar.w[0] = P256.w[0]; |
| // fraction f* > 10^(-x) <=> inexact |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| if ((P256.w[1] > ten2mk128[ind - 1].w[1]) |
| || (P256.w[1] == ten2mk128[ind - 1].w[1] |
| && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) { |
| *pfpsf |= INEXACT_EXCEPTION; |
| // if negative, the truncated value is already the correct result |
| if (!x_sign) { // if positive |
| if (++res.w[0] == 0) { |
| res.w[1]++; |
| } |
| } |
| } |
| } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 |
| shift = shiftright128[ind - 1]; // 3 <= shift <= 63 |
| res.w[1] = (P256.w[3] >> shift); |
| res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); |
| // redundant fstar.w[3] = 0; |
| fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; |
| fstar.w[1] = P256.w[1]; |
| fstar.w[0] = P256.w[0]; |
| // fraction f* > 10^(-x) <=> inexact |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] || |
| (fstar.w[1] == ten2mk128[ind - 1].w[1] && |
| fstar.w[0] >= ten2mk128[ind - 1].w[0])) { |
| *pfpsf |= INEXACT_EXCEPTION; |
| // if negative, the truncated value is already the correct result |
| if (!x_sign) { // if positive |
| if (++res.w[0] == 0) { |
| res.w[1]++; |
| } |
| } |
| } |
| } else { // 22 <= ind - 1 <= 33 |
| shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 |
| res.w[1] = 0; |
| res.w[0] = P256.w[3] >> shift; |
| fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; |
| fstar.w[2] = P256.w[2]; |
| fstar.w[1] = P256.w[1]; |
| fstar.w[0] = P256.w[0]; |
| // fraction f* > 10^(-x) <=> inexact |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| if (fstar.w[3] || fstar.w[2] |
| || fstar.w[1] > ten2mk128[ind - 1].w[1] |
| || (fstar.w[1] == ten2mk128[ind - 1].w[1] |
| && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { |
| *pfpsf |= INEXACT_EXCEPTION; |
| // if negative, the truncated value is already the correct result |
| if (!x_sign) { // if positive |
| if (++res.w[0] == 0) { |
| res.w[1]++; |
| } |
| } |
| } |
| } |
| res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; |
| BID_RETURN (res); |
| } else { // if exp < 0 and q + exp <= 0 |
| if (x_sign) { // negative rounds up to -0.0 |
| res.w[1] = 0xb040000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } else { // positive rpunds up to +1.0 |
| res.w[1] = 0x3040000000000000ull; |
| res.w[0] = 0x0000000000000001ull; |
| } |
| *pfpsf |= INEXACT_EXCEPTION; |
| BID_RETURN (res); |
| } |
| break; |
| case ROUNDING_TO_ZERO: |
| if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q |
| // need to shift right -exp digits from the coefficient; exp will be 0 |
| ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' |
| // (number of digits to be chopped off) |
| // chop off ind digits from the lower part of C1 |
| // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate |
| // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP |
| // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE |
| // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE |
| //tmp64 = C1.w[0]; |
| // if (ind <= 19) { |
| // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; |
| // } else { |
| // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; |
| // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; |
| // } |
| // if (C1.w[0] < tmp64) C1.w[1]++; |
| // if carry-out from C1.w[0], increment C1.w[1] |
| // calculate C* and f* |
| // C* is actually floor(C*) in this case |
| // C* and f* need shifting and masking, as shown by |
| // shiftright128[] and maskhigh128[] |
| // 1 <= x <= 34 |
| // kx = 10^(-x) = ten2mk128[ind - 1] |
| // C* = (C1 + 1/2 * 10^x) * 10^(-x) |
| // the approximation of 10^(-x) was rounded up to 118 bits |
| __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); |
| if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 |
| res.w[1] = P256.w[3]; |
| res.w[0] = P256.w[2]; |
| // redundant fstar.w[3] = 0; |
| // redundant fstar.w[2] = 0; |
| // redundant fstar.w[1] = P256.w[1]; |
| // redundant fstar.w[0] = P256.w[0]; |
| // fraction f* > 10^(-x) <=> inexact |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| if ((P256.w[1] > ten2mk128[ind - 1].w[1]) |
| || (P256.w[1] == ten2mk128[ind - 1].w[1] |
| && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) { |
| *pfpsf |= INEXACT_EXCEPTION; |
| } |
| } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 |
| shift = shiftright128[ind - 1]; // 3 <= shift <= 63 |
| res.w[1] = (P256.w[3] >> shift); |
| res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); |
| // redundant fstar.w[3] = 0; |
| fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; |
| fstar.w[1] = P256.w[1]; |
| fstar.w[0] = P256.w[0]; |
| // fraction f* > 10^(-x) <=> inexact |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] || |
| (fstar.w[1] == ten2mk128[ind - 1].w[1] && |
| fstar.w[0] >= ten2mk128[ind - 1].w[0])) { |
| *pfpsf |= INEXACT_EXCEPTION; |
| } |
| } else { // 22 <= ind - 1 <= 33 |
| shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 |
| res.w[1] = 0; |
| res.w[0] = P256.w[3] >> shift; |
| fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; |
| fstar.w[2] = P256.w[2]; |
| fstar.w[1] = P256.w[1]; |
| fstar.w[0] = P256.w[0]; |
| // fraction f* > 10^(-x) <=> inexact |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| if (fstar.w[3] || fstar.w[2] |
| || fstar.w[1] > ten2mk128[ind - 1].w[1] |
| || (fstar.w[1] == ten2mk128[ind - 1].w[1] |
| && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { |
| *pfpsf |= INEXACT_EXCEPTION; |
| } |
| } |
| res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; |
| BID_RETURN (res); |
| } else { // if exp < 0 and q + exp <= 0 the result is +0 or -0 |
| res.w[1] = x_sign | 0x3040000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| *pfpsf |= INEXACT_EXCEPTION; |
| BID_RETURN (res); |
| } |
| break; |
| } |
| |
| BID_RETURN (res); |
| } |
| |
| /***************************************************************************** |
| * BID128_round_integral_nearest_even |
| ****************************************************************************/ |
| |
| BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_even, x) |
| |
| UINT128 res; |
| UINT64 x_sign; |
| UINT64 x_exp; |
| int exp; // unbiased exponent |
| // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64) |
| UINT64 tmp64; |
| BID_UI64DOUBLE tmp1; |
| unsigned int x_nr_bits; |
| int q, ind, shift; |
| UINT128 C1; |
| // UINT128 res is C* at first - represents up to 34 decimal digits ~ 113 bits |
| UINT256 fstar; |
| UINT256 P256; |
| |
| // check for NaN or Infinity |
| if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { |
| // x is special |
| if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN |
| // if x = NaN, then res = Q (x) |
| // check first for non-canonical NaN payload |
| if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || |
| (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && |
| (x.w[0] > 0x38c15b09ffffffffull))) { |
| x.w[1] = x.w[1] & 0xffffc00000000000ull; |
| x.w[0] = 0x0ull; |
| } |
| if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN |
| // set invalid flag |
| *pfpsf |= INVALID_EXCEPTION; |
| // return quiet (x) |
| res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] |
| res.w[0] = x.w[0]; |
| } else { // x is QNaN |
| // return x |
| res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] |
| res.w[0] = x.w[0]; |
| } |
| BID_RETURN (res) |
| } else { // x is not a NaN, so it must be infinity |
| if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf |
| // return +inf |
| res.w[1] = 0x7800000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } else { // x is -inf |
| // return -inf |
| res.w[1] = 0xf800000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } |
| BID_RETURN (res); |
| } |
| } |
| // unpack x |
| x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative |
| C1.w[1] = x.w[1] & MASK_COEFF; |
| C1.w[0] = x.w[0]; |
| |
| // check for non-canonical values (treated as zero) |
| if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 |
| // non-canonical |
| x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits |
| C1.w[1] = 0; // significand high |
| C1.w[0] = 0; // significand low |
| } else { // G0_G1 != 11 |
| x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits |
| if (C1.w[1] > 0x0001ed09bead87c0ull || |
| (C1.w[1] == 0x0001ed09bead87c0ull |
| && C1.w[0] > 0x378d8e63ffffffffull)) { |
| // x is non-canonical if coefficient is larger than 10^34 -1 |
| C1.w[1] = 0; |
| C1.w[0] = 0; |
| } else { // canonical |
| ; |
| } |
| } |
| |
| // test for input equal to zero |
| if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { |
| // x is 0 |
| // return 0 preserving the sign bit and the preferred exponent |
| // of MAX(Q(x), 0) |
| if (x_exp <= (0x1820ull << 49)) { |
| res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; |
| } else { |
| res.w[1] = x_sign | x_exp; |
| } |
| res.w[0] = 0x0000000000000000ull; |
| BID_RETURN (res); |
| } |
| // x is not special and is not zero |
| |
| // if (exp <= -(p+1)) return 0 |
| if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35 |
| res.w[1] = x_sign | 0x3040000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| BID_RETURN (res); |
| } |
| // q = nr. of decimal digits in x |
| // determine first the nr. of bits in x |
| if (C1.w[1] == 0) { |
| if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 |
| // split the 64-bit value in two 32-bit halves to avoid rounding errors |
| if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 |
| tmp1.d = (double) (C1.w[0] >> 32); // exact conversion |
| x_nr_bits = |
| 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } else { // x < 2^32 |
| tmp1.d = (double) (C1.w[0]); // exact conversion |
| x_nr_bits = |
| 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| } else { // if x < 2^53 |
| tmp1.d = (double) C1.w[0]; // exact conversion |
| x_nr_bits = |
| 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) |
| tmp1.d = (double) C1.w[1]; // exact conversion |
| x_nr_bits = |
| 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| |
| q = nr_digits[x_nr_bits - 1].digits; |
| if (q == 0) { |
| q = nr_digits[x_nr_bits - 1].digits1; |
| if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi |
| || (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && |
| C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) |
| q++; |
| } |
| exp = (x_exp >> 49) - 6176; |
| if (exp >= 0) { // -exp <= 0 |
| // the argument is an integer already |
| res.w[1] = x.w[1]; |
| res.w[0] = x.w[0]; |
| BID_RETURN (res); |
| } else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q |
| // need to shift right -exp digits from the coefficient; the exp will be 0 |
| ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' |
| // chop off ind digits from the lower part of C1 |
| // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits |
| tmp64 = C1.w[0]; |
| if (ind <= 19) { |
| C1.w[0] = C1.w[0] + midpoint64[ind - 1]; |
| } else { |
| C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; |
| C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; |
| } |
| if (C1.w[0] < tmp64) |
| C1.w[1]++; |
| // calculate C* and f* |
| // C* is actually floor(C*) in this case |
| // C* and f* need shifting and masking, as shown by |
| // shiftright128[] and maskhigh128[] |
| // 1 <= x <= 34 |
| // kx = 10^(-x) = ten2mk128[ind - 1] |
| // C* = (C1 + 1/2 * 10^x) * 10^(-x) |
| // the approximation of 10^(-x) was rounded up to 118 bits |
| __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); |
| // determine the value of res and fstar |
| if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 |
| // redundant shift = shiftright128[ind - 1]; // shift = 0 |
| res.w[1] = P256.w[3]; |
| res.w[0] = P256.w[2]; |
| // redundant fstar.w[3] = 0; |
| // redundant fstar.w[2] = 0; |
| // redundant fstar.w[1] = P256.w[1]; |
| // redundant fstar.w[0] = P256.w[0]; |
| // fraction f* < 10^(-x) <=> midpoint |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even) |
| if ((res.w[0] & 0x0000000000000001ull) && // is result odd? |
| ((P256.w[1] < (ten2mk128[ind - 1].w[1])) |
| || ((P256.w[1] == ten2mk128[ind - 1].w[1]) |
| && (P256.w[0] < ten2mk128[ind - 1].w[0])))) { |
| // subract 1 to make even |
| if (res.w[0]-- == 0) { |
| res.w[1]--; |
| } |
| } |
| } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 |
| shift = shiftright128[ind - 1]; // 3 <= shift <= 63 |
| res.w[1] = (P256.w[3] >> shift); |
| res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); |
| // redundant fstar.w[3] = 0; |
| fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; |
| fstar.w[1] = P256.w[1]; |
| fstar.w[0] = P256.w[0]; |
| // fraction f* < 10^(-x) <=> midpoint |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| if ((res.w[0] & 0x0000000000000001ull) && // is result odd? |
| fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] || |
| (fstar.w[1] == ten2mk128[ind - 1].w[1] && |
| fstar.w[0] < ten2mk128[ind - 1].w[0]))) { |
| // subract 1 to make even |
| if (res.w[0]-- == 0) { |
| res.w[1]--; |
| } |
| } |
| } else { // 22 <= ind - 1 <= 33 |
| shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 |
| res.w[1] = 0; |
| res.w[0] = P256.w[3] >> shift; |
| fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; |
| fstar.w[2] = P256.w[2]; |
| fstar.w[1] = P256.w[1]; |
| fstar.w[0] = P256.w[0]; |
| // fraction f* < 10^(-x) <=> midpoint |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| if ((res.w[0] & 0x0000000000000001ull) && // is result odd? |
| fstar.w[3] == 0 && fstar.w[2] == 0 |
| && (fstar.w[1] < ten2mk128[ind - 1].w[1] |
| || (fstar.w[1] == ten2mk128[ind - 1].w[1] |
| && fstar.w[0] < ten2mk128[ind - 1].w[0]))) { |
| // subract 1 to make even |
| if (res.w[0]-- == 0) { |
| res.w[1]--; |
| } |
| } |
| } |
| res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; |
| BID_RETURN (res); |
| } else { // if ((q + exp) < 0) <=> q < -exp |
| // the result is +0 or -0 |
| res.w[1] = x_sign | 0x3040000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| BID_RETURN (res); |
| } |
| } |
| |
| /***************************************************************************** |
| * BID128_round_integral_negative |
| ****************************************************************************/ |
| |
| BID128_FUNCTION_ARG1_NORND (bid128_round_integral_negative, x) |
| |
| UINT128 res; |
| UINT64 x_sign; |
| UINT64 x_exp; |
| int exp; // unbiased exponent |
| // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo |
| // (all are UINT64) |
| BID_UI64DOUBLE tmp1; |
| unsigned int x_nr_bits; |
| int q, ind, shift; |
| UINT128 C1; |
| // UINT128 res is C* at first - represents up to 34 decimal digits ~ |
| // 113 bits |
| UINT256 fstar; |
| UINT256 P256; |
| |
| // check for NaN or Infinity |
| if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { |
| // x is special |
| if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN |
| // if x = NaN, then res = Q (x) |
| // check first for non-canonical NaN payload |
| if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || |
| (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && |
| (x.w[0] > 0x38c15b09ffffffffull))) { |
| x.w[1] = x.w[1] & 0xffffc00000000000ull; |
| x.w[0] = 0x0ull; |
| } |
| if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN |
| // set invalid flag |
| *pfpsf |= INVALID_EXCEPTION; |
| // return quiet (x) |
| res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] |
| res.w[0] = x.w[0]; |
| } else { // x is QNaN |
| // return x |
| res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] |
| res.w[0] = x.w[0]; |
| } |
| BID_RETURN (res) |
| } else { // x is not a NaN, so it must be infinity |
| if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf |
| // return +inf |
| res.w[1] = 0x7800000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } else { // x is -inf |
| // return -inf |
| res.w[1] = 0xf800000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } |
| BID_RETURN (res); |
| } |
| } |
| // unpack x |
| x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative |
| C1.w[1] = x.w[1] & MASK_COEFF; |
| C1.w[0] = x.w[0]; |
| |
| // check for non-canonical values (treated as zero) |
| if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 |
| // non-canonical |
| x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits |
| C1.w[1] = 0; // significand high |
| C1.w[0] = 0; // significand low |
| } else { // G0_G1 != 11 |
| x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits |
| if (C1.w[1] > 0x0001ed09bead87c0ull || |
| (C1.w[1] == 0x0001ed09bead87c0ull |
| && C1.w[0] > 0x378d8e63ffffffffull)) { |
| // x is non-canonical if coefficient is larger than 10^34 -1 |
| C1.w[1] = 0; |
| C1.w[0] = 0; |
| } else { // canonical |
| ; |
| } |
| } |
| |
| // test for input equal to zero |
| if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { |
| // x is 0 |
| // return 0 preserving the sign bit and the preferred exponent |
| // of MAX(Q(x), 0) |
| if (x_exp <= (0x1820ull << 49)) { |
| res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; |
| } else { |
| res.w[1] = x_sign | x_exp; |
| } |
| res.w[0] = 0x0000000000000000ull; |
| BID_RETURN (res); |
| } |
| // x is not special and is not zero |
| |
| // if (exp <= -p) return -1.0 or +0.0 |
| if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 |
| if (x_sign) { |
| // if negative, return negative 1, because we know the coefficient |
| // is non-zero (would have been caught above) |
| res.w[1] = 0xb040000000000000ull; |
| res.w[0] = 0x0000000000000001ull; |
| } else { |
| // if positive, return positive 0, because we know coefficient is |
| // non-zero (would have been caught above) |
| res.w[1] = 0x3040000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } |
| BID_RETURN (res); |
| } |
| // q = nr. of decimal digits in x |
| // determine first the nr. of bits in x |
| if (C1.w[1] == 0) { |
| if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 |
| // split the 64-bit value in two 32-bit halves to avoid rounding errors |
| if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 |
| tmp1.d = (double) (C1.w[0] >> 32); // exact conversion |
| x_nr_bits = |
| 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } else { // x < 2^32 |
| tmp1.d = (double) (C1.w[0]); // exact conversion |
| x_nr_bits = |
| 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| } else { // if x < 2^53 |
| tmp1.d = (double) C1.w[0]; // exact conversion |
| x_nr_bits = |
| 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) |
| tmp1.d = (double) C1.w[1]; // exact conversion |
| x_nr_bits = |
| 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| |
| q = nr_digits[x_nr_bits - 1].digits; |
| if (q == 0) { |
| q = nr_digits[x_nr_bits - 1].digits1; |
| if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || |
| (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && |
| C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) |
| q++; |
| } |
| exp = (x_exp >> 49) - 6176; |
| if (exp >= 0) { // -exp <= 0 |
| // the argument is an integer already |
| res.w[1] = x.w[1]; |
| res.w[0] = x.w[0]; |
| BID_RETURN (res); |
| } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q |
| // need to shift right -exp digits from the coefficient; the exp will be 0 |
| ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' |
| // (number of digits to be chopped off) |
| // chop off ind digits from the lower part of C1 |
| // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate |
| // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP |
| // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE |
| // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE |
| //tmp64 = C1.w[0]; |
| // if (ind <= 19) { |
| // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; |
| // } else { |
| // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; |
| // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; |
| // } |
| // if (C1.w[0] < tmp64) C1.w[1]++; |
| // if carry-out from C1.w[0], increment C1.w[1] |
| // calculate C* and f* |
| // C* is actually floor(C*) in this case |
| // C* and f* need shifting and masking, as shown by |
| // shiftright128[] and maskhigh128[] |
| // 1 <= x <= 34 |
| // kx = 10^(-x) = ten2mk128[ind - 1] |
| // C* = (C1 + 1/2 * 10^x) * 10^(-x) |
| // the approximation of 10^(-x) was rounded up to 118 bits |
| __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); |
| if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 |
| res.w[1] = P256.w[3]; |
| res.w[0] = P256.w[2]; |
| // if positive, the truncated value is already the correct result |
| if (x_sign) { // if negative |
| // redundant fstar.w[3] = 0; |
| // redundant fstar.w[2] = 0; |
| // redundant fstar.w[1] = P256.w[1]; |
| // redundant fstar.w[0] = P256.w[0]; |
| // fraction f* > 10^(-x) <=> inexact |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| if ((P256.w[1] > ten2mk128[ind - 1].w[1]) |
| || (P256.w[1] == ten2mk128[ind - 1].w[1] |
| && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) { |
| if (++res.w[0] == 0) { |
| res.w[1]++; |
| } |
| } |
| } |
| } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 |
| shift = shiftright128[ind - 1]; // 0 <= shift <= 102 |
| res.w[1] = (P256.w[3] >> shift); |
| res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); |
| // if positive, the truncated value is already the correct result |
| if (x_sign) { // if negative |
| // redundant fstar.w[3] = 0; |
| fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; |
| fstar.w[1] = P256.w[1]; |
| fstar.w[0] = P256.w[0]; |
| // fraction f* > 10^(-x) <=> inexact |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] || |
| (fstar.w[1] == ten2mk128[ind - 1].w[1] && |
| fstar.w[0] >= ten2mk128[ind - 1].w[0])) { |
| if (++res.w[0] == 0) { |
| res.w[1]++; |
| } |
| } |
| } |
| } else { // 22 <= ind - 1 <= 33 |
| shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 |
| res.w[1] = 0; |
| res.w[0] = P256.w[3] >> shift; |
| // if positive, the truncated value is already the correct result |
| if (x_sign) { // if negative |
| fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; |
| fstar.w[2] = P256.w[2]; |
| fstar.w[1] = P256.w[1]; |
| fstar.w[0] = P256.w[0]; |
| // fraction f* > 10^(-x) <=> inexact |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| if (fstar.w[3] || fstar.w[2] |
| || fstar.w[1] > ten2mk128[ind - 1].w[1] |
| || (fstar.w[1] == ten2mk128[ind - 1].w[1] |
| && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { |
| if (++res.w[0] == 0) { |
| res.w[1]++; |
| } |
| } |
| } |
| } |
| res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; |
| BID_RETURN (res); |
| } else { // if exp < 0 and q + exp <= 0 |
| if (x_sign) { // negative rounds down to -1.0 |
| res.w[1] = 0xb040000000000000ull; |
| res.w[0] = 0x0000000000000001ull; |
| } else { // positive rpunds down to +0.0 |
| res.w[1] = 0x3040000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } |
| BID_RETURN (res); |
| } |
| } |
| |
| /***************************************************************************** |
| * BID128_round_integral_positive |
| ****************************************************************************/ |
| |
| BID128_FUNCTION_ARG1_NORND (bid128_round_integral_positive, x) |
| |
| UINT128 res; |
| UINT64 x_sign; |
| UINT64 x_exp; |
| int exp; // unbiased exponent |
| // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo |
| // (all are UINT64) |
| BID_UI64DOUBLE tmp1; |
| unsigned int x_nr_bits; |
| int q, ind, shift; |
| UINT128 C1; |
| // UINT128 res is C* at first - represents up to 34 decimal digits ~ |
| // 113 bits |
| UINT256 fstar; |
| UINT256 P256; |
| |
| // check for NaN or Infinity |
| if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { |
| // x is special |
| if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN |
| // if x = NaN, then res = Q (x) |
| // check first for non-canonical NaN payload |
| if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || |
| (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && |
| (x.w[0] > 0x38c15b09ffffffffull))) { |
| x.w[1] = x.w[1] & 0xffffc00000000000ull; |
| x.w[0] = 0x0ull; |
| } |
| if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN |
| // set invalid flag |
| *pfpsf |= INVALID_EXCEPTION; |
| // return quiet (x) |
| res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] |
| res.w[0] = x.w[0]; |
| } else { // x is QNaN |
| // return x |
| res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] |
| res.w[0] = x.w[0]; |
| } |
| BID_RETURN (res) |
| } else { // x is not a NaN, so it must be infinity |
| if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf |
| // return +inf |
| res.w[1] = 0x7800000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } else { // x is -inf |
| // return -inf |
| res.w[1] = 0xf800000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } |
| BID_RETURN (res); |
| } |
| } |
| // unpack x |
| x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative |
| C1.w[1] = x.w[1] & MASK_COEFF; |
| C1.w[0] = x.w[0]; |
| |
| // check for non-canonical values (treated as zero) |
| if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 |
| // non-canonical |
| x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits |
| C1.w[1] = 0; // significand high |
| C1.w[0] = 0; // significand low |
| } else { // G0_G1 != 11 |
| x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits |
| if (C1.w[1] > 0x0001ed09bead87c0ull || |
| (C1.w[1] == 0x0001ed09bead87c0ull |
| && C1.w[0] > 0x378d8e63ffffffffull)) { |
| // x is non-canonical if coefficient is larger than 10^34 -1 |
| C1.w[1] = 0; |
| C1.w[0] = 0; |
| } else { // canonical |
| ; |
| } |
| } |
| |
| // test for input equal to zero |
| if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { |
| // x is 0 |
| // return 0 preserving the sign bit and the preferred exponent |
| // of MAX(Q(x), 0) |
| if (x_exp <= (0x1820ull << 49)) { |
| res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; |
| } else { |
| res.w[1] = x_sign | x_exp; |
| } |
| res.w[0] = 0x0000000000000000ull; |
| BID_RETURN (res); |
| } |
| // x is not special and is not zero |
| |
| // if (exp <= -p) return -0.0 or +1.0 |
| if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 |
| if (x_sign) { |
| // if negative, return negative 0, because we know the coefficient |
| // is non-zero (would have been caught above) |
| res.w[1] = 0xb040000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } else { |
| // if positive, return positive 1, because we know coefficient is |
| // non-zero (would have been caught above) |
| res.w[1] = 0x3040000000000000ull; |
| res.w[0] = 0x0000000000000001ull; |
| } |
| BID_RETURN (res); |
| } |
| // q = nr. of decimal digits in x |
| // determine first the nr. of bits in x |
| if (C1.w[1] == 0) { |
| if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 |
| // split 64-bit value in two 32-bit halves to avoid rounding errors |
| if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 |
| tmp1.d = (double) (C1.w[0] >> 32); // exact conversion |
| x_nr_bits = |
| 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } else { // x < 2^32 |
| tmp1.d = (double) (C1.w[0]); // exact conversion |
| x_nr_bits = |
| 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| } else { // if x < 2^53 |
| tmp1.d = (double) C1.w[0]; // exact conversion |
| x_nr_bits = |
| 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) |
| tmp1.d = (double) C1.w[1]; // exact conversion |
| x_nr_bits = |
| 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| |
| q = nr_digits[x_nr_bits - 1].digits; |
| if (q == 0) { |
| q = nr_digits[x_nr_bits - 1].digits1; |
| if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || |
| (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && |
| C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) |
| q++; |
| } |
| exp = (x_exp >> 49) - 6176; |
| if (exp >= 0) { // -exp <= 0 |
| // the argument is an integer already |
| res.w[1] = x.w[1]; |
| res.w[0] = x.w[0]; |
| BID_RETURN (res); |
| } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q |
| // need to shift right -exp digits from the coefficient; exp will be 0 |
| ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' |
| // (number of digits to be chopped off) |
| // chop off ind digits from the lower part of C1 |
| // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate |
| // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP |
| // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE |
| // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE |
| // tmp64 = C1.w[0]; |
| // if (ind <= 19) { |
| // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; |
| // } else { |
| // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; |
| // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; |
| // } |
| // if (C1.w[0] < tmp64) C1.w[1]++; |
| // if carry-out from C1.w[0], increment C1.w[1] |
| // calculate C* and f* |
| // C* is actually floor(C*) in this case |
| // C* and f* need shifting and masking, as shown by |
| // shiftright128[] and maskhigh128[] |
| // 1 <= x <= 34 |
| // kx = 10^(-x) = ten2mk128[ind - 1] |
| // C* = C1 * 10^(-x) |
| // the approximation of 10^(-x) was rounded up to 118 bits |
| __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); |
| if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 |
| res.w[1] = P256.w[3]; |
| res.w[0] = P256.w[2]; |
| // if negative, the truncated value is already the correct result |
| if (!x_sign) { // if positive |
| // redundant fstar.w[3] = 0; |
| // redundant fstar.w[2] = 0; |
| // redundant fstar.w[1] = P256.w[1]; |
| // redundant fstar.w[0] = P256.w[0]; |
| // fraction f* > 10^(-x) <=> inexact |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| if ((P256.w[1] > ten2mk128[ind - 1].w[1]) |
| || (P256.w[1] == ten2mk128[ind - 1].w[1] |
| && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) { |
| if (++res.w[0] == 0) { |
| res.w[1]++; |
| } |
| } |
| } |
| } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 |
| shift = shiftright128[ind - 1]; // 3 <= shift <= 63 |
| res.w[1] = (P256.w[3] >> shift); |
| res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); |
| // if negative, the truncated value is already the correct result |
| if (!x_sign) { // if positive |
| // redundant fstar.w[3] = 0; |
| fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; |
| fstar.w[1] = P256.w[1]; |
| fstar.w[0] = P256.w[0]; |
| // fraction f* > 10^(-x) <=> inexact |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] || |
| (fstar.w[1] == ten2mk128[ind - 1].w[1] && |
| fstar.w[0] >= ten2mk128[ind - 1].w[0])) { |
| if (++res.w[0] == 0) { |
| res.w[1]++; |
| } |
| } |
| } |
| } else { // 22 <= ind - 1 <= 33 |
| shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 |
| res.w[1] = 0; |
| res.w[0] = P256.w[3] >> shift; |
| // if negative, the truncated value is already the correct result |
| if (!x_sign) { // if positive |
| fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; |
| fstar.w[2] = P256.w[2]; |
| fstar.w[1] = P256.w[1]; |
| fstar.w[0] = P256.w[0]; |
| // fraction f* > 10^(-x) <=> inexact |
| // f* is in the right position to be compared with |
| // 10^(-x) from ten2mk128[] |
| if (fstar.w[3] || fstar.w[2] |
| || fstar.w[1] > ten2mk128[ind - 1].w[1] |
| || (fstar.w[1] == ten2mk128[ind - 1].w[1] |
| && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { |
| if (++res.w[0] == 0) { |
| res.w[1]++; |
| } |
| } |
| } |
| } |
| res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; |
| BID_RETURN (res); |
| } else { // if exp < 0 and q + exp <= 0 |
| if (x_sign) { // negative rounds up to -0.0 |
| res.w[1] = 0xb040000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } else { // positive rpunds up to +1.0 |
| res.w[1] = 0x3040000000000000ull; |
| res.w[0] = 0x0000000000000001ull; |
| } |
| BID_RETURN (res); |
| } |
| } |
| |
| /***************************************************************************** |
| * BID128_round_integral_zero |
| ****************************************************************************/ |
| |
| BID128_FUNCTION_ARG1_NORND (bid128_round_integral_zero, x) |
| |
| UINT128 res; |
| UINT64 x_sign; |
| UINT64 x_exp; |
| int exp; // unbiased exponent |
| // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo |
| // (all are UINT64) |
| BID_UI64DOUBLE tmp1; |
| unsigned int x_nr_bits; |
| int q, ind, shift; |
| UINT128 C1; |
| // UINT128 res is C* at first - represents up to 34 decimal digits ~ |
| // 113 bits |
| UINT256 P256; |
| |
| // check for NaN or Infinity |
| if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { |
| // x is special |
| if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN |
| // if x = NaN, then res = Q (x) |
| // check first for non-canonical NaN payload |
| if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || |
| (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && |
| (x.w[0] > 0x38c15b09ffffffffull))) { |
| x.w[1] = x.w[1] & 0xffffc00000000000ull; |
| x.w[0] = 0x0ull; |
| } |
| if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN |
| // set invalid flag |
| *pfpsf |= INVALID_EXCEPTION; |
| // return quiet (x) |
| res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] |
| res.w[0] = x.w[0]; |
| } else { // x is QNaN |
| // return x |
| res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] |
| res.w[0] = x.w[0]; |
| } |
| BID_RETURN (res) |
| } else { // x is not a NaN, so it must be infinity |
| if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf |
| // return +inf |
| res.w[1] = 0x7800000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } else { // x is -inf |
| // return -inf |
| res.w[1] = 0xf800000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } |
| BID_RETURN (res); |
| } |
| } |
| // unpack x |
| x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative |
| C1.w[1] = x.w[1] & MASK_COEFF; |
| C1.w[0] = x.w[0]; |
| |
| // check for non-canonical values (treated as zero) |
| if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 |
| // non-canonical |
| x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits |
| C1.w[1] = 0; // significand high |
| C1.w[0] = 0; // significand low |
| } else { // G0_G1 != 11 |
| x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits |
| if (C1.w[1] > 0x0001ed09bead87c0ull || |
| (C1.w[1] == 0x0001ed09bead87c0ull |
| && C1.w[0] > 0x378d8e63ffffffffull)) { |
| // x is non-canonical if coefficient is larger than 10^34 -1 |
| C1.w[1] = 0; |
| C1.w[0] = 0; |
| } else { // canonical |
| ; |
| } |
| } |
| |
| // test for input equal to zero |
| if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { |
| // x is 0 |
| // return 0 preserving the sign bit and the preferred exponent |
| // of MAX(Q(x), 0) |
| if (x_exp <= (0x1820ull << 49)) { |
| res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; |
| } else { |
| res.w[1] = x_sign | x_exp; |
| } |
| res.w[0] = 0x0000000000000000ull; |
| BID_RETURN (res); |
| } |
| // x is not special and is not zero |
| |
| // if (exp <= -p) return -0.0 or +0.0 |
| if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 |
| res.w[1] = x_sign | 0x3040000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| BID_RETURN (res); |
| } |
| // q = nr. of decimal digits in x |
| // determine first the nr. of bits in x |
| if (C1.w[1] == 0) { |
| if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 |
| // split the 64-bit value in two 32-bit halves to avoid rounding errors |
| if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 |
| tmp1.d = (double) (C1.w[0] >> 32); // exact conversion |
| x_nr_bits = |
| 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } else { // x < 2^32 |
| tmp1.d = (double) (C1.w[0]); // exact conversion |
| x_nr_bits = |
| 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| } else { // if x < 2^53 |
| tmp1.d = (double) C1.w[0]; // exact conversion |
| x_nr_bits = |
| 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) |
| tmp1.d = (double) C1.w[1]; // exact conversion |
| x_nr_bits = |
| 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| |
| q = nr_digits[x_nr_bits - 1].digits; |
| if (q == 0) { |
| q = nr_digits[x_nr_bits - 1].digits1; |
| if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || |
| (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && |
| C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) |
| q++; |
| } |
| exp = (x_exp >> 49) - 6176; |
| if (exp >= 0) { // -exp <= 0 |
| // the argument is an integer already |
| res.w[1] = x.w[1]; |
| res.w[0] = x.w[0]; |
| BID_RETURN (res); |
| } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q |
| // need to shift right -exp digits from the coefficient; the exp will be 0 |
| ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' |
| // (number of digits to be chopped off) |
| // chop off ind digits from the lower part of C1 |
| // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate |
| // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP |
| // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE |
| // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE |
| //tmp64 = C1.w[0]; |
| // if (ind <= 19) { |
| // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; |
| // } else { |
| // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; |
| // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; |
| // } |
| // if (C1.w[0] < tmp64) C1.w[1]++; |
| // if carry-out from C1.w[0], increment C1.w[1] |
| // calculate C* and f* |
| // C* is actually floor(C*) in this case |
| // C* and f* need shifting and masking, as shown by |
| // shiftright128[] and maskhigh128[] |
| // 1 <= x <= 34 |
| // kx = 10^(-x) = ten2mk128[ind - 1] |
| // C* = (C1 + 1/2 * 10^x) * 10^(-x) |
| // the approximation of 10^(-x) was rounded up to 118 bits |
| __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); |
| if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 |
| res.w[1] = P256.w[3]; |
| res.w[0] = P256.w[2]; |
| } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 |
| shift = shiftright128[ind - 1]; // 3 <= shift <= 63 |
| res.w[1] = (P256.w[3] >> shift); |
| res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); |
| } else { // 22 <= ind - 1 <= 33 |
| shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 |
| res.w[1] = 0; |
| res.w[0] = P256.w[3] >> shift; |
| } |
| res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; |
| BID_RETURN (res); |
| } else { // if exp < 0 and q + exp <= 0 the result is +0 or -0 |
| res.w[1] = x_sign | 0x3040000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| BID_RETURN (res); |
| } |
| } |
| |
| /***************************************************************************** |
| * BID128_round_integral_nearest_away |
| ****************************************************************************/ |
| |
| BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_away, x) |
| |
| UINT128 res; |
| UINT64 x_sign; |
| UINT64 x_exp; |
| int exp; // unbiased exponent |
| // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo |
| // (all are UINT64) |
| UINT64 tmp64; |
| BID_UI64DOUBLE tmp1; |
| unsigned int x_nr_bits; |
| int q, ind, shift; |
| UINT128 C1; |
| // UINT128 res is C* at first - represents up to 34 decimal digits ~ |
| // 113 bits |
| // UINT256 fstar; |
| UINT256 P256; |
| |
| // check for NaN or Infinity |
| if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { |
| // x is special |
| if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN |
| // if x = NaN, then res = Q (x) |
| // check first for non-canonical NaN payload |
| if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || |
| (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && |
| (x.w[0] > 0x38c15b09ffffffffull))) { |
| x.w[1] = x.w[1] & 0xffffc00000000000ull; |
| x.w[0] = 0x0ull; |
| } |
| if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN |
| // set invalid flag |
| *pfpsf |= INVALID_EXCEPTION; |
| // return quiet (x) |
| res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] |
| res.w[0] = x.w[0]; |
| } else { // x is QNaN |
| // return x |
| res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] |
| res.w[0] = x.w[0]; |
| } |
| BID_RETURN (res) |
| } else { // x is not a NaN, so it must be infinity |
| if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf |
| // return +inf |
| res.w[1] = 0x7800000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } else { // x is -inf |
| // return -inf |
| res.w[1] = 0xf800000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } |
| BID_RETURN (res); |
| } |
| } |
| // unpack x |
| x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative |
| C1.w[1] = x.w[1] & MASK_COEFF; |
| C1.w[0] = x.w[0]; |
| |
| // check for non-canonical values (treated as zero) |
| if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 |
| // non-canonical |
| x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits |
| C1.w[1] = 0; // significand high |
| C1.w[0] = 0; // significand low |
| } else { // G0_G1 != 11 |
| x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits |
| if (C1.w[1] > 0x0001ed09bead87c0ull || |
| (C1.w[1] == 0x0001ed09bead87c0ull |
| && C1.w[0] > 0x378d8e63ffffffffull)) { |
| // x is non-canonical if coefficient is larger than 10^34 -1 |
| C1.w[1] = 0; |
| C1.w[0] = 0; |
| } else { // canonical |
| ; |
| } |
| } |
| |
| // test for input equal to zero |
| if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { |
| // x is 0 |
| // return 0 preserving the sign bit and the preferred exponent |
| // of MAX(Q(x), 0) |
| if (x_exp <= (0x1820ull << 49)) { |
| res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; |
| } else { |
| res.w[1] = x_sign | x_exp; |
| } |
| res.w[0] = 0x0000000000000000ull; |
| BID_RETURN (res); |
| } |
| // x is not special and is not zero |
| |
| // if (exp <= -(p+1)) return 0.0 |
| if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35 |
| res.w[1] = x_sign | 0x3040000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| BID_RETURN (res); |
| } |
| // q = nr. of decimal digits in x |
| // determine first the nr. of bits in x |
| if (C1.w[1] == 0) { |
| if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 |
| // split the 64-bit value in two 32-bit halves to avoid rounding errors |
| if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 |
| tmp1.d = (double) (C1.w[0] >> 32); // exact conversion |
| x_nr_bits = |
| 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } else { // x < 2^32 |
| tmp1.d = (double) (C1.w[0]); // exact conversion |
| x_nr_bits = |
| 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| } else { // if x < 2^53 |
| tmp1.d = (double) C1.w[0]; // exact conversion |
| x_nr_bits = |
| 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) |
| tmp1.d = (double) C1.w[1]; // exact conversion |
| x_nr_bits = |
| 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| |
| q = nr_digits[x_nr_bits - 1].digits; |
| if (q == 0) { |
| q = nr_digits[x_nr_bits - 1].digits1; |
| if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || |
| (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && |
| C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) |
| q++; |
| } |
| exp = (x_exp >> 49) - 6176; |
| if (exp >= 0) { // -exp <= 0 |
| // the argument is an integer already |
| res.w[1] = x.w[1]; |
| res.w[0] = x.w[0]; |
| BID_RETURN (res); |
| } else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q |
| // need to shift right -exp digits from the coefficient; the exp will be 0 |
| ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' |
| // chop off ind digits from the lower part of C1 |
| // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits |
| tmp64 = C1.w[0]; |
| if (ind <= 19) { |
| C1.w[0] = C1.w[0] + midpoint64[ind - 1]; |
| } else { |
| C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; |
| C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; |
| } |
| if (C1.w[0] < tmp64) |
| C1.w[1]++; |
| // calculate C* and f* |
| // C* is actually floor(C*) in this case |
| // C* and f* need shifting and masking, as shown by |
| // shiftright128[] and maskhigh128[] |
| // 1 <= x <= 34 |
| // kx = 10^(-x) = ten2mk128[ind - 1] |
| // C* = (C1 + 1/2 * 10^x) * 10^(-x) |
| // the approximation of 10^(-x) was rounded up to 118 bits |
| __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); |
| // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g. |
| // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999 |
| // if (0 < f* < 10^(-x)) then the result is a midpoint |
| // if floor(C*) is even then C* = floor(C*) - logical right |
| // shift; C* has p decimal digits, correct by Prop. 1) |
| // else if floor(C*) is odd C* = floor(C*)-1 (logical right |
| // shift; C* has p decimal digits, correct by Pr. 1) |
| // else |
| // C* = floor(C*) (logical right shift; C has p decimal digits, |
| // correct by Property 1) |
| // n = C* * 10^(e+x) |
| |
| // shift right C* by Ex-128 = shiftright128[ind] |
| if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 |
| res.w[1] = P256.w[3]; |
| res.w[0] = P256.w[2]; |
| } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 |
| shift = shiftright128[ind - 1]; // 3 <= shift <= 63 |
| res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); |
| res.w[1] = (P256.w[3] >> shift); |
| } else { // 22 <= ind - 1 <= 33 |
| shift = shiftright128[ind - 1]; // 2 <= shift <= 38 |
| res.w[1] = 0; |
| res.w[0] = (P256.w[3] >> (shift - 64)); // 2 <= shift - 64 <= 38 |
| } |
| // if the result was a midpoint, it was already rounded away from zero |
| res.w[1] |= x_sign | 0x3040000000000000ull; |
| BID_RETURN (res); |
| } else { // if ((q + exp) < 0) <=> q < -exp |
| // the result is +0 or -0 |
| res.w[1] = x_sign | 0x3040000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| BID_RETURN (res); |
| } |
| } |