| /* Copyright (C) 2007-2017 Free Software Foundation, Inc. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 3, or (at your option) any later |
| version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| for more details. |
| |
| Under Section 7 of GPL version 3, you are granted additional |
| permissions described in the GCC Runtime Library Exception, version |
| 3.1, as published by the Free Software Foundation. |
| |
| You should have received a copy of the GNU General Public License and |
| a copy of the GCC Runtime Library Exception along with this program; |
| see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| /***************************************************************************** |
| * |
| * BID128 fma x * y + z |
| * |
| ****************************************************************************/ |
| |
| #include "bid_internal.h" |
| |
| static void |
| rounding_correction (unsigned int rnd_mode, |
| unsigned int is_inexact_lt_midpoint, |
| unsigned int is_inexact_gt_midpoint, |
| unsigned int is_midpoint_lt_even, |
| unsigned int is_midpoint_gt_even, |
| int unbexp, |
| UINT128 * ptrres, _IDEC_flags * ptrfpsf) { |
| // unbiased true exponent unbexp may be larger than emax |
| |
| UINT128 res = *ptrres; // expected to have the correct sign and coefficient |
| // (the exponent field is ignored, as unbexp is used instead) |
| UINT64 sign, exp; |
| UINT64 C_hi, C_lo; |
| |
| // general correction from RN to RA, RM, RP, RZ |
| // Note: if the result is negative, then is_inexact_lt_midpoint, |
| // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even |
| // have to be considered as if determined for the absolute value of the |
| // result (so they seem to be reversed) |
| |
| if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || |
| is_midpoint_lt_even || is_midpoint_gt_even) { |
| *ptrfpsf |= INEXACT_EXCEPTION; |
| } |
| // apply correction to result calculated with unbounded exponent |
| sign = res.w[1] & MASK_SIGN; |
| exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax |
| C_hi = res.w[1] & MASK_COEFF; |
| C_lo = res.w[0]; |
| if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) || |
| ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) && |
| is_midpoint_gt_even))) || |
| (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) || |
| ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) && |
| is_midpoint_gt_even)))) { |
| // C = C + 1 |
| C_lo = C_lo + 1; |
| if (C_lo == 0) |
| C_hi = C_hi + 1; |
| if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) { |
| // C = 10^34 => rounding overflow |
| C_hi = 0x0000314dc6448d93ull; |
| C_lo = 0x38c15b0a00000000ull; // 10^33 |
| // exp = exp + EXP_P1; |
| unbexp = unbexp + 1; |
| exp = (UINT64) (unbexp + 6176) << 49; |
| } |
| } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) && |
| ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) || |
| (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) { |
| // C = C - 1 |
| C_lo = C_lo - 1; |
| if (C_lo == 0xffffffffffffffffull) |
| C_hi--; |
| // check if we crossed into the lower decade |
| if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) { |
| // C = 10^33 - 1 |
| if (exp > 0) { |
| C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1 |
| C_lo = 0x378d8e63ffffffffull; |
| // exp = exp - EXP_P1; |
| unbexp = unbexp - 1; |
| exp = (UINT64) (unbexp + 6176) << 49; |
| } else { // if exp = 0 the result is tiny & inexact |
| *ptrfpsf |= UNDERFLOW_EXCEPTION; |
| } |
| } |
| } else { |
| ; // the result is already correct |
| } |
| if (unbexp > expmax) { // 6111 |
| *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); |
| exp = 0; |
| if (!sign) { // result is positive |
| if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf |
| C_hi = 0x7800000000000000ull; |
| C_lo = 0x0000000000000000ull; |
| } else { // res = +MAXFP = (10^34-1) * 10^emax |
| C_hi = 0x5fffed09bead87c0ull; |
| C_lo = 0x378d8e63ffffffffull; |
| } |
| } else { // result is negative |
| if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf |
| C_hi = 0xf800000000000000ull; |
| C_lo = 0x0000000000000000ull; |
| } else { // res = -MAXFP = -(10^34-1) * 10^emax |
| C_hi = 0xdfffed09bead87c0ull; |
| C_lo = 0x378d8e63ffffffffull; |
| } |
| } |
| } |
| // assemble the result |
| res.w[1] = sign | exp | C_hi; |
| res.w[0] = C_lo; |
| *ptrres = res; |
| } |
| |
| static void |
| add256 (UINT256 x, UINT256 y, UINT256 * pz) { |
| // *z = x + yl assume the sum fits in 256 bits |
| UINT256 z; |
| z.w[0] = x.w[0] + y.w[0]; |
| if (z.w[0] < x.w[0]) { |
| x.w[1]++; |
| if (x.w[1] == 0x0000000000000000ull) { |
| x.w[2]++; |
| if (x.w[2] == 0x0000000000000000ull) { |
| x.w[3]++; |
| } |
| } |
| } |
| z.w[1] = x.w[1] + y.w[1]; |
| if (z.w[1] < x.w[1]) { |
| x.w[2]++; |
| if (x.w[2] == 0x0000000000000000ull) { |
| x.w[3]++; |
| } |
| } |
| z.w[2] = x.w[2] + y.w[2]; |
| if (z.w[2] < x.w[2]) { |
| x.w[3]++; |
| } |
| z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible |
| *pz = z; |
| } |
| |
| static void |
| sub256 (UINT256 x, UINT256 y, UINT256 * pz) { |
| // *z = x - y; assume x >= y |
| UINT256 z; |
| z.w[0] = x.w[0] - y.w[0]; |
| if (z.w[0] > x.w[0]) { |
| x.w[1]--; |
| if (x.w[1] == 0xffffffffffffffffull) { |
| x.w[2]--; |
| if (x.w[2] == 0xffffffffffffffffull) { |
| x.w[3]--; |
| } |
| } |
| } |
| z.w[1] = x.w[1] - y.w[1]; |
| if (z.w[1] > x.w[1]) { |
| x.w[2]--; |
| if (x.w[2] == 0xffffffffffffffffull) { |
| x.w[3]--; |
| } |
| } |
| z.w[2] = x.w[2] - y.w[2]; |
| if (z.w[2] > x.w[2]) { |
| x.w[3]--; |
| } |
| z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y |
| *pz = z; |
| } |
| |
| |
| static int |
| nr_digits256 (UINT256 R256) { |
| int ind; |
| // determine the number of decimal digits in R256 |
| if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) { |
| // between 1 and 19 digits |
| for (ind = 1; ind <= 19; ind++) { |
| if (R256.w[0] < ten2k64[ind]) { |
| break; |
| } |
| } |
| // ind digits |
| } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && |
| (R256.w[1] < ten2k128[0].w[1] || |
| (R256.w[1] == ten2k128[0].w[1] |
| && R256.w[0] < ten2k128[0].w[0]))) { |
| // 20 digits |
| ind = 20; |
| } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) { |
| // between 21 and 38 digits |
| for (ind = 1; ind <= 18; ind++) { |
| if (R256.w[1] < ten2k128[ind].w[1] || |
| (R256.w[1] == ten2k128[ind].w[1] && |
| R256.w[0] < ten2k128[ind].w[0])) { |
| break; |
| } |
| } |
| // ind + 20 digits |
| ind = ind + 20; |
| } else if (R256.w[3] == 0x0 && |
| (R256.w[2] < ten2k256[0].w[2] || |
| (R256.w[2] == ten2k256[0].w[2] && |
| R256.w[1] < ten2k256[0].w[1]) || |
| (R256.w[2] == ten2k256[0].w[2] && |
| R256.w[1] == ten2k256[0].w[1] && |
| R256.w[0] < ten2k256[0].w[0]))) { |
| // 39 digits |
| ind = 39; |
| } else { |
| // between 40 and 68 digits |
| for (ind = 1; ind <= 29; ind++) { |
| if (R256.w[3] < ten2k256[ind].w[3] || |
| (R256.w[3] == ten2k256[ind].w[3] && |
| R256.w[2] < ten2k256[ind].w[2]) || |
| (R256.w[3] == ten2k256[ind].w[3] && |
| R256.w[2] == ten2k256[ind].w[2] && |
| R256.w[1] < ten2k256[ind].w[1]) || |
| (R256.w[3] == ten2k256[ind].w[3] && |
| R256.w[2] == ten2k256[ind].w[2] && |
| R256.w[1] == ten2k256[ind].w[1] && |
| R256.w[0] < ten2k256[ind].w[0])) { |
| break; |
| } |
| } |
| // ind + 39 digits |
| ind = ind + 39; |
| } |
| return (ind); |
| } |
| |
| // add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so |
| // use the rounding information from ptr_is_* to avoid a double rounding error |
| static void |
| add_and_round (int q3, |
| int q4, |
| int e4, |
| int delta, |
| int p34, |
| UINT64 z_sign, |
| UINT64 p_sign, |
| UINT128 C3, |
| UINT256 C4, |
| int rnd_mode, |
| int *ptr_is_midpoint_lt_even, |
| int *ptr_is_midpoint_gt_even, |
| int *ptr_is_inexact_lt_midpoint, |
| int *ptr_is_inexact_gt_midpoint, |
| _IDEC_flags * ptrfpsf, UINT128 * ptrres) { |
| |
| int scale; |
| int x0; |
| int ind; |
| UINT64 R64; |
| UINT128 P128, R128; |
| UINT192 P192, R192; |
| UINT256 R256; |
| int is_midpoint_lt_even = 0; |
| int is_midpoint_gt_even = 0; |
| int is_inexact_lt_midpoint = 0; |
| int is_inexact_gt_midpoint = 0; |
| int is_midpoint_lt_even0 = 0; |
| int is_midpoint_gt_even0 = 0; |
| int is_inexact_lt_midpoint0 = 0; |
| int is_inexact_gt_midpoint0 = 0; |
| int incr_exp = 0; |
| int is_tiny = 0; |
| int lt_half_ulp = 0; |
| int eq_half_ulp = 0; |
| // int gt_half_ulp = 0; |
| UINT128 res = *ptrres; |
| |
| // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66 |
| scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this |
| // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1 |
| |
| // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for |
| // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6)) |
| if (scale == 0) { |
| R256.w[3] = 0x0ull; |
| R256.w[2] = 0x0ull; |
| R256.w[1] = C3.w[1]; |
| R256.w[0] = C3.w[0]; |
| } else if (scale <= 19) { // 10^scale fits in 64 bits |
| P128.w[1] = 0; |
| P128.w[0] = ten2k64[scale]; |
| __mul_128x128_to_256 (R256, P128, C3); |
| } else if (scale <= 38) { // 10^scale fits in 128 bits |
| __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3); |
| } else if (scale <= 57) { // 39 <= scale <= 57 |
| // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits |
| // (10^67 has 223 bits; 10^69 has 230 bits); |
| // must split the computation: |
| // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127 |
| // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty |
| // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits |
| __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3); |
| // now multiply R128 by 10^38 |
| __mul_128x128_to_256 (R256, R128, ten2k128[18]); |
| } else { // 58 <= scale <= 66 |
| // 10^scale takes between 193 and 220 bits, |
| // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits) |
| // must split the computation: |
| // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127 |
| // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty |
| // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits |
| // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because |
| // 10^(scale-38) takes more than 64 bits, C3 will take less than 64 |
| __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]); |
| // now calculate 10*38 * 10^(scale-38) * C3 |
| __mul_128x128_to_256 (R256, R128, ten2k128[18]); |
| } |
| // C3 * 10^scale is now in R256 |
| |
| // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least |
| // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is |
| // possible |
| // add/subtract C4 and C3 * 10^scale; the exponent is e4 |
| if (p_sign == z_sign) { // R256 = C4 + R256 |
| // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact, |
| // but may require rounding |
| add256 (C4, R256, &R256); |
| } else { // if (p_sign != z_sign) { // R256 = C4 - R256 |
| // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or |
| // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact, |
| // but may require rounding |
| |
| // compare first R256 = C3 * 10^scale and C4 |
| if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) || |
| (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) || |
| (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] && |
| R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4 |
| // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact, |
| // but may require rounding |
| sub256 (R256, C4, &R256); |
| // flip p_sign too, because the result has the sign of z |
| p_sign = z_sign; |
| } else { // if C4 > C3 * 10^scale |
| // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact, |
| // but may require rounding |
| sub256 (C4, R256, &R256); |
| } |
| // if the result is pure zero, the sign depends on the rounding mode |
| // (x*y and z had opposite signs) |
| if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull && |
| R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) { |
| if (rnd_mode != ROUNDING_DOWN) |
| p_sign = 0x0000000000000000ull; |
| else |
| p_sign = 0x8000000000000000ull; |
| // the exponent is max (e4, expmin) |
| if (e4 < -6176) |
| e4 = expmin; |
| // assemble result |
| res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49); |
| res.w[0] = 0x0; |
| *ptrres = res; |
| return; |
| } |
| } |
| |
| // determine the number of decimal digits in R256 |
| ind = nr_digits256 (R256); |
| |
| // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind; |
| // round to the destination precision, with unbounded exponent |
| |
| if (ind <= p34) { |
| // result rounded to the destination precision with unbounded exponent |
| // is exact |
| if (ind + e4 < p34 + expmin) { |
| is_tiny = 1; // applies to all rounding modes |
| } |
| res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1]; |
| res.w[0] = R256.w[0]; |
| // Note: res is correct only if expmin <= e4 <= expmax |
| } else { // if (ind > p34) |
| // if more than P digits, round to nearest to P digits |
| // round R256 to p34 digits |
| x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68 |
| if (ind <= 38) { |
| P128.w[1] = R256.w[1]; |
| P128.w[0] = R256.w[0]; |
| round128_19_38 (ind, x0, P128, &R128, &incr_exp, |
| &is_midpoint_lt_even, &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); |
| } else if (ind <= 57) { |
| P192.w[2] = R256.w[2]; |
| P192.w[1] = R256.w[1]; |
| P192.w[0] = R256.w[0]; |
| round192_39_57 (ind, x0, P192, &R192, &incr_exp, |
| &is_midpoint_lt_even, &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); |
| R128.w[1] = R192.w[1]; |
| R128.w[0] = R192.w[0]; |
| } else { // if (ind <= 68) |
| round256_58_76 (ind, x0, R256, &R256, &incr_exp, |
| &is_midpoint_lt_even, &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); |
| R128.w[1] = R256.w[1]; |
| R128.w[0] = R256.w[0]; |
| } |
| // the rounded result has p34 = 34 digits |
| e4 = e4 + x0 + incr_exp; |
| if (rnd_mode == ROUNDING_TO_NEAREST) { |
| if (e4 < expmin) { |
| is_tiny = 1; // for other rounding modes apply correction |
| } |
| } else { |
| // for RM, RP, RZ, RA apply correction in order to determine tininess |
| // but do not save the result; apply the correction to |
| // (-1)^p_sign * significand * 10^0 |
| P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1]; |
| P128.w[0] = R128.w[0]; |
| rounding_correction (rnd_mode, |
| is_inexact_lt_midpoint, |
| is_inexact_gt_midpoint, is_midpoint_lt_even, |
| is_midpoint_gt_even, 0, &P128, ptrfpsf); |
| scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1 |
| // the number of digits in the significand is p34 = 34 |
| if (e4 + scale < expmin) { |
| is_tiny = 1; |
| } |
| } |
| ind = p34; // the number of decimal digits in the signifcand of res |
| res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN |
| res.w[0] = R128.w[0]; |
| // Note: res is correct only if expmin <= e4 <= expmax |
| // set the inexact flag after rounding with bounded exponent, if any |
| } |
| // at this point we have the result rounded with unbounded exponent in |
| // res and we know its tininess: |
| // res = (-1)^p_sign * significand * 10^e4, |
| // where q (significand) = ind <= p34 |
| // Note: res is correct only if expmin <= e4 <= expmax |
| |
| // check for overflow if RN |
| if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) { |
| res.w[1] = p_sign | 0x7800000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| *ptrres = res; |
| *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); |
| return; // BID_RETURN (res) |
| } // else not overflow or not RN, so continue |
| |
| // if (e4 >= expmin) we have the result rounded with bounded exponent |
| if (e4 < expmin) { |
| x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res |
| // where the result rounded [at most] once is |
| // (-1)^p_sign * significand_res * 10^e4 |
| |
| // avoid double rounding error |
| is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; |
| is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; |
| is_midpoint_lt_even0 = is_midpoint_lt_even; |
| is_midpoint_gt_even0 = is_midpoint_gt_even; |
| is_inexact_lt_midpoint = 0; |
| is_inexact_gt_midpoint = 0; |
| is_midpoint_lt_even = 0; |
| is_midpoint_gt_even = 0; |
| |
| if (x0 > ind) { |
| // nothing is left of res when moving the decimal point left x0 digits |
| is_inexact_lt_midpoint = 1; |
| res.w[1] = p_sign | 0x0000000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| e4 = expmin; |
| } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34 |
| // this is <, =, or > 1/2 ulp |
| // compare the ind-digit value in the significand of res with |
| // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is |
| // less than, equal to, or greater than 1/2 ulp (significand of res) |
| R128.w[1] = res.w[1] & MASK_COEFF; |
| R128.w[0] = res.w[0]; |
| if (ind <= 19) { |
| if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp |
| lt_half_ulp = 1; |
| is_inexact_lt_midpoint = 1; |
| } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp |
| eq_half_ulp = 1; |
| is_midpoint_gt_even = 1; |
| } else { // > 1/2 ulp |
| // gt_half_ulp = 1; |
| is_inexact_gt_midpoint = 1; |
| } |
| } else { // if (ind <= 38) { |
| if (R128.w[1] < midpoint128[ind - 20].w[1] || |
| (R128.w[1] == midpoint128[ind - 20].w[1] && |
| R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp |
| lt_half_ulp = 1; |
| is_inexact_lt_midpoint = 1; |
| } else if (R128.w[1] == midpoint128[ind - 20].w[1] && |
| R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp |
| eq_half_ulp = 1; |
| is_midpoint_gt_even = 1; |
| } else { // > 1/2 ulp |
| // gt_half_ulp = 1; |
| is_inexact_gt_midpoint = 1; |
| } |
| } |
| if (lt_half_ulp || eq_half_ulp) { |
| // res = +0.0 * 10^expmin |
| res.w[1] = 0x0000000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } else { // if (gt_half_ulp) |
| // res = +1 * 10^expmin |
| res.w[1] = 0x0000000000000000ull; |
| res.w[0] = 0x0000000000000001ull; |
| } |
| res.w[1] = p_sign | res.w[1]; |
| e4 = expmin; |
| } else { // if (1 <= x0 <= ind - 1 <= 33) |
| // round the ind-digit result to ind - x0 digits |
| |
| if (ind <= 18) { // 2 <= ind <= 18 |
| round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, |
| &is_midpoint_lt_even, &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); |
| res.w[1] = 0x0; |
| res.w[0] = R64; |
| } else if (ind <= 38) { |
| P128.w[1] = res.w[1] & MASK_COEFF; |
| P128.w[0] = res.w[0]; |
| round128_19_38 (ind, x0, P128, &res, &incr_exp, |
| &is_midpoint_lt_even, &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, |
| &is_inexact_gt_midpoint); |
| } |
| e4 = e4 + x0; // expmin |
| // we want the exponent to be expmin, so if incr_exp = 1 then |
| // multiply the rounded result by 10 - it will still fit in 113 bits |
| if (incr_exp) { |
| // 64 x 128 -> 128 |
| P128.w[1] = res.w[1] & MASK_COEFF; |
| P128.w[0] = res.w[0]; |
| __mul_64x128_to_128 (res, ten2k64[1], P128); |
| } |
| res.w[1] = |
| p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF); |
| // avoid a double rounding error |
| if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && |
| is_midpoint_lt_even) { // double rounding error upward |
| // res = res - 1 |
| res.w[0]--; |
| if (res.w[0] == 0xffffffffffffffffull) |
| res.w[1]--; |
| // Note: a double rounding error upward is not possible; for this |
| // the result after the first rounding would have to be 99...95 |
| // (35 digits in all), possibly followed by a number of zeros; this |
| // is not possible in Cases (2)-(6) or (15)-(17) which may get here |
| is_midpoint_lt_even = 0; |
| is_inexact_lt_midpoint = 1; |
| } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && |
| is_midpoint_gt_even) { // double rounding error downward |
| // res = res + 1 |
| res.w[0]++; |
| if (res.w[0] == 0) |
| res.w[1]++; |
| is_midpoint_gt_even = 0; |
| is_inexact_gt_midpoint = 1; |
| } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && |
| !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { |
| // if this second rounding was exact the result may still be |
| // inexact because of the first rounding |
| if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { |
| is_inexact_gt_midpoint = 1; |
| } |
| if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { |
| is_inexact_lt_midpoint = 1; |
| } |
| } else if (is_midpoint_gt_even && |
| (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { |
| // pulled up to a midpoint |
| is_inexact_lt_midpoint = 1; |
| is_inexact_gt_midpoint = 0; |
| is_midpoint_lt_even = 0; |
| is_midpoint_gt_even = 0; |
| } else if (is_midpoint_lt_even && |
| (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { |
| // pulled down to a midpoint |
| is_inexact_lt_midpoint = 0; |
| is_inexact_gt_midpoint = 1; |
| is_midpoint_lt_even = 0; |
| is_midpoint_gt_even = 0; |
| } else { |
| ; |
| } |
| } |
| } |
| // res contains the correct result |
| // apply correction if not rounding to nearest |
| if (rnd_mode != ROUNDING_TO_NEAREST) { |
| rounding_correction (rnd_mode, |
| is_inexact_lt_midpoint, is_inexact_gt_midpoint, |
| is_midpoint_lt_even, is_midpoint_gt_even, |
| e4, &res, ptrfpsf); |
| } |
| if (is_midpoint_lt_even || is_midpoint_gt_even || |
| is_inexact_lt_midpoint || is_inexact_gt_midpoint) { |
| // set the inexact flag |
| *ptrfpsf |= INEXACT_EXCEPTION; |
| if (is_tiny) |
| *ptrfpsf |= UNDERFLOW_EXCEPTION; |
| } |
| |
| *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
| *ptr_is_midpoint_gt_even = is_midpoint_gt_even; |
| *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; |
| *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; |
| *ptrres = res; |
| return; |
| } |
| |
| |
| #if DECIMAL_CALL_BY_REFERENCE |
| static void |
| bid128_ext_fma (int *ptr_is_midpoint_lt_even, |
| int *ptr_is_midpoint_gt_even, |
| int *ptr_is_inexact_lt_midpoint, |
| int *ptr_is_inexact_gt_midpoint, UINT128 * pres, |
| UINT128 * px, UINT128 * py, |
| UINT128 * |
| pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM |
| _EXC_INFO_PARAM) { |
| UINT128 x = *px, y = *py, z = *pz; |
| #if !DECIMAL_GLOBAL_ROUNDING |
| unsigned int rnd_mode = *prnd_mode; |
| #endif |
| #else |
| static UINT128 |
| bid128_ext_fma (int *ptr_is_midpoint_lt_even, |
| int *ptr_is_midpoint_gt_even, |
| int *ptr_is_inexact_lt_midpoint, |
| int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y, |
| UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM |
| _EXC_MASKS_PARAM _EXC_INFO_PARAM) { |
| #endif |
| |
| UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; |
| UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign; |
| UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp; |
| int true_p_exp; |
| UINT128 C1, C2, C3; |
| UINT256 C4; |
| int q1 = 0, q2 = 0, q3 = 0, q4; |
| int e1, e2, e3, e4; |
| int scale, ind, delta, x0; |
| int p34 = P34; // used to modify the limit on the number of digits |
| BID_UI64DOUBLE tmp; |
| int x_nr_bits, y_nr_bits, z_nr_bits; |
| unsigned int save_fpsf; |
| int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0; |
| int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; |
| int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0; |
| int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0; |
| int incr_exp = 0; |
| int lsb; |
| int lt_half_ulp = 0; |
| int eq_half_ulp = 0; |
| int gt_half_ulp = 0; |
| int is_tiny = 0; |
| UINT64 R64, tmp64; |
| UINT128 P128, R128; |
| UINT192 P192, R192; |
| UINT256 R256; |
| |
| // the following are based on the table of special cases for fma; the NaN |
| // behavior is similar to that of the IA-64 Architecture fma |
| |
| // identify cases where at least one operand is NaN |
| |
| BID_SWAP128 (x); |
| BID_SWAP128 (y); |
| BID_SWAP128 (z); |
| if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN |
| // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y) |
| // check first for non-canonical NaN payload |
| if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || |
| (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && |
| (y.w[0] > 0x38c15b09ffffffffull))) { |
| y.w[1] = y.w[1] & 0xffffc00000000000ull; |
| y.w[0] = 0x0ull; |
| } |
| if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN |
| // set invalid flag |
| *pfpsf |= INVALID_EXCEPTION; |
| // return quiet (y) |
| res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] |
| res.w[0] = y.w[0]; |
| } else { // y is QNaN |
| // return y |
| res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] |
| res.w[0] = y.w[0]; |
| // if z = SNaN or x = SNaN signal invalid exception |
| if ((z.w[1] & MASK_SNAN) == MASK_SNAN || |
| (x.w[1] & MASK_SNAN) == MASK_SNAN) { |
| // set invalid flag |
| *pfpsf |= INVALID_EXCEPTION; |
| } |
| } |
| *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
| *ptr_is_midpoint_gt_even = is_midpoint_gt_even; |
| *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; |
| *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; |
| BID_SWAP128 (res); |
| BID_RETURN (res) |
| } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN |
| // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z) |
| // check first for non-canonical NaN payload |
| if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || |
| (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && |
| (z.w[0] > 0x38c15b09ffffffffull))) { |
| z.w[1] = z.w[1] & 0xffffc00000000000ull; |
| z.w[0] = 0x0ull; |
| } |
| if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN |
| // set invalid flag |
| *pfpsf |= INVALID_EXCEPTION; |
| // return quiet (z) |
| res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] |
| res.w[0] = z.w[0]; |
| } else { // z is QNaN |
| // return z |
| res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] |
| res.w[0] = z.w[0]; |
| // if x = SNaN signal invalid exception |
| if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { |
| // set invalid flag |
| *pfpsf |= INVALID_EXCEPTION; |
| } |
| } |
| *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
| *ptr_is_midpoint_gt_even = is_midpoint_gt_even; |
| *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; |
| *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; |
| BID_SWAP128 (res); |
| BID_RETURN (res) |
| } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN |
| // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x) |
| // check first for non-canonical NaN payload |
| if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || |
| (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && |
| (x.w[0] > 0x38c15b09ffffffffull))) { |
| x.w[1] = x.w[1] & 0xffffc00000000000ull; |
| x.w[0] = 0x0ull; |
| } |
| if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN |
| // set invalid flag |
| *pfpsf |= INVALID_EXCEPTION; |
| // return quiet (x) |
| res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] |
| res.w[0] = x.w[0]; |
| } else { // x is QNaN |
| // return x |
| res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] |
| res.w[0] = x.w[0]; |
| } |
| *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
| *ptr_is_midpoint_gt_even = is_midpoint_gt_even; |
| *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; |
| *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; |
| BID_SWAP128 (res); |
| BID_RETURN (res) |
| } |
| // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check |
| // for non-canonical values |
| |
| x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative |
| C1.w[1] = x.w[1] & MASK_COEFF; |
| C1.w[0] = x.w[0]; |
| if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf |
| // if x is not infinity check for non-canonical values - treated as zero |
| if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 |
| // non-canonical |
| x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits |
| C1.w[1] = 0; // significand high |
| C1.w[0] = 0; // significand low |
| } else { // G0_G1 != 11 |
| x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits |
| if (C1.w[1] > 0x0001ed09bead87c0ull || |
| (C1.w[1] == 0x0001ed09bead87c0ull && |
| C1.w[0] > 0x378d8e63ffffffffull)) { |
| // x is non-canonical if coefficient is larger than 10^34 -1 |
| C1.w[1] = 0; |
| C1.w[0] = 0; |
| } else { // canonical |
| ; |
| } |
| } |
| } |
| y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative |
| C2.w[1] = y.w[1] & MASK_COEFF; |
| C2.w[0] = y.w[0]; |
| if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf |
| // if y is not infinity check for non-canonical values - treated as zero |
| if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 |
| // non-canonical |
| y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits |
| C2.w[1] = 0; // significand high |
| C2.w[0] = 0; // significand low |
| } else { // G0_G1 != 11 |
| y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits |
| if (C2.w[1] > 0x0001ed09bead87c0ull || |
| (C2.w[1] == 0x0001ed09bead87c0ull && |
| C2.w[0] > 0x378d8e63ffffffffull)) { |
| // y is non-canonical if coefficient is larger than 10^34 -1 |
| C2.w[1] = 0; |
| C2.w[0] = 0; |
| } else { // canonical |
| ; |
| } |
| } |
| } |
| z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative |
| C3.w[1] = z.w[1] & MASK_COEFF; |
| C3.w[0] = z.w[0]; |
| if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf |
| // if z is not infinity check for non-canonical values - treated as zero |
| if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 |
| // non-canonical |
| z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits |
| C3.w[1] = 0; // significand high |
| C3.w[0] = 0; // significand low |
| } else { // G0_G1 != 11 |
| z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits |
| if (C3.w[1] > 0x0001ed09bead87c0ull || |
| (C3.w[1] == 0x0001ed09bead87c0ull && |
| C3.w[0] > 0x378d8e63ffffffffull)) { |
| // z is non-canonical if coefficient is larger than 10^34 -1 |
| C3.w[1] = 0; |
| C3.w[0] = 0; |
| } else { // canonical |
| ; |
| } |
| } |
| } |
| |
| p_sign = x_sign ^ y_sign; // sign of the product |
| |
| // identify cases where at least one operand is infinity |
| |
| if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf |
| if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf |
| if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf |
| if (p_sign == z_sign) { |
| res.w[1] = z_sign | MASK_INF; |
| res.w[0] = 0x0; |
| } else { |
| // return QNaN Indefinite |
| res.w[1] = 0x7c00000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| // set invalid flag |
| *pfpsf |= INVALID_EXCEPTION; |
| } |
| } else { // z = 0 or z = f |
| res.w[1] = p_sign | MASK_INF; |
| res.w[0] = 0x0; |
| } |
| } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f |
| if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf |
| if (p_sign == z_sign) { |
| res.w[1] = z_sign | MASK_INF; |
| res.w[0] = 0x0; |
| } else { |
| // return QNaN Indefinite |
| res.w[1] = 0x7c00000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| // set invalid flag |
| *pfpsf |= INVALID_EXCEPTION; |
| } |
| } else { // z = 0 or z = f |
| res.w[1] = p_sign | MASK_INF; |
| res.w[0] = 0x0; |
| } |
| } else { // y = 0 |
| // return QNaN Indefinite |
| res.w[1] = 0x7c00000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| // set invalid flag |
| *pfpsf |= INVALID_EXCEPTION; |
| } |
| *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
| *ptr_is_midpoint_gt_even = is_midpoint_gt_even; |
| *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; |
| *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; |
| BID_SWAP128 (res); |
| BID_RETURN (res) |
| } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf |
| if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf |
| // x = f, necessarily |
| if ((p_sign != z_sign) |
| || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) { |
| // return QNaN Indefinite |
| res.w[1] = 0x7c00000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| // set invalid flag |
| *pfpsf |= INVALID_EXCEPTION; |
| } else { |
| res.w[1] = z_sign | MASK_INF; |
| res.w[0] = 0x0; |
| } |
| } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0 |
| // z = 0, f, inf |
| // return QNaN Indefinite |
| res.w[1] = 0x7c00000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| // set invalid flag |
| *pfpsf |= INVALID_EXCEPTION; |
| } else { |
| // x = f and z = 0, f, necessarily |
| res.w[1] = p_sign | MASK_INF; |
| res.w[0] = 0x0; |
| } |
| *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
| *ptr_is_midpoint_gt_even = is_midpoint_gt_even; |
| *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; |
| *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; |
| BID_SWAP128 (res); |
| BID_RETURN (res) |
| } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf |
| // x = 0, f and y = 0, f, necessarily |
| res.w[1] = z_sign | MASK_INF; |
| res.w[0] = 0x0; |
| *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
| *ptr_is_midpoint_gt_even = is_midpoint_gt_even; |
| *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; |
| *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; |
| BID_SWAP128 (res); |
| BID_RETURN (res) |
| } |
| |
| true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176; |
| if (true_p_exp < -6176) |
| p_exp = 0; // cannot be less than EXP_MIN |
| else |
| p_exp = (UINT64) (true_p_exp + 6176) << 49; |
| |
| if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0 |
| // the result is 0 |
| if (p_exp < z_exp) |
| res.w[1] = p_exp; // preferred exponent |
| else |
| res.w[1] = z_exp; // preferred exponent |
| if (p_sign == z_sign) { |
| res.w[1] |= z_sign; |
| res.w[0] = 0x0; |
| } else { // x * y and z have opposite signs |
| if (rnd_mode == ROUNDING_DOWN) { |
| // res = -0.0 |
| res.w[1] |= MASK_SIGN; |
| res.w[0] = 0x0; |
| } else { |
| // res = +0.0 |
| // res.w[1] |= 0x0; |
| res.w[0] = 0x0; |
| } |
| } |
| *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
| *ptr_is_midpoint_gt_even = is_midpoint_gt_even; |
| *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; |
| *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; |
| BID_SWAP128 (res); |
| BID_RETURN (res) |
| } |
| // from this point on, we may need to know the number of decimal digits |
| // in the significands of x, y, z when x, y, z != 0 |
| |
| if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite) |
| // q1 = nr. of decimal digits in x |
| // determine first the nr. of bits in x |
| if (C1.w[1] == 0) { |
| if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 |
| // split the 64-bit value in two 32-bit halves to avoid rounding errors |
| if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 |
| tmp.d = (double) (C1.w[0] >> 32); // exact conversion |
| x_nr_bits = |
| 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } else { // x < 2^32 |
| tmp.d = (double) (C1.w[0]); // exact conversion |
| x_nr_bits = |
| 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| } else { // if x < 2^53 |
| tmp.d = (double) C1.w[0]; // exact conversion |
| x_nr_bits = |
| 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) |
| tmp.d = (double) C1.w[1]; // exact conversion |
| x_nr_bits = |
| 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| q1 = nr_digits[x_nr_bits - 1].digits; |
| if (q1 == 0) { |
| q1 = nr_digits[x_nr_bits - 1].digits1; |
| if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || |
| (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && |
| C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) |
| q1++; |
| } |
| } |
| |
| if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite) |
| if (C2.w[1] == 0) { |
| if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53 |
| // split the 64-bit value in two 32-bit halves to avoid rounding errors |
| if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32 |
| tmp.d = (double) (C2.w[0] >> 32); // exact conversion |
| y_nr_bits = |
| 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } else { // y < 2^32 |
| tmp.d = (double) C2.w[0]; // exact conversion |
| y_nr_bits = |
| ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| } else { // if y < 2^53 |
| tmp.d = (double) C2.w[0]; // exact conversion |
| y_nr_bits = |
| ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1]) |
| tmp.d = (double) C2.w[1]; // exact conversion |
| y_nr_bits = |
| 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| |
| q2 = nr_digits[y_nr_bits].digits; |
| if (q2 == 0) { |
| q2 = nr_digits[y_nr_bits].digits1; |
| if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi || |
| (C2.w[1] == nr_digits[y_nr_bits].threshold_hi && |
| C2.w[0] >= nr_digits[y_nr_bits].threshold_lo)) |
| q2++; |
| } |
| } |
| |
| if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite) |
| if (C3.w[1] == 0) { |
| if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53 |
| // split the 64-bit value in two 32-bit halves to avoid rounding errors |
| if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32 |
| tmp.d = (double) (C3.w[0] >> 32); // exact conversion |
| z_nr_bits = |
| 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } else { // z < 2^32 |
| tmp.d = (double) C3.w[0]; // exact conversion |
| z_nr_bits = |
| ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| } else { // if z < 2^53 |
| tmp.d = (double) C3.w[0]; // exact conversion |
| z_nr_bits = |
| ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1]) |
| tmp.d = (double) C3.w[1]; // exact conversion |
| z_nr_bits = |
| 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); |
| } |
| |
| q3 = nr_digits[z_nr_bits].digits; |
| if (q3 == 0) { |
| q3 = nr_digits[z_nr_bits].digits1; |
| if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi || |
| (C3.w[1] == nr_digits[z_nr_bits].threshold_hi && |
| C3.w[0] >= nr_digits[z_nr_bits].threshold_lo)) |
| q3++; |
| } |
| } |
| |
| if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) || |
| (C2.w[1] == 0x0 && C2.w[0] == 0x0)) { |
| // x = 0 or y = 0 |
| // z = f, necessarily; for 0 + z return z, with the preferred exponent |
| // the result is z, but need to get the preferred exponent |
| if (z_exp <= p_exp) { // the preferred exponent is z_exp |
| res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1]; |
| res.w[0] = C3.w[0]; |
| } else { // if (p_exp < z_exp) the preferred exponent is p_exp |
| // return (C3 * 10^scale) * 10^(z_exp - scale) |
| // where scale = min (p34-q3, (z_exp-p_exp) >> 49) |
| scale = p34 - q3; |
| ind = (z_exp - p_exp) >> 49; |
| if (ind < scale) |
| scale = ind; |
| if (scale == 0) { |
| res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant |
| res.w[0] = z.w[0]; |
| } else if (q3 <= 19) { // z fits in 64 bits |
| if (scale <= 19) { // 10^scale fits in 64 bits |
| // 64 x 64 C3.w[0] * ten2k64[scale] |
| __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); |
| } else { // 10^scale fits in 128 bits |
| // 64 x 128 C3.w[0] * ten2k128[scale - 20] |
| __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); |
| } |
| } else { // z fits in 128 bits, but 10^scale must fit in 64 bits |
| // 64 x 128 ten2k64[scale] * C3 |
| __mul_128x64_to_128 (res, ten2k64[scale], C3); |
| } |
| // subtract scale from the exponent |
| z_exp = z_exp - ((UINT64) scale << 49); |
| res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; |
| } |
| *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
| *ptr_is_midpoint_gt_even = is_midpoint_gt_even; |
| *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; |
| *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; |
| BID_SWAP128 (res); |
| BID_RETURN (res) |
| } else { |
| ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f |
| } |
| |
| e1 = (x_exp >> 49) - 6176; // unbiased exponent of x |
| e2 = (y_exp >> 49) - 6176; // unbiased exponent of y |
| e3 = (z_exp >> 49) - 6176; // unbiased exponent of z |
| e4 = e1 + e2; // unbiased exponent of the exact x * y |
| |
| // calculate C1 * C2 and its number of decimal digits, q4 |
| |
| // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits |
| // where 2 <= q1 + q2 <= 68 |
| // calculate C4 = C1 * C2 and determine q |
| C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0; |
| if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits |
| C4.w[0] = C1.w[0] * C2.w[0]; |
| // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 |
| if (C4.w[0] < ten2k64[q1 + q2 - 1]) |
| q4 = q1 + q2 - 1; // q4 in [1, 18] |
| else |
| q4 = q1 + q2; // q4 in [2, 19] |
| // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; |
| } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits |
| // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits |
| __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]); |
| // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20 |
| if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1 |
| // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; |
| q4 = 19; // 19 = q1 + q2 - 1 |
| } else { |
| // if (C4.w[1] == 0) |
| // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; |
| // else |
| // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; |
| q4 = 20; // 20 = q1 + q2 |
| } |
| } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38 |
| // C4 = C1 * C2 fits in 64 or 128 bits |
| // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits) |
| // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits |
| if (q1 <= 19) { |
| __mul_128x64_to_128 (C4, C1.w[0], C2); |
| } else { // q2 <= 19 |
| __mul_128x64_to_128 (C4, C2.w[0], C1); |
| } |
| // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 |
| if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] || |
| (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] && |
| C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) { |
| // if (C4.w[1] == 0) // q4 = 20, necessarily |
| // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; |
| // else |
| // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; |
| q4 = q1 + q2 - 1; // q4 in [20, 37] |
| } else { |
| // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; |
| q4 = q1 + q2; // q4 in [21, 38] |
| } |
| } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits |
| // both C1 and C2 fit in 128 bits (actually in 113 bits) |
| // may replace this by 128x128_to192 |
| __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0 |
| // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39 |
| if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] || |
| (C4.w[1] == ten2k128[18].w[1] |
| && C4.w[0] < ten2k128[18].w[0]))) { |
| // 18 = 38 - 20 = q1+q2-1 - 20 |
| // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; |
| q4 = 38; // 38 = q1 + q2 - 1 |
| } else { |
| // if (C4.w[2] == 0) |
| // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; |
| // else |
| // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; |
| q4 = 39; // 39 = q1 + q2 |
| } |
| } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57 |
| // C4 = C1 * C2 fits in 128 or 192 bits |
| // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits) |
| // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one |
| // may fit in 64 bits |
| if (C1.w[1] == 0) { // C1 fits in 64 bits |
| // __mul_64x128_full (REShi64, RESlo128, A64, B128) |
| __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); |
| } else if (C2.w[1] == 0) { // C2 fits in 64 bits |
| // __mul_64x128_full (REShi64, RESlo128, A64, B128) |
| __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); |
| } else { // both C1 and C2 require 128 bits |
| // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1); |
| __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0 |
| } |
| // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 |
| if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] || |
| (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] && |
| (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] || |
| (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] && |
| C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) { |
| // if (C4.w[2] == 0) // q4 = 39, necessarily |
| // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; |
| // else |
| // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; |
| q4 = q1 + q2 - 1; // q4 in [39, 56] |
| } else { |
| // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; |
| q4 = q1 + q2; // q4 in [40, 57] |
| } |
| } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits |
| // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one |
| // may fit in 64 bits |
| if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits |
| __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192 |
| } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits |
| __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192 |
| } else { // C1 * C2 will fit in 192 bits or in 256 bits |
| __mul_128x128_to_256 (C4, C1, C2); |
| } |
| // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58 |
| if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] || |
| (C4.w[2] == ten2k256[18].w[2] |
| && (C4.w[1] < ten2k256[18].w[1] |
| || (C4.w[1] == ten2k256[18].w[1] |
| && C4.w[0] < ten2k256[18].w[0]))))) { |
| // 18 = 57 - 39 = q1+q2-1 - 39 |
| // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; |
| q4 = 57; // 57 = q1 + q2 - 1 |
| } else { |
| // if (C4.w[3] == 0) |
| // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; |
| // else |
| // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; |
| q4 = 58; // 58 = q1 + q2 |
| } |
| } else { // if 59 <= q1 + q2 <= 68 |
| // C4 = C1 * C2 fits in 192 or 256 bits |
| // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits) |
| // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in |
| // 64 bits |
| // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1); |
| __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0 |
| // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 |
| if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] || |
| (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] && |
| (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] || |
| (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] && |
| (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] || |
| (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] && |
| C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) { |
| // if (C4.w[3] == 0) // q4 = 58, necessarily |
| // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; |
| // else |
| // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; |
| q4 = q1 + q2 - 1; // q4 in [58, 67] |
| } else { |
| // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; |
| q4 = q1 + q2; // q4 in [59, 68] |
| } |
| } |
| |
| if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0 |
| save_fpsf = *pfpsf; // sticky bits - caller value must be preserved |
| *pfpsf = 0; |
| |
| if (q4 > p34) { |
| |
| // truncate C4 to p34 digits into res |
| // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68 |
| x0 = q4 - p34; |
| if (q4 <= 38) { |
| P128.w[1] = C4.w[1]; |
| P128.w[0] = C4.w[0]; |
| round128_19_38 (q4, x0, P128, &res, &incr_exp, |
| &is_midpoint_lt_even, &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, |
| &is_inexact_gt_midpoint); |
| } else if (q4 <= 57) { // 35 <= q4 <= 57 |
| P192.w[2] = C4.w[2]; |
| P192.w[1] = C4.w[1]; |
| P192.w[0] = C4.w[0]; |
| round192_39_57 (q4, x0, P192, &R192, &incr_exp, |
| &is_midpoint_lt_even, &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, |
| &is_inexact_gt_midpoint); |
| res.w[0] = R192.w[0]; |
| res.w[1] = R192.w[1]; |
| } else { // if (q4 <= 68) |
| round256_58_76 (q4, x0, C4, &R256, &incr_exp, |
| &is_midpoint_lt_even, &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, |
| &is_inexact_gt_midpoint); |
| res.w[0] = R256.w[0]; |
| res.w[1] = R256.w[1]; |
| } |
| e4 = e4 + x0; |
| if (incr_exp) { |
| e4 = e4 + 1; |
| } |
| q4 = p34; |
| // res is now the coefficient of the result rounded to the destination |
| // precision, with unbounded exponent; the exponent is e4; q4=digits(res) |
| } else { // if (q4 <= p34) |
| // C4 * 10^e4 is the result rounded to the destination precision, with |
| // unbounded exponent (which is exact) |
| |
| if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) { |
| // e4 is too large, but can be brought within range by scaling up C4 |
| scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2 |
| // res = (C4 * 10^scale) * 10^expmax |
| if (q4 <= 19) { // C4 fits in 64 bits |
| if (scale <= 19) { // 10^scale fits in 64 bits |
| // 64 x 64 C4.w[0] * ten2k64[scale] |
| __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]); |
| } else { // 10^scale fits in 128 bits |
| // 64 x 128 C4.w[0] * ten2k128[scale - 20] |
| __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]); |
| } |
| } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits |
| // 64 x 128 ten2k64[scale] * CC43 |
| __mul_128x64_to_128 (res, ten2k64[scale], C4); |
| } |
| e4 = e4 - scale; // expmax |
| q4 = q4 + scale; |
| } else { |
| res.w[1] = C4.w[1]; |
| res.w[0] = C4.w[0]; |
| } |
| // res is the coefficient of the result rounded to the destination |
| // precision, with unbounded exponent (it has q4 digits); the exponent |
| // is e4 (exact result) |
| } |
| |
| // check for overflow |
| if (q4 + e4 > p34 + expmax) { |
| if (rnd_mode == ROUNDING_TO_NEAREST) { |
| res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf |
| res.w[0] = 0x0000000000000000ull; |
| *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); |
| } else { |
| res.w[1] = p_sign | res.w[1]; |
| rounding_correction (rnd_mode, |
| is_inexact_lt_midpoint, |
| is_inexact_gt_midpoint, |
| is_midpoint_lt_even, is_midpoint_gt_even, |
| e4, &res, pfpsf); |
| } |
| *pfpsf |= save_fpsf; |
| *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
| *ptr_is_midpoint_gt_even = is_midpoint_gt_even; |
| *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; |
| *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; |
| BID_SWAP128 (res); |
| BID_RETURN (res) |
| } |
| // check for underflow |
| if (q4 + e4 < expmin + P34) { |
| is_tiny = 1; // the result is tiny |
| if (e4 < expmin) { |
| // if e4 < expmin, we must truncate more of res |
| x0 = expmin - e4; // x0 >= 1 |
| is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; |
| is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; |
| is_midpoint_lt_even0 = is_midpoint_lt_even; |
| is_midpoint_gt_even0 = is_midpoint_gt_even; |
| is_inexact_lt_midpoint = 0; |
| is_inexact_gt_midpoint = 0; |
| is_midpoint_lt_even = 0; |
| is_midpoint_gt_even = 0; |
| // the number of decimal digits in res is q4 |
| if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits |
| if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17 |
| round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp, |
| &is_midpoint_lt_even, &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, |
| &is_inexact_gt_midpoint); |
| if (incr_exp) { |
| // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 |
| R64 = ten2k64[q4 - x0]; |
| } |
| // res.w[1] = 0; (from above) |
| res.w[0] = R64; |
| } else { // if (q4 <= 34) |
| // 19 <= q4 <= 38 |
| P128.w[1] = res.w[1]; |
| P128.w[0] = res.w[0]; |
| round128_19_38 (q4, x0, P128, &res, &incr_exp, |
| &is_midpoint_lt_even, &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, |
| &is_inexact_gt_midpoint); |
| if (incr_exp) { |
| // increase coefficient by a factor of 10; this will be <= 10^33 |
| // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 |
| if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 |
| // res.w[1] = 0; |
| res.w[0] = ten2k64[q4 - x0]; |
| } else { // 20 <= q4 - x0 <= 37 |
| res.w[0] = ten2k128[q4 - x0 - 20].w[0]; |
| res.w[1] = ten2k128[q4 - x0 - 20].w[1]; |
| } |
| } |
| } |
| e4 = e4 + x0; // expmin |
| } else if (x0 == q4) { |
| // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin |
| // determine relationship with 1/2 ulp |
| if (q4 <= 19) { |
| if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp |
| lt_half_ulp = 1; |
| is_inexact_lt_midpoint = 1; |
| } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp |
| eq_half_ulp = 1; |
| is_midpoint_gt_even = 1; |
| } else { // > 1/2 ulp |
| // gt_half_ulp = 1; |
| is_inexact_gt_midpoint = 1; |
| } |
| } else { // if (q4 <= 34) |
| if (res.w[1] < midpoint128[q4 - 20].w[1] || |
| (res.w[1] == midpoint128[q4 - 20].w[1] && |
| res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp |
| lt_half_ulp = 1; |
| is_inexact_lt_midpoint = 1; |
| } else if (res.w[1] == midpoint128[q4 - 20].w[1] && |
| res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp |
| eq_half_ulp = 1; |
| is_midpoint_gt_even = 1; |
| } else { // > 1/2 ulp |
| // gt_half_ulp = 1; |
| is_inexact_gt_midpoint = 1; |
| } |
| } |
| if (lt_half_ulp || eq_half_ulp) { |
| // res = +0.0 * 10^expmin |
| res.w[1] = 0x0000000000000000ull; |
| res.w[0] = 0x0000000000000000ull; |
| } else { // if (gt_half_ulp) |
| // res = +1 * 10^expmin |
| res.w[1] = 0x0000000000000000ull; |
| res.w[0] = 0x0000000000000001ull; |
| } |
| e4 = expmin; |
| } else { // if (x0 > q4) |
| // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin |
| res.w[1] = 0; |
| res.w[0] = 0; |
| e4 = expmin; |
| is_inexact_lt_midpoint = 1; |
| } |
| // avoid a double rounding error |
| if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && |
| is_midpoint_lt_even) { // double rounding error upward |
| // res = res - 1 |
| res.w[0]--; |
| if (res.w[0] == 0xffffffffffffffffull) |
| res.w[1]--; |
| // Note: a double rounding error upward is not possible; for this |
| // the result after the first rounding would have to be 99...95 |
| // (35 digits in all), possibly followed by a number of zeros; this |
| // not possible for f * f + 0 |
| is_midpoint_lt_even = 0; |
| is_inexact_lt_midpoint = 1; |
| } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && |
| is_midpoint_gt_even) { // double rounding error downward |
| // res = res + 1 |
| res.w[0]++; |
| if (res.w[0] == 0) |
| res.w[1]++; |
| is_midpoint_gt_even = 0; |
| is_inexact_gt_midpoint = 1; |
| } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && |
| !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { |
| // if this second rounding was exact the result may still be |
| // inexact because of the first rounding |
| if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { |
| is_inexact_gt_midpoint = 1; |
| } |
| if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { |
| is_inexact_lt_midpoint = 1; |
| } |
| } else if (is_midpoint_gt_even && |
| (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { |
| // pulled up to a midpoint |
| is_inexact_lt_midpoint = 1; |
| is_inexact_gt_midpoint = 0; |
| is_midpoint_lt_even = 0; |
| is_midpoint_gt_even = 0; |
| } else if (is_midpoint_lt_even && |
| (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { |
| // pulled down to a midpoint |
| is_inexact_lt_midpoint = 0; |
| is_inexact_gt_midpoint = 1; |
| is_midpoint_lt_even = 0; |
| is_midpoint_gt_even = 0; |
| } else { |
| ; |
| } |
| } else { // if e4 >= emin then q4 < P and the result is tiny and exact |
| if (e3 < e4) { |
| // if (e3 < e4) the preferred exponent is e3 |
| // return (C4 * 10^scale) * 10^(e4 - scale) |
| // where scale = min (p34-q4, (e4 - e3)) |
| scale = p34 - q4; |
| ind = e4 - e3; |
| if (ind < scale) |
| scale = ind; |
| if (scale == 0) { |
| ; // res and e4 are unchanged |
| } else if (q4 <= 19) { // C4 fits in 64 bits |
| if (scale <= 19) { // 10^scale fits in 64 bits |
| // 64 x 64 res.w[0] * ten2k64[scale] |
| __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]); |
| } else { // 10^scale fits in 128 bits |
| // 64 x 128 res.w[0] * ten2k128[scale - 20] |
| __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]); |
| } |
| } else { // res fits in 128 bits, but 10^scale must fit in 64 bits |
| // 64 x 128 ten2k64[scale] * C3 |
| __mul_128x64_to_128 (res, ten2k64[scale], res); |
| } |
| // subtract scale from the exponent |
| e4 = e4 - scale; |
| } |
| } |
| |
| // check for inexact result |
| if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || |
| is_midpoint_lt_even || is_midpoint_gt_even) { |
| // set the inexact flag and the underflow flag |
| *pfpsf |= INEXACT_EXCEPTION; |
| *pfpsf |= UNDERFLOW_EXCEPTION; |
| } |
| res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; |
| if (rnd_mode != ROUNDING_TO_NEAREST) { |
| rounding_correction (rnd_mode, |
| is_inexact_lt_midpoint, |
| is_inexact_gt_midpoint, |
| is_midpoint_lt_even, is_midpoint_gt_even, |
| e4, &res, pfpsf); |
| } |
| *pfpsf |= save_fpsf; |
| *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
| *ptr_is_midpoint_gt_even = is_midpoint_gt_even; |
| *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; |
| *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; |
| BID_SWAP128 (res); |
| BID_RETURN (res) |
| } |
| // no overflow, and no underflow for rounding to nearest |
| res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; |
| |
| if (rnd_mode != ROUNDING_TO_NEAREST) { |
| rounding_correction (rnd_mode, |
| is_inexact_lt_midpoint, |
| is_inexact_gt_midpoint, |
| is_midpoint_lt_even, is_midpoint_gt_even, |
| e4, &res, pfpsf); |
| // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ) |
| if (e4 == expmin) { |
| if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull || |
| ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && |
| res.w[0] < 0x38c15b0a00000000ull)) { |
| is_tiny = 1; |
| } |
| } |
| } |
| |
| if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || |
| is_midpoint_lt_even || is_midpoint_gt_even) { |
| // set the inexact flag |
| *pfpsf |= INEXACT_EXCEPTION; |
| if (is_tiny) |
| *pfpsf |= UNDERFLOW_EXCEPTION; |
| } |
| |
| if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact |
| // need to ensure that the result has the preferred exponent |
| p_exp = res.w[1] & MASK_EXP; |
| if (z_exp < p_exp) { // the preferred exponent is z_exp |
| // signficand of res in C3 |
| C3.w[1] = res.w[1] & MASK_COEFF; |
| C3.w[0] = res.w[0]; |
| // the number of decimal digits of x * y is q4 <= 34 |
| // Note: the coefficient fits in 128 bits |
| |
| // return (C3 * 10^scale) * 10^(p_exp - scale) |
| // where scale = min (p34-q4, (p_exp-z_exp) >> 49) |
| scale = p34 - q4; |
| ind = (p_exp - z_exp) >> 49; |
| if (ind < scale) |
| scale = ind; |
| // subtract scale from the exponent |
| p_exp = p_exp - ((UINT64) scale << 49); |
| if (scale == 0) { |
| ; // leave res unchanged |
| } else if (q4 <= 19) { // x * y fits in 64 bits |
| if (scale <= 19) { // 10^scale fits in 64 bits |
| // 64 x 64 C3.w[0] * ten2k64[scale] |
| __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); |
| } else { // 10^scale fits in 128 bits |
| // 64 x 128 C3.w[0] * ten2k128[scale - 20] |
| __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); |
| } |
| res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; |
| } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits |
| // 64 x 128 ten2k64[scale] * C3 |
| __mul_128x64_to_128 (res, ten2k64[scale], C3); |
| res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; |
| } |
| } // else leave the result as it is, because p_exp <= z_exp |
| } |
| *pfpsf |= save_fpsf; |
| *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
| *ptr_is_midpoint_gt_even = is_midpoint_gt_even; |
| *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; |
| *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; |
| BID_SWAP128 (res); |
| BID_RETURN (res) |
| } // else we have f * f + f |
| |
| // continue with x = f, y = f, z = f |
| |
| delta = q3 + e3 - q4 - e4; |
| delta_ge_zero: |
| if (delta >= 0) { |
| |
| if (p34 <= delta - 1 || // Case (1') |
| (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A) |
| // check for overflow, which can occur only in Case (1') |
| if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) { |
| // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary |
| // condition for (q3 + e3) > (p34 + expmax) |
| if (rnd_mode == ROUNDING_TO_NEAREST) { |
| res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf |
| res.w[0] = 0x0000000000000000ull; |
| *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); |
| } else { |
| if (p_sign == z_sign) { |
| is_inexact_lt_midpoint = 1; |
| } else { |
| is_inexact_gt_midpoint = 1; |
| } |
| // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3) |
| scale = p34 - q3; |
| if (scale == 0) { |
| res.w[1] = z_sign | C3.w[1]; |
| res.w[0] = C3.w[0]; |
| } else { |
| if (q3 <= 19) { // C3 fits in 64 bits |
| if (scale <= 19) { // 10^scale fits in 64 bits |
| // 64 x 64 C3.w[0] * ten2k64[scale] |
| __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); |
| } else { // 10^scale fits in 128 bits |
| // 64 x 128 C3.w[0] * ten2k128[scale - 20] |
| __mul_128x64_to_128 (res, C3.w[0], |
| ten2k128[scale - 20]); |
| } |
| } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits |
| // 64 x 128 ten2k64[scale] * C3 |
| __mul_128x64_to_128 (res, ten2k64[scale], C3); |
| } |
| // the coefficient in res has q3 + scale = p34 digits |
| } |
| e3 = e3 - scale; |
| res.w[1] = z_sign | res.w[1]; |
| rounding_correction (rnd_mode, |
| is_inexact_lt_midpoint, |
| is_inexact_gt_midpoint, |
| is_midpoint_lt_even, is_midpoint_gt_even, |
| e3, &res, pfpsf); |
| } |
| *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
| *ptr_is_midpoint_gt_even = is_midpoint_gt_even; |
| *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; |
| *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; |
| BID_SWAP128 (res); |
| BID_RETURN (res) |
| } |
| // res = z |
| if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3) |
| // return (C3 * 10^scale) * 10^(z_exp - scale) |
| // where scale = min (p34-q3, z_exp-EMIN) |
| scale = p34 - q3; |
| ind = e3 + 6176; |
| if (ind < scale) |
| scale = ind; |
| if (scale == 0) { |
| res.w[1] = C3.w[1]; |
| res.w[0] = C3.w[0]; |
| } else if (q3 <= 19) { // z fits in 64 bits |
| if (scale <= 19) { // 10^scale fits in 64 bits |
| // 64 x 64 C3.w[0] * ten2k64[scale] |
| __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); |
| } else { // 10^scale fits in 128 bits |
| // 64 x 128 C3.w[0] * ten2k128[scale - 20] |
| __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); |
| } |
| } else { // z fits in 128 bits, but 10^scale must fit in 64 bits |
| // 64 x 128 ten2k64[scale] * C3 |
| __mul_128x64_to_128 (res, ten2k64[scale], C3); |
| } |
| // the coefficient in res has q3 + scale digits |
| // subtract scale from the exponent |
| z_exp = z_exp - ((UINT64) scale << 49); |
| e3 = e3 - scale; |
| res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; |
| if (scale + q3 < p34) |
| *pfpsf |= UNDERFLOW_EXCEPTION; |
| } else { |
| scale = 0; |
| res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1]; |
| res.w[0] = C3.w[0]; |
| } |
| |
| // use the following to avoid double rounding errors when operating on |
| // mixed formats in rounding to nearest, and for correcting the result |
| // if not rounding to nearest |
| if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) { |
| // there is a gap of exactly one digit between the scaled C3 and C4 |
| // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case |
| if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) || |
| (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) || |
| (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] || |
| C3.w[0] != ten2k128[q3 - 21].w[0]))) { |
| // C3 * 10^ scale != 10^(q3-1) |
| // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull || |
| // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33 |
| is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value |
| } else { // if C3 * 10^scale = 10^(q3+scale-1) |
| // ok from above e3 = (z_exp >> 49) - 6176; |
| // the result is always inexact |
| if (q4 == 1) { |
| R64 = C4.w[0]; |
| } else { |
| // if q4 > 1 then truncate C4 from q4 digits to 1 digit; |
| // x = q4-1, 1 <= x <= 67 and check if this operation is exact |
| if (q4 <= 18) { // 2 <= q4 <= 18 |
| round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, |
| &is_midpoint_lt_even, &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, |
| &is_inexact_gt_midpoint); |
| } else if (q4 <= 38) { |
| P128.w[1] = C4.w[1]; |
| P128.w[0] = C4.w[0]; |
| round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, |
| &is_midpoint_lt_even, |
| &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, |
| &is_inexact_gt_midpoint); |
| R64 = R128.w[0]; // one decimal digit |
| } else if (q4 <= 57) { |
| P192.w[2] = C4.w[2]; |
| P192.w[1] = C4.w[1]; |
| P192.w[0] = C4.w[0]; |
| round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, |
| &is_midpoint_lt_even, |
| &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, |
| &is_inexact_gt_midpoint); |
| R64 = R192.w[0]; // one decimal digit |
| } else { // if (q4 <= 68) |
| round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, |
| &is_midpoint_lt_even, |
| &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, |
| &is_inexact_gt_midpoint); |
| R64 = R256.w[0]; // one decimal digit |
| } |
| if (incr_exp) { |
| R64 = 10; |
| } |
| } |
| if (q4 == 1 && C4.w[0] == 5) { |
| is_inexact_lt_midpoint = 0; |
| is_inexact_gt_midpoint = 0; |
| is_midpoint_lt_even = 1; |
| is_midpoint_gt_even = 0; |
| } else if ((e3 == expmin) || |
| R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) { |
| // result does not change |
| is_inexact_lt_midpoint = 0; |
| is_inexact_gt_midpoint = 1; |
| is_midpoint_lt_even = 0; |
| is_midpoint_gt_even = 0; |
| } else { |
| is_inexact_lt_midpoint = 1; |
| is_inexact_gt_midpoint = 0; |
| is_midpoint_lt_even = 0; |
| is_midpoint_gt_even = 0; |
| // result decremented is 10^(q3+scale) - 1 |
| if ((q3 + scale) <= 19) { |
| res.w[1] = 0; |
| res.w[0] = ten2k64[q3 + scale]; |
| } else { // if ((q3 + scale + 1) <= 35) |
| res.w[1] = ten2k128[q3 + scale - 20].w[1]; |
| res.w[0] = ten2k128[q3 + scale - 20].w[0]; |
| } |
| res.w[0] = res.w[0] - 1; // borrow never occurs |
| z_exp = z_exp - EXP_P1; |
| e3 = e3 - 1; |
| res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; |
| } |
| if (e3 == expmin) { |
| if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) { |
| ; // result not tiny (in round-to-nearest mode) |
| } else { |
| *pfpsf |= UNDERFLOW_EXCEPTION; |
| } |
| } |
| } // end 10^(q3+scale-1) |
| // set the inexact flag |
| *pfpsf |= INEXACT_EXCEPTION; |
| } else { |
| if (p_sign == z_sign) { |
| // if (z_sign), set as if for absolute value |
| is_inexact_lt_midpoint = 1; |
| } else { // if (p_sign != z_sign) |
| // if (z_sign), set as if for absolute value |
| is_inexact_gt_midpoint = 1; |
| } |
| *pfpsf |= INEXACT_EXCEPTION; |
| } |
| // the result is always inexact => set the inexact flag |
| // Determine tininess: |
| // if (exp > expmin) |
| // the result is not tiny |
| // else // if exp = emin |
| // if (q3 + scale < p34) |
| // the result is tiny |
| // else // if (q3 + scale = p34) |
| // if (C3 * 10^scale > 10^33) |
| // the result is not tiny |
| // else // if C3 * 10^scale = 10^33 |
| // if (xy * z > 0) |
| // the result is not tiny |
| // else // if (xy * z < 0) |
| // if (z > 0) |
| // if rnd_mode != RP |
| // the result is tiny |
| // else // if RP |
| // the result is not tiny |
| // else // if (z < 0) |
| // if rnd_mode != RM |
| // the result is tiny |
| // else // if RM |
| // the result is not tiny |
| // endif |
| // endif |
| // endif |
| // endif |
| // endif |
| // endif |
| if ((e3 == expmin && (q3 + scale) < p34) || |
| (e3 == expmin && (q3 + scale) == p34 && |
| (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && // 10^33_high |
| res.w[0] == 0x38c15b0a00000000ull && // 10^33_low |
| z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) || |
| (z_sign && rnd_mode != ROUNDING_DOWN)))) { |
| *pfpsf |= UNDERFLOW_EXCEPTION; |
| } |
| if (rnd_mode != ROUNDING_TO_NEAREST) { |
| rounding_correction (rnd_mode, |
| is_inexact_lt_midpoint, |
| is_inexact_gt_midpoint, |
| is_midpoint_lt_even, is_midpoint_gt_even, |
| e3, &res, pfpsf); |
| } |
| *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
| *ptr_is_midpoint_gt_even = is_midpoint_gt_even; |
| *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; |
| *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; |
| BID_SWAP128 (res); |
| BID_RETURN (res) |
| |
| } else if (p34 == delta) { // Case (1''B) |
| |
| // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3 |
| // and C3 can be scaled up to p34 digits if needed |
| |
| // scale C3 to p34 digits if needed |
| scale = p34 - q3; // 0 <= scale <= p34 - 1 |
| if (scale == 0) { |
| res.w[1] = C3.w[1]; |
| res.w[0] = C3.w[0]; |
| } else if (q3 <= 19) { // z fits in 64 bits |
| if (scale <= 19) { // 10^scale fits in 64 bits |
| // 64 x 64 C3.w[0] * ten2k64[scale] |
| __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); |
| } else { // 10^scale fits in 128 bits |
| // 64 x 128 C3.w[0] * ten2k128[scale - 20] |
| __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); |
| } |
| } else { // z fits in 128 bits, but 10^scale must fit in 64 bits |
| // 64 x 128 ten2k64[scale] * C3 |
| __mul_128x64_to_128 (res, ten2k64[scale], C3); |
| } |
| // subtract scale from the exponent |
| z_exp = z_exp - ((UINT64) scale << 49); |
| e3 = e3 - scale; |
| // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits |
| |
| // determine whether x * y is less than, equal to, or greater than |
| // 1/2 ulp (z) |
| if (q4 <= 19) { |
| if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp |
| lt_half_ulp = 1; |
| } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp |
| eq_half_ulp = 1; |
| } else { // > 1/2 ulp |
| gt_half_ulp = 1; |
| } |
| } else if (q4 <= 38) { |
| if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] || |
| (C4.w[1] == midpoint128[q4 - 20].w[1] && |
| C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp |
| lt_half_ulp = 1; |
| } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] && |
| C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp |
| eq_half_ulp = 1; |
| } else { // > 1/2 ulp |
| gt_half_ulp = 1; |
| } |
| } else if (q4 <= 58) { |
| if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] || |
| (C4.w[2] == midpoint192[q4 - 39].w[2] && |
| C4.w[1] < midpoint192[q4 - 39].w[1]) || |
| (C4.w[2] == midpoint192[q4 - 39].w[2] && |
| C4.w[1] == midpoint192[q4 - 39].w[1] && |
| C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp |
| lt_half_ulp = 1; |
| } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] && |
| C4.w[1] == midpoint192[q4 - 39].w[1] && |
| C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp |
| eq_half_ulp = 1; |
| } else { // > 1/2 ulp |
| gt_half_ulp = 1; |
| } |
| } else { |
| if (C4.w[3] < midpoint256[q4 - 59].w[3] || |
| (C4.w[3] == midpoint256[q4 - 59].w[3] && |
| C4.w[2] < midpoint256[q4 - 59].w[2]) || |
| (C4.w[3] == midpoint256[q4 - 59].w[3] && |
| C4.w[2] == midpoint256[q4 - 59].w[2] && |
| C4.w[1] < midpoint256[q4 - 59].w[1]) || |
| (C4.w[3] == midpoint256[q4 - 59].w[3] && |
| C4.w[2] == midpoint256[q4 - 59].w[2] && |
| C4.w[1] == midpoint256[q4 - 59].w[1] && |
| C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp |
| lt_half_ulp = 1; |
| } else if (C4.w[3] == midpoint256[q4 - 59].w[3] && |
| C4.w[2] == midpoint256[q4 - 59].w[2] && |
| C4.w[1] == midpoint256[q4 - 59].w[1] && |
| C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp |
| eq_half_ulp = 1; |
| } else { // > 1/2 ulp |
| gt_half_ulp = 1; |
| } |
| } |
| |
| if (p_sign == z_sign) { |
| if (lt_half_ulp) { |
| res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; |
| // use the following to avoid double rounding errors when operating on |
| // mixed formats in rounding to nearest |
| is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value |
| } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) { |
| // add 1 ulp to the significand |
| res.w[0]++; |
| if (res.w[0] == 0x0ull) |
| res.w[1]++; |
| // check for rounding overflow, when coeff == 10^34 |
| if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull && |
| res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34 |
| e3 = e3 + 1; |
| // coeff = 10^33 |
| z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP; |
| res.w[1] = 0x0000314dc6448d93ull; |
| res.w[0] = 0x38c15b0a00000000ull; |
| } |
| // end add 1 ulp |
| res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; |
| if (eq_half_ulp) { |
| is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value |
| } else { |
| is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value |
| } |
| } else { // if (eq_half_ulp && !(res.w[0] & 0x01)) |
| // leave unchanged |
| res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; |
| is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value |
| } |
| // the result is always inexact, and never tiny |
| // set the inexact flag |
| *pfpsf |= INEXACT_EXCEPTION; |
| // check for overflow |
| if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) { |
| res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf |
| res.w[0] = 0x0000000000000000ull; |
| *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); |
| *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
| *ptr_is_midpoint_gt_even = is_midpoint_gt_even; |
| *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; |
| *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; |
| BID_SWAP128 (res); |
| BID_RETURN (res) |
| } |
| if (rnd_mode != ROUNDING_TO_NEAREST) { |
| rounding_correction (rnd_mode, |
| is_inexact_lt_midpoint, |
| is_inexact_gt_midpoint, |
| is_midpoint_lt_even, is_midpoint_gt_even, |
| e3, &res, pfpsf); |
| z_exp = res.w[1] & MASK_EXP; |
| } |
| } else { // if (p_sign != z_sign) |
| // consider two cases, because C3 * 10^scale = 10^33 is a special case |
| if (res.w[1] != 0x0000314dc6448d93ull || |
| res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33 |
| if (lt_half_ulp) { |
| res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; |
| // use the following to avoid double rounding errors when operating |
| // on mixed formats in rounding to nearest |
| is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value |
| } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) { |
| // subtract 1 ulp from the significand |
| res.w[0]--; |
| if (res.w[0] == 0xffffffffffffffffull) |
| res.w[1]--; |
| res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; |
| if (eq_half_ulp) { |
| is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value |
| } else { |
| is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value |
| } |
| } else { // if (eq_half_ulp && !(res.w[0] & 0x01)) |
| // leave unchanged |
| res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; |
| is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value |
| } |
| // the result is always inexact, and never tiny |
| // check for overflow for RN |
| if (e3 > expmax) { |
| if (rnd_mode == ROUNDING_TO_NEAREST) { |
| res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf |
| res.w[0] = 0x0000000000000000ull; |
| *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); |
| } else { |
| rounding_correction (rnd_mode, |
| is_inexact_lt_midpoint, |
| is_inexact_gt_midpoint, |
| is_midpoint_lt_even, |
| is_midpoint_gt_even, e3, &res, |
| pfpsf); |
| } |
| *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
| *ptr_is_midpoint_gt_even = is_midpoint_gt_even; |
| *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; |
| *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; |
| BID_SWAP128 (res); |
| BID_RETURN (res) |
| } |
| // set the inexact flag |
| *pfpsf |= INEXACT_EXCEPTION; |
| if (rnd_mode != ROUNDING_TO_NEAREST) { |
| rounding_correction (rnd_mode, |
| is_inexact_lt_midpoint, |
| is_inexact_gt_midpoint, |
| is_midpoint_lt_even, |
| is_midpoint_gt_even, e3, &res, pfpsf); |
| } |
| z_exp = res.w[1] & MASK_EXP; |
| } else { // if C3 * 10^scale = 10^33 |
| e3 = (z_exp >> 49) - 6176; |
| if (e3 > expmin) { |
| // the result is exact if exp > expmin and C4 = d*10^(q4-1), |
| // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact |
| if (q4 == 1) { |
| // if q4 = 1 the result is exact |
| // result coefficient = 10^34 - C4 |
| res.w[1] = 0x0001ed09bead87c0ull; |
| res.w[0] = 0x378d8e6400000000ull - C4.w[0]; |
| z_exp = z_exp - EXP_P1; |
| e3 = e3 - 1; |
| res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; |
| } else { |
| // if q4 > 1 then truncate C4 from q4 digits to 1 digit; |
| // x = q4-1, 1 <= x <= 67 and check if this operation is exact |
| if (q4 <= 18) { // 2 <= q4 <= 18 |
| round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, |
| &is_midpoint_lt_even, |
| &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, |
| &is_inexact_gt_midpoint); |
| } else if (q4 <= 38) { |
| P128.w[1] = C4.w[1]; |
| P128.w[0] = C4.w[0]; |
| round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, |
| &is_midpoint_lt_even, |
| &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, |
| &is_inexact_gt_midpoint); |
| R64 = R128.w[0]; // one decimal digit |
| } else if (q4 <= 57) { |
| P192.w[2] = C4.w[2]; |
| P192.w[1] = C4.w[1]; |
| P192.w[0] = C4.w[0]; |
| round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, |
| &is_midpoint_lt_even, |
| &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, |
| &is_inexact_gt_midpoint); |
| R64 = R192.w[0]; // one decimal digit |
| } else { // if (q4 <= 68) |
| round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, |
| &is_midpoint_lt_even, |
| &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, |
| &is_inexact_gt_midpoint); |
| R64 = R256.w[0]; // one decimal digit |
| } |
| if (!is_midpoint_lt_even && !is_midpoint_gt_even && |
| !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { |
| // the result is exact: 10^34 - R64 |
| // incr_exp = 0 with certainty |
| z_exp = z_exp - EXP_P1; |
| e3 = e3 - 1; |
| res.w[1] = |
| z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull; |
| res.w[0] = 0x378d8e6400000000ull - R64; |
| } else { |
| // We want R64 to be the top digit of C4, but we actually |
| // obtained (C4 * 10^(-q4+1))RN; a correction may be needed, |
| // because the top digit is (C4 * 10^(-q4+1))RZ |
| // however, if incr_exp = 1 then R64 = 10 with certainty |
| if (incr_exp) { |
| R64 = 10; |
| } |
| // the result is inexact as C4 has more than 1 significant digit |
| // and C3 * 10^scale = 10^33 |
| // example of case that is treated here: |
| // 100...0 * 10^e3 - 0.41 * 10^e3 = |
| // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1) |
| // note that (e3 > expmin} |
| // in order to round, subtract R64 from 10^34 and then compare |
| // C4 - R64 * 10^(q4-1) with 1/2 ulp |
| // calculate 10^34 - R64 |
| res.w[1] = 0x0001ed09bead87c0ull; |
| res.w[0] = 0x378d8e6400000000ull - R64; |
| z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand |
| // calculate C4 - R64 * 10^(q4-1); this is a rare case and |
| // R64 is small, 1 <= R64 <= 9 |
| e3 = e3 - 1; |
| if (is_inexact_lt_midpoint) { |
| is_inexact_lt_midpoint = 0; |
| is_inexact_gt_midpoint = 1; |
| } else if (is_inexact_gt_midpoint) { |
| is_inexact_gt_midpoint = 0; |
| is_inexact_lt_midpoint = 1; |
| } else if (is_midpoint_lt_even) { |
| is_midpoint_lt_even = 0; |
| is_midpoint_gt_even = 1; |
| } else if (is_midpoint_gt_even) { |
| is_midpoint_gt_even = 0; |
| is_midpoint_lt_even = 1; |
| } else { |
| ; |
| } |
| // the result is always inexact, and never tiny |
| // check for overflow for RN |
| if (e3 > expmax) { |
| if (rnd_mode == ROUNDING_TO_NEAREST) { |
| res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf |
| res.w[0] = 0x0000000000000000ull; |
| *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); |
| } else { |
| rounding_correction (rnd_mode, |
| is_inexact_lt_midpoint, |
| is_inexact_gt_midpoint, |
| is_midpoint_lt_even, |
| is_midpoint_gt_even, e3, &res, |
| pfpsf); |
| } |
| *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
| *ptr_is_midpoint_gt_even = is_midpoint_gt_even; |
| *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; |
| *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; |
| BID_SWAP128 (res); |
| BID_RETURN (res) |
| } |
| // set the inexact flag |
| *pfpsf |= INEXACT_EXCEPTION; |
| res.w[1] = |
| z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; |
| if (rnd_mode != ROUNDING_TO_NEAREST) { |
| rounding_correction (rnd_mode, |
| is_inexact_lt_midpoint, |
| is_inexact_gt_midpoint, |
| is_midpoint_lt_even, |
| is_midpoint_gt_even, e3, &res, |
| pfpsf); |
| } |
| z_exp = res.w[1] & MASK_EXP; |
| } // end result is inexact |
| } // end q4 > 1 |
| } else { // if (e3 = emin) |
| // if e3 = expmin the result is also tiny (the condition for |
| // tininess is C4 > 050...0 [q4 digits] which is met because |
| // the msd of C4 is not zero) |
| // the result is tiny and inexact in all rounding modes; |
| // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp, |
| // gt_half_ulp to calculate) |
| // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged |
| |
| // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp |
| if (gt_half_ulp) { // res = 10^33 - 1 |
| res.w[1] = 0x0000314dc6448d93ull; |
| res.w[0] = 0x38c15b09ffffffffull; |
| } else { |
| res.w[1] = 0x0000314dc6448d93ull; |
| res.w[0] = 0x38c15b0a00000000ull; |
| } |
| res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; |
| *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later |
| |
| if (eq_half_ulp) { |
| is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value |
| } else if (lt_half_ulp) { |
| is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value |
| } else { // if (gt_half_ulp) |
| is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value |
| } |
| |
| if (rnd_mode != ROUNDING_TO_NEAREST) { |
| rounding_correction (rnd_mode, |
| is_inexact_lt_midpoint, |
| is_inexact_gt_midpoint, |
| is_midpoint_lt_even, |
| is_midpoint_gt_even, e3, &res, |
| pfpsf); |
| z_exp = res.w[1] & MASK_EXP; |
| } |
| } // end e3 = emin |
| // set the inexact flag (if the result was not exact) |
| if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || |
| is_midpoint_lt_even || is_midpoint_gt_even) |
| *pfpsf |= INEXACT_EXCEPTION; |
| } // end 10^33 |
| } // end if (p_sign != z_sign) |
| res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; |
| *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
| *ptr_is_midpoint_gt_even = is_midpoint_gt_even; |
| *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; |
| *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; |
| BID_SWAP128 (res); |
| BID_RETURN (res) |
| |
| } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2) |
| (q3 <= delta && delta + q4 <= p34) || // Case (3) |
| (delta < q3 && p34 < delta + q4) || // Case (4) |
| (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5) |
| (delta + q4 < q3)) && // Case (6) |
| !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6) |
| |
| // the result has the sign of z |
| |
| if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2) |
| (delta < q3 && p34 < delta + q4)) { // Case (4) |
| // round first the sum x * y + z with unbounded exponent |
| // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1, |
| // 1 <= scale <= 33 |
| // calculate res = C3 * 10^scale |
| scale = p34 - q3; |
| x0 = delta + q4 - p34; |
| } else if (delta + q4 < q3) { // Case (6) |
| // make Case (6) look like Case (3) or Case (5) with scale = 0 |
| // by scaling up C4 by 10^(q3 - delta - q4) |
| scale = q3 - delta - q4; // 1 <= scale <= 33 |
| if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits |
| if (scale <= 19) { // 10^scale fits in 64 bits |
| // 64 x 64 C4.w[0] * ten2k64[scale] |
| __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]); |
| } else { // 10^scale fits in 128 bits |
| // 64 x 128 C4.w[0] * ten2k128[scale - 20] |
| __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]); |
| } |
| } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits |
| // 64 x 128 ten2k64[scale] * C4 |
| __mul_128x64_to_128 (P128, ten2k64[scale], C4); |
| } |
| C4.w[0] = P128.w[0]; |
| C4.w[1] = P128.w[1]; |
| // e4 does not need adjustment, as it is not used from this point on |
| scale = 0; |
| x0 = 0; |
| // now Case (6) looks like Case (3) or Case (5) with scale = 0 |
| } else { // if Case (3) or Case (5) |
| // Note: Case (3) is similar to Case (2), but scale differs and the |
| // result is exact, unless it is tiny (so x0 = 0 when calculating the |
| // result with unbounded exponent) |
| |
| // calculate first the sum x * y + z with unbounded exponent (exact) |
| // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1, |
| // 1 <= scale <= 33 |
| // calculate res = C3 * 10^scale |
| scale = delta + q4 - q3; |
| x0 = 0; |
| // Note: the comments which follow refer [mainly] to Case (2)] |
| } |
| |
| case2_repeat: |
| if (scale == 0) { // this could happen e.g. if we return to case2_repeat |
| // or in Case (4) |
| res.w[1] = C3.w[1]; |
| res.w[0] = C3.w[0]; |
| } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits |
| if (scale <= 19) { // 10^scale fits in 64 bits |
| // 64 x 64 C3.w[0] * ten2k64[scale] |
| __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); |
| } else { // 10^scale fits in 128 bits |
| // 64 x 128 C3.w[0] * ten2k128[scale - 20] |
| __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); |
| } |
| } else { // z fits in 128 bits, but 10^scale must fit in 64 bits |
| // 64 x 128 ten2k64[scale] * C3 |
| __mul_128x64_to_128 (res, ten2k64[scale], C3); |
| } |
| // e3 is already calculated |
| e3 = e3 - scale; |
| // now res = C3 * 10^scale and e3 = e3 - scale |
| // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat |
| // because the result was too small |
| |
| // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34, |
| // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67) |
| // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of |
| // the rounding fits in 128 bits!) |
| // x0 = delta + q4 - p34 (calculated before reaching case2_repeat) |
| // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34 |
| if (x0 == 0) { // this could happen only if we return to case2_repeat, or |
| // for Case (3) or Case (6) |
| R128.w[1] = C4.w[1]; |
| R128.w[0] = C4.w[0]; |
| } else if (q4 <= 18) { |
| // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17 |
| round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp, |
| &is_midpoint_lt_even, &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); |
| if (incr_exp) { |
| // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 |
| R64 = ten2k64[q4 - x0]; |
| } |
| R128.w[1] = 0; |
| R128.w[0] = R64; |
| } else if (q4 <= 38) { |
| // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37 |
| P128.w[1] = C4.w[1]; |
| P128.w[0] = C4.w[0]; |
| round128_19_38 (q4, x0, P128, &R128, &incr_exp, |
| &is_midpoint_lt_even, &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, |
| &is_inexact_gt_midpoint); |
| if (incr_exp) { |
| // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 |
| if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 |
| R128.w[0] = ten2k64[q4 - x0]; |
| // R128.w[1] stays 0 |
| } else { // 20 <= q4 - x0 <= 37 |
| R128.w[0] = ten2k128[q4 - x0 - 20].w[0]; |
| R128.w[1] = ten2k128[q4 - x0 - 20].w[1]; |
| } |
| } |
| } else if (q4 <= 57) { |
| // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56 |
| P192.w[2] = C4.w[2]; |
| P192.w[1] = C4.w[1]; |
| P192.w[0] = C4.w[0]; |
| round192_39_57 (q4, x0, P192, &R192, &incr_exp, |
| &is_midpoint_lt_even, &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, |
| &is_inexact_gt_midpoint); |
| // R192.w[2] is always 0 |
| if (incr_exp) { |
| // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52 |
| if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 |
| R192.w[0] = ten2k64[q4 - x0]; |
| // R192.w[1] stays 0 |
| // R192.w[2] stays 0 |
| } else { // 20 <= q4 - x0 <= 33 |
| R192.w[0] = ten2k128[q4 - x0 - 20].w[0]; |
| R192.w[1] = ten2k128[q4 - x0 - 20].w[1]; |
| // R192.w[2] stays 0 |
| } |
| } |
| R128.w[1] = R192.w[1]; |
| R128.w[0] = R192.w[0]; |
| } else { |
| // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67 |
| round256_58_76 (q4, x0, C4, &R256, &incr_exp, |
| &is_midpoint_lt_even, &is_midpoint_gt_even, |
| &is_inexact_lt_midpoint, |
| &is_inexact_gt_midpoint); |
| // R256.w[3] and R256.w[2] are always 0 |
| if (incr_exp) { |
| // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43 |
| if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 |
| R256.w[0] = ten2k64[q4 - x0]; |
| // R256.w[1] stays 0 |
| // R256.w[2] stays 0 |
| // R256.w[3] stays 0 |
| } else { // 20 <= q4 - x0 <= 33 |
| R256.w[0] = ten2k128[q4 - x0 - 20].w[0]; |
| R256.w[1] = ten2k128[q4 - x0 - 20].w[1]; |
| // R256.w[2] stays 0 |
| // R256.w[3] stays 0 |
| } |
|