libgcc/config/libbid/bid128_fma.c - gcc - Git at Google

 /* Copyright (C) 2007-2021 Free Software Foundation, Inc.

 This file is part of GCC.

 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
 Software Foundation; either version 3, or (at your option) any later
 version.

 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
 WARRANTY; without even the implied warranty of MERCHANTABILITY or
 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 for more details.

 Under Section 7 of GPL version 3, you are granted additional
 permissions described in the GCC Runtime Library Exception, version
 3.1, as published by the Free Software Foundation.

 You should have received a copy of the GNU General Public License and
 a copy of the GCC Runtime Library Exception along with this program;
 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 <http://www.gnu.org/licenses/>.  */

 /*****************************************************************************
  *
  *  BID128 fma   x * y + z
  *
  ****************************************************************************/

 #include "bid_internal.h"

 static void
 rounding_correction (unsigned int rnd_mode,
         	     unsigned int is_inexact_lt_midpoint,
         	     unsigned int is_inexact_gt_midpoint,
         	     unsigned int is_midpoint_lt_even,
         	     unsigned int is_midpoint_gt_even,
         	     int unbexp,
         	     UINT128 * ptrres, _IDEC_flags * ptrfpsf) {
   // unbiased true exponent unbexp may be larger than emax

   UINT128 res = *ptrres; // expected to have the correct sign and coefficient
   // (the exponent field is ignored, as unbexp is used instead)
   UINT64 sign, exp;
   UINT64 C_hi, C_lo;

   // general correction from RN to RA, RM, RP, RZ
   // Note: if the result is negative, then is_inexact_lt_midpoint,
   // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even
   // have to be considered as if determined for the absolute value of the
   // result (so they seem to be reversed)

   if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
       is_midpoint_lt_even || is_midpoint_gt_even) {
     *ptrfpsf |= INEXACT_EXCEPTION;
   }
   // apply correction to result calculated with unbounded exponent
   sign = res.w[1] & MASK_SIGN;
   exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax
   C_hi = res.w[1] & MASK_COEFF;
   C_lo = res.w[0];
   if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) ||
       ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) &&
       is_midpoint_gt_even))) ||
       (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) ||
       ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) &&
       is_midpoint_gt_even)))) {
     // C = C + 1
     C_lo = C_lo + 1;
     if (C_lo == 0)
       C_hi = C_hi + 1;
     if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) {
       // C = 10^34 => rounding overflow
       C_hi = 0x0000314dc6448d93ull;
       C_lo = 0x38c15b0a00000000ull; // 10^33
       // exp = exp + EXP_P1;
       unbexp = unbexp + 1;
       exp = (UINT64) (unbexp + 6176) << 49;
     }
   } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) &&
       ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) ||
       (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) {
     // C = C - 1
     C_lo = C_lo - 1;
     if (C_lo == 0xffffffffffffffffull)
       C_hi--;
     // check if we crossed into the lower decade
     if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) {
       // C = 10^33 - 1
       if (exp > 0) {
         C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
         C_lo = 0x378d8e63ffffffffull;
         // exp = exp - EXP_P1;
         unbexp = unbexp - 1;
         exp = (UINT64) (unbexp + 6176) << 49;
       } else { // if exp = 0 the result is tiny & inexact
         *ptrfpsf |= UNDERFLOW_EXCEPTION;
       }
     }
   } else {
     ; // the result is already correct
   }
   if (unbexp > expmax) { // 6111
     *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
     exp = 0;
     if (!sign) { // result is positive
       if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf
         C_hi = 0x7800000000000000ull;
         C_lo = 0x0000000000000000ull;
       } else { // res = +MAXFP = (10^34-1) * 10^emax
         C_hi = 0x5fffed09bead87c0ull;
         C_lo = 0x378d8e63ffffffffull;
       }
     } else { // result is negative
       if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf
         C_hi = 0xf800000000000000ull;
         C_lo = 0x0000000000000000ull;
       } else { // res = -MAXFP = -(10^34-1) * 10^emax
         C_hi = 0xdfffed09bead87c0ull;
         C_lo = 0x378d8e63ffffffffull;
       }
     }
   }
   // assemble the result
   res.w[1] = sign | exp | C_hi;
   res.w[0] = C_lo;
   *ptrres = res;
 }

 static void
 add256 (UINT256 x, UINT256 y, UINT256 * pz) {
   // *z = x + yl assume the sum fits in 256 bits
   UINT256 z;
   z.w[0] = x.w[0] + y.w[0];
   if (z.w[0] < x.w[0]) {
     x.w[1]++;
     if (x.w[1] == 0x0000000000000000ull) {
       x.w[2]++;
       if (x.w[2] == 0x0000000000000000ull) {
         x.w[3]++;
       }
     }
   }
   z.w[1] = x.w[1] + y.w[1];
   if (z.w[1] < x.w[1]) {
     x.w[2]++;
     if (x.w[2] == 0x0000000000000000ull) {
       x.w[3]++;
     }
   }
   z.w[2] = x.w[2] + y.w[2];
   if (z.w[2] < x.w[2]) {
     x.w[3]++;
   }
   z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible
   *pz = z;
 }

 static void
 sub256 (UINT256 x, UINT256 y, UINT256 * pz) {
   // *z = x - y; assume x >= y
   UINT256 z;
   z.w[0] = x.w[0] - y.w[0];
   if (z.w[0] > x.w[0]) {
     x.w[1]--;
     if (x.w[1] == 0xffffffffffffffffull) {
       x.w[2]--;
       if (x.w[2] == 0xffffffffffffffffull) {
         x.w[3]--;
       }
     }
   }
   z.w[1] = x.w[1] - y.w[1];
   if (z.w[1] > x.w[1]) {
     x.w[2]--;
     if (x.w[2] == 0xffffffffffffffffull) {
       x.w[3]--;
     }
   }
   z.w[2] = x.w[2] - y.w[2];
   if (z.w[2] > x.w[2]) {
     x.w[3]--;
   }
   z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y
   *pz = z;
 }


 static int
 nr_digits256 (UINT256 R256) {
   int ind;
   // determine the number of decimal digits in R256
   if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) {
     // between 1 and 19 digits
     for (ind = 1; ind <= 19; ind++) {
       if (R256.w[0] < ten2k64[ind]) {
         break;
       }
     }
     // ind digits
   } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 &&
              (R256.w[1] < ten2k128[0].w[1] ||
               (R256.w[1] == ten2k128[0].w[1]
                && R256.w[0] < ten2k128[0].w[0]))) {
     // 20 digits
     ind = 20;
   } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) {
     // between 21 and 38 digits
     for (ind = 1; ind <= 18; ind++) {
       if (R256.w[1] < ten2k128[ind].w[1] ||
           (R256.w[1] == ten2k128[ind].w[1] &&
            R256.w[0] < ten2k128[ind].w[0])) {
         break;
       }
     }
     // ind + 20 digits
     ind = ind + 20;
   } else if (R256.w[3] == 0x0 &&
              (R256.w[2] < ten2k256[0].w[2] ||
               (R256.w[2] == ten2k256[0].w[2] &&
                R256.w[1] < ten2k256[0].w[1]) ||
               (R256.w[2] == ten2k256[0].w[2] &&
                R256.w[1] == ten2k256[0].w[1] &&
                R256.w[0] < ten2k256[0].w[0]))) {
     // 39 digits
     ind = 39;
   } else {
     // between 40 and 68 digits
     for (ind = 1; ind <= 29; ind++) {
       if (R256.w[3] < ten2k256[ind].w[3] ||
           (R256.w[3] == ten2k256[ind].w[3] &&
            R256.w[2] < ten2k256[ind].w[2]) ||
           (R256.w[3] == ten2k256[ind].w[3] &&
            R256.w[2] == ten2k256[ind].w[2] &&
            R256.w[1] < ten2k256[ind].w[1]) ||
           (R256.w[3] == ten2k256[ind].w[3] &&
            R256.w[2] == ten2k256[ind].w[2] &&
            R256.w[1] == ten2k256[ind].w[1] &&
            R256.w[0] < ten2k256[ind].w[0])) {
         break;
       }
     }
     // ind + 39 digits
     ind = ind + 39;
   }
   return (ind);
 }

 // add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so
 // use the rounding information from ptr_is_* to avoid a double rounding error
 static void
 add_and_round (int q3,
                int q4,
                int e4,
                int delta,
                int p34,
                UINT64 z_sign,
                UINT64 p_sign,
                UINT128 C3,
                UINT256 C4,
                int rnd_mode,
                int *ptr_is_midpoint_lt_even,
                int *ptr_is_midpoint_gt_even,
                int *ptr_is_inexact_lt_midpoint,
                int *ptr_is_inexact_gt_midpoint,
                _IDEC_flags * ptrfpsf, UINT128 * ptrres) {

   int scale;
   int x0;
   int ind;
   UINT64 R64;
   UINT128 P128, R128;
   UINT192 P192, R192;
   UINT256 R256;
   int is_midpoint_lt_even = 0;
   int is_midpoint_gt_even = 0;
   int is_inexact_lt_midpoint = 0;
   int is_inexact_gt_midpoint = 0;
   int is_midpoint_lt_even0 = 0;
   int is_midpoint_gt_even0 = 0;
   int is_inexact_lt_midpoint0 = 0;
   int is_inexact_gt_midpoint0 = 0;
   int incr_exp = 0;
   int is_tiny = 0;
   int lt_half_ulp = 0;
   int eq_half_ulp = 0;
   // int gt_half_ulp = 0;
   UINT128 res = *ptrres;

   // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66
   scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this
   // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1

   // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for
   // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6))
   if (scale == 0) {
     R256.w[3] = 0x0ull;
     R256.w[2] = 0x0ull;
     R256.w[1] = C3.w[1];
     R256.w[0] = C3.w[0];
   } else if (scale <= 19) { // 10^scale fits in 64 bits
     P128.w[1] = 0;
     P128.w[0] = ten2k64[scale];
     __mul_128x128_to_256 (R256, P128, C3);
   } else if (scale <= 38) { // 10^scale fits in 128 bits
     __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3);
   } else if (scale <= 57) { // 39 <= scale <= 57
     // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits
     // (10^67 has 223 bits; 10^69 has 230 bits);
     // must split the computation:
     // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
     // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
     // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits
     __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3);
     // now multiply R128 by 10^38
     __mul_128x128_to_256 (R256, R128, ten2k128[18]);
   } else { // 58 <= scale <= 66
     // 10^scale takes between 193 and 220 bits,
     // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits)
     // must split the computation:
     // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
     // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
     // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits
     // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because
     // 10^(scale-38) takes more than 64 bits, C3 will take less than 64
     __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]);
     // now calculate 10*38 * 10^(scale-38) * C3
     __mul_128x128_to_256 (R256, R128, ten2k128[18]);
   }
   // C3 * 10^scale is now in R256

   // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least
   // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is
   // possible
   // add/subtract C4 and C3 * 10^scale; the exponent is e4
   if (p_sign == z_sign) { // R256 = C4 + R256
     // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact,
     // but may require rounding
     add256 (C4, R256, &R256);
   } else { // if (p_sign != z_sign) { // R256 = C4 - R256
     // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or
     // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact,
     // but may require rounding

     // compare first R256 = C3 * 10^scale and C4
     if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) ||
         (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) ||
         (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] &&
         R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4
       // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact,
       // but may require rounding
       sub256 (R256, C4, &R256);
       // flip p_sign too, because the result has the sign of z
       p_sign = z_sign;
     } else { // if C4 > C3 * 10^scale
       // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact,
       // but may require rounding
       sub256 (C4, R256, &R256);
     }
     // if the result is pure zero, the sign depends on the rounding mode
     // (x*y and z had opposite signs)
     if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull &&
         R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) {
       if (rnd_mode != ROUNDING_DOWN)
         p_sign = 0x0000000000000000ull;
       else
         p_sign = 0x8000000000000000ull;
       // the exponent is max (e4, expmin)
       if (e4 < -6176)
         e4 = expmin;
       // assemble result
       res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49);
       res.w[0] = 0x0;
       *ptrres = res;
       return;
     }
   }

   // determine the number of decimal digits in R256
   ind = nr_digits256 (R256);

   // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind;
   // round to the destination precision, with unbounded exponent

   if (ind <= p34) {
     // result rounded to the destination precision with unbounded exponent
     // is exact
     if (ind + e4 < p34 + expmin) {
       is_tiny = 1; // applies to all rounding modes
     }
     res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1];
     res.w[0] = R256.w[0];
     // Note: res is correct only if expmin <= e4 <= expmax
   } else { // if (ind > p34)
     // if more than P digits, round to nearest to P digits
     // round R256 to p34 digits
     x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
     if (ind <= 38) {
       P128.w[1] = R256.w[1];
       P128.w[0] = R256.w[0];
       round128_19_38 (ind, x0, P128, &R128, &incr_exp,
         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
     } else if (ind <= 57) {
       P192.w[2] = R256.w[2];
       P192.w[1] = R256.w[1];
       P192.w[0] = R256.w[0];
       round192_39_57 (ind, x0, P192, &R192, &incr_exp,
         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
       R128.w[1] = R192.w[1];
       R128.w[0] = R192.w[0];
     } else { // if (ind <= 68)
       round256_58_76 (ind, x0, R256, &R256, &incr_exp,
         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
       R128.w[1] = R256.w[1];
       R128.w[0] = R256.w[0];
     }
     // the rounded result has p34 = 34 digits
     e4 = e4 + x0 + incr_exp;
     if (rnd_mode == ROUNDING_TO_NEAREST) {
       if (e4 < expmin) {
         is_tiny = 1; // for other rounding modes apply correction
       }
     } else {
       // for RM, RP, RZ, RA apply correction in order to determine tininess
       // but do not save the result; apply the correction to
       // (-1)^p_sign * significand * 10^0
       P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1];
       P128.w[0] = R128.w[0];
       rounding_correction (rnd_mode,
         		   is_inexact_lt_midpoint,
         		   is_inexact_gt_midpoint, is_midpoint_lt_even,
         		   is_midpoint_gt_even, 0, &P128, ptrfpsf);
       scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
       // the number of digits in the significand is p34 = 34
       if (e4 + scale < expmin) {
         is_tiny = 1;
       }
     }
     ind = p34; // the number of decimal digits in the signifcand of res
     res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN
     res.w[0] = R128.w[0];
     // Note: res is correct only if expmin <= e4 <= expmax
     // set the inexact flag after rounding with bounded exponent, if any
   }
   // at this point we have the result rounded with unbounded exponent in
   // res and we know its tininess:
   // res = (-1)^p_sign * significand * 10^e4,
   // where q (significand) = ind <= p34
   // Note: res is correct only if expmin <= e4 <= expmax

   // check for overflow if RN
   if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) {
     res.w[1] = p_sign | 0x7800000000000000ull;
     res.w[0] = 0x0000000000000000ull;
     *ptrres = res;
     *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
     return; // BID_RETURN (res)
   } // else not overflow or not RN, so continue

   // if (e4 >= expmin) we have the result rounded with bounded exponent
   if (e4 < expmin) {
     x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
     // where the result rounded [at most] once is
     //   (-1)^p_sign * significand_res * 10^e4

     // avoid double rounding error
     is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
     is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
     is_midpoint_lt_even0 = is_midpoint_lt_even;
     is_midpoint_gt_even0 = is_midpoint_gt_even;
     is_inexact_lt_midpoint = 0;
     is_inexact_gt_midpoint = 0;
     is_midpoint_lt_even = 0;
     is_midpoint_gt_even = 0;

     if (x0 > ind) {
       // nothing is left of res when moving the decimal point left x0 digits
       is_inexact_lt_midpoint = 1;
       res.w[1] = p_sign | 0x0000000000000000ull;
       res.w[0] = 0x0000000000000000ull;
       e4 = expmin;
     } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
       // this is <, =, or > 1/2 ulp
       // compare the ind-digit value in the significand of res with
       // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
       // less than, equal to, or greater than 1/2 ulp (significand of res)
       R128.w[1] = res.w[1] & MASK_COEFF;
       R128.w[0] = res.w[0];
       if (ind <= 19) {
         if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
           lt_half_ulp = 1;
           is_inexact_lt_midpoint = 1;
         } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
           eq_half_ulp = 1;
           is_midpoint_gt_even = 1;
         } else { // > 1/2 ulp
           // gt_half_ulp = 1;
           is_inexact_gt_midpoint = 1;
         }
       } else { // if (ind <= 38) {
         if (R128.w[1] < midpoint128[ind - 20].w[1] ||
             (R128.w[1] == midpoint128[ind - 20].w[1] &&
             R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
           lt_half_ulp = 1;
           is_inexact_lt_midpoint = 1;
         } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
             R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
           eq_half_ulp = 1;
           is_midpoint_gt_even = 1;
         } else { // > 1/2 ulp
           // gt_half_ulp = 1;
           is_inexact_gt_midpoint = 1;
         }
       }
       if (lt_half_ulp || eq_half_ulp) {
         // res = +0.0 * 10^expmin
         res.w[1] = 0x0000000000000000ull;
         res.w[0] = 0x0000000000000000ull;
       } else { // if (gt_half_ulp)
         // res = +1 * 10^expmin
         res.w[1] = 0x0000000000000000ull;
         res.w[0] = 0x0000000000000001ull;
       }
       res.w[1] = p_sign | res.w[1];
       e4 = expmin;
     } else { // if (1 <= x0 <= ind - 1 <= 33)
       // round the ind-digit result to ind - x0 digits

       if (ind <= 18) { // 2 <= ind <= 18
         round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
         res.w[1] = 0x0;
         res.w[0] = R64;
       } else if (ind <= 38) {
         P128.w[1] = res.w[1] & MASK_COEFF;
         P128.w[0] = res.w[0];
         round128_19_38 (ind, x0, P128, &res, &incr_exp,
         		&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint);
       }
       e4 = e4 + x0; // expmin
       // we want the exponent to be expmin, so if incr_exp = 1 then
       // multiply the rounded result by 10 - it will still fit in 113 bits
       if (incr_exp) {
         // 64 x 128 -> 128
         P128.w[1] = res.w[1] & MASK_COEFF;
         P128.w[0] = res.w[0];
         __mul_64x128_to_128 (res, ten2k64[1], P128);
       }
       res.w[1] =
         p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF);
       // avoid a double rounding error
       if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
           is_midpoint_lt_even) { // double rounding error upward
         // res = res - 1
         res.w[0]--;
         if (res.w[0] == 0xffffffffffffffffull)
           res.w[1]--;
         // Note: a double rounding error upward is not possible; for this
         // the result after the first rounding would have to be 99...95
         // (35 digits in all), possibly followed by a number of zeros; this
         // is not possible in Cases (2)-(6) or (15)-(17) which may get here
         is_midpoint_lt_even = 0;
         is_inexact_lt_midpoint = 1;
       } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
           is_midpoint_gt_even) { // double rounding error downward
         // res = res + 1
         res.w[0]++;
         if (res.w[0] == 0)
           res.w[1]++;
         is_midpoint_gt_even = 0;
         is_inexact_gt_midpoint = 1;
       } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
         	 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
         // if this second rounding was exact the result may still be
         // inexact because of the first rounding
         if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
           is_inexact_gt_midpoint = 1;
         }
         if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
           is_inexact_lt_midpoint = 1;
         }
       } else if (is_midpoint_gt_even &&
         	 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
         // pulled up to a midpoint
         is_inexact_lt_midpoint = 1;
         is_inexact_gt_midpoint = 0;
         is_midpoint_lt_even = 0;
         is_midpoint_gt_even = 0;
       } else if (is_midpoint_lt_even &&
         	 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
         // pulled down to a midpoint
         is_inexact_lt_midpoint = 0;
         is_inexact_gt_midpoint = 1;
         is_midpoint_lt_even = 0;
         is_midpoint_gt_even = 0;
       } else {
         ;
       }
     }
   }
   // res contains the correct result
   // apply correction if not rounding to nearest
   if (rnd_mode != ROUNDING_TO_NEAREST) {
     rounding_correction (rnd_mode,
         		 is_inexact_lt_midpoint, is_inexact_gt_midpoint,
         		 is_midpoint_lt_even, is_midpoint_gt_even,
         		 e4, &res, ptrfpsf);
   }
   if (is_midpoint_lt_even || is_midpoint_gt_even ||
       is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
     // set the inexact flag
     *ptrfpsf |= INEXACT_EXCEPTION;
     if (is_tiny)
       *ptrfpsf |= UNDERFLOW_EXCEPTION;
   }

   *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   *ptrres = res;
   return;
 }


 #if DECIMAL_CALL_BY_REFERENCE
 static void
 bid128_ext_fma (int *ptr_is_midpoint_lt_even,
         	int *ptr_is_midpoint_gt_even,
         	int *ptr_is_inexact_lt_midpoint,
         	int *ptr_is_inexact_gt_midpoint, UINT128 * pres,
         	UINT128 * px, UINT128 * py,
         	UINT128 *
         	pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
         	_EXC_INFO_PARAM) {
   UINT128 x = *px, y = *py, z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 static UINT128
 bid128_ext_fma (int *ptr_is_midpoint_lt_even,
         	int *ptr_is_midpoint_gt_even,
         	int *ptr_is_inexact_lt_midpoint,
         	int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y,
         	UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
         	_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
 #endif

   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign;
   UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp;
   int true_p_exp;
   UINT128 C1, C2, C3;
   UINT256 C4;
   int q1 = 0, q2 = 0, q3 = 0, q4;
   int e1, e2, e3, e4;
   int scale, ind, delta, x0;
   int p34 = P34; // used to modify the limit on the number of digits
   BID_UI64DOUBLE tmp;
   int x_nr_bits, y_nr_bits, z_nr_bits;
   unsigned int save_fpsf;
   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
   int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0;
   int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
   int incr_exp = 0;
   int lsb;
   int lt_half_ulp = 0;
   int eq_half_ulp = 0;
   int gt_half_ulp = 0;
   int is_tiny = 0;
   UINT64 R64, tmp64;
   UINT128 P128, R128;
   UINT192 P192, R192;
   UINT256 R256;

   // the following are based on the table of special cases for fma; the NaN
   // behavior is similar to that of the IA-64 Architecture fma

   // identify cases where at least one operand is NaN

   BID_SWAP128 (x);
   BID_SWAP128 (y);
   BID_SWAP128 (z);
   if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN
     // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
     // check first for non-canonical NaN payload
     if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
         (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
          (y.w[0] > 0x38c15b09ffffffffull))) {
       y.w[1] = y.w[1] & 0xffffc00000000000ull;
       y.w[0] = 0x0ull;
     }
     if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN
       // set invalid flag
       *pfpsf |= INVALID_EXCEPTION;
       // return quiet (y)
       res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
       res.w[0] = y.w[0];
     } else { // y is QNaN
       // return y
       res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
       res.w[0] = y.w[0];
       // if z = SNaN or x = SNaN signal invalid exception
       if ((z.w[1] & MASK_SNAN) == MASK_SNAN ||
           (x.w[1] & MASK_SNAN) == MASK_SNAN) {
         // set invalid flag
         *pfpsf |= INVALID_EXCEPTION;
       }
     }
     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
     BID_SWAP128 (res);
     BID_RETURN (res)
   } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN
     // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
     // check first for non-canonical NaN payload
     if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
         (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
          (z.w[0] > 0x38c15b09ffffffffull))) {
       z.w[1] = z.w[1] & 0xffffc00000000000ull;
       z.w[0] = 0x0ull;
     }
     if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN
       // set invalid flag
       *pfpsf |= INVALID_EXCEPTION;
       // return quiet (z)
       res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
       res.w[0] = z.w[0];
     } else { // z is QNaN
       // return z
       res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
       res.w[0] = z.w[0];
       // if x = SNaN signal invalid exception
       if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {
         // set invalid flag
         *pfpsf |= INVALID_EXCEPTION;
       }
     }
     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
     BID_SWAP128 (res);
     BID_RETURN (res)
   } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
     // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
     // check first for non-canonical NaN payload
     if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
         (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
          (x.w[0] > 0x38c15b09ffffffffull))) {
       x.w[1] = x.w[1] & 0xffffc00000000000ull;
       x.w[0] = 0x0ull;
     }
     if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
       // set invalid flag
       *pfpsf |= INVALID_EXCEPTION;
       // return quiet (x)
       res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
       res.w[0] = x.w[0];
     } else { // x is QNaN
       // return x
       res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
       res.w[0] = x.w[0];
     }
     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
     BID_SWAP128 (res);
     BID_RETURN (res)
   }
   // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check
   // for non-canonical values

   x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
   C1.w[1] = x.w[1] & MASK_COEFF;
   C1.w[0] = x.w[0];
   if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf
     // if x is not infinity check for non-canonical values - treated as zero
     if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
       // non-canonical
       x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
       C1.w[1] = 0; // significand high
       C1.w[0] = 0; // significand low
     } else { // G0_G1 != 11
       x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
       if (C1.w[1] > 0x0001ed09bead87c0ull ||
           (C1.w[1] == 0x0001ed09bead87c0ull &&
            C1.w[0] > 0x378d8e63ffffffffull)) {
         // x is non-canonical if coefficient is larger than 10^34 -1
         C1.w[1] = 0;
         C1.w[0] = 0;
       } else { // canonical
         ;
       }
     }
   }
   y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
   C2.w[1] = y.w[1] & MASK_COEFF;
   C2.w[0] = y.w[0];
   if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf
     // if y is not infinity check for non-canonical values - treated as zero
     if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
       // non-canonical
       y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
       C2.w[1] = 0; // significand high
       C2.w[0] = 0; // significand low
     } else { // G0_G1 != 11
       y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits
       if (C2.w[1] > 0x0001ed09bead87c0ull ||
           (C2.w[1] == 0x0001ed09bead87c0ull &&
            C2.w[0] > 0x378d8e63ffffffffull)) {
         // y is non-canonical if coefficient is larger than 10^34 -1
         C2.w[1] = 0;
         C2.w[0] = 0;
       } else { // canonical
         ;
       }
     }
   }
   z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
   C3.w[1] = z.w[1] & MASK_COEFF;
   C3.w[0] = z.w[0];
   if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf
     // if z is not infinity check for non-canonical values - treated as zero
     if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
       // non-canonical
       z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
       C3.w[1] = 0; // significand high
       C3.w[0] = 0; // significand low
     } else { // G0_G1 != 11
       z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits
       if (C3.w[1] > 0x0001ed09bead87c0ull ||
           (C3.w[1] == 0x0001ed09bead87c0ull &&
            C3.w[0] > 0x378d8e63ffffffffull)) {
         // z is non-canonical if coefficient is larger than 10^34 -1
         C3.w[1] = 0;
         C3.w[0] = 0;
       } else { // canonical
         ;
       }
     }
   }

   p_sign = x_sign ^ y_sign; // sign of the product

   // identify cases where at least one operand is infinity

   if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf
     if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
       if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
         if (p_sign == z_sign) {
           res.w[1] = z_sign | MASK_INF;
           res.w[0] = 0x0;
         } else {
           // return QNaN Indefinite
           res.w[1] = 0x7c00000000000000ull;
           res.w[0] = 0x0000000000000000ull;
           // set invalid flag
           *pfpsf |= INVALID_EXCEPTION;
         }
       } else { // z = 0 or z = f
         res.w[1] = p_sign | MASK_INF;
         res.w[0] = 0x0;
       }
     } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f
       if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
         if (p_sign == z_sign) {
           res.w[1] = z_sign | MASK_INF;
           res.w[0] = 0x0;
         } else {
           // return QNaN Indefinite
           res.w[1] = 0x7c00000000000000ull;
           res.w[0] = 0x0000000000000000ull;
           // set invalid flag
           *pfpsf |= INVALID_EXCEPTION;
         }
       } else { // z = 0 or z = f
         res.w[1] = p_sign | MASK_INF;
         res.w[0] = 0x0;
       }
     } else { // y = 0
       // return QNaN Indefinite
       res.w[1] = 0x7c00000000000000ull;
       res.w[0] = 0x0000000000000000ull;
       // set invalid flag
       *pfpsf |= INVALID_EXCEPTION;
     }
     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
     BID_SWAP128 (res);
     BID_RETURN (res)
   } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
     if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
       // x = f, necessarily
       if ((p_sign != z_sign)
           || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) {
         // return QNaN Indefinite
         res.w[1] = 0x7c00000000000000ull;
         res.w[0] = 0x0000000000000000ull;
         // set invalid flag
         *pfpsf |= INVALID_EXCEPTION;
       } else {
         res.w[1] = z_sign | MASK_INF;
         res.w[0] = 0x0;
       }
     } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0
       // z = 0, f, inf
       // return QNaN Indefinite
       res.w[1] = 0x7c00000000000000ull;
       res.w[0] = 0x0000000000000000ull;
       // set invalid flag
       *pfpsf |= INVALID_EXCEPTION;
     } else {
       // x = f and z = 0, f, necessarily
       res.w[1] = p_sign | MASK_INF;
       res.w[0] = 0x0;
     }
     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
     BID_SWAP128 (res);
     BID_RETURN (res)
   } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
     // x = 0, f and y = 0, f, necessarily
     res.w[1] = z_sign | MASK_INF;
     res.w[0] = 0x0;
     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
     BID_SWAP128 (res);
     BID_RETURN (res)
   }

   true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176;
   if (true_p_exp < -6176)
     p_exp = 0; // cannot be less than EXP_MIN
   else
     p_exp = (UINT64) (true_p_exp + 6176) << 49;

   if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0
     // the result is 0
     if (p_exp < z_exp)
       res.w[1] = p_exp; // preferred exponent
     else
       res.w[1] = z_exp; // preferred exponent
     if (p_sign == z_sign) {
       res.w[1] |= z_sign;
       res.w[0] = 0x0;
     } else { // x * y and z have opposite signs
       if (rnd_mode == ROUNDING_DOWN) {
         // res = -0.0
         res.w[1] |= MASK_SIGN;
         res.w[0] = 0x0;
       } else {
         // res = +0.0
         // res.w[1] |= 0x0;
         res.w[0] = 0x0;
       }
     }
     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
     BID_SWAP128 (res);
     BID_RETURN (res)
   }
   // from this point on, we may need to know the number of decimal digits
   // in the significands of x, y, z when x, y, z != 0

   if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite)
     // q1 = nr. of decimal digits in x
     // determine first the nr. of bits in x
     if (C1.w[1] == 0) {
       if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
         // split the 64-bit value in two 32-bit halves to avoid rounding errors
         if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
           tmp.d = (double) (C1.w[0] >> 32); // exact conversion
           x_nr_bits =
             33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
         } else { // x < 2^32
           tmp.d = (double) (C1.w[0]); // exact conversion
           x_nr_bits =
             1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
         }
       } else { // if x < 2^53
         tmp.d = (double) C1.w[0]; // exact conversion
         x_nr_bits =
           1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
       }
     } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
       tmp.d = (double) C1.w[1]; // exact conversion
       x_nr_bits =
         65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
     }
     q1 = nr_digits[x_nr_bits - 1].digits;
     if (q1 == 0) {
       q1 = nr_digits[x_nr_bits - 1].digits1;
       if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
           (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
            C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
         q1++;
     }
   }

   if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite)
     if (C2.w[1] == 0) {
       if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53
         // split the 64-bit value in two 32-bit halves to avoid rounding errors
         if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32
           tmp.d = (double) (C2.w[0] >> 32); // exact conversion
           y_nr_bits =
             32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
         } else { // y < 2^32
           tmp.d = (double) C2.w[0]; // exact conversion
           y_nr_bits =
             ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
         }
       } else { // if y < 2^53
         tmp.d = (double) C2.w[0]; // exact conversion
         y_nr_bits =
           ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
       }
     } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1])
       tmp.d = (double) C2.w[1]; // exact conversion
       y_nr_bits =
         64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
     }

     q2 = nr_digits[y_nr_bits].digits;
     if (q2 == 0) {
       q2 = nr_digits[y_nr_bits].digits1;
       if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi ||
           (C2.w[1] == nr_digits[y_nr_bits].threshold_hi &&
            C2.w[0] >= nr_digits[y_nr_bits].threshold_lo))
         q2++;
     }
   }

   if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite)
     if (C3.w[1] == 0) {
       if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53
         // split the 64-bit value in two 32-bit halves to avoid rounding errors
         if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32
           tmp.d = (double) (C3.w[0] >> 32); // exact conversion
           z_nr_bits =
             32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
         } else { // z < 2^32
           tmp.d = (double) C3.w[0]; // exact conversion
           z_nr_bits =
             ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
         }
       } else { // if z < 2^53
         tmp.d = (double) C3.w[0]; // exact conversion
         z_nr_bits =
           ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
       }
     } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1])
       tmp.d = (double) C3.w[1]; // exact conversion
       z_nr_bits =
         64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
     }

     q3 = nr_digits[z_nr_bits].digits;
     if (q3 == 0) {
       q3 = nr_digits[z_nr_bits].digits1;
       if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi ||
           (C3.w[1] == nr_digits[z_nr_bits].threshold_hi &&
            C3.w[0] >= nr_digits[z_nr_bits].threshold_lo))
         q3++;
     }
   }

   if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) ||
       (C2.w[1] == 0x0 && C2.w[0] == 0x0)) {
     // x = 0 or y = 0
     // z = f, necessarily; for 0 + z return z, with the preferred exponent
     // the result is z, but need to get the preferred exponent
     if (z_exp <= p_exp) { // the preferred exponent is z_exp
       res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1];
       res.w[0] = C3.w[0];
     } else { // if (p_exp < z_exp) the preferred exponent is p_exp
       // return (C3 * 10^scale) * 10^(z_exp - scale)
       // where scale = min (p34-q3, (z_exp-p_exp) >> 49)
       scale = p34 - q3;
       ind = (z_exp - p_exp) >> 49;
       if (ind < scale)
         scale = ind;
       if (scale == 0) {
         res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant
         res.w[0] = z.w[0];
       } else if (q3 <= 19) { // z fits in 64 bits
         if (scale <= 19) { // 10^scale fits in 64 bits
           // 64 x 64 C3.w[0] * ten2k64[scale]
           __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
         } else { // 10^scale fits in 128 bits
           // 64 x 128 C3.w[0] * ten2k128[scale - 20]
           __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
         }
       } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
         // 64 x 128 ten2k64[scale] * C3
         __mul_128x64_to_128 (res, ten2k64[scale], C3);
       }
       // subtract scale from the exponent
       z_exp = z_exp - ((UINT64) scale << 49);
       res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
     }
     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
     BID_SWAP128 (res);
     BID_RETURN (res)
   } else {
     ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f
   }

   e1 = (x_exp >> 49) - 6176; // unbiased exponent of x
   e2 = (y_exp >> 49) - 6176; // unbiased exponent of y
   e3 = (z_exp >> 49) - 6176; // unbiased exponent of z
   e4 = e1 + e2; // unbiased exponent of the exact x * y

   // calculate C1 * C2 and its number of decimal digits, q4

   // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits
   // where 2 <= q1 + q2 <= 68
   // calculate C4 = C1 * C2 and determine q
   C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0;
   if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits
     C4.w[0] = C1.w[0] * C2.w[0];
     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
     if (C4.w[0] < ten2k64[q1 + q2 - 1])
       q4 = q1 + q2 - 1; // q4 in [1, 18]
     else
       q4 = q1 + q2; // q4 in [2, 19]
     // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
   } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits
     // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits
     __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]);
     // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20
     if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1
       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
       q4 = 19; // 19 = q1 + q2 - 1
     } else {
       // if (C4.w[1] == 0)
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
       // else
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
       q4 = 20; // 20 = q1 + q2
     }
   } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38
     // C4 = C1 * C2 fits in 64 or 128 bits
     // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits)
     // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits
     if (q1 <= 19) {
       __mul_128x64_to_128 (C4, C1.w[0], C2);
     } else { // q2 <= 19
       __mul_128x64_to_128 (C4, C2.w[0], C1);
     }
     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
     if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] ||
         (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] &&
          C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) {
       // if (C4.w[1] == 0) // q4 = 20, necessarily
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
       // else
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
       q4 = q1 + q2 - 1; // q4 in [20, 37]
     } else {
       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
       q4 = q1 + q2; // q4 in [21, 38]
     }
   } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits
     // both C1 and C2 fit in 128 bits (actually in 113 bits)
     // may replace this by 128x128_to192
     __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0
     // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39
     if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] ||
         		 (C4.w[1] == ten2k128[18].w[1]
         		  && C4.w[0] < ten2k128[18].w[0]))) {
       // 18 = 38 - 20 = q1+q2-1 - 20
       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
       q4 = 38; // 38 = q1 + q2 - 1
     } else {
       // if (C4.w[2] == 0)
       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
       // else
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
       q4 = 39; // 39 = q1 + q2
     }
   } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57
     // C4 = C1 * C2 fits in 128 or 192 bits
     // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits)
     // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
     // may fit in 64 bits
     if (C1.w[1] == 0) { // C1 fits in 64 bits
       // __mul_64x128_full (REShi64, RESlo128, A64, B128)
       __mul_64x128_full (C4.w[2], C4, C1.w[0], C2);
     } else if (C2.w[1] == 0) { // C2 fits in 64 bits
       // __mul_64x128_full (REShi64, RESlo128, A64, B128)
       __mul_64x128_full (C4.w[2], C4, C2.w[0], C1);
     } else { // both C1 and C2 require 128 bits
       // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
       __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
     }
     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
     if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
         (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
          (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
           (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
            C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) {
       // if (C4.w[2] == 0) // q4 = 39, necessarily
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
       // else
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
       q4 = q1 + q2 - 1; // q4 in [39, 56]
     } else {
       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
       q4 = q1 + q2; // q4 in [40, 57]
     }
   } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits
     // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
     // may fit in 64 bits
     if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits
       __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192
     } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits
       __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192
     } else { // C1 * C2 will fit in 192 bits or in 256 bits
       __mul_128x128_to_256 (C4, C1, C2);
     }
     // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58
     if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] ||
         		 (C4.w[2] == ten2k256[18].w[2]
         		  && (C4.w[1] < ten2k256[18].w[1]
         		      || (C4.w[1] == ten2k256[18].w[1]
         			  && C4.w[0] < ten2k256[18].w[0]))))) {
       // 18 = 57 - 39 = q1+q2-1 - 39
       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
       q4 = 57; // 57 = q1 + q2 - 1
     } else {
       // if (C4.w[3] == 0)
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
       // else
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
       q4 = 58; // 58 = q1 + q2
     }
   } else { // if 59 <= q1 + q2 <= 68
     // C4 = C1 * C2 fits in 192 or 256 bits
     // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits)
     // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in
     // 64 bits
     // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
     __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
     if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] ||
         (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] &&
          (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
           (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
            (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
             (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
              C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) {
       // if (C4.w[3] == 0) // q4 = 58, necessarily
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
       // else
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
       q4 = q1 + q2 - 1; // q4 in [58, 67]
     } else {
       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
       q4 = q1 + q2; // q4 in [59, 68]
     }
   }

   if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0
     save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
     *pfpsf = 0;

     if (q4 > p34) {

       // truncate C4 to p34 digits into res
       // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
       x0 = q4 - p34;
       if (q4 <= 38) {
         P128.w[1] = C4.w[1];
         P128.w[0] = C4.w[0];
         round128_19_38 (q4, x0, P128, &res, &incr_exp,
         		&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint);
       } else if (q4 <= 57) { // 35 <= q4 <= 57
         P192.w[2] = C4.w[2];
         P192.w[1] = C4.w[1];
         P192.w[0] = C4.w[0];
         round192_39_57 (q4, x0, P192, &R192, &incr_exp,
         		&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint);
         res.w[0] = R192.w[0];
         res.w[1] = R192.w[1];
       } else { // if (q4 <= 68)
         round256_58_76 (q4, x0, C4, &R256, &incr_exp,
         		&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint);
         res.w[0] = R256.w[0];
         res.w[1] = R256.w[1];
       }
       e4 = e4 + x0;
       if (incr_exp) {
         e4 = e4 + 1;
       }
       q4 = p34;
       // res is now the coefficient of the result rounded to the destination
       // precision, with unbounded exponent; the exponent is e4; q4=digits(res)
     } else { // if (q4 <= p34)
       // C4 * 10^e4 is the result rounded to the destination precision, with
       // unbounded exponent (which is exact)

       if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) {
         // e4 is too large, but can be brought within range by scaling up C4
         scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2
         // res = (C4 * 10^scale) * 10^expmax
         if (q4 <= 19) { // C4 fits in 64 bits
           if (scale <= 19) { // 10^scale fits in 64 bits
             // 64 x 64 C4.w[0] * ten2k64[scale]
             __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]);
           } else { // 10^scale fits in 128 bits
             // 64 x 128 C4.w[0] * ten2k128[scale - 20]
             __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]);
           }
         } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
           // 64 x 128 ten2k64[scale] * CC43
           __mul_128x64_to_128 (res, ten2k64[scale], C4);
         }
         e4 = e4 - scale; // expmax
         q4 = q4 + scale;
       } else {
         res.w[1] = C4.w[1];
         res.w[0] = C4.w[0];
       }
       // res is the coefficient of the result rounded to the destination
       // precision, with unbounded exponent (it has q4 digits); the exponent
       // is e4 (exact result)
     }

     // check for overflow
     if (q4 + e4 > p34 + expmax) {
       if (rnd_mode == ROUNDING_TO_NEAREST) {
         res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf
         res.w[0] = 0x0000000000000000ull;
         *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
       } else {
         res.w[1] = p_sign | res.w[1];
         rounding_correction (rnd_mode,
         		     is_inexact_lt_midpoint,
         		     is_inexact_gt_midpoint,
         		     is_midpoint_lt_even, is_midpoint_gt_even,
         		     e4, &res, pfpsf);
       }
       *pfpsf |= save_fpsf;
       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
       BID_SWAP128 (res);
       BID_RETURN (res)
     }
     // check for underflow
     if (q4 + e4 < expmin + P34) {
       is_tiny = 1; // the result is tiny
       if (e4 < expmin) {
         // if e4 < expmin, we must truncate more of res
         x0 = expmin - e4; // x0 >= 1
         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
         is_midpoint_lt_even0 = is_midpoint_lt_even;
         is_midpoint_gt_even0 = is_midpoint_gt_even;
         is_inexact_lt_midpoint = 0;
         is_inexact_gt_midpoint = 0;
         is_midpoint_lt_even = 0;
         is_midpoint_gt_even = 0;
         // the number of decimal digits in res is q4
         if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits
           if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17
             round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp,
         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
         		  &is_inexact_lt_midpoint,
         		  &is_inexact_gt_midpoint);
             if (incr_exp) {
               // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
               R64 = ten2k64[q4 - x0];
             }
             // res.w[1] = 0; (from above)
             res.w[0] = R64;
           } else { // if (q4 <= 34)
             // 19 <= q4 <= 38
             P128.w[1] = res.w[1];
             P128.w[0] = res.w[0];
             round128_19_38 (q4, x0, P128, &res, &incr_exp,
         		    &is_midpoint_lt_even, &is_midpoint_gt_even,
         		    &is_inexact_lt_midpoint,
         		    &is_inexact_gt_midpoint);
             if (incr_exp) {
               // increase coefficient by a factor of 10; this will be <= 10^33
               // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
               if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
         	// res.w[1] = 0;
         	res.w[0] = ten2k64[q4 - x0];
               } else { // 20 <= q4 - x0 <= 37
         	res.w[0] = ten2k128[q4 - x0 - 20].w[0];
         	res.w[1] = ten2k128[q4 - x0 - 20].w[1];
               }
             }
           }
           e4 = e4 + x0; // expmin
         } else if (x0 == q4) {
           // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin
           // determine relationship with 1/2 ulp
           if (q4 <= 19) {
             if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
               lt_half_ulp = 1;
               is_inexact_lt_midpoint = 1;
             } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
               eq_half_ulp = 1;
               is_midpoint_gt_even = 1;
             } else { // > 1/2 ulp
               // gt_half_ulp = 1;
               is_inexact_gt_midpoint = 1;
             }
           } else { // if (q4 <= 34)
             if (res.w[1] < midpoint128[q4 - 20].w[1] ||
                 (res.w[1] == midpoint128[q4 - 20].w[1] &&
                 res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp
               lt_half_ulp = 1;
               is_inexact_lt_midpoint = 1;
             } else if (res.w[1] == midpoint128[q4 - 20].w[1] &&
                 res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
               eq_half_ulp = 1;
               is_midpoint_gt_even = 1;
             } else { // > 1/2 ulp
               // gt_half_ulp = 1;
               is_inexact_gt_midpoint = 1;
             }
           }
           if (lt_half_ulp || eq_half_ulp) {
             // res = +0.0 * 10^expmin
             res.w[1] = 0x0000000000000000ull;
             res.w[0] = 0x0000000000000000ull;
           } else { // if (gt_half_ulp)
             // res = +1 * 10^expmin
             res.w[1] = 0x0000000000000000ull;
             res.w[0] = 0x0000000000000001ull;
           }
           e4 = expmin;
         } else { // if (x0 > q4)
           // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin
           res.w[1] = 0;
           res.w[0] = 0;
           e4 = expmin;
           is_inexact_lt_midpoint = 1;
         }
         // avoid a double rounding error
         if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
             is_midpoint_lt_even) { // double rounding error upward
           // res = res - 1
           res.w[0]--;
           if (res.w[0] == 0xffffffffffffffffull)
             res.w[1]--;
           // Note: a double rounding error upward is not possible; for this
           // the result after the first rounding would have to be 99...95
           // (35 digits in all), possibly followed by a number of zeros; this
           // not possible for f * f + 0
           is_midpoint_lt_even = 0;
           is_inexact_lt_midpoint = 1;
         } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
             is_midpoint_gt_even) { // double rounding error downward
           // res = res + 1
           res.w[0]++;
           if (res.w[0] == 0)
             res.w[1]++;
           is_midpoint_gt_even = 0;
           is_inexact_gt_midpoint = 1;
         } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
         	   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
           // if this second rounding was exact the result may still be
           // inexact because of the first rounding
           if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
             is_inexact_gt_midpoint = 1;
           }
           if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
             is_inexact_lt_midpoint = 1;
           }
         } else if (is_midpoint_gt_even &&
         	   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
           // pulled up to a midpoint
           is_inexact_lt_midpoint = 1;
           is_inexact_gt_midpoint = 0;
           is_midpoint_lt_even = 0;
           is_midpoint_gt_even = 0;
         } else if (is_midpoint_lt_even &&
         	   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
           // pulled down to a midpoint
           is_inexact_lt_midpoint = 0;
           is_inexact_gt_midpoint = 1;
           is_midpoint_lt_even = 0;
           is_midpoint_gt_even = 0;
         } else {
           ;
         }
       } else { // if e4 >= emin then q4 < P and the result is tiny and exact
         if (e3 < e4) {
           // if (e3 < e4) the preferred exponent is e3
           // return (C4 * 10^scale) * 10^(e4 - scale)
           // where scale = min (p34-q4, (e4 - e3))
           scale = p34 - q4;
           ind = e4 - e3;
           if (ind < scale)
             scale = ind;
           if (scale == 0) {
             ; // res and e4 are unchanged
           } else if (q4 <= 19) { // C4 fits in 64 bits
             if (scale <= 19) { // 10^scale fits in 64 bits
               // 64 x 64 res.w[0] * ten2k64[scale]
               __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]);
             } else { // 10^scale fits in 128 bits
               // 64 x 128 res.w[0] * ten2k128[scale - 20]
               __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]);
             }
           } else { // res fits in 128 bits, but 10^scale must fit in 64 bits
             // 64 x 128 ten2k64[scale] * C3
             __mul_128x64_to_128 (res, ten2k64[scale], res);
           }
           // subtract scale from the exponent
           e4 = e4 - scale;
         }
       }

       // check for inexact result
       if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
           is_midpoint_lt_even || is_midpoint_gt_even) {
         // set the inexact flag and the underflow flag
         *pfpsf |= INEXACT_EXCEPTION;
         *pfpsf |= UNDERFLOW_EXCEPTION;
       }
       res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
       if (rnd_mode != ROUNDING_TO_NEAREST) {
         rounding_correction (rnd_mode,
         		     is_inexact_lt_midpoint,
         		     is_inexact_gt_midpoint,
         		     is_midpoint_lt_even, is_midpoint_gt_even,
         		     e4, &res, pfpsf);
       }
       *pfpsf |= save_fpsf;
       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
       BID_SWAP128 (res);
       BID_RETURN (res)
     }
     // no overflow, and no underflow for rounding to nearest
     res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];

     if (rnd_mode != ROUNDING_TO_NEAREST) {
       rounding_correction (rnd_mode,
         		   is_inexact_lt_midpoint,
         		   is_inexact_gt_midpoint,
         		   is_midpoint_lt_even, is_midpoint_gt_even,
         		   e4, &res, pfpsf);
       // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ)
       if (e4 == expmin) {
         if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
             ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
              res.w[0] < 0x38c15b0a00000000ull)) {
           is_tiny = 1;
         }
       }
     }

     if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
         is_midpoint_lt_even || is_midpoint_gt_even) {
       // set the inexact flag
       *pfpsf |= INEXACT_EXCEPTION;
       if (is_tiny)
         *pfpsf |= UNDERFLOW_EXCEPTION;
     }

     if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact
       // need to ensure that the result has the preferred exponent
       p_exp = res.w[1] & MASK_EXP;
       if (z_exp < p_exp) { // the preferred exponent is z_exp
         // signficand of res in C3
         C3.w[1] = res.w[1] & MASK_COEFF;
         C3.w[0] = res.w[0];
         // the number of decimal digits of x * y is q4 <= 34
         // Note: the coefficient fits in 128 bits

         // return (C3 * 10^scale) * 10^(p_exp - scale)
         // where scale = min (p34-q4, (p_exp-z_exp) >> 49)
         scale = p34 - q4;
         ind = (p_exp - z_exp) >> 49;
         if (ind < scale)
           scale = ind;
         // subtract scale from the exponent
         p_exp = p_exp - ((UINT64) scale << 49);
         if (scale == 0) {
           ; // leave res unchanged
         } else if (q4 <= 19) { // x * y fits in 64 bits
           if (scale <= 19) { // 10^scale fits in 64 bits
             // 64 x 64 C3.w[0] * ten2k64[scale]
             __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
           } else { // 10^scale fits in 128 bits
             // 64 x 128 C3.w[0] * ten2k128[scale - 20]
             __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
           }
           res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
         } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits
           // 64 x 128 ten2k64[scale] * C3
           __mul_128x64_to_128 (res, ten2k64[scale], C3);
           res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
         }
       } // else leave the result as it is, because p_exp <= z_exp
     }
     *pfpsf |= save_fpsf;
     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
     BID_SWAP128 (res);
     BID_RETURN (res)
   } // else we have f * f + f

   // continue with x = f, y = f, z = f

   delta = q3 + e3 - q4 - e4;
 delta_ge_zero:
   if (delta >= 0) {

     if (p34 <= delta - 1 ||	// Case (1')
         (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A)
       // check for overflow, which can occur only in Case (1')
       if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) {
         // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary
         // condition for (q3 + e3) > (p34 + expmax)
         if (rnd_mode == ROUNDING_TO_NEAREST) {
           res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
           res.w[0] = 0x0000000000000000ull;
           *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
         } else {
           if (p_sign == z_sign) {
             is_inexact_lt_midpoint = 1;
           } else {
             is_inexact_gt_midpoint = 1;
           }
           // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3)
           scale = p34 - q3;
           if (scale == 0) {
             res.w[1] = z_sign | C3.w[1];
             res.w[0] = C3.w[0];
           } else {
             if (q3 <= 19) { // C3 fits in 64 bits
               if (scale <= 19) { // 10^scale fits in 64 bits
         	// 64 x 64 C3.w[0] * ten2k64[scale]
         	__mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
               } else { // 10^scale fits in 128 bits
         	// 64 x 128 C3.w[0] * ten2k128[scale - 20]
         	__mul_128x64_to_128 (res, C3.w[0],
         			     ten2k128[scale - 20]);
               }
             } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits
               // 64 x 128 ten2k64[scale] * C3
               __mul_128x64_to_128 (res, ten2k64[scale], C3);
             }
             // the coefficient in res has q3 + scale = p34 digits
           }
           e3 = e3 - scale;
           res.w[1] = z_sign | res.w[1];
           rounding_correction (rnd_mode,
         		       is_inexact_lt_midpoint,
         		       is_inexact_gt_midpoint,
         		       is_midpoint_lt_even, is_midpoint_gt_even,
         		       e3, &res, pfpsf);
         }
         *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
         *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
         *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
         *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
         BID_SWAP128 (res);
         BID_RETURN (res)
       }
       // res = z
       if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3)
         // return (C3 * 10^scale) * 10^(z_exp - scale)
         // where scale = min (p34-q3, z_exp-EMIN)
         scale = p34 - q3;
         ind = e3 + 6176;
         if (ind < scale)
           scale = ind;
         if (scale == 0) {
           res.w[1] = C3.w[1];
           res.w[0] = C3.w[0];
         } else if (q3 <= 19) { // z fits in 64 bits
           if (scale <= 19) { // 10^scale fits in 64 bits
             // 64 x 64 C3.w[0] * ten2k64[scale]
             __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
           } else { // 10^scale fits in 128 bits
             // 64 x 128 C3.w[0] * ten2k128[scale - 20]
             __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
           }
         } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
           // 64 x 128 ten2k64[scale] * C3
           __mul_128x64_to_128 (res, ten2k64[scale], C3);
         }
         // the coefficient in res has q3 + scale digits
         // subtract scale from the exponent
         z_exp = z_exp - ((UINT64) scale << 49);
         e3 = e3 - scale;
         res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
         if (scale + q3 < p34)
           *pfpsf |= UNDERFLOW_EXCEPTION;
       } else {
         scale = 0;
         res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1];
         res.w[0] = C3.w[0];
       }

       // use the following to avoid double rounding errors when operating on
       // mixed formats in rounding to nearest, and for correcting the result
       // if not rounding to nearest
       if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) {
         // there is a gap of exactly one digit between the scaled C3 and C4
         // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case
         if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) ||
             (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) ||
             (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] ||
         		  C3.w[0] != ten2k128[q3 - 21].w[0]))) {
           // C3 * 10^ scale != 10^(q3-1)
           // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull ||
           // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
           is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value
         } else { // if C3 * 10^scale = 10^(q3+scale-1)
           // ok from above e3 = (z_exp >> 49) - 6176;
           // the result is always inexact
           if (q4 == 1) {
             R64 = C4.w[0];
           } else {
             // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
             // x = q4-1, 1 <= x <= 67 and check if this operation is exact
             if (q4 <= 18) { // 2 <= q4 <= 18
               round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
         		    &is_midpoint_lt_even, &is_midpoint_gt_even,
         		    &is_inexact_lt_midpoint,
         		    &is_inexact_gt_midpoint);
             } else if (q4 <= 38) {
               P128.w[1] = C4.w[1];
               P128.w[0] = C4.w[0];
               round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
         		      &is_midpoint_lt_even,
         		      &is_midpoint_gt_even,
         		      &is_inexact_lt_midpoint,
         		      &is_inexact_gt_midpoint);
               R64 = R128.w[0]; // one decimal digit
             } else if (q4 <= 57) {
               P192.w[2] = C4.w[2];
               P192.w[1] = C4.w[1];
               P192.w[0] = C4.w[0];
               round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
         		      &is_midpoint_lt_even,
         		      &is_midpoint_gt_even,
         		      &is_inexact_lt_midpoint,
         		      &is_inexact_gt_midpoint);
               R64 = R192.w[0]; // one decimal digit
             } else { // if (q4 <= 68)
               round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
         		      &is_midpoint_lt_even,
         		      &is_midpoint_gt_even,
         		      &is_inexact_lt_midpoint,
         		      &is_inexact_gt_midpoint);
               R64 = R256.w[0]; // one decimal digit
             }
             if (incr_exp) {
               R64 = 10;
             }
           }
           if (q4 == 1 && C4.w[0] == 5) {
             is_inexact_lt_midpoint = 0;
             is_inexact_gt_midpoint = 0;
             is_midpoint_lt_even = 1;
             is_midpoint_gt_even = 0;
           } else if ((e3 == expmin) ||
         	     R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) {
             // result does not change
             is_inexact_lt_midpoint = 0;
             is_inexact_gt_midpoint = 1;
             is_midpoint_lt_even = 0;
             is_midpoint_gt_even = 0;
           } else {
             is_inexact_lt_midpoint = 1;
             is_inexact_gt_midpoint = 0;
             is_midpoint_lt_even = 0;
             is_midpoint_gt_even = 0;
             // result decremented is 10^(q3+scale) - 1
             if ((q3 + scale) <= 19) {
               res.w[1] = 0;
               res.w[0] = ten2k64[q3 + scale];
             } else { // if ((q3 + scale + 1) <= 35)
               res.w[1] = ten2k128[q3 + scale - 20].w[1];
               res.w[0] = ten2k128[q3 + scale - 20].w[0];
             }
             res.w[0] = res.w[0] - 1; // borrow never occurs
             z_exp = z_exp - EXP_P1;
             e3 = e3 - 1;
             res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
           }
           if (e3 == expmin) {
             if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) {
               ; // result not tiny (in round-to-nearest mode)
             } else {
               *pfpsf |= UNDERFLOW_EXCEPTION;
             }
           }
         } // end 10^(q3+scale-1)
         // set the inexact flag
         *pfpsf |= INEXACT_EXCEPTION;
       } else {
         if (p_sign == z_sign) {
           // if (z_sign), set as if for absolute value
           is_inexact_lt_midpoint = 1;
         } else { // if (p_sign != z_sign)
           // if (z_sign), set as if for absolute value
           is_inexact_gt_midpoint = 1;
         }
         *pfpsf |= INEXACT_EXCEPTION;
       }
       // the result is always inexact => set the inexact flag
       // Determine tininess:
       //    if (exp > expmin)
       //      the result is not tiny
       //    else // if exp = emin
       //      if (q3 + scale < p34)
       //        the result is tiny
       //      else // if (q3 + scale = p34)
       //        if (C3 * 10^scale > 10^33)
       //          the result is not tiny
       //        else // if C3 * 10^scale = 10^33
       //          if (xy * z > 0)
       //            the result is not tiny
       //          else // if (xy * z < 0)
       //            if (z > 0)
       //              if rnd_mode != RP
       //                the result is tiny
       //              else // if RP
       //                the result is not tiny
       //            else // if (z < 0)
       //              if rnd_mode != RM
       //                the result is tiny
       //              else // if RM
       //                the result is not tiny
       //              endif
       //            endif
       //          endif
       //        endif
       //      endif
       //    endif
       if ((e3 == expmin && (q3 + scale) < p34) ||
           (e3 == expmin && (q3 + scale) == p34 &&
           (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&	// 10^33_high
           res.w[0] == 0x38c15b0a00000000ull &&	// 10^33_low
           z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) ||
           (z_sign && rnd_mode != ROUNDING_DOWN)))) {
         *pfpsf |= UNDERFLOW_EXCEPTION;
       }
       if (rnd_mode != ROUNDING_TO_NEAREST) {
         rounding_correction (rnd_mode,
         		     is_inexact_lt_midpoint,
         		     is_inexact_gt_midpoint,
         		     is_midpoint_lt_even, is_midpoint_gt_even,
         		     e3, &res, pfpsf);
       }
       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
       BID_SWAP128 (res);
       BID_RETURN (res)

     } else if (p34 == delta) { // Case (1''B)

       // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3
       // and C3 can be scaled up to p34 digits if needed

       // scale C3 to p34 digits if needed
       scale = p34 - q3; // 0 <= scale <= p34 - 1
       if (scale == 0) {
         res.w[1] = C3.w[1];
         res.w[0] = C3.w[0];
       } else if (q3 <= 19) { // z fits in 64 bits
         if (scale <= 19) { // 10^scale fits in 64 bits
           // 64 x 64 C3.w[0] * ten2k64[scale]
           __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
         } else { // 10^scale fits in 128 bits
           // 64 x 128 C3.w[0] * ten2k128[scale - 20]
           __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
         }
       } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
         // 64 x 128 ten2k64[scale] * C3
         __mul_128x64_to_128 (res, ten2k64[scale], C3);
       }
       // subtract scale from the exponent
       z_exp = z_exp - ((UINT64) scale << 49);
       e3 = e3 - scale;
       // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits

       // determine whether x * y is less than, equal to, or greater than
       // 1/2 ulp (z)
       if (q4 <= 19) {
         if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
           lt_half_ulp = 1;
         } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
           eq_half_ulp = 1;
         } else { // > 1/2 ulp
           gt_half_ulp = 1;
         }
       } else if (q4 <= 38) {
         if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] ||
             (C4.w[1] == midpoint128[q4 - 20].w[1] &&
             C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp
           lt_half_ulp = 1;
         } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] &&
             C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
           eq_half_ulp = 1;
         } else { // > 1/2 ulp
           gt_half_ulp = 1;
         }
       } else if (q4 <= 58) {
         if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] ||
             (C4.w[2] == midpoint192[q4 - 39].w[2] &&
             C4.w[1] < midpoint192[q4 - 39].w[1]) ||
             (C4.w[2] == midpoint192[q4 - 39].w[2] &&
             C4.w[1] == midpoint192[q4 - 39].w[1] &&
             C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp
           lt_half_ulp = 1;
         } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] &&
             C4.w[1] == midpoint192[q4 - 39].w[1] &&
             C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp
           eq_half_ulp = 1;
         } else { // > 1/2 ulp
           gt_half_ulp = 1;
         }
       } else {
         if (C4.w[3] < midpoint256[q4 - 59].w[3] ||
             (C4.w[3] == midpoint256[q4 - 59].w[3] &&
             C4.w[2] < midpoint256[q4 - 59].w[2]) ||
             (C4.w[3] == midpoint256[q4 - 59].w[3] &&
             C4.w[2] == midpoint256[q4 - 59].w[2] &&
             C4.w[1] < midpoint256[q4 - 59].w[1]) ||
             (C4.w[3] == midpoint256[q4 - 59].w[3] &&
             C4.w[2] == midpoint256[q4 - 59].w[2] &&
             C4.w[1] == midpoint256[q4 - 59].w[1] &&
             C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp
           lt_half_ulp = 1;
         } else if (C4.w[3] == midpoint256[q4 - 59].w[3] &&
             C4.w[2] == midpoint256[q4 - 59].w[2] &&
             C4.w[1] == midpoint256[q4 - 59].w[1] &&
             C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp
           eq_half_ulp = 1;
         } else { // > 1/2 ulp
           gt_half_ulp = 1;
         }
       }

       if (p_sign == z_sign) {
         if (lt_half_ulp) {
           res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
           // use the following to avoid double rounding errors when operating on
           // mixed formats in rounding to nearest
           is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value
         } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
           // add 1 ulp to the significand
           res.w[0]++;
           if (res.w[0] == 0x0ull)
             res.w[1]++;
           // check for rounding overflow, when coeff == 10^34
           if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull &&
               res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34
             e3 = e3 + 1;
             // coeff = 10^33
             z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP;
             res.w[1] = 0x0000314dc6448d93ull;
             res.w[0] = 0x38c15b0a00000000ull;
           }
           // end add 1 ulp
           res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
           if (eq_half_ulp) {
             is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
           } else {
             is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
           }
         } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
           // leave unchanged
           res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
           is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
         }
         // the result is always inexact, and never tiny
         // set the inexact flag
         *pfpsf |= INEXACT_EXCEPTION;
         // check for overflow
         if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) {
           res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
           res.w[0] = 0x0000000000000000ull;
           *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
           *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
           *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
           *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
           *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
           BID_SWAP128 (res);
           BID_RETURN (res)
         }
         if (rnd_mode != ROUNDING_TO_NEAREST) {
           rounding_correction (rnd_mode,
         		       is_inexact_lt_midpoint,
         		       is_inexact_gt_midpoint,
         		       is_midpoint_lt_even, is_midpoint_gt_even,
         		       e3, &res, pfpsf);
           z_exp = res.w[1] & MASK_EXP;
         }
       } else { // if (p_sign != z_sign)
         // consider two cases, because C3 * 10^scale = 10^33 is a special case
         if (res.w[1] != 0x0000314dc6448d93ull ||
             res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
           if (lt_half_ulp) {
             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
             // use the following to avoid double rounding errors when operating
             // on mixed formats in rounding to nearest
             is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
           } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
             // subtract 1 ulp from the significand
             res.w[0]--;
             if (res.w[0] == 0xffffffffffffffffull)
               res.w[1]--;
             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
             if (eq_half_ulp) {
               is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
             } else {
               is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
             }
           } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
             // leave unchanged
             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
             is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
           }
           // the result is always inexact, and never tiny
           // check for overflow for RN
           if (e3 > expmax) {
             if (rnd_mode == ROUNDING_TO_NEAREST) {
               res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
               res.w[0] = 0x0000000000000000ull;
               *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
             } else {
               rounding_correction (rnd_mode,
         			   is_inexact_lt_midpoint,
         			   is_inexact_gt_midpoint,
         			   is_midpoint_lt_even,
         			   is_midpoint_gt_even, e3, &res,
         			   pfpsf);
             }
             *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
             *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
             *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
             *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
             BID_SWAP128 (res);
             BID_RETURN (res)
           }
           // set the inexact flag
           *pfpsf |= INEXACT_EXCEPTION;
           if (rnd_mode != ROUNDING_TO_NEAREST) {
             rounding_correction (rnd_mode,
         			 is_inexact_lt_midpoint,
         			 is_inexact_gt_midpoint,
         			 is_midpoint_lt_even,
         			 is_midpoint_gt_even, e3, &res, pfpsf);
           }
           z_exp = res.w[1] & MASK_EXP;
         } else { // if C3 * 10^scale = 10^33
           e3 = (z_exp >> 49) - 6176;
           if (e3 > expmin) {
             // the result is exact if exp > expmin and C4 = d*10^(q4-1),
             // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact
             if (q4 == 1) {
               // if q4 = 1 the result is exact
               // result coefficient = 10^34 - C4
               res.w[1] = 0x0001ed09bead87c0ull;
               res.w[0] = 0x378d8e6400000000ull - C4.w[0];
               z_exp = z_exp - EXP_P1;
               e3 = e3 - 1;
               res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
             } else {
               // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
               // x = q4-1, 1 <= x <= 67 and check if this operation is exact
               if (q4 <= 18) { // 2 <= q4 <= 18
         	round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
         		      &is_midpoint_lt_even,
         		      &is_midpoint_gt_even,
         		      &is_inexact_lt_midpoint,
         		      &is_inexact_gt_midpoint);
               } else if (q4 <= 38) {
         	P128.w[1] = C4.w[1];
         	P128.w[0] = C4.w[0];
         	round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
         			&is_midpoint_lt_even,
         			&is_midpoint_gt_even,
         			&is_inexact_lt_midpoint,
         			&is_inexact_gt_midpoint);
         	R64 = R128.w[0]; // one decimal digit
               } else if (q4 <= 57) {
         	P192.w[2] = C4.w[2];
         	P192.w[1] = C4.w[1];
         	P192.w[0] = C4.w[0];
         	round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
         			&is_midpoint_lt_even,
         			&is_midpoint_gt_even,
         			&is_inexact_lt_midpoint,
         			&is_inexact_gt_midpoint);
         	R64 = R192.w[0]; // one decimal digit
               } else { // if (q4 <= 68)
         	round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
         			&is_midpoint_lt_even,
         			&is_midpoint_gt_even,
         			&is_inexact_lt_midpoint,
         			&is_inexact_gt_midpoint);
         	R64 = R256.w[0]; // one decimal digit
               }
               if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
         	  !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
         	// the result is exact: 10^34 - R64
         	// incr_exp = 0 with certainty
         	z_exp = z_exp - EXP_P1;
         	e3 = e3 - 1;
         	res.w[1] =
         	  z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull;
         	res.w[0] = 0x378d8e6400000000ull - R64;
               } else {
         	// We want R64 to be the top digit of C4, but we actually
         	// obtained (C4 * 10^(-q4+1))RN; a correction may be needed,
         	// because the top digit is (C4 * 10^(-q4+1))RZ
         	// however, if incr_exp = 1 then R64 = 10 with certainty
         	if (incr_exp) {
         	  R64 = 10;
         	}
         	// the result is inexact as C4 has more than 1 significant digit
         	// and C3 * 10^scale = 10^33
         	// example of case that is treated here:
         	// 100...0 * 10^e3 - 0.41 * 10^e3 =
         	// 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1)
         	// note that (e3 > expmin}
         	// in order to round, subtract R64 from 10^34 and then compare
         	// C4 - R64 * 10^(q4-1) with 1/2 ulp
         	// calculate 10^34 - R64
         	res.w[1] = 0x0001ed09bead87c0ull;
         	res.w[0] = 0x378d8e6400000000ull - R64;
         	z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand
         	// calculate C4 - R64 * 10^(q4-1); this is a rare case and
         	// R64 is small, 1 <= R64 <= 9
         	e3 = e3 - 1;
         	if (is_inexact_lt_midpoint) {
         	  is_inexact_lt_midpoint = 0;
         	  is_inexact_gt_midpoint = 1;
         	} else if (is_inexact_gt_midpoint) {
         	  is_inexact_gt_midpoint = 0;
         	  is_inexact_lt_midpoint = 1;
         	} else if (is_midpoint_lt_even) {
         	  is_midpoint_lt_even = 0;
         	  is_midpoint_gt_even = 1;
         	} else if (is_midpoint_gt_even) {
         	  is_midpoint_gt_even = 0;
         	  is_midpoint_lt_even = 1;
         	} else {
         	  ;
         	}
         	// the result is always inexact, and never tiny
         	// check for overflow for RN
         	if (e3 > expmax) {
         	  if (rnd_mode == ROUNDING_TO_NEAREST) {
         	    res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
         	    res.w[0] = 0x0000000000000000ull;
         	    *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
         	  } else {
         	    rounding_correction (rnd_mode,
         				 is_inexact_lt_midpoint,
         				 is_inexact_gt_midpoint,
         				 is_midpoint_lt_even,
         				 is_midpoint_gt_even, e3, &res,
         				 pfpsf);
         	  }
         	  *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
         	  *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
         	  *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
         	  *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
         	  BID_SWAP128 (res);
         	  BID_RETURN (res)
         	}
         	// set the inexact flag
         	*pfpsf |= INEXACT_EXCEPTION;
         	res.w[1] =
         	  z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
         	if (rnd_mode != ROUNDING_TO_NEAREST) {
         	  rounding_correction (rnd_mode,
         			       is_inexact_lt_midpoint,
         			       is_inexact_gt_midpoint,
         			       is_midpoint_lt_even,
         			       is_midpoint_gt_even, e3, &res,
         			       pfpsf);
         	}
         	z_exp = res.w[1] & MASK_EXP;
               } // end result is inexact
             } // end q4 > 1
           } else { // if (e3 = emin)
             // if e3 = expmin the result is also tiny (the condition for
             // tininess is C4 > 050...0 [q4 digits] which is met because
             // the msd of C4 is not zero)
             // the result is tiny and inexact in all rounding modes;
             // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp,
             // gt_half_ulp to calculate)
             // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged

             // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp
             if (gt_half_ulp) { // res = 10^33 - 1
               res.w[1] = 0x0000314dc6448d93ull;
               res.w[0] = 0x38c15b09ffffffffull;
             } else {
               res.w[1] = 0x0000314dc6448d93ull;
               res.w[0] = 0x38c15b0a00000000ull;
             }
             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
             *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later

             if (eq_half_ulp) {
               is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
             } else if (lt_half_ulp) {
               is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value
             } else { // if (gt_half_ulp)
               is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
             }

             if (rnd_mode != ROUNDING_TO_NEAREST) {
               rounding_correction (rnd_mode,
                   is_inexact_lt_midpoint,
                   is_inexact_gt_midpoint,
                   is_midpoint_lt_even,
                   is_midpoint_gt_even, e3, &res,
                   pfpsf);
               z_exp = res.w[1] & MASK_EXP;
             }
           } // end e3 = emin
           // set the inexact flag (if the result was not exact)
           if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
               is_midpoint_lt_even || is_midpoint_gt_even)
             *pfpsf |= INEXACT_EXCEPTION;
         } // end 10^33
       } // end if (p_sign != z_sign)
       res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
       BID_SWAP128 (res);
       BID_RETURN (res)

     } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
         (q3 <= delta && delta + q4 <= p34) || // Case (3)
         (delta < q3 && p34 < delta + q4) || // Case (4)
         (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5)
         (delta + q4 < q3)) && // Case (6)
         !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6)

       // the result has the sign of z

       if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
           (delta < q3 && p34 < delta + q4)) { // Case (4)
         // round first the sum x * y + z with unbounded exponent
         // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1,
         // 1 <= scale <= 33
         // calculate res = C3 * 10^scale
         scale = p34 - q3;
         x0 = delta + q4 - p34;
       } else if (delta + q4 < q3) { // Case (6)
         // make Case (6) look like Case (3) or Case (5) with scale = 0
         // by scaling up C4 by 10^(q3 - delta - q4)
         scale = q3 - delta - q4; // 1 <= scale <= 33
         if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits
           if (scale <= 19) { // 10^scale fits in 64 bits
             // 64 x 64 C4.w[0] * ten2k64[scale]
             __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]);
           } else { // 10^scale fits in 128 bits
             // 64 x 128 C4.w[0] * ten2k128[scale - 20]
             __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]);
           }
         } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
           // 64 x 128 ten2k64[scale] * C4
           __mul_128x64_to_128 (P128, ten2k64[scale], C4);
         }
         C4.w[0] = P128.w[0];
         C4.w[1] = P128.w[1];
         // e4 does not need adjustment, as it is not used from this point on
         scale = 0;
         x0 = 0;
         // now Case (6) looks like Case (3) or Case (5) with scale = 0
       } else { // if Case (3) or Case (5)
         // Note: Case (3) is similar to Case (2), but scale differs and the
         // result is exact, unless it is tiny (so x0 = 0 when calculating the
         // result with unbounded exponent)

         // calculate first the sum x * y + z with unbounded exponent (exact)
         // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1,
         // 1 <= scale <= 33
         // calculate res = C3 * 10^scale
         scale = delta + q4 - q3;
         x0 = 0;
         // Note: the comments which follow refer [mainly] to Case (2)]
       }

     case2_repeat:
       if (scale == 0) { // this could happen e.g. if we return to case2_repeat
         // or in Case (4)
         res.w[1] = C3.w[1];
         res.w[0] = C3.w[0];
       } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits
         if (scale <= 19) { // 10^scale fits in 64 bits
           // 64 x 64 C3.w[0] * ten2k64[scale]
           __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
         } else { // 10^scale fits in 128 bits
           // 64 x 128 C3.w[0] * ten2k128[scale - 20]
           __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
         }
       } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
         // 64 x 128 ten2k64[scale] * C3
         __mul_128x64_to_128 (res, ten2k64[scale], C3);
       }
       // e3 is already calculated
       e3 = e3 - scale;
       // now res = C3 * 10^scale and e3 = e3 - scale
       // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat
       // because the result was too small

       // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34,
       // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67)
       // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of
       // the rounding fits in 128 bits!)
       // x0 = delta + q4 - p34 (calculated before reaching case2_repeat)
       // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34
       if (x0 == 0) { // this could happen only if we return to case2_repeat, or
         // for Case (3) or Case (6)
         R128.w[1] = C4.w[1];
         R128.w[0] = C4.w[0];
       } else if (q4 <= 18) {
         // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17
         round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp,
             &is_midpoint_lt_even, &is_midpoint_gt_even,
             &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
         if (incr_exp) {
           // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
           R64 = ten2k64[q4 - x0];
         }
         R128.w[1] = 0;
         R128.w[0] = R64;
       } else if (q4 <= 38) {
         // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37
         P128.w[1] = C4.w[1];
         P128.w[0] = C4.w[0];
         round128_19_38 (q4, x0, P128, &R128, &incr_exp,
             &is_midpoint_lt_even, &is_midpoint_gt_even,
             &is_inexact_lt_midpoint,
             &is_inexact_gt_midpoint);
         if (incr_exp) {
           // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
           if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
             R128.w[0] = ten2k64[q4 - x0];
             // R128.w[1] stays 0
           } else { // 20 <= q4 - x0 <= 37
             R128.w[0] = ten2k128[q4 - x0 - 20].w[0];
             R128.w[1] = ten2k128[q4 - x0 - 20].w[1];
           }
         }
       } else if (q4 <= 57) {
         // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56
         P192.w[2] = C4.w[2];
         P192.w[1] = C4.w[1];
         P192.w[0] = C4.w[0];
         round192_39_57 (q4, x0, P192, &R192, &incr_exp,
             &is_midpoint_lt_even, &is_midpoint_gt_even,
             &is_inexact_lt_midpoint,
             &is_inexact_gt_midpoint);
         // R192.w[2] is always 0
         if (incr_exp) {
           // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52
           if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
             R192.w[0] = ten2k64[q4 - x0];
             // R192.w[1] stays 0
             // R192.w[2] stays 0
           } else { // 20 <= q4 - x0 <= 33
             R192.w[0] = ten2k128[q4 - x0 - 20].w[0];
             R192.w[1] = ten2k128[q4 - x0 - 20].w[1];
             // R192.w[2] stays 0
           }
         }
         R128.w[1] = R192.w[1];
         R128.w[0] = R192.w[0];
       } else {
         // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67
         round256_58_76 (q4, x0, C4, &R256, &incr_exp,
             &is_midpoint_lt_even, &is_midpoint_gt_even,
             &is_inexact_lt_midpoint,
             &is_inexact_gt_midpoint);
         // R256.w[3] and R256.w[2] are always 0
         if (incr_exp) {
           // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43
           if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
             R256.w[0] = ten2k64[q4 - x0];
             // R256.w[1] stays 0
             // R256.w[2] stays 0
             // R256.w[3] stays 0
           } else { // 20 <= q4 - x0 <= 33
             R256.w[0] = ten2k128[q4 - x0 - 20].w[0];
             R256.w[1] = ten2k128[q4 - x0 - 20].w[1];
             // R256.w[2] stays 0
             // R256.w[3] stays 0
           }
         }
         R128.w[1] = R256.w[1];
         R128.w[0] = R256.w[0];
       }
       // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4,
       // rounded to nearest, which were copied into R128
       if (z_sign == p_sign) {
         lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale
         // the sum can result in [up to] p34 or p34 + 1 digits
         res.w[0] = res.w[0] + R128.w[0];
         res.w[1] = res.w[1] + R128.w[1];
         if (res.w[0] < R128.w[0])
           res.w[1]++; // carry
         // if res > 10^34 - 1 need to increase x0 and decrease scale by 1
         if (res.w[1] > 0x0001ed09bead87c0ull ||
             (res.w[1] == 0x0001ed09bead87c0ull &&
              res.w[0] > 0x378d8e63ffffffffull)) {
           // avoid double rounding error
           is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
           is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
           is_midpoint_lt_even0 = is_midpoint_lt_even;
           is_midpoint_gt_even0 = is_midpoint_gt_even;
           is_inexact_lt_midpoint = 0;
           is_inexact_gt_midpoint = 0;
           is_midpoint_lt_even = 0;
           is_midpoint_gt_even = 0;
           P128.w[1] = res.w[1];
           P128.w[0] = res.w[0];
           round128_19_38 (35, 1, P128, &res, &incr_exp,
               &is_midpoint_lt_even, &is_midpoint_gt_even,
               &is_inexact_lt_midpoint,
               &is_inexact_gt_midpoint);
           // incr_exp is 0 with certainty in this case
           // avoid a double rounding error
           if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
               is_midpoint_lt_even) { // double rounding error upward
             // res = res - 1
             res.w[0]--;
             if (res.w[0] == 0xffffffffffffffffull)
               res.w[1]--;
             // Note: a double rounding error upward is not possible; for this
             // the result after the first rounding would have to be 99...95
             // (35 digits in all), possibly followed by a number of zeros; this
             // not possible in Cases (2)-(6) or (15)-(17) which may get here
             is_midpoint_lt_even = 0;
             is_inexact_lt_midpoint = 1;
           } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
               is_midpoint_gt_even) { // double rounding error downward
             // res = res + 1
             res.w[0]++;
             if (res.w[0] == 0)
               res.w[1]++;
             is_midpoint_gt_even = 0;
             is_inexact_gt_midpoint = 1;
           } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
         	     !is_inexact_lt_midpoint
         	     && !is_inexact_gt_midpoint) {
             // if this second rounding was exact the result may still be
             // inexact because of the first rounding
             if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
               is_inexact_gt_midpoint = 1;
             }
             if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
               is_inexact_lt_midpoint = 1;
             }
           } else if (is_midpoint_gt_even &&
         	     (is_inexact_gt_midpoint0
         	      || is_midpoint_lt_even0)) {
             // pulled up to a midpoint
             is_inexact_lt_midpoint = 1;
             is_inexact_gt_midpoint = 0;
             is_midpoint_lt_even = 0;
             is_midpoint_gt_even = 0;
           } else if (is_midpoint_lt_even &&
         	     (is_inexact_lt_midpoint0
         	      || is_midpoint_gt_even0)) {
             // pulled down to a midpoint
             is_inexact_lt_midpoint = 0;
             is_inexact_gt_midpoint = 1;
             is_midpoint_lt_even = 0;
             is_midpoint_gt_even = 0;
           } else {
             ;
           }
           // adjust exponent
           e3 = e3 + 1;
           if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
               !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
             if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
         	is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
               is_inexact_lt_midpoint = 1;
             }
           }
         } else {
           // this is the result rounded with unbounded exponent, unless a
           // correction is needed
           res.w[1] = res.w[1] & MASK_COEFF;
           if (lsb == 1) {
             if (is_midpoint_gt_even) {
               // res = res + 1
               is_midpoint_gt_even = 0;
               is_midpoint_lt_even = 1;
               res.w[0]++;
               if (res.w[0] == 0x0)
         	res.w[1]++;
               // check for rounding overflow
               if (res.w[1] == 0x0001ed09bead87c0ull &&
         	  res.w[0] == 0x378d8e6400000000ull) {
         	// res = 10^34 => rounding overflow
         	res.w[1] = 0x0000314dc6448d93ull;
         	res.w[0] = 0x38c15b0a00000000ull; // 10^33
         	e3++;
               }
             } else if (is_midpoint_lt_even) {
               // res = res - 1
               is_midpoint_lt_even = 0;
               is_midpoint_gt_even = 1;
               res.w[0]--;
               if (res.w[0] == 0xffffffffffffffffull)
         	res.w[1]--;
               // if the result is pure zero, the sign depends on the rounding
               // mode (x*y and z had opposite signs)
               if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
         	if (rnd_mode != ROUNDING_DOWN)
         	  z_sign = 0x0000000000000000ull;
         	else
         	  z_sign = 0x8000000000000000ull;
         	// the exponent is max (e3, expmin)
         	res.w[1] = 0x0;
         	res.w[0] = 0x0;
         	*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
         	*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
         	*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
         	*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
         	BID_SWAP128 (res);
         	BID_RETURN (res)
               }
             } else {
               ;
             }
           }
         }
       } else { // if (z_sign != p_sign)
         lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4
         // used to swap rounding indicators if p_sign != z_sign
         // the sum can result in [up to] p34 or p34 - 1 digits
         tmp64 = res.w[0];
         res.w[0] = res.w[0] - R128.w[0];
         res.w[1] = res.w[1] - R128.w[1];
         if (res.w[0] > tmp64)
           res.w[1]--; // borrow
         // if res < 10^33 and exp > expmin need to decrease x0 and
         // increase scale by 1
         if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull ||
         		     (res.w[1] == 0x0000314dc6448d93ull &&
         		      res.w[0] < 0x38c15b0a00000000ull)) ||
         		    (is_inexact_lt_midpoint
         		     && res.w[1] == 0x0000314dc6448d93ull
         		     && res.w[0] == 0x38c15b0a00000000ull))
             && x0 >= 1) {
           x0 = x0 - 1;
           // first restore e3, otherwise it will be too small
           e3 = e3 + scale;
           scale = scale + 1;
           is_inexact_lt_midpoint = 0;
           is_inexact_gt_midpoint = 0;
           is_midpoint_lt_even = 0;
           is_midpoint_gt_even = 0;
           incr_exp = 0;
           goto case2_repeat;
         }
         // else this is the result rounded with unbounded exponent;
         // because the result has opposite sign to that of C4 which was
         // rounded, need to change the rounding indicators
         if (is_inexact_lt_midpoint) {
           is_inexact_lt_midpoint = 0;
           is_inexact_gt_midpoint = 1;
         } else if (is_inexact_gt_midpoint) {
           is_inexact_gt_midpoint = 0;
           is_inexact_lt_midpoint = 1;
         } else if (lsb == 0) {
           if (is_midpoint_lt_even) {
             is_midpoint_lt_even = 0;
             is_midpoint_gt_even = 1;
           } else if (is_midpoint_gt_even) {
             is_midpoint_gt_even = 0;
             is_midpoint_lt_even = 1;
           } else {
             ;
           }
         } else if (lsb == 1) {
           if (is_midpoint_lt_even) {
             // res = res + 1
             res.w[0]++;
             if (res.w[0] == 0x0)
               res.w[1]++;
             // check for rounding overflow
             if (res.w[1] == 0x0001ed09bead87c0ull &&
         	res.w[0] == 0x378d8e6400000000ull) {
               // res = 10^34 => rounding overflow
               res.w[1] = 0x0000314dc6448d93ull;
               res.w[0] = 0x38c15b0a00000000ull; // 10^33
               e3++;
             }
           } else if (is_midpoint_gt_even) {
             // res = res - 1
             res.w[0]--;
             if (res.w[0] == 0xffffffffffffffffull)
               res.w[1]--;
             // if the result is pure zero, the sign depends on the rounding
             // mode (x*y and z had opposite signs)
             if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
               if (rnd_mode != ROUNDING_DOWN)
         	z_sign = 0x0000000000000000ull;
               else
         	z_sign = 0x8000000000000000ull;
               // the exponent is max (e3, expmin)
               res.w[1] = 0x0;
               res.w[0] = 0x0;
               *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
               *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
               *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
               *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
               BID_SWAP128 (res);
               BID_RETURN (res)
             }
           } else {
             ;
           }
         } else {
           ;
         }
       }
       // check for underflow
       if (e3 == expmin) { // and if significand < 10^33 => result is tiny
         if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
             ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
              res.w[0] < 0x38c15b0a00000000ull)) {
           is_tiny = 1;
         }
       } else if (e3 < expmin) {
         // the result is tiny, so we must truncate more of res
         is_tiny = 1;
         x0 = expmin - e3;
         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
         is_midpoint_lt_even0 = is_midpoint_lt_even;
         is_midpoint_gt_even0 = is_midpoint_gt_even;
         is_inexact_lt_midpoint = 0;
         is_inexact_gt_midpoint = 0;
         is_midpoint_lt_even = 0;
         is_midpoint_gt_even = 0;
         // determine the number of decimal digits in res
         if (res.w[1] == 0x0) {
           // between 1 and 19 digits
           for (ind = 1; ind <= 19; ind++) {
             if (res.w[0] < ten2k64[ind]) {
               break;
             }
           }
           // ind digits
         } else if (res.w[1] < ten2k128[0].w[1] ||
         	   (res.w[1] == ten2k128[0].w[1]
         	    && res.w[0] < ten2k128[0].w[0])) {
           // 20 digits
           ind = 20;
         } else { // between 21 and 38 digits
           for (ind = 1; ind <= 18; ind++) {
             if (res.w[1] < ten2k128[ind].w[1] ||
         	(res.w[1] == ten2k128[ind].w[1] &&
         	 res.w[0] < ten2k128[ind].w[0])) {
               break;
             }
           }
           // ind + 20 digits
           ind = ind + 20;
         }

         // at this point ind >= x0; because delta >= 2 on this path, the case
         // ind = x0 can occur only in Case (2) or case (3), when C3 has one
         // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin),
         // the signs of x * y and z are opposite, and through cancellation
         // the most significant decimal digit in res has the weight
         // 10^(emin-1); however, it is clear that in this case the most
         // significant digit is 9, so the result before rounding is
         // 0.9... * 10^emin
         // Otherwise, ind > x0 because there are non-zero decimal digits in the
         // result with weight of at least 10^emin, and correction for underflow
         //  can be carried out using the round*_*_2_* () routines
         if (x0 == ind) { // the result before rounding is 0.9... * 10^emin
           res.w[1] = 0x0;
           res.w[0] = 0x1;
           is_inexact_gt_midpoint = 1;
         } else if (ind <= 18) { // check that 2 <= ind
           // 2 <= ind <= 18, 1 <= x0 <= 17
           round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
         		&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint);
           if (incr_exp) {
             // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17
             R64 = ten2k64[ind - x0];
           }
           res.w[1] = 0;
           res.w[0] = R64;
         } else if (ind <= 38) {
           // 19 <= ind <= 38
           P128.w[1] = res.w[1];
           P128.w[0] = res.w[0];
           round128_19_38 (ind, x0, P128, &res, &incr_exp,
         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
         		  &is_inexact_lt_midpoint,
         		  &is_inexact_gt_midpoint);
           if (incr_exp) {
             // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37
             if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19
               res.w[0] = ten2k64[ind - x0];
               // res.w[1] stays 0
             } else { // 20 <= ind - x0 <= 37
               res.w[0] = ten2k128[ind - x0 - 20].w[0];
               res.w[1] = ten2k128[ind - x0 - 20].w[1];
             }
           }
         }
         // avoid a double rounding error
         if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
             is_midpoint_lt_even) { // double rounding error upward
           // res = res - 1
           res.w[0]--;
           if (res.w[0] == 0xffffffffffffffffull)
             res.w[1]--;
           // Note: a double rounding error upward is not possible; for this
           // the result after the first rounding would have to be 99...95
           // (35 digits in all), possibly followed by a number of zeros; this
           // not possible in Cases (2)-(6) which may get here
           is_midpoint_lt_even = 0;
           is_inexact_lt_midpoint = 1;
         } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
             is_midpoint_gt_even) { // double rounding error downward
           // res = res + 1
           res.w[0]++;
           if (res.w[0] == 0)
             res.w[1]++;
           is_midpoint_gt_even = 0;
           is_inexact_gt_midpoint = 1;
         } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
         	   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
           // if this second rounding was exact the result may still be
           // inexact because of the first rounding
           if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
             is_inexact_gt_midpoint = 1;
           }
           if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
             is_inexact_lt_midpoint = 1;
           }
         } else if (is_midpoint_gt_even &&
         	   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
           // pulled up to a midpoint
           is_inexact_lt_midpoint = 1;
           is_inexact_gt_midpoint = 0;
           is_midpoint_lt_even = 0;
           is_midpoint_gt_even = 0;
         } else if (is_midpoint_lt_even &&
         	   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
           // pulled down to a midpoint
           is_inexact_lt_midpoint = 0;
           is_inexact_gt_midpoint = 1;
           is_midpoint_lt_even = 0;
           is_midpoint_gt_even = 0;
         } else {
           ;
         }
         // adjust exponent
         e3 = e3 + x0;
         if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
             !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
           if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
               is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
             is_inexact_lt_midpoint = 1;
           }
         }
       } else {
         ; // not underflow
       }
       // check for inexact result
       if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
           is_midpoint_lt_even || is_midpoint_gt_even) {
         // set the inexact flag
         *pfpsf |= INEXACT_EXCEPTION;
         if (is_tiny)
           *pfpsf |= UNDERFLOW_EXCEPTION;
       }
       // now check for significand = 10^34 (may have resulted from going
       // back to case2_repeat)
       if (res.w[1] == 0x0001ed09bead87c0ull &&
           res.w[0] == 0x378d8e6400000000ull) { // if  res = 10^34
         res.w[1] = 0x0000314dc6448d93ull; // res = 10^33
         res.w[0] = 0x38c15b0a00000000ull;
         e3 = e3 + 1;
       }
       res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
       // check for overflow
       if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) {
         res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
         res.w[0] = 0x0000000000000000ull;
         *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
       }
       if (rnd_mode != ROUNDING_TO_NEAREST) {
         rounding_correction (rnd_mode,
         		     is_inexact_lt_midpoint,
         		     is_inexact_gt_midpoint,
         		     is_midpoint_lt_even, is_midpoint_gt_even,
         		     e3, &res, pfpsf);
       }
       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
       BID_SWAP128 (res);
       BID_RETURN (res)

     } else {

       // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and
       // the signs of x*y and z are opposite; in these cases massive
       // cancellation can occur, so it is better to scale either C3 or C4 and
       // to perform the subtraction before rounding; rounding is performed
       // next, depending on the number of decimal digits in the result and on
       // the exponent value
       // Note: overlow is not possible in this case
       // this is similar to Cases (15), (16), and (17)

       if (delta + q4 < q3) { // from Case (6)
         // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and
         // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign)
         // and call add_and_round; delta stays positive
         // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
         P128.w[1] = C3.w[1];
         P128.w[0] = C3.w[0];
         C3.w[1] = C4.w[1];
         C3.w[0] = C4.w[0];
         C4.w[1] = P128.w[1];
         C4.w[0] = P128.w[0];
         ind = q3;
         q3 = q4;
         q4 = ind;
         ind = e3;
         e3 = e4;
         e4 = ind;
         tmp_sign = z_sign;
         z_sign = p_sign;
         p_sign = tmp_sign;
       } else { // from Cases (2), (3), (4), (5)
         // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be
         // scaled up by q4 + delta - q3; this is the same as in Cases (15),
         // (16), and (17) if we just change the sign of delta
         delta = -delta;
       }
       add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
         	     rnd_mode, &is_midpoint_lt_even,
         	     &is_midpoint_gt_even, &is_inexact_lt_midpoint,
         	     &is_inexact_gt_midpoint, pfpsf, &res);
       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
       BID_SWAP128 (res);
       BID_RETURN (res)

     }

   } else { // if delta < 0

     delta = -delta;

     if (p34 < q4 && q4 <= delta) { // Case (7)

       // truncate C4 to p34 digits into res
       // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
       x0 = q4 - p34;
       if (q4 <= 38) {
         P128.w[1] = C4.w[1];
         P128.w[0] = C4.w[0];
         round128_19_38 (q4, x0, P128, &res, &incr_exp,
         		&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint);
       } else if (q4 <= 57) { // 35 <= q4 <= 57
         P192.w[2] = C4.w[2];
         P192.w[1] = C4.w[1];
         P192.w[0] = C4.w[0];
         round192_39_57 (q4, x0, P192, &R192, &incr_exp,
         		&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint);
         res.w[0] = R192.w[0];
         res.w[1] = R192.w[1];
       } else { // if (q4 <= 68)
         round256_58_76 (q4, x0, C4, &R256, &incr_exp,
         		&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint);
         res.w[0] = R256.w[0];
         res.w[1] = R256.w[1];
       }
       e4 = e4 + x0;
       if (incr_exp) {
         e4 = e4 + 1;
       }
       if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
           !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
         // if C4 rounded to p34 digits is exact then the result is inexact,
         // in a way that depends on the signs of x * y and z
         if (p_sign == z_sign) {
           is_inexact_lt_midpoint = 1;
         } else { // if (p_sign != z_sign)
           if (res.w[1] != 0x0000314dc6448d93ull ||
               res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33
             is_inexact_gt_midpoint = 1;
           } else { // res = 10^33 and exact is a special case
             // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1
             // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1
             // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1
             // Note: ulp is really ulp/10 (after borrow which propagates to msd)
             if (delta > p34 + 1) { // C3 < 1/2
               // res = 10^33, unchanged
               is_inexact_gt_midpoint = 1;
             } else { // if (delta == p34 + 1)
               if (q3 <= 19) {
         	if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp
         	  // res = 10^33, unchanged
         	  is_inexact_gt_midpoint = 1;
         	} else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp
         	  // res = 10^33, unchanged
         	  is_midpoint_lt_even = 1;
         	} else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp
         	  res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
         	  res.w[0] = 0x378d8e63ffffffffull;
         	  e4 = e4 - 1;
         	  is_inexact_lt_midpoint = 1;
         	}
               } else { // if (20 <= q3 <=34)
         	if (C3.w[1] < midpoint128[q3 - 20].w[1] ||
                     (C3.w[1] == midpoint128[q3 - 20].w[1] &&
                     C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp
         	  // res = 10^33, unchanged
         	  is_inexact_gt_midpoint = 1;
         	} else if (C3.w[1] == midpoint128[q3 - 20].w[1] &&
                     C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp
         	  // res = 10^33, unchanged
         	  is_midpoint_lt_even = 1;
         	} else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp
         	  res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
         	  res.w[0] = 0x378d8e63ffffffffull;
         	  e4 = e4 - 1;
         	  is_inexact_lt_midpoint = 1;
         	}
               }
             }
           }
         }
       } else if (is_midpoint_lt_even) {
         if (z_sign != p_sign) {
           // needs correction: res = res - 1
           res.w[0] = res.w[0] - 1;
           if (res.w[0] == 0xffffffffffffffffull)
             res.w[1]--;
           // if it is (10^33-1)*10^e4 then the corect result is
           // (10^34-1)*10(e4-1)
           if (res.w[1] == 0x0000314dc6448d93ull &&
               res.w[0] == 0x38c15b09ffffffffull) {
             res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
             res.w[0] = 0x378d8e63ffffffffull;
             e4 = e4 - 1;
           }
           is_midpoint_lt_even = 0;
           is_inexact_lt_midpoint = 1;
         } else { // if (z_sign == p_sign)
           is_midpoint_lt_even = 0;
           is_inexact_gt_midpoint = 1;
         }
       } else if (is_midpoint_gt_even) {
         if (z_sign == p_sign) {
           // needs correction: res = res + 1 (cannot cross in the next binade)
           res.w[0] = res.w[0] + 1;
           if (res.w[0] == 0x0000000000000000ull)
             res.w[1]++;
           is_midpoint_gt_even = 0;
           is_inexact_gt_midpoint = 1;
         } else { // if (z_sign != p_sign)
           is_midpoint_gt_even = 0;
           is_inexact_lt_midpoint = 1;
         }
       } else {
         ; // the rounded result is already correct
       }
       // check for overflow
       if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) {
         res.w[1] = p_sign | 0x7800000000000000ull;
         res.w[0] = 0x0000000000000000ull;
         *pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
       } else { // no overflow or not RN
         p_exp = ((UINT64) (e4 + 6176) << 49);
         res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
       }
       if (rnd_mode != ROUNDING_TO_NEAREST) {
         rounding_correction (rnd_mode,
         		     is_inexact_lt_midpoint,
         		     is_inexact_gt_midpoint,
         		     is_midpoint_lt_even, is_midpoint_gt_even,
         		     e4, &res, pfpsf);
       }
       if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
           is_midpoint_lt_even || is_midpoint_gt_even) {
         // set the inexact flag
         *pfpsf |= INEXACT_EXCEPTION;
       }
       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
       BID_SWAP128 (res);
       BID_RETURN (res)

     } else if ((q4 <= p34 && p34 <= delta) || // Case (8)
         (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9)
         (q4 <= delta && delta + q3 <= p34) || // Case (10)
         (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13)
         (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14)
         (delta + q3 < q4 && q4 <= p34)) { // Case (18)

       // Case (8) is similar to Case (1), with C3 and C4 swapped
       // Case (9) is similar to Case (2), with C3 and C4 swapped
       // Case (10) is similar to Case (3), with C3 and C4 swapped
       // Case (13) is similar to Case (4), with C3 and C4 swapped
       // Case (14) is similar to Case (5), with C3 and C4 swapped
       // Case (18) is similar to Case (6), with C3 and C4 swapped

       // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp)
       // and go back to delta_ge_zero
       // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
       P128.w[1] = C3.w[1];
       P128.w[0] = C3.w[0];
       C3.w[1] = C4.w[1];
       C3.w[0] = C4.w[0];
       C4.w[1] = P128.w[1];
       C4.w[0] = P128.w[0];
       ind = q3;
       q3 = q4;
       q4 = ind;
       ind = e3;
       e3 = e4;
       e4 = ind;
       tmp_sign = z_sign;
       z_sign = p_sign;
       p_sign = tmp_sign;
       tmp.ui64 = z_exp;
       z_exp = p_exp;
       p_exp = tmp.ui64;
       goto delta_ge_zero;

     } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11)
                (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12)

       // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3,
       // 1 <= x0 <= q3 - 1 <= p34 - 1
       x0 = e4 - e3; // or x0 = delta + q3 - q4
       if (q3 <= 18) { // 2 <= q3 <= 18
         round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp,
         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
         // C3.w[1] = 0;
         C3.w[0] = R64;
       } else if (q3 <= 38) {
         round128_19_38 (q3, x0, C3, &R128, &incr_exp,
         		&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint);
         C3.w[1] = R128.w[1];
         C3.w[0] = R128.w[0];
       }
       // the rounded result has q3 - x0 digits
       // we want the exponent to be e4, so if incr_exp = 1 then
       // multiply the rounded result by 10 - it will still fit in 113 bits
       if (incr_exp) {
         // 64 x 128 -> 128
         P128.w[1] = C3.w[1];
         P128.w[0] = C3.w[0];
         __mul_64x128_to_128 (C3, ten2k64[1], P128);
       }
       e3 = e3 + x0; // this is e4
       // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3;
       // the result will have the sign of x * y; the exponent is e4
       R256.w[3] = 0;
       R256.w[2] = 0;
       R256.w[1] = C3.w[1];
       R256.w[0] = C3.w[0];
       if (p_sign == z_sign) { // R256 = C4 + R256
         add256 (C4, R256, &R256);
       } else { // if (p_sign != z_sign) { // R256 = C4 - R256
         sub256 (C4, R256, &R256); // the result cannot be pure zero
         // because the result has opposite sign to that of R256 which was
         // rounded, need to change the rounding indicators
         lsb = C4.w[0] & 0x01;
         if (is_inexact_lt_midpoint) {
           is_inexact_lt_midpoint = 0;
           is_inexact_gt_midpoint = 1;
         } else if (is_inexact_gt_midpoint) {
           is_inexact_gt_midpoint = 0;
           is_inexact_lt_midpoint = 1;
         } else if (lsb == 0) {
           if (is_midpoint_lt_even) {
             is_midpoint_lt_even = 0;
             is_midpoint_gt_even = 1;
           } else if (is_midpoint_gt_even) {
             is_midpoint_gt_even = 0;
             is_midpoint_lt_even = 1;
           } else {
             ;
           }
         } else if (lsb == 1) {
           if (is_midpoint_lt_even) {
             // res = res + 1
             R256.w[0]++;
             if (R256.w[0] == 0x0) {
               R256.w[1]++;
               if (R256.w[1] == 0x0) {
         	R256.w[2]++;
         	if (R256.w[2] == 0x0) {
         	  R256.w[3]++;
         	}
               }
             }
             // no check for rounding overflow - R256 was a difference
           } else if (is_midpoint_gt_even) {
             // res = res - 1
             R256.w[0]--;
             if (R256.w[0] == 0xffffffffffffffffull) {
               R256.w[1]--;
               if (R256.w[1] == 0xffffffffffffffffull) {
         	R256.w[2]--;
         	if (R256.w[2] == 0xffffffffffffffffull) {
         	  R256.w[3]--;
         	}
               }
             }
           } else {
             ;
           }
         } else {
           ;
         }
       }
       // determine the number of decimal digits in R256
       ind = nr_digits256 (R256); // ind >= p34
       // if R256 is sum, then ind > p34; if R256 is a difference, then
       // ind >= p34; this means that we can calculate the result rounded to
       // the destination precision, with unbounded exponent, starting from R256
       // and using the indicators from the rounding of C3 to avoid a double
       // rounding error

       if (ind < p34) {
         ;
       } else if (ind == p34) {
         // the result rounded to the destination precision with
         // unbounded exponent
         // is (-1)^p_sign * R256 * 10^e4
         res.w[1] = R256.w[1];
         res.w[0] = R256.w[0];
       } else { // if (ind > p34)
         // if more than P digits, round to nearest to P digits
         // round R256 to p34 digits
         x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
         // save C3 rounding indicators to help avoid double rounding error
         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
         is_midpoint_lt_even0 = is_midpoint_lt_even;
         is_midpoint_gt_even0 = is_midpoint_gt_even;
         // initialize rounding indicators
         is_inexact_lt_midpoint = 0;
         is_inexact_gt_midpoint = 0;
         is_midpoint_lt_even = 0;
         is_midpoint_gt_even = 0;
         // round to p34 digits; the result fits in 113 bits
         if (ind <= 38) {
           P128.w[1] = R256.w[1];
           P128.w[0] = R256.w[0];
           round128_19_38 (ind, x0, P128, &R128, &incr_exp,
         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
         		  &is_inexact_lt_midpoint,
         		  &is_inexact_gt_midpoint);
         } else if (ind <= 57) {
           P192.w[2] = R256.w[2];
           P192.w[1] = R256.w[1];
           P192.w[0] = R256.w[0];
           round192_39_57 (ind, x0, P192, &R192, &incr_exp,
         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
         		  &is_inexact_lt_midpoint,
         		  &is_inexact_gt_midpoint);
           R128.w[1] = R192.w[1];
           R128.w[0] = R192.w[0];
         } else { // if (ind <= 68)
           round256_58_76 (ind, x0, R256, &R256, &incr_exp,
         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
         		  &is_inexact_lt_midpoint,
         		  &is_inexact_gt_midpoint);
           R128.w[1] = R256.w[1];
           R128.w[0] = R256.w[0];
         }
         // the rounded result has p34 = 34 digits
         e4 = e4 + x0 + incr_exp;

         res.w[1] = R128.w[1];
         res.w[0] = R128.w[0];

         // avoid a double rounding error
         if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
             is_midpoint_lt_even) { // double rounding error upward
           // res = res - 1
           res.w[0]--;
           if (res.w[0] == 0xffffffffffffffffull)
             res.w[1]--;
           is_midpoint_lt_even = 0;
           is_inexact_lt_midpoint = 1;
           // Note: a double rounding error upward is not possible; for this
           // the result after the first rounding would have to be 99...95
           // (35 digits in all), possibly followed by a number of zeros; this
           // not possible in Cases (2)-(6) or (15)-(17) which may get here
           // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent
           if (res.w[1] == 0x0000314dc6448d93ull &&
             res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1
             res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
             res.w[0] = 0x378d8e63ffffffffull;
             e4--;
           }
         } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
             is_midpoint_gt_even) { // double rounding error downward
           // res = res + 1
           res.w[0]++;
           if (res.w[0] == 0)
             res.w[1]++;
           is_midpoint_gt_even = 0;
           is_inexact_gt_midpoint = 1;
         } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
         	   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
           // if this second rounding was exact the result may still be
           // inexact because of the first rounding
           if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
             is_inexact_gt_midpoint = 1;
           }
           if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
             is_inexact_lt_midpoint = 1;
           }
         } else if (is_midpoint_gt_even &&
         	   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
           // pulled up to a midpoint
           is_inexact_lt_midpoint = 1;
           is_inexact_gt_midpoint = 0;
           is_midpoint_lt_even = 0;
           is_midpoint_gt_even = 0;
         } else if (is_midpoint_lt_even &&
         	   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
           // pulled down to a midpoint
           is_inexact_lt_midpoint = 0;
           is_inexact_gt_midpoint = 1;
           is_midpoint_lt_even = 0;
           is_midpoint_gt_even = 0;
         } else {
           ;
         }
       }

       // determine tininess
       if (rnd_mode == ROUNDING_TO_NEAREST) {
         if (e4 < expmin) {
           is_tiny = 1; // for other rounding modes apply correction
         }
       } else {
         // for RM, RP, RZ, RA apply correction in order to determine tininess
         // but do not save the result; apply the correction to
         // (-1)^p_sign * res * 10^0
         P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1];
         P128.w[0] = res.w[0];
         rounding_correction (rnd_mode,
         		     is_inexact_lt_midpoint,
         		     is_inexact_gt_midpoint,
         		     is_midpoint_lt_even, is_midpoint_gt_even,
         		     0, &P128, pfpsf);
         scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
         // the number of digits in the significand is p34 = 34
         if (e4 + scale < expmin) {
           is_tiny = 1;
         }
       }

       // the result rounded to the destination precision with unbounded exponent
       // is (-1)^p_sign * res * 10^e4
       res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN
       // res.w[0] unchanged;
       // Note: res is correct only if expmin <= e4 <= expmax
       ind = p34; // the number of decimal digits in the signifcand of res

       // at this point we have the result rounded with unbounded exponent in
       // res and we know its tininess:
       // res = (-1)^p_sign * significand * 10^e4,
       // where q (significand) = ind = p34
       // Note: res is correct only if expmin <= e4 <= expmax

       // check for overflow if RN
       if (rnd_mode == ROUNDING_TO_NEAREST
           && (ind + e4) > (p34 + expmax)) {
         res.w[1] = p_sign | 0x7800000000000000ull;
         res.w[0] = 0x0000000000000000ull;
         *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
         *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
         *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
         *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
         *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
         BID_SWAP128 (res);
         BID_RETURN (res)
       } // else not overflow or not RN, so continue

       // from this point on this is similar to the last part of the computation
       // for Cases (15), (16), (17)

       // if (e4 >= expmin) we have the result rounded with bounded exponent
       if (e4 < expmin) {
         x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
         // where the result rounded [at most] once is
         //   (-1)^p_sign * significand_res * 10^e4

         // avoid double rounding error
         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
         is_midpoint_lt_even0 = is_midpoint_lt_even;
         is_midpoint_gt_even0 = is_midpoint_gt_even;
         is_inexact_lt_midpoint = 0;
         is_inexact_gt_midpoint = 0;
         is_midpoint_lt_even = 0;
         is_midpoint_gt_even = 0;

         if (x0 > ind) {
           // nothing is left of res when moving the decimal point left x0 digits
           is_inexact_lt_midpoint = 1;
           res.w[1] = p_sign | 0x0000000000000000ull;
           res.w[0] = 0x0000000000000000ull;
           e4 = expmin;
         } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
           // this is <, =, or > 1/2 ulp
           // compare the ind-digit value in the significand of res with
           // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
           // less than, equal to, or greater than 1/2 ulp (significand of res)
           R128.w[1] = res.w[1] & MASK_COEFF;
           R128.w[0] = res.w[0];
           if (ind <= 19) {
             if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
               lt_half_ulp = 1;
               is_inexact_lt_midpoint = 1;
             } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
               eq_half_ulp = 1;
               is_midpoint_gt_even = 1;
             } else { // > 1/2 ulp
               gt_half_ulp = 1;
               is_inexact_gt_midpoint = 1;
             }
           } else { // if (ind <= 38)
             if (R128.w[1] < midpoint128[ind - 20].w[1] ||
                 (R128.w[1] == midpoint128[ind - 20].w[1] &&
                 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
               lt_half_ulp = 1;
               is_inexact_lt_midpoint = 1;
             } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
                 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
               eq_half_ulp = 1;
               is_midpoint_gt_even = 1;
             } else { // > 1/2 ulp
               gt_half_ulp = 1;
               is_inexact_gt_midpoint = 1;
             }
           }
           if (lt_half_ulp || eq_half_ulp) {
             // res = +0.0 * 10^expmin
             res.w[1] = 0x0000000000000000ull;
             res.w[0] = 0x0000000000000000ull;
           } else { // if (gt_half_ulp)
             // res = +1 * 10^expmin
             res.w[1] = 0x0000000000000000ull;
             res.w[0] = 0x0000000000000001ull;
           }
           res.w[1] = p_sign | res.w[1];
           e4 = expmin;
         } else { // if (1 <= x0 <= ind - 1 <= 33)
           // round the ind-digit result to ind - x0 digits

           if (ind <= 18) { // 2 <= ind <= 18
             round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
         		  &is_inexact_lt_midpoint,
         		  &is_inexact_gt_midpoint);
             res.w[1] = 0x0;
             res.w[0] = R64;
           } else if (ind <= 38) {
             P128.w[1] = res.w[1] & MASK_COEFF;
             P128.w[0] = res.w[0];
             round128_19_38 (ind, x0, P128, &res, &incr_exp,
         		    &is_midpoint_lt_even, &is_midpoint_gt_even,
         		    &is_inexact_lt_midpoint,
         		    &is_inexact_gt_midpoint);
           }
           e4 = e4 + x0; // expmin
           // we want the exponent to be expmin, so if incr_exp = 1 then
           // multiply the rounded result by 10 - it will still fit in 113 bits
           if (incr_exp) {
             // 64 x 128 -> 128
             P128.w[1] = res.w[1] & MASK_COEFF;
             P128.w[0] = res.w[0];
             __mul_64x128_to_128 (res, ten2k64[1], P128);
           }
           res.w[1] =
             p_sign | ((UINT64) (e4 + 6176) << 49) | (res.
         					     w[1] & MASK_COEFF);
           // avoid a double rounding error
           if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
                 is_midpoint_lt_even) { // double rounding error upward
             // res = res - 1
             res.w[0]--;
             if (res.w[0] == 0xffffffffffffffffull)
               res.w[1]--;
             // Note: a double rounding error upward is not possible; for this
             // the result after the first rounding would have to be 99...95
             // (35 digits in all), possibly followed by a number of zeros; this
             // not possible in this underflow case
             is_midpoint_lt_even = 0;
             is_inexact_lt_midpoint = 1;
           } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
                 is_midpoint_gt_even) { // double rounding error downward
             // res = res + 1
             res.w[0]++;
             if (res.w[0] == 0)
               res.w[1]++;
             is_midpoint_gt_even = 0;
             is_inexact_gt_midpoint = 1;
           } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
         	     !is_inexact_lt_midpoint
         	     && !is_inexact_gt_midpoint) {
             // if this second rounding was exact the result may still be
             // inexact because of the first rounding
             if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
               is_inexact_gt_midpoint = 1;
             }
             if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
               is_inexact_lt_midpoint = 1;
             }
           } else if (is_midpoint_gt_even &&
         	     (is_inexact_gt_midpoint0
         	      || is_midpoint_lt_even0)) {
             // pulled up to a midpoint
             is_inexact_lt_midpoint = 1;
             is_inexact_gt_midpoint = 0;
             is_midpoint_lt_even = 0;
             is_midpoint_gt_even = 0;
           } else if (is_midpoint_lt_even &&
         	     (is_inexact_lt_midpoint0
         	      || is_midpoint_gt_even0)) {
             // pulled down to a midpoint
             is_inexact_lt_midpoint = 0;
             is_inexact_gt_midpoint = 1;
             is_midpoint_lt_even = 0;
             is_midpoint_gt_even = 0;
           } else {
             ;
           }
         }
       }
       // res contains the correct result
       // apply correction if not rounding to nearest
       if (rnd_mode != ROUNDING_TO_NEAREST) {
         rounding_correction (rnd_mode,
         		     is_inexact_lt_midpoint,
         		     is_inexact_gt_midpoint,
         		     is_midpoint_lt_even, is_midpoint_gt_even,
         		     e4, &res, pfpsf);
       }
       if (is_midpoint_lt_even || is_midpoint_gt_even ||
           is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
         // set the inexact flag
         *pfpsf |= INEXACT_EXCEPTION;
         if (is_tiny)
           *pfpsf |= UNDERFLOW_EXCEPTION;
       }
       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
       BID_SWAP128 (res);
       BID_RETURN (res)

     } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15)
         (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16)
         (delta + q3 <= p34 && p34 < q4)) { // Case (17)

       // calculate first the result rounded to the destination precision, with
       // unbounded exponent

       add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
               rnd_mode, &is_midpoint_lt_even,
               &is_midpoint_gt_even, &is_inexact_lt_midpoint,
               &is_inexact_gt_midpoint, pfpsf, &res);
       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
       BID_SWAP128 (res);
       BID_RETURN (res)

     } else {
       ;
     }

   } // end if delta < 0

   *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   BID_SWAP128 (res);
   BID_RETURN (res)

 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
             _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
             _EXC_INFO_PARAM) {
   UINT128 x = *px, y = *py, z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT128
 bid128_fma (UINT128 x, UINT128 y, UINT128 z
             _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
             _EXC_INFO_PARAM) {
 #endif
   int is_midpoint_lt_even, is_midpoint_gt_even,
     is_inexact_lt_midpoint, is_inexact_gt_midpoint;
   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };

 #if DECIMAL_CALL_BY_REFERENCE
   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
         	  &res, &x, &y, &z
         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	  _EXC_INFO_ARG);
 #else
   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint, x, y,
         		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         		_EXC_INFO_ARG);
 #endif
   BID_RETURN (res);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
   UINT64 x = *px, y = *py, z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT128
 bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
 #endif
   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   UINT128 x1, y1, z1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
         	  &res, &x1, &y1, &z1
         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	  _EXC_INFO_ARG);
 #else
   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint, x1, y1,
         		z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         		_EXC_INFO_ARG);
 #endif
   BID_RETURN (res);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
   UINT64 x = *px, y = *py;
   UINT128 z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT128
 bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
 #endif
   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   UINT128 x1, y1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
         	  &res, &x1, &y1, &z
         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	  _EXC_INFO_ARG);
 #else
   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint, x1, y1,
         		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         		_EXC_INFO_ARG);
 #endif
   BID_RETURN (res);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
   UINT64 x = *px, z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT128
 bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
 #endif
   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   UINT128 x1, z1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
         	  &res, &x1, py, &z1
         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	  _EXC_INFO_ARG);
 #else
   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint, x1, y,
         		z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         		_EXC_INFO_ARG);
 #endif
   BID_RETURN (res);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
   UINT64 x = *px;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT128
 bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
 #endif
   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   UINT128 x1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
         	  &res, &x1, py, pz
         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	  _EXC_INFO_ARG);
 #else
   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint, x1, y,
         		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         		_EXC_INFO_ARG);
 #endif
   BID_RETURN (res);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
   UINT64 y = *py, z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT128
 bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
 #endif
   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   UINT128 y1, z1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
         	  &res, px, &y1, &z1
         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	  _EXC_INFO_ARG);
 #else
   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint, x, y1,
         		z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         		_EXC_INFO_ARG);
 #endif
   BID_RETURN (res);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
   UINT64 y = *py;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT128
 bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
 #endif
   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   UINT128 y1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
         	  &res, px, &y1, pz
         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	  _EXC_INFO_ARG);
 #else
   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint, x, y1,
         		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         		_EXC_INFO_ARG);
 #endif
   BID_RETURN (res);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
   UINT64 z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT128
 bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
 #endif
   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   UINT128 z1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
         	  &res, px, py, &z1
         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	  _EXC_INFO_ARG);
 #else
   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint, x, y,
         		z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         		_EXC_INFO_ARG);
 #endif
   BID_RETURN (res);
 }

 // Note: bid128qqq_fma is represented by bid128_fma

 // Note: bid64ddd_fma is represented by bid64_fma

 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
   UINT64 x = *px, y = *py;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT64
 bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
 #endif
   UINT64 res1 = 0xbaddbaddbaddbaddull;
   UINT128 x1, y1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64qqq_fma (&res1, &x1, &y1, pz
         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	_EXC_INFO_ARG);
 #else
   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res1 = bid64qqq_fma (x1, y1, z
         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	       _EXC_INFO_ARG);
 #endif
   BID_RETURN (res1);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
   UINT64 x = *px, z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT64
 bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
 #endif
   UINT64 res1 = 0xbaddbaddbaddbaddull;
   UINT128 x1, z1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64qqq_fma (&res1, &x1, py, &z1
         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	_EXC_INFO_ARG);
 #else
   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res1 = bid64qqq_fma (x1, y, z1
         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	       _EXC_INFO_ARG);
 #endif
   BID_RETURN (res1);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
   UINT64 x = *px;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT64
 bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
 #endif
   UINT64 res1 = 0xbaddbaddbaddbaddull;
   UINT128 x1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64qqq_fma (&res1, &x1, py, pz
         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	_EXC_INFO_ARG);
 #else
   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res1 = bid64qqq_fma (x1, y, z
         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	       _EXC_INFO_ARG);
 #endif
   BID_RETURN (res1);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
   UINT64 y = *py, z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT64
 bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
 #endif
   UINT64 res1 = 0xbaddbaddbaddbaddull;
   UINT128 y1, z1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64qqq_fma (&res1, px, &y1, &z1
         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	_EXC_INFO_ARG);
 #else
   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res1 = bid64qqq_fma (x, y1, z1
         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	       _EXC_INFO_ARG);
 #endif
   BID_RETURN (res1);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
   UINT64 y = *py;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT64
 bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
 #endif
   UINT64 res1 = 0xbaddbaddbaddbaddull;
   UINT128 y1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64qqq_fma (&res1, px, &y1, pz
         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	_EXC_INFO_ARG);
 #else
   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res1 = bid64qqq_fma (x, y1, z
         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	       _EXC_INFO_ARG);
 #endif
   BID_RETURN (res1);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
   UINT64 z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT64
 bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
 #endif
   UINT64 res1 = 0xbaddbaddbaddbaddull;
   UINT128 z1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64qqq_fma (&res1, px, py, &z1
         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	_EXC_INFO_ARG);
 #else
   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res1 = bid64qqq_fma (x, y, z1
         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	       _EXC_INFO_ARG);
 #endif
   BID_RETURN (res1);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
   UINT128 x = *px, y = *py, z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT64
 bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
 #endif
   int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0,
     is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   int incr_exp;
   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   UINT64 res1 = 0xbaddbaddbaddbaddull;
   unsigned int save_fpsf; // needed because of the call to bid128_ext_fma
   UINT64 sign;
   UINT64 exp;
   int unbexp;
   UINT128 C;
   BID_UI64DOUBLE tmp;
   int nr_bits;
   int q, x0;
   int scale;
   int lt_half_ulp = 0, eq_half_ulp = 0;

   // Note: for rounding modes other than RN or RA, the result can be obtained
   // by rounding first to BID128 and then to BID64

   save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
   *pfpsf = 0;

 #if DECIMAL_CALL_BY_REFERENCE
   bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
         	  &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0,
         	  &res, &x, &y, &z
         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	  _EXC_INFO_ARG);
 #else
   res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
         		&is_inexact_lt_midpoint0,
         		&is_inexact_gt_midpoint0, x, y,
         		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         		_EXC_INFO_ARG);
 #endif

   if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) ||
       (rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible
       ((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN)
       ((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity
 #if DECIMAL_CALL_BY_REFERENCE
     bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
 #else
     res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
 #endif
     // determine the unbiased exponent of the result
     unbexp = ((res1 >> 53) & 0x3ff) - 398;

     // if subnormal, res1  must have exp = -398
     // if tiny and inexact set underflow and inexact status flags
     if (!((res1 & MASK_NAN) == MASK_NAN) &&	// res1 not NaN
         (unbexp == -398)
         && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull)
         && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0
             || is_midpoint_lt_even0 || is_midpoint_gt_even0)) {
       // set the inexact flag and the underflow flag
       *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
     } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
                is_midpoint_lt_even0 || is_midpoint_gt_even0) {
       // set the inexact flag and the underflow flag
       *pfpsf |= INEXACT_EXCEPTION;
     }

     *pfpsf |= save_fpsf;
     BID_RETURN (res1);
   } // else continue, and use rounding to nearest to round to 16 digits

   // at this point the result is rounded to nearest (even or away) to 34 digits
   // (or less if exact), and it is zero or finite non-zero canonical [sub]normal
   sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
   exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits
   unbexp = (exp >> 49) - 6176;
   C.w[1] = res.w[HIGH_128W] & MASK_COEFF;
   C.w[0] = res.w[LOW_128W];

   if ((C.w[1] == 0x0 && C.w[0] == 0x0) ||	// result is zero
       (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) {
       // clear under/overflow
 #if DECIMAL_CALL_BY_REFERENCE
     bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
 #else
     res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
 #endif
     *pfpsf |= save_fpsf;
     BID_RETURN (res1);
   } // else continue

   // -398 - 34 <= unbexp <= 369 + 15
   if (rnd_mode == ROUNDING_TIES_AWAY) {
     // apply correction, if needed, to make the result rounded to nearest-even
     if (is_midpoint_gt_even) {
       // res = res - 1
       res1--; // res1 is now even
     } // else the result is already correctly rounded to nearest-even
   }
   // at this point the result is finite, non-zero canonical normal or subnormal,
   // and in most cases overflow or underflow will not occur

   // determine the number of digits q in the result
   // q = nr. of decimal digits in x
   // determine first the nr. of bits in x
   if (C.w[1] == 0) {
     if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53
       // split the 64-bit value in two 32-bit halves to avoid rounding errors
       if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32
         tmp.d = (double) (C.w[0] >> 32); // exact conversion
         nr_bits =
           33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
       } else { // x < 2^32
         tmp.d = (double) (C.w[0]); // exact conversion
         nr_bits =
           1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
       }
     } else { // if x < 2^53
       tmp.d = (double) C.w[0]; // exact conversion
       nr_bits =
         1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
     }
   } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1])
     tmp.d = (double) C.w[1]; // exact conversion
     nr_bits =
       65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
   }
   q = nr_digits[nr_bits - 1].digits;
   if (q == 0) {
     q = nr_digits[nr_bits - 1].digits1;
     if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi ||
         (C.w[1] == nr_digits[nr_bits - 1].threshold_hi &&
          C.w[0] >= nr_digits[nr_bits - 1].threshold_lo))
       q++;
   }
   // if q > 16, round to nearest even to 16 digits (but for underflow it may
   // have to be truncated even more)
   if (q > 16) {
     x0 = q - 16;
     if (q <= 18) {
       round64_2_18 (q, x0, C.w[0], &res1, &incr_exp,
         	    &is_midpoint_lt_even, &is_midpoint_gt_even,
         	    &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
     } else { // 19 <= q <= 34
       round128_19_38 (q, x0, C, &res128, &incr_exp,
         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
       res1 = res128.w[0]; // the result fits in 64 bits
     }
     unbexp = unbexp + x0;
     if (incr_exp)
       unbexp++;
     q = 16; // need to set in case denormalization is necessary
   } else {
     // the result does not require a second rounding (and it must have
     // been exact in the first rounding, since q <= 16)
     res1 = C.w[0];
   }

   // avoid a double rounding error
   if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
       is_midpoint_lt_even) { // double rounding error upward
     // res = res - 1
     res1--; // res1 becomes odd
     is_midpoint_lt_even = 0;
     is_inexact_lt_midpoint = 1;
     if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1
       res1 = 0x002386f26fc0ffffull; // 10^16 - 1
       unbexp--;
     }
   } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
       is_midpoint_gt_even) { // double rounding error downward
     // res = res + 1
     res1++; // res1 becomes odd (so it cannot be 10^16)
     is_midpoint_gt_even = 0;
     is_inexact_gt_midpoint = 1;
   } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
              !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
     // if this second rounding was exact the result may still be
     // inexact because of the first rounding
     if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
       is_inexact_gt_midpoint = 1;
     }
     if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
       is_inexact_lt_midpoint = 1;
     }
   } else if (is_midpoint_gt_even &&
              (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
     // pulled up to a midpoint
     is_inexact_lt_midpoint = 1;
     is_inexact_gt_midpoint = 0;
     is_midpoint_lt_even = 0;
     is_midpoint_gt_even = 0;
   } else if (is_midpoint_lt_even &&
              (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
     // pulled down to a midpoint
     is_inexact_lt_midpoint = 0;
     is_inexact_gt_midpoint = 1;
     is_midpoint_lt_even = 0;
     is_midpoint_gt_even = 0;
   } else {
     ;
   }
   // this is the result rounded correctly to nearest even, with unbounded exp.

   // check for overflow
   if (q + unbexp > P16 + expmax16) {
     res1 = sign | 0x7800000000000000ull;
     *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
     *pfpsf |= save_fpsf;
     BID_RETURN (res1)
   } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16
     // not overflow; the result must be exact, and we can multiply res1 by
     // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits
     scale = unbexp - expmax16;
     res1 = res1 * ten2k64[scale]; // res1 * 10^scale
     unbexp = expmax16; // unbexp - scale
   } else {
     ; // continue
   }

   // check for underflow
   if (q + unbexp < P16 + expmin16) {
     if (unbexp < expmin16) {
       // we must truncate more of res
       x0 = expmin16 - unbexp; // x0 >= 1
       is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
       is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
       is_midpoint_lt_even0 = is_midpoint_lt_even;
       is_midpoint_gt_even0 = is_midpoint_gt_even;
       is_inexact_lt_midpoint = 0;
       is_inexact_gt_midpoint = 0;
       is_midpoint_lt_even = 0;
       is_midpoint_gt_even = 0;
       // the number of decimal digits in res1 is q
       if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits
         // 2 <= q <= 16, 1 <= x0 <= 15
         round64_2_18 (q, x0, res1, &res1, &incr_exp,
         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
         if (incr_exp) {
           // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15
           res1 = ten2k64[q - x0];
         }
         unbexp = unbexp + x0; // expmin16
       } else if (x0 == q) {
         // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin
         // determine relationship with 1/2 ulp
         // q <= 16
         if (res1 < midpoint64[q - 1]) { // < 1/2 ulp
           lt_half_ulp = 1;
           is_inexact_lt_midpoint = 1;
         } else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp
           eq_half_ulp = 1;
           is_midpoint_gt_even = 1;
         } else { // > 1/2 ulp
           // gt_half_ulp = 1;
           is_inexact_gt_midpoint = 1;
         }
         if (lt_half_ulp || eq_half_ulp) {
           // res = +0.0 * 10^expmin16
           res1 = 0x0000000000000000ull;
         } else { // if (gt_half_ulp)
           // res = +1 * 10^expmin16
           res1 = 0x0000000000000001ull;
         }
         unbexp = expmin16;
       } else { // if (x0 > q)
         // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin
         res1 = 0x0000000000000000ull;
         unbexp = expmin16;
         is_inexact_lt_midpoint = 1;
       }
       // avoid a double rounding error
       if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
           is_midpoint_lt_even) { // double rounding error upward
         // res = res - 1
         res1--; // res1 becomes odd
         is_midpoint_lt_even = 0;
         is_inexact_lt_midpoint = 1;
       } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
           is_midpoint_gt_even) { // double rounding error downward
         // res = res + 1
         res1++; // res1 becomes odd
         is_midpoint_gt_even = 0;
         is_inexact_gt_midpoint = 1;
       } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
         	 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
         // if this rounding was exact the result may still be
         // inexact because of the previous roundings
         if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
           is_inexact_gt_midpoint = 1;
         }
         if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
           is_inexact_lt_midpoint = 1;
         }
       } else if (is_midpoint_gt_even &&
         	 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
         // pulled up to a midpoint
         is_inexact_lt_midpoint = 1;
         is_inexact_gt_midpoint = 0;
         is_midpoint_lt_even = 0;
         is_midpoint_gt_even = 0;
       } else if (is_midpoint_lt_even &&
         	 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
         // pulled down to a midpoint
         is_inexact_lt_midpoint = 0;
         is_inexact_gt_midpoint = 1;
         is_midpoint_lt_even = 0;
         is_midpoint_gt_even = 0;
       } else {
         ;
       }
     }
     // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16)
     // and the result is tiny and exact

     // check for inexact result
     if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
         is_midpoint_lt_even || is_midpoint_gt_even ||
         is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
         is_midpoint_lt_even0 || is_midpoint_gt_even0) {
       // set the inexact flag and the underflow flag
       *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
     }
   } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
              is_midpoint_lt_even || is_midpoint_gt_even) {
     *pfpsf |= INEXACT_EXCEPTION;
   }
   // this is the result rounded correctly to nearest, with bounded exponent

   if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction
     // res = res + 1
     res1++; // res1 is now odd
   } // else the result is already correct

   // assemble the result
   if (res1 < 0x0020000000000000ull) { // res < 2^53
     res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1;
   } else { // res1 >= 2^53
     res1 = sign | MASK_STEERING_BITS |
       ((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2);
   }
   *pfpsf |= save_fpsf;
   BID_RETURN (res1);
 }