libgcc/config/libbid/bid128_round_integral.c - gcc - Git at Google

 /* Copyright (C) 2007-2021 Free Software Foundation, Inc.

 This file is part of GCC.

 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
 Software Foundation; either version 3, or (at your option) any later
 version.

 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
 WARRANTY; without even the implied warranty of MERCHANTABILITY or
 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 for more details.

 Under Section 7 of GPL version 3, you are granted additional
 permissions described in the GCC Runtime Library Exception, version
 3.1, as published by the Free Software Foundation.

 You should have received a copy of the GNU General Public License and
 a copy of the GCC Runtime Library Exception along with this program;
 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 <http://www.gnu.org/licenses/>.  */

 #define BID_128RES

 #include "bid_internal.h"

 /*****************************************************************************
  *  BID128_round_integral_exact
  ****************************************************************************/

 BID128_FUNCTION_ARG1 (bid128_round_integral_exact, x)

      UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
      };
 UINT64 x_sign;
 UINT64 x_exp;
 int exp;			// unbiased exponent
   // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
 UINT64 tmp64;
 BID_UI64DOUBLE tmp1;
 unsigned int x_nr_bits;
 int q, ind, shift;
 UINT128 C1;
 UINT256 fstar;
 UINT256 P256;

   // check for NaN or Infinity
 if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
   // x is special
   if ((x.w[1] & MASK_NAN) == MASK_NAN) {	// x is NAN
     // if x = NaN, then res = Q (x)
     // check first for non-canonical NaN payload
     if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
 	(((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
 	 (x.w[0] > 0x38c15b09ffffffffull))) {
       x.w[1] = x.w[1] & 0xffffc00000000000ull;
       x.w[0] = 0x0ull;
     }
     if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {	// x is SNAN
       // set invalid flag
       *pfpsf |= INVALID_EXCEPTION;
       // return quiet (x)
       res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out also G[6]-G[16]
       res.w[0] = x.w[0];
     } else {	// x is QNaN
       // return x
       res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out G[6]-G[16]
       res.w[0] = x.w[0];
     }
     BID_RETURN (res)
   } else {	// x is not a NaN, so it must be infinity
     if ((x.w[1] & MASK_SIGN) == 0x0ull) {	// x is +inf
       // return +inf
       res.w[1] = 0x7800000000000000ull;
       res.w[0] = 0x0000000000000000ull;
     } else {	// x is -inf
       // return -inf
       res.w[1] = 0xf800000000000000ull;
       res.w[0] = 0x0000000000000000ull;
     }
     BID_RETURN (res);
   }
 }
   // unpack x
 x_sign = x.w[1] & MASK_SIGN;	// 0 for positive, MASK_SIGN for negative
 C1.w[1] = x.w[1] & MASK_COEFF;
 C1.w[0] = x.w[0];

   // check for non-canonical values (treated as zero)
 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {	// G0_G1=11
   // non-canonical
   x_exp = (x.w[1] << 2) & MASK_EXP;	// biased and shifted left 49 bits
   C1.w[1] = 0;	// significand high
   C1.w[0] = 0;	// significand low
 } else {	// G0_G1 != 11
   x_exp = x.w[1] & MASK_EXP;	// biased and shifted left 49 bits
   if (C1.w[1] > 0x0001ed09bead87c0ull ||
       (C1.w[1] == 0x0001ed09bead87c0ull
        && C1.w[0] > 0x378d8e63ffffffffull)) {
     // x is non-canonical if coefficient is larger than 10^34 -1
     C1.w[1] = 0;
     C1.w[0] = 0;
   } else {	// canonical
     ;
   }
 }

   // test for input equal to zero
 if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
   // x is 0
   // return 0 preserving the sign bit and the preferred exponent
   // of MAX(Q(x), 0)
   if (x_exp <= (0x1820ull << 49)) {
     res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
   } else {
     res.w[1] = x_sign | x_exp;
   }
   res.w[0] = 0x0000000000000000ull;
   BID_RETURN (res);
 }
   // x is not special and is not zero

 switch (rnd_mode) {
 case ROUNDING_TO_NEAREST:
 case ROUNDING_TIES_AWAY:
   // if (exp <= -(p+1)) return 0.0
   if (x_exp <= 0x2ffa000000000000ull) {	// 0x2ffa000000000000ull == -35
     res.w[1] = x_sign | 0x3040000000000000ull;
     res.w[0] = 0x0000000000000000ull;
     *pfpsf |= INEXACT_EXCEPTION;
     BID_RETURN (res);
   }
   break;
 case ROUNDING_DOWN:
   // if (exp <= -p) return -1.0 or +0.0
   if (x_exp <= 0x2ffc000000000000ull) {	// 0x2ffa000000000000ull == -34
     if (x_sign) {
       // if negative, return negative 1, because we know coefficient
       // is non-zero (would have been caught above)
       res.w[1] = 0xb040000000000000ull;
       res.w[0] = 0x0000000000000001ull;
     } else {
       // if positive, return positive 0, because we know coefficient is
       // non-zero (would have been caught above)
       res.w[1] = 0x3040000000000000ull;
       res.w[0] = 0x0000000000000000ull;
     }
     *pfpsf |= INEXACT_EXCEPTION;
     BID_RETURN (res);
   }
   break;
 case ROUNDING_UP:
   // if (exp <= -p) return -0.0 or +1.0
   if (x_exp <= 0x2ffc000000000000ull) {	// 0x2ffc000000000000ull == -34
     if (x_sign) {
       // if negative, return negative 0, because we know the coefficient
       // is non-zero (would have been caught above)
       res.w[1] = 0xb040000000000000ull;
       res.w[0] = 0x0000000000000000ull;
     } else {
       // if positive, return positive 1, because we know coefficient is
       // non-zero (would have been caught above)
       res.w[1] = 0x3040000000000000ull;
       res.w[0] = 0x0000000000000001ull;
     }
     *pfpsf |= INEXACT_EXCEPTION;
     BID_RETURN (res);
   }
   break;
 case ROUNDING_TO_ZERO:
   // if (exp <= -p) return -0.0 or +0.0
   if (x_exp <= 0x2ffc000000000000ull) {	// 0x2ffc000000000000ull == -34
     res.w[1] = x_sign | 0x3040000000000000ull;
     res.w[0] = 0x0000000000000000ull;
     *pfpsf |= INEXACT_EXCEPTION;
     BID_RETURN (res);
   }
   break;
 }

   // q = nr. of decimal digits in x
   //  determine first the nr. of bits in x
 if (C1.w[1] == 0) {
   if (C1.w[0] >= 0x0020000000000000ull) {	// x >= 2^53
     // split the 64-bit value in two 32-bit halves to avoid rounding errors
     if (C1.w[0] >= 0x0000000100000000ull) {	// x >= 2^32
       tmp1.d = (double) (C1.w[0] >> 32);	// exact conversion
       x_nr_bits =
 	33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
     } else {	// x < 2^32
       tmp1.d = (double) (C1.w[0]);	// exact conversion
       x_nr_bits =
 	1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
     }
   } else {	// if x < 2^53
     tmp1.d = (double) C1.w[0];	// exact conversion
     x_nr_bits =
       1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
   }
 } else {	// C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
   tmp1.d = (double) C1.w[1];	// exact conversion
   x_nr_bits =
     65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
 }

 q = nr_digits[x_nr_bits - 1].digits;
 if (q == 0) {
   q = nr_digits[x_nr_bits - 1].digits1;
   if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
       (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
        C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
     q++;
 }
 exp = (x_exp >> 49) - 6176;
 if (exp >= 0) {	// -exp <= 0
   // the argument is an integer already
   res.w[1] = x.w[1];
   res.w[0] = x.w[0];
   BID_RETURN (res);
 }
   // exp < 0
 switch (rnd_mode) {
 case ROUNDING_TO_NEAREST:
   if ((q + exp) >= 0) {	// exp < 0 and 1 <= -exp <= q
     // need to shift right -exp digits from the coefficient; exp will be 0
     ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x'
     // chop off ind digits from the lower part of C1
     // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
     tmp64 = C1.w[0];
     if (ind <= 19) {
       C1.w[0] = C1.w[0] + midpoint64[ind - 1];
     } else {
       C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
       C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
     }
     if (C1.w[0] < tmp64)
       C1.w[1]++;
     // calculate C* and f*
     // C* is actually floor(C*) in this case
     // C* and f* need shifting and masking, as shown by
     // shiftright128[] and maskhigh128[]
     // 1 <= x <= 34
     // kx = 10^(-x) = ten2mk128[ind - 1]
     // C* = (C1 + 1/2 * 10^x) * 10^(-x)
     // the approximation of 10^(-x) was rounded up to 118 bits
     __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
     // determine the value of res and fstar

     // determine inexactness of the rounding of C*
     // if (0 < f* - 1/2 < 10^(-x)) then
     //   the result is exact
     // else // if (f* - 1/2 > T*) then
     //   the result is inexact
     // Note: we are going to use ten2mk128[] instead of ten2mk128trunc[]

     if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
       // redundant shift = shiftright128[ind - 1]; // shift = 0
       res.w[1] = P256.w[3];
       res.w[0] = P256.w[2];
       // redundant fstar.w[3] = 0;
       // redundant fstar.w[2] = 0;
       fstar.w[1] = P256.w[1];
       fstar.w[0] = P256.w[0];
       // fraction f* < 10^(-x) <=> midpoint
       // f* is in the right position to be compared with
       // 10^(-x) from ten2mk128[]
       // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even)
       if ((res.w[0] & 0x0000000000000001ull) &&	// is result odd?
 	  ((fstar.w[1] < (ten2mk128[ind - 1].w[1]))
 	   || ((fstar.w[1] == ten2mk128[ind - 1].w[1])
 	       && (fstar.w[0] < ten2mk128[ind - 1].w[0])))) {
 	// subract 1 to make even
 	if (res.w[0]-- == 0) {
 	  res.w[1]--;
 	}
       }
       if (fstar.w[1] > 0x8000000000000000ull ||
 	  (fstar.w[1] == 0x8000000000000000ull
 	   && fstar.w[0] > 0x0ull)) {
 	// f* > 1/2 and the result may be exact
 	tmp64 = fstar.w[1] - 0x8000000000000000ull;	// f* - 1/2
 	if (tmp64 > ten2mk128[ind - 1].w[1] ||
 	    (tmp64 == ten2mk128[ind - 1].w[1] &&
 	     fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
 	  // set the inexact flag
 	  *pfpsf |= INEXACT_EXCEPTION;
 	}	// else the result is exact
       } else {	// the result is inexact; f2* <= 1/2
 	// set the inexact flag
 	*pfpsf |= INEXACT_EXCEPTION;
       }
     } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
       shift = shiftright128[ind - 1];	// 3 <= shift <= 63
       res.w[1] = (P256.w[3] >> shift);
       res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
       // redundant fstar.w[3] = 0;
       fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
       fstar.w[1] = P256.w[1];
       fstar.w[0] = P256.w[0];
       // fraction f* < 10^(-x) <=> midpoint
       // f* is in the right position to be compared with
       // 10^(-x) from ten2mk128[]
       if ((res.w[0] & 0x0000000000000001ull) &&	// is result odd?
 	  fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] ||
 			      (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
 			       fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
 	// subract 1 to make even
 	if (res.w[0]-- == 0) {
 	  res.w[1]--;
 	}
       }
       if (fstar.w[2] > onehalf128[ind - 1] ||
 	  (fstar.w[2] == onehalf128[ind - 1]
 	   && (fstar.w[1] || fstar.w[0]))) {
 	// f2* > 1/2 and the result may be exact
 	// Calculate f2* - 1/2
 	tmp64 = fstar.w[2] - onehalf128[ind - 1];
 	if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
 	    (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
 	     fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
 	  // set the inexact flag
 	  *pfpsf |= INEXACT_EXCEPTION;
 	}	// else the result is exact
       } else {	// the result is inexact; f2* <= 1/2
 	// set the inexact flag
 	*pfpsf |= INEXACT_EXCEPTION;
       }
     } else {	// 22 <= ind - 1 <= 33
       shift = shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
       res.w[1] = 0;
       res.w[0] = P256.w[3] >> shift;
       fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
       fstar.w[2] = P256.w[2];
       fstar.w[1] = P256.w[1];
       fstar.w[0] = P256.w[0];
       // fraction f* < 10^(-x) <=> midpoint
       // f* is in the right position to be compared with
       // 10^(-x) from ten2mk128[]
       if ((res.w[0] & 0x0000000000000001ull) &&	// is result odd?
 	  fstar.w[3] == 0 && fstar.w[2] == 0 &&
 	  (fstar.w[1] < ten2mk128[ind - 1].w[1] ||
 	   (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
 	    fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
 	// subract 1 to make even
 	if (res.w[0]-- == 0) {
 	  res.w[1]--;
 	}
       }
       if (fstar.w[3] > onehalf128[ind - 1] ||
 	  (fstar.w[3] == onehalf128[ind - 1] &&
 	   (fstar.w[2] || fstar.w[1] || fstar.w[0]))) {
 	// f2* > 1/2 and the result may be exact
 	// Calculate f2* - 1/2
 	tmp64 = fstar.w[3] - onehalf128[ind - 1];
 	if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1]
 	    || (fstar.w[1] == ten2mk128[ind - 1].w[1]
 		&& fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
 	  // set the inexact flag
 	  *pfpsf |= INEXACT_EXCEPTION;
 	}	// else the result is exact
       } else {	// the result is inexact; f2* <= 1/2
 	// set the inexact flag
 	*pfpsf |= INEXACT_EXCEPTION;
       }
     }
     res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
     BID_RETURN (res);
   } else {	// if ((q + exp) < 0) <=> q < -exp
     // the result is +0 or -0
     res.w[1] = x_sign | 0x3040000000000000ull;
     res.w[0] = 0x0000000000000000ull;
     *pfpsf |= INEXACT_EXCEPTION;
     BID_RETURN (res);
   }
   break;
 case ROUNDING_TIES_AWAY:
   if ((q + exp) >= 0) {	// exp < 0 and 1 <= -exp <= q
     // need to shift right -exp digits from the coefficient; exp will be 0
     ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x'
     // chop off ind digits from the lower part of C1
     // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
     tmp64 = C1.w[0];
     if (ind <= 19) {
       C1.w[0] = C1.w[0] + midpoint64[ind - 1];
     } else {
       C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
       C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
     }
     if (C1.w[0] < tmp64)
       C1.w[1]++;
     // calculate C* and f*
     // C* is actually floor(C*) in this case
     // C* and f* need shifting and masking, as shown by
     // shiftright128[] and maskhigh128[]
     // 1 <= x <= 34
     // kx = 10^(-x) = ten2mk128[ind - 1]
     // C* = (C1 + 1/2 * 10^x) * 10^(-x)
     // the approximation of 10^(-x) was rounded up to 118 bits
     __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
     // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
     // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
     // if (0 < f* < 10^(-x)) then the result is a midpoint
     //   if floor(C*) is even then C* = floor(C*) - logical right
     //       shift; C* has p decimal digits, correct by Prop. 1)
     //   else if floor(C*) is odd C* = floor(C*)-1 (logical right
     //       shift; C* has p decimal digits, correct by Pr. 1)
     // else
     //   C* = floor(C*) (logical right shift; C has p decimal digits,
     //       correct by Property 1)
     // n = C* * 10^(e+x)

     // determine also the inexactness of the rounding of C*
     // if (0 < f* - 1/2 < 10^(-x)) then
     //   the result is exact
     // else // if (f* - 1/2 > T*) then
     //   the result is inexact
     // Note: we are going to use ten2mk128[] instead of ten2mk128trunc[]
     // shift right C* by Ex-128 = shiftright128[ind]
     if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
       // redundant shift = shiftright128[ind - 1]; // shift = 0
       res.w[1] = P256.w[3];
       res.w[0] = P256.w[2];
       // redundant fstar.w[3] = 0;
       // redundant fstar.w[2] = 0;
       fstar.w[1] = P256.w[1];
       fstar.w[0] = P256.w[0];
       if (fstar.w[1] > 0x8000000000000000ull ||
 	  (fstar.w[1] == 0x8000000000000000ull
 	   && fstar.w[0] > 0x0ull)) {
 	// f* > 1/2 and the result may be exact
 	tmp64 = fstar.w[1] - 0x8000000000000000ull;	// f* - 1/2
 	if ((tmp64 > ten2mk128[ind - 1].w[1] ||
 	     (tmp64 == ten2mk128[ind - 1].w[1] &&
 	      fstar.w[0] >= ten2mk128[ind - 1].w[0]))) {
 	  // set the inexact flag
 	  *pfpsf |= INEXACT_EXCEPTION;
 	}	// else the result is exact
       } else {	// the result is inexact; f2* <= 1/2
 	// set the inexact flag
 	*pfpsf |= INEXACT_EXCEPTION;
       }
     } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
       shift = shiftright128[ind - 1];	// 3 <= shift <= 63
       res.w[1] = (P256.w[3] >> shift);
       res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
       // redundant fstar.w[3] = 0;
       fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
       fstar.w[1] = P256.w[1];
       fstar.w[0] = P256.w[0];
       if (fstar.w[2] > onehalf128[ind - 1] ||
 	  (fstar.w[2] == onehalf128[ind - 1]
 	   && (fstar.w[1] || fstar.w[0]))) {
 	// f2* > 1/2 and the result may be exact
 	// Calculate f2* - 1/2
 	tmp64 = fstar.w[2] - onehalf128[ind - 1];
 	if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
 	    (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
 	     fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
 	  // set the inexact flag
 	  *pfpsf |= INEXACT_EXCEPTION;
 	}	// else the result is exact
       } else {	// the result is inexact; f2* <= 1/2
 	// set the inexact flag
 	*pfpsf |= INEXACT_EXCEPTION;
       }
     } else {	// 22 <= ind - 1 <= 33
       shift = shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
       res.w[1] = 0;
       res.w[0] = P256.w[3] >> shift;
       fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
       fstar.w[2] = P256.w[2];
       fstar.w[1] = P256.w[1];
       fstar.w[0] = P256.w[0];
       if (fstar.w[3] > onehalf128[ind - 1] ||
 	  (fstar.w[3] == onehalf128[ind - 1] &&
 	   (fstar.w[2] || fstar.w[1] || fstar.w[0]))) {
 	// f2* > 1/2 and the result may be exact
 	// Calculate f2* - 1/2
 	tmp64 = fstar.w[3] - onehalf128[ind - 1];
 	if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1]
 	    || (fstar.w[1] == ten2mk128[ind - 1].w[1]
 		&& fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
 	  // set the inexact flag
 	  *pfpsf |= INEXACT_EXCEPTION;
 	}	// else the result is exact
       } else {	// the result is inexact; f2* <= 1/2
 	// set the inexact flag
 	*pfpsf |= INEXACT_EXCEPTION;
       }
     }
     // if the result was a midpoint, it was already rounded away from zero
     res.w[1] |= x_sign | 0x3040000000000000ull;
     BID_RETURN (res);
   } else {	// if ((q + exp) < 0) <=> q < -exp
     // the result is +0 or -0
     res.w[1] = x_sign | 0x3040000000000000ull;
     res.w[0] = 0x0000000000000000ull;
     *pfpsf |= INEXACT_EXCEPTION;
     BID_RETURN (res);
   }
   break;
 case ROUNDING_DOWN:
   if ((q + exp) > 0) {	// exp < 0 and 1 <= -exp < q
     // need to shift right -exp digits from the coefficient; exp will be 0
     ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x'
     // (number of digits to be chopped off)
     // chop off ind digits from the lower part of C1
     // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
     // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
     // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
     // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
     // tmp64 = C1.w[0];
     // if (ind <= 19) {
     //   C1.w[0] = C1.w[0] + midpoint64[ind - 1];
     // } else {
     //   C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
     //   C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
     // }
     // if (C1.w[0] < tmp64) C1.w[1]++;
     // if carry-out from C1.w[0], increment C1.w[1]
     // calculate C* and f*
     // C* is actually floor(C*) in this case
     // C* and f* need shifting and masking, as shown by
     // shiftright128[] and maskhigh128[]
     // 1 <= x <= 34
     // kx = 10^(-x) = ten2mk128[ind - 1]
     // C* = (C1 + 1/2 * 10^x) * 10^(-x)
     // the approximation of 10^(-x) was rounded up to 118 bits
     __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
     if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
       res.w[1] = P256.w[3];
       res.w[0] = P256.w[2];
       // redundant fstar.w[3] = 0;
       // redundant fstar.w[2] = 0;
       // redundant fstar.w[1] = P256.w[1];
       // redundant fstar.w[0] = P256.w[0];
       // fraction f* > 10^(-x) <=> inexact
       // f* is in the right position to be compared with
       // 10^(-x) from ten2mk128[]
       if ((P256.w[1] > ten2mk128[ind - 1].w[1])
 	  || (P256.w[1] == ten2mk128[ind - 1].w[1]
 	      && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
 	*pfpsf |= INEXACT_EXCEPTION;
 	// if positive, the truncated value is already the correct result
 	if (x_sign) {	// if negative
 	  if (++res.w[0] == 0) {
 	    res.w[1]++;
 	  }
 	}
       }
     } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
       shift = shiftright128[ind - 1];	// 0 <= shift <= 102
       res.w[1] = (P256.w[3] >> shift);
       res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
       // redundant fstar.w[3] = 0;
       fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
       fstar.w[1] = P256.w[1];
       fstar.w[0] = P256.w[0];
       // fraction f* > 10^(-x) <=> inexact
       // f* is in the right position to be compared with
       // 10^(-x) from ten2mk128[]
       if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
 	  (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
 	   fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
 	*pfpsf |= INEXACT_EXCEPTION;
 	// if positive, the truncated value is already the correct result
 	if (x_sign) {	// if negative
 	  if (++res.w[0] == 0) {
 	    res.w[1]++;
 	  }
 	}
       }
     } else {	// 22 <= ind - 1 <= 33
       shift = shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
       res.w[1] = 0;
       res.w[0] = P256.w[3] >> shift;
       fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
       fstar.w[2] = P256.w[2];
       fstar.w[1] = P256.w[1];
       fstar.w[0] = P256.w[0];
       // fraction f* > 10^(-x) <=> inexact
       // f* is in the right position to be compared with
       // 10^(-x) from ten2mk128[]
       if (fstar.w[3] || fstar.w[2]
 	  || fstar.w[1] > ten2mk128[ind - 1].w[1]
 	  || (fstar.w[1] == ten2mk128[ind - 1].w[1]
 	      && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
 	*pfpsf |= INEXACT_EXCEPTION;
 	// if positive, the truncated value is already the correct result
 	if (x_sign) {	// if negative
 	  if (++res.w[0] == 0) {
 	    res.w[1]++;
 	  }
 	}
       }
     }
     res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
     BID_RETURN (res);
   } else {	// if exp < 0 and q + exp <= 0
     if (x_sign) {	// negative rounds down to -1.0
       res.w[1] = 0xb040000000000000ull;
       res.w[0] = 0x0000000000000001ull;
     } else {	// positive rpunds down to +0.0
       res.w[1] = 0x3040000000000000ull;
       res.w[0] = 0x0000000000000000ull;
     }
     *pfpsf |= INEXACT_EXCEPTION;
     BID_RETURN (res);
   }
   break;
 case ROUNDING_UP:
   if ((q + exp) > 0) {	// exp < 0 and 1 <= -exp < q
     // need to shift right -exp digits from the coefficient; exp will be 0
     ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x'
     // (number of digits to be chopped off)
     // chop off ind digits from the lower part of C1
     // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
     // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
     // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
     // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
     // tmp64 = C1.w[0];
     // if (ind <= 19) {
     //   C1.w[0] = C1.w[0] + midpoint64[ind - 1];
     // } else {
     //   C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
     //   C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
     // }
     // if (C1.w[0] < tmp64) C1.w[1]++;
     // if carry-out from C1.w[0], increment C1.w[1]
     // calculate C* and f*
     // C* is actually floor(C*) in this case
     // C* and f* need shifting and masking, as shown by
     // shiftright128[] and maskhigh128[]
     // 1 <= x <= 34
     // kx = 10^(-x) = ten2mk128[ind - 1]
     // C* = C1 * 10^(-x)
     // the approximation of 10^(-x) was rounded up to 118 bits
     __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
     if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
       res.w[1] = P256.w[3];
       res.w[0] = P256.w[2];
       // redundant fstar.w[3] = 0;
       // redundant fstar.w[2] = 0;
       // redundant fstar.w[1] = P256.w[1];
       // redundant fstar.w[0] = P256.w[0];
       // fraction f* > 10^(-x) <=> inexact
       // f* is in the right position to be compared with
       // 10^(-x) from ten2mk128[]
       if ((P256.w[1] > ten2mk128[ind - 1].w[1])
 	  || (P256.w[1] == ten2mk128[ind - 1].w[1]
 	      && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
 	*pfpsf |= INEXACT_EXCEPTION;
 	// if negative, the truncated value is already the correct result
 	if (!x_sign) {	// if positive
 	  if (++res.w[0] == 0) {
 	    res.w[1]++;
 	  }
 	}
       }
     } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
       shift = shiftright128[ind - 1];	// 3 <= shift <= 63
       res.w[1] = (P256.w[3] >> shift);
       res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
       // redundant fstar.w[3] = 0;
       fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
       fstar.w[1] = P256.w[1];
       fstar.w[0] = P256.w[0];
       // fraction f* > 10^(-x) <=> inexact
       // f* is in the right position to be compared with
       // 10^(-x) from ten2mk128[]
       if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
 	  (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
 	   fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
 	*pfpsf |= INEXACT_EXCEPTION;
 	// if negative, the truncated value is already the correct result
 	if (!x_sign) {	// if positive
 	  if (++res.w[0] == 0) {
 	    res.w[1]++;
 	  }
 	}
       }
     } else {	// 22 <= ind - 1 <= 33
       shift = shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
       res.w[1] = 0;
       res.w[0] = P256.w[3] >> shift;
       fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
       fstar.w[2] = P256.w[2];
       fstar.w[1] = P256.w[1];
       fstar.w[0] = P256.w[0];
       // fraction f* > 10^(-x) <=> inexact
       // f* is in the right position to be compared with
       // 10^(-x) from ten2mk128[]
       if (fstar.w[3] || fstar.w[2]
 	  || fstar.w[1] > ten2mk128[ind - 1].w[1]
 	  || (fstar.w[1] == ten2mk128[ind - 1].w[1]
 	      && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
 	*pfpsf |= INEXACT_EXCEPTION;
 	// if negative, the truncated value is already the correct result
 	if (!x_sign) {	// if positive
 	  if (++res.w[0] == 0) {
 	    res.w[1]++;
 	  }
 	}
       }
     }
     res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
     BID_RETURN (res);
   } else {	// if exp < 0 and q + exp <= 0
     if (x_sign) {	// negative rounds up to -0.0
       res.w[1] = 0xb040000000000000ull;
       res.w[0] = 0x0000000000000000ull;
     } else {	// positive rpunds up to +1.0
       res.w[1] = 0x3040000000000000ull;
       res.w[0] = 0x0000000000000001ull;
     }
     *pfpsf |= INEXACT_EXCEPTION;
     BID_RETURN (res);
   }
   break;
 case ROUNDING_TO_ZERO:
   if ((q + exp) > 0) {	// exp < 0 and 1 <= -exp < q
     // need to shift right -exp digits from the coefficient; exp will be 0
     ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x'
     // (number of digits to be chopped off)
     // chop off ind digits from the lower part of C1
     // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
     // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
     // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
     // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
     //tmp64 = C1.w[0];
     // if (ind <= 19) {
     //   C1.w[0] = C1.w[0] + midpoint64[ind - 1];
     // } else {
     //   C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
     //   C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
     // }
     // if (C1.w[0] < tmp64) C1.w[1]++;
     // if carry-out from C1.w[0], increment C1.w[1]
     // calculate C* and f*
     // C* is actually floor(C*) in this case
     // C* and f* need shifting and masking, as shown by
     // shiftright128[] and maskhigh128[]
     // 1 <= x <= 34
     // kx = 10^(-x) = ten2mk128[ind - 1]
     // C* = (C1 + 1/2 * 10^x) * 10^(-x)
     // the approximation of 10^(-x) was rounded up to 118 bits
     __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
     if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
       res.w[1] = P256.w[3];
       res.w[0] = P256.w[2];
       // redundant fstar.w[3] = 0;
       // redundant fstar.w[2] = 0;
       // redundant fstar.w[1] = P256.w[1];
       // redundant fstar.w[0] = P256.w[0];
       // fraction f* > 10^(-x) <=> inexact
       // f* is in the right position to be compared with
       // 10^(-x) from ten2mk128[]
       if ((P256.w[1] > ten2mk128[ind - 1].w[1])
 	  || (P256.w[1] == ten2mk128[ind - 1].w[1]
 	      && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
 	*pfpsf |= INEXACT_EXCEPTION;
       }
     } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
       shift = shiftright128[ind - 1];	// 3 <= shift <= 63
       res.w[1] = (P256.w[3] >> shift);
       res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
       // redundant fstar.w[3] = 0;
       fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
       fstar.w[1] = P256.w[1];
       fstar.w[0] = P256.w[0];
       // fraction f* > 10^(-x) <=> inexact
       // f* is in the right position to be compared with
       // 10^(-x) from ten2mk128[]
       if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
 	  (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
 	   fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
 	*pfpsf |= INEXACT_EXCEPTION;
       }
     } else {	// 22 <= ind - 1 <= 33
       shift = shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
       res.w[1] = 0;
       res.w[0] = P256.w[3] >> shift;
       fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
       fstar.w[2] = P256.w[2];
       fstar.w[1] = P256.w[1];
       fstar.w[0] = P256.w[0];
       // fraction f* > 10^(-x) <=> inexact
       // f* is in the right position to be compared with
       // 10^(-x) from ten2mk128[]
       if (fstar.w[3] || fstar.w[2]
 	  || fstar.w[1] > ten2mk128[ind - 1].w[1]
 	  || (fstar.w[1] == ten2mk128[ind - 1].w[1]
 	      && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
 	*pfpsf |= INEXACT_EXCEPTION;
       }
     }
     res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
     BID_RETURN (res);
   } else {	// if exp < 0 and q + exp <= 0 the result is +0 or -0
     res.w[1] = x_sign | 0x3040000000000000ull;
     res.w[0] = 0x0000000000000000ull;
     *pfpsf |= INEXACT_EXCEPTION;
     BID_RETURN (res);
   }
   break;
 }

 BID_RETURN (res);
 }

 /*****************************************************************************
  *  BID128_round_integral_nearest_even
  ****************************************************************************/

 BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_even, x)

      UINT128 res;
      UINT64 x_sign;
      UINT64 x_exp;
      int exp;			// unbiased exponent
   // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
      UINT64 tmp64;
      BID_UI64DOUBLE tmp1;
      unsigned int x_nr_bits;
      int q, ind, shift;
      UINT128 C1;
   // UINT128 res is C* at first - represents up to 34 decimal digits ~ 113 bits
      UINT256 fstar;
      UINT256 P256;

   // check for NaN or Infinity
 if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
     // x is special
 if ((x.w[1] & MASK_NAN) == MASK_NAN) {	// x is NAN
   // if x = NaN, then res = Q (x)
   // check first for non-canonical NaN payload
   if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
       (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
        (x.w[0] > 0x38c15b09ffffffffull))) {
     x.w[1] = x.w[1] & 0xffffc00000000000ull;
     x.w[0] = 0x0ull;
   }
   if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {	// x is SNAN
     // set invalid flag
     *pfpsf |= INVALID_EXCEPTION;
     // return quiet (x)
     res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out also G[6]-G[16]
     res.w[0] = x.w[0];
   } else {	// x is QNaN
     // return x
     res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out G[6]-G[16]
     res.w[0] = x.w[0];
   }
   BID_RETURN (res)
 } else {	// x is not a NaN, so it must be infinity
   if ((x.w[1] & MASK_SIGN) == 0x0ull) {	// x is +inf
     // return +inf
     res.w[1] = 0x7800000000000000ull;
     res.w[0] = 0x0000000000000000ull;
   } else {	// x is -inf
     // return -inf
     res.w[1] = 0xf800000000000000ull;
     res.w[0] = 0x0000000000000000ull;
   }
   BID_RETURN (res);
 }
 }
   // unpack x
 x_sign = x.w[1] & MASK_SIGN;	// 0 for positive, MASK_SIGN for negative
 C1.w[1] = x.w[1] & MASK_COEFF;
 C1.w[0] = x.w[0];

   // check for non-canonical values (treated as zero)
 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {	// G0_G1=11
   // non-canonical
   x_exp = (x.w[1] << 2) & MASK_EXP;	// biased and shifted left 49 bits
   C1.w[1] = 0;	// significand high
   C1.w[0] = 0;	// significand low
 } else {	// G0_G1 != 11
   x_exp = x.w[1] & MASK_EXP;	// biased and shifted left 49 bits
   if (C1.w[1] > 0x0001ed09bead87c0ull ||
       (C1.w[1] == 0x0001ed09bead87c0ull
        && C1.w[0] > 0x378d8e63ffffffffull)) {
     // x is non-canonical if coefficient is larger than 10^34 -1
     C1.w[1] = 0;
     C1.w[0] = 0;
   } else {	// canonical
     ;
   }
 }

   // test for input equal to zero
 if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
   // x is 0
   // return 0 preserving the sign bit and the preferred exponent
   // of MAX(Q(x), 0)
   if (x_exp <= (0x1820ull << 49)) {
     res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
   } else {
     res.w[1] = x_sign | x_exp;
   }
   res.w[0] = 0x0000000000000000ull;
   BID_RETURN (res);
 }
   // x is not special and is not zero

   // if (exp <= -(p+1)) return 0
 if (x_exp <= 0x2ffa000000000000ull) {	// 0x2ffa000000000000ull == -35
   res.w[1] = x_sign | 0x3040000000000000ull;
   res.w[0] = 0x0000000000000000ull;
   BID_RETURN (res);
 }
   // q = nr. of decimal digits in x
   //  determine first the nr. of bits in x
 if (C1.w[1] == 0) {
   if (C1.w[0] >= 0x0020000000000000ull) {	// x >= 2^53
     // split the 64-bit value in two 32-bit halves to avoid rounding errors
     if (C1.w[0] >= 0x0000000100000000ull) {	// x >= 2^32
       tmp1.d = (double) (C1.w[0] >> 32);	// exact conversion
       x_nr_bits =
 	33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
     } else {	// x < 2^32
       tmp1.d = (double) (C1.w[0]);	// exact conversion
       x_nr_bits =
 	1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
     }
   } else {	// if x < 2^53
     tmp1.d = (double) C1.w[0];	// exact conversion
     x_nr_bits =
       1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
   }
 } else {	// C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
   tmp1.d = (double) C1.w[1];	// exact conversion
   x_nr_bits =
     65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
 }

 q = nr_digits[x_nr_bits - 1].digits;
 if (q == 0) {
   q = nr_digits[x_nr_bits - 1].digits1;
   if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi
       || (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
 	  C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
     q++;
 }
 exp = (x_exp >> 49) - 6176;
 if (exp >= 0) {	// -exp <= 0
   // the argument is an integer already
   res.w[1] = x.w[1];
   res.w[0] = x.w[0];
   BID_RETURN (res);
 } else if ((q + exp) >= 0) {	// exp < 0 and 1 <= -exp <= q
   // need to shift right -exp digits from the coefficient; the exp will be 0
   ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x'
   // chop off ind digits from the lower part of C1
   // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
   tmp64 = C1.w[0];
   if (ind <= 19) {
     C1.w[0] = C1.w[0] + midpoint64[ind - 1];
   } else {
     C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
     C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
   }
   if (C1.w[0] < tmp64)
     C1.w[1]++;
   // calculate C* and f*
   // C* is actually floor(C*) in this case
   // C* and f* need shifting and masking, as shown by
   // shiftright128[] and maskhigh128[]
   // 1 <= x <= 34
   // kx = 10^(-x) = ten2mk128[ind - 1]
   // C* = (C1 + 1/2 * 10^x) * 10^(-x)
   // the approximation of 10^(-x) was rounded up to 118 bits
   __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
   // determine the value of res and fstar
   if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
     // redundant shift = shiftright128[ind - 1]; // shift = 0
     res.w[1] = P256.w[3];
     res.w[0] = P256.w[2];
     // redundant fstar.w[3] = 0;
     // redundant fstar.w[2] = 0;
     // redundant fstar.w[1] = P256.w[1];
     // redundant fstar.w[0] = P256.w[0];
     // fraction f* < 10^(-x) <=> midpoint
     // f* is in the right position to be compared with
     // 10^(-x) from ten2mk128[]
     // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even)
     if ((res.w[0] & 0x0000000000000001ull) &&	// is result odd?
 	((P256.w[1] < (ten2mk128[ind - 1].w[1]))
 	 || ((P256.w[1] == ten2mk128[ind - 1].w[1])
 	     && (P256.w[0] < ten2mk128[ind - 1].w[0])))) {
       // subract 1 to make even
       if (res.w[0]-- == 0) {
 	res.w[1]--;
       }
     }
   } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
     shift = shiftright128[ind - 1];	// 3 <= shift <= 63
     res.w[1] = (P256.w[3] >> shift);
     res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
     // redundant fstar.w[3] = 0;
     fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
     fstar.w[1] = P256.w[1];
     fstar.w[0] = P256.w[0];
     // fraction f* < 10^(-x) <=> midpoint
     // f* is in the right position to be compared with
     // 10^(-x) from ten2mk128[]
     if ((res.w[0] & 0x0000000000000001ull) &&	// is result odd?
 	fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] ||
 			    (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
 			     fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
       // subract 1 to make even
       if (res.w[0]-- == 0) {
 	res.w[1]--;
       }
     }
   } else {	// 22 <= ind - 1 <= 33
     shift = shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
     res.w[1] = 0;
     res.w[0] = P256.w[3] >> shift;
     fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
     fstar.w[2] = P256.w[2];
     fstar.w[1] = P256.w[1];
     fstar.w[0] = P256.w[0];
     // fraction f* < 10^(-x) <=> midpoint
     // f* is in the right position to be compared with
     // 10^(-x) from ten2mk128[]
     if ((res.w[0] & 0x0000000000000001ull) &&	// is result odd?
 	fstar.w[3] == 0 && fstar.w[2] == 0
 	&& (fstar.w[1] < ten2mk128[ind - 1].w[1]
 	    || (fstar.w[1] == ten2mk128[ind - 1].w[1]
 		&& fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
       // subract 1 to make even
       if (res.w[0]-- == 0) {
 	res.w[1]--;
       }
     }
   }
   res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
   BID_RETURN (res);
 } else {	// if ((q + exp) < 0) <=> q < -exp
   // the result is +0 or -0
   res.w[1] = x_sign | 0x3040000000000000ull;
   res.w[0] = 0x0000000000000000ull;
   BID_RETURN (res);
 }
 }

 /*****************************************************************************
  *  BID128_round_integral_negative
  ****************************************************************************/

 BID128_FUNCTION_ARG1_NORND (bid128_round_integral_negative, x)

      UINT128 res;
      UINT64 x_sign;
      UINT64 x_exp;
      int exp;			// unbiased exponent
   // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
   // (all are UINT64)
      BID_UI64DOUBLE tmp1;
      unsigned int x_nr_bits;
      int q, ind, shift;
      UINT128 C1;
   // UINT128 res is C* at first - represents up to 34 decimal digits ~
   // 113 bits
      UINT256 fstar;
      UINT256 P256;

   // check for NaN or Infinity
 if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
     // x is special
 if ((x.w[1] & MASK_NAN) == MASK_NAN) {	// x is NAN
   // if x = NaN, then res = Q (x)
   // check first for non-canonical NaN payload
   if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
       (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
        (x.w[0] > 0x38c15b09ffffffffull))) {
     x.w[1] = x.w[1] & 0xffffc00000000000ull;
     x.w[0] = 0x0ull;
   }
   if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {	// x is SNAN
     // set invalid flag
     *pfpsf |= INVALID_EXCEPTION;
     // return quiet (x)
     res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out also G[6]-G[16]
     res.w[0] = x.w[0];
   } else {	// x is QNaN
     // return x
     res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out G[6]-G[16]
     res.w[0] = x.w[0];
   }
   BID_RETURN (res)
 } else {	// x is not a NaN, so it must be infinity
   if ((x.w[1] & MASK_SIGN) == 0x0ull) {	// x is +inf
     // return +inf
     res.w[1] = 0x7800000000000000ull;
     res.w[0] = 0x0000000000000000ull;
   } else {	// x is -inf
     // return -inf
     res.w[1] = 0xf800000000000000ull;
     res.w[0] = 0x0000000000000000ull;
   }
   BID_RETURN (res);
 }
 }
   // unpack x
 x_sign = x.w[1] & MASK_SIGN;	// 0 for positive, MASK_SIGN for negative
 C1.w[1] = x.w[1] & MASK_COEFF;
 C1.w[0] = x.w[0];

   // check for non-canonical values (treated as zero)
 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {	// G0_G1=11
   // non-canonical
   x_exp = (x.w[1] << 2) & MASK_EXP;	// biased and shifted left 49 bits
   C1.w[1] = 0;	// significand high
   C1.w[0] = 0;	// significand low
 } else {	// G0_G1 != 11
   x_exp = x.w[1] & MASK_EXP;	// biased and shifted left 49 bits
   if (C1.w[1] > 0x0001ed09bead87c0ull ||
       (C1.w[1] == 0x0001ed09bead87c0ull
        && C1.w[0] > 0x378d8e63ffffffffull)) {
     // x is non-canonical if coefficient is larger than 10^34 -1
     C1.w[1] = 0;
     C1.w[0] = 0;
   } else {	// canonical
     ;
   }
 }

   // test for input equal to zero
 if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
   // x is 0
   // return 0 preserving the sign bit and the preferred exponent
   // of MAX(Q(x), 0)
   if (x_exp <= (0x1820ull << 49)) {
     res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
   } else {
     res.w[1] = x_sign | x_exp;
   }
   res.w[0] = 0x0000000000000000ull;
   BID_RETURN (res);
 }
   // x is not special and is not zero

   // if (exp <= -p) return -1.0 or +0.0
 if (x_exp <= 0x2ffc000000000000ull) {	// 0x2ffc000000000000ull == -34
   if (x_sign) {
     // if negative, return negative 1, because we know the coefficient
     // is non-zero (would have been caught above)
     res.w[1] = 0xb040000000000000ull;
     res.w[0] = 0x0000000000000001ull;
   } else {
     // if positive, return positive 0, because we know coefficient is
     // non-zero (would have been caught above)
     res.w[1] = 0x3040000000000000ull;
     res.w[0] = 0x0000000000000000ull;
   }
   BID_RETURN (res);
 }
   // q = nr. of decimal digits in x
   // determine first the nr. of bits in x
 if (C1.w[1] == 0) {
   if (C1.w[0] >= 0x0020000000000000ull) {	// x >= 2^53
     // split the 64-bit value in two 32-bit halves to avoid rounding errors
     if (C1.w[0] >= 0x0000000100000000ull) {	// x >= 2^32
       tmp1.d = (double) (C1.w[0] >> 32);	// exact conversion
       x_nr_bits =
 	33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
     } else {	// x < 2^32
       tmp1.d = (double) (C1.w[0]);	// exact conversion
       x_nr_bits =
 	1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
     }
   } else {	// if x < 2^53
     tmp1.d = (double) C1.w[0];	// exact conversion
     x_nr_bits =
       1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
   }
 } else {	// C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
   tmp1.d = (double) C1.w[1];	// exact conversion
   x_nr_bits =
     65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
 }

 q = nr_digits[x_nr_bits - 1].digits;
 if (q == 0) {
   q = nr_digits[x_nr_bits - 1].digits1;
   if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
       (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
        C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
     q++;
 }
 exp = (x_exp >> 49) - 6176;
 if (exp >= 0) {	// -exp <= 0
   // the argument is an integer already
   res.w[1] = x.w[1];
   res.w[0] = x.w[0];
   BID_RETURN (res);
 } else if ((q + exp) > 0) {	// exp < 0 and 1 <= -exp < q
   // need to shift right -exp digits from the coefficient; the exp will be 0
   ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x'
   // (number of digits to be chopped off)
   // chop off ind digits from the lower part of C1
   // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
   // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
   // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
   // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
   //tmp64 = C1.w[0];
   // if (ind <= 19) {
   //   C1.w[0] = C1.w[0] + midpoint64[ind - 1];
   // } else {
   //   C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
   //   C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
   // }
   // if (C1.w[0] < tmp64) C1.w[1]++;
   // if carry-out from C1.w[0], increment C1.w[1]
   // calculate C* and f*
   // C* is actually floor(C*) in this case
   // C* and f* need shifting and masking, as shown by
   // shiftright128[] and maskhigh128[]
   // 1 <= x <= 34
   // kx = 10^(-x) = ten2mk128[ind - 1]
   // C* = (C1 + 1/2 * 10^x) * 10^(-x)
   // the approximation of 10^(-x) was rounded up to 118 bits
   __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
   if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
     res.w[1] = P256.w[3];
     res.w[0] = P256.w[2];
     // if positive, the truncated value is already the correct result
     if (x_sign) {	// if negative
       // redundant fstar.w[3] = 0;
       // redundant fstar.w[2] = 0;
       // redundant fstar.w[1] = P256.w[1];
       // redundant fstar.w[0] = P256.w[0];
       // fraction f* > 10^(-x) <=> inexact
       // f* is in the right position to be compared with
       // 10^(-x) from ten2mk128[]
       if ((P256.w[1] > ten2mk128[ind - 1].w[1])
 	  || (P256.w[1] == ten2mk128[ind - 1].w[1]
 	      && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
 	if (++res.w[0] == 0) {
 	  res.w[1]++;
 	}
       }
     }
   } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
     shift = shiftright128[ind - 1];	// 0 <= shift <= 102
     res.w[1] = (P256.w[3] >> shift);
     res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
     // if positive, the truncated value is already the correct result
     if (x_sign) {	// if negative
       // redundant fstar.w[3] = 0;
       fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
       fstar.w[1] = P256.w[1];
       fstar.w[0] = P256.w[0];
       // fraction f* > 10^(-x) <=> inexact
       // f* is in the right position to be compared with
       // 10^(-x) from ten2mk128[]
       if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
 	  (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
 	   fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
 	if (++res.w[0] == 0) {
 	  res.w[1]++;
 	}
       }
     }
   } else {	// 22 <= ind - 1 <= 33
     shift = shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
     res.w[1] = 0;
     res.w[0] = P256.w[3] >> shift;
     // if positive, the truncated value is already the correct result
     if (x_sign) {	// if negative
       fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
       fstar.w[2] = P256.w[2];
       fstar.w[1] = P256.w[1];
       fstar.w[0] = P256.w[0];
       // fraction f* > 10^(-x) <=> inexact
       // f* is in the right position to be compared with
       // 10^(-x) from ten2mk128[]
       if (fstar.w[3] || fstar.w[2]
 	  || fstar.w[1] > ten2mk128[ind - 1].w[1]
 	  || (fstar.w[1] == ten2mk128[ind - 1].w[1]
 	      && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
 	if (++res.w[0] == 0) {
 	  res.w[1]++;
 	}
       }
     }
   }
   res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
   BID_RETURN (res);
 } else {	// if exp < 0 and q + exp <= 0
   if (x_sign) {	// negative rounds down to -1.0
     res.w[1] = 0xb040000000000000ull;
     res.w[0] = 0x0000000000000001ull;
   } else {	// positive rpunds down to +0.0
     res.w[1] = 0x3040000000000000ull;
     res.w[0] = 0x0000000000000000ull;
   }
   BID_RETURN (res);
 }
 }

 /*****************************************************************************
  *  BID128_round_integral_positive
  ****************************************************************************/

 BID128_FUNCTION_ARG1_NORND (bid128_round_integral_positive, x)

      UINT128 res;
      UINT64 x_sign;
      UINT64 x_exp;
      int exp;			// unbiased exponent
   // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
   // (all are UINT64)
      BID_UI64DOUBLE tmp1;
      unsigned int x_nr_bits;
      int q, ind, shift;
      UINT128 C1;
   // UINT128 res is C* at first - represents up to 34 decimal digits ~
   // 113 bits
      UINT256 fstar;
      UINT256 P256;

   // check for NaN or Infinity
 if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
     // x is special
 if ((x.w[1] & MASK_NAN) == MASK_NAN) {	// x is NAN
   // if x = NaN, then res = Q (x)
   // check first for non-canonical NaN payload
   if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
       (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
        (x.w[0] > 0x38c15b09ffffffffull))) {
     x.w[1] = x.w[1] & 0xffffc00000000000ull;
     x.w[0] = 0x0ull;
   }
   if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {	// x is SNAN
     // set invalid flag
     *pfpsf |= INVALID_EXCEPTION;
     // return quiet (x)
     res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out also G[6]-G[16]
     res.w[0] = x.w[0];
   } else {	// x is QNaN
     // return x
     res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out G[6]-G[16]
     res.w[0] = x.w[0];
   }
   BID_RETURN (res)
 } else {	// x is not a NaN, so it must be infinity
   if ((x.w[1] & MASK_SIGN) == 0x0ull) {	// x is +inf
     // return +inf
     res.w[1] = 0x7800000000000000ull;
     res.w[0] = 0x0000000000000000ull;
   } else {	// x is -inf
     // return -inf
     res.w[1] = 0xf800000000000000ull;
     res.w[0] = 0x0000000000000000ull;
   }
   BID_RETURN (res);
 }
 }
   // unpack x
 x_sign = x.w[1] & MASK_SIGN;	// 0 for positive, MASK_SIGN for negative
 C1.w[1] = x.w[1] & MASK_COEFF;
 C1.w[0] = x.w[0];

   // check for non-canonical values (treated as zero)
 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {	// G0_G1=11
   // non-canonical
   x_exp = (x.w[1] << 2) & MASK_EXP;	// biased and shifted left 49 bits
   C1.w[1] = 0;	// significand high
   C1.w[0] = 0;	// significand low
 } else {	// G0_G1 != 11
   x_exp = x.w[1] & MASK_EXP;	// biased and shifted left 49 bits
   if (C1.w[1] > 0x0001ed09bead87c0ull ||
       (C1.w[1] == 0x0001ed09bead87c0ull
        && C1.w[0] > 0x378d8e63ffffffffull)) {
     // x is non-canonical if coefficient is larger than 10^34 -1
     C1.w[1] = 0;
     C1.w[0] = 0;
   } else {	// canonical
     ;
   }
 }

   // test for input equal to zero
 if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
   // x is 0
   // return 0 preserving the sign bit and the preferred exponent
   // of MAX(Q(x), 0)
   if (x_exp <= (0x1820ull << 49)) {
     res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
   } else {
     res.w[1] = x_sign | x_exp;
   }
   res.w[0] = 0x0000000000000000ull;
   BID_RETURN (res);
 }
   // x is not special and is not zero

   // if (exp <= -p) return -0.0 or +1.0
 if (x_exp <= 0x2ffc000000000000ull) {	// 0x2ffc000000000000ull == -34
   if (x_sign) {
     // if negative, return negative 0, because we know the coefficient
     // is non-zero (would have been caught above)
     res.w[1] = 0xb040000000000000ull;
     res.w[0] = 0x0000000000000000ull;
   } else {
     // if positive, return positive 1, because we know coefficient is
     // non-zero (would have been caught above)
     res.w[1] = 0x3040000000000000ull;
     res.w[0] = 0x0000000000000001ull;
   }
   BID_RETURN (res);
 }
   // q = nr. of decimal digits in x
   // determine first the nr. of bits in x
 if (C1.w[1] == 0) {
   if (C1.w[0] >= 0x0020000000000000ull) {	// x >= 2^53
     // split 64-bit value in two 32-bit halves to avoid rounding errors
     if (C1.w[0] >= 0x0000000100000000ull) {	// x >= 2^32
       tmp1.d = (double) (C1.w[0] >> 32);	// exact conversion
       x_nr_bits =
 	33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
     } else {	// x < 2^32
       tmp1.d = (double) (C1.w[0]);	// exact conversion
       x_nr_bits =
 	1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
     }
   } else {	// if x < 2^53
     tmp1.d = (double) C1.w[0];	// exact conversion
     x_nr_bits =
       1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
   }
 } else {	// C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
   tmp1.d = (double) C1.w[1];	// exact conversion
   x_nr_bits =
     65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
 }

 q = nr_digits[x_nr_bits - 1].digits;
 if (q == 0) {
   q = nr_digits[x_nr_bits - 1].digits1;
   if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
       (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
        C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
     q++;
 }
 exp = (x_exp >> 49) - 6176;
 if (exp >= 0) {	// -exp <= 0
   // the argument is an integer already
   res.w[1] = x.w[1];
   res.w[0] = x.w[0];
   BID_RETURN (res);
 } else if ((q + exp) > 0) {	// exp < 0 and 1 <= -exp < q
   // need to shift right -exp digits from the coefficient; exp will be 0
   ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x'
   // (number of digits to be chopped off)
   // chop off ind digits from the lower part of C1
   // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
   // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
   // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
   // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
   // tmp64 = C1.w[0];
   // if (ind <= 19) {
   //   C1.w[0] = C1.w[0] + midpoint64[ind - 1];
   // } else {
   //   C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
   //   C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
   // }
   // if (C1.w[0] < tmp64) C1.w[1]++;
   // if carry-out from C1.w[0], increment C1.w[1]
   // calculate C* and f*
   // C* is actually floor(C*) in this case
   // C* and f* need shifting and masking, as shown by
   // shiftright128[] and maskhigh128[]
   // 1 <= x <= 34
   // kx = 10^(-x) = ten2mk128[ind - 1]
   // C* = C1 * 10^(-x)
   // the approximation of 10^(-x) was rounded up to 118 bits
   __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
   if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
     res.w[1] = P256.w[3];
     res.w[0] = P256.w[2];
     // if negative, the truncated value is already the correct result
     if (!x_sign) {	// if positive
       // redundant fstar.w[3] = 0;
       // redundant fstar.w[2] = 0;
       // redundant fstar.w[1] = P256.w[1];
       // redundant fstar.w[0] = P256.w[0];
       // fraction f* > 10^(-x) <=> inexact
       // f* is in the right position to be compared with
       // 10^(-x) from ten2mk128[]
       if ((P256.w[1] > ten2mk128[ind - 1].w[1])
 	  || (P256.w[1] == ten2mk128[ind - 1].w[1]
 	      && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
 	if (++res.w[0] == 0) {
 	  res.w[1]++;
 	}
       }
     }
   } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
     shift = shiftright128[ind - 1];	// 3 <= shift <= 63
     res.w[1] = (P256.w[3] >> shift);
     res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
     // if negative, the truncated value is already the correct result
     if (!x_sign) {	// if positive
       // redundant fstar.w[3] = 0;
       fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
       fstar.w[1] = P256.w[1];
       fstar.w[0] = P256.w[0];
       // fraction f* > 10^(-x) <=> inexact
       // f* is in the right position to be compared with
       // 10^(-x) from ten2mk128[]
       if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
 	  (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
 	   fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
 	if (++res.w[0] == 0) {
 	  res.w[1]++;
 	}
       }
     }
   } else {	// 22 <= ind - 1 <= 33
     shift = shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
     res.w[1] = 0;
     res.w[0] = P256.w[3] >> shift;
     // if negative, the truncated value is already the correct result
     if (!x_sign) {	// if positive
       fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
       fstar.w[2] = P256.w[2];
       fstar.w[1] = P256.w[1];
       fstar.w[0] = P256.w[0];
       // fraction f* > 10^(-x) <=> inexact
       // f* is in the right position to be compared with
       // 10^(-x) from ten2mk128[]
       if (fstar.w[3] || fstar.w[2]
 	  || fstar.w[1] > ten2mk128[ind - 1].w[1]
 	  || (fstar.w[1] == ten2mk128[ind - 1].w[1]
 	      && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
 	if (++res.w[0] == 0) {
 	  res.w[1]++;
 	}
       }
     }
   }
   res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
   BID_RETURN (res);
 } else {	// if exp < 0 and q + exp <= 0
   if (x_sign) {	// negative rounds up to -0.0
     res.w[1] = 0xb040000000000000ull;
     res.w[0] = 0x0000000000000000ull;
   } else {	// positive rpunds up to +1.0
     res.w[1] = 0x3040000000000000ull;
     res.w[0] = 0x0000000000000001ull;
   }
   BID_RETURN (res);
 }
 }

 /*****************************************************************************
  *  BID128_round_integral_zero
  ****************************************************************************/

 BID128_FUNCTION_ARG1_NORND (bid128_round_integral_zero, x)

      UINT128 res;
      UINT64 x_sign;
      UINT64 x_exp;
      int exp;			// unbiased exponent
   // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
   // (all are UINT64)
      BID_UI64DOUBLE tmp1;
      unsigned int x_nr_bits;
      int q, ind, shift;
      UINT128 C1;
   // UINT128 res is C* at first - represents up to 34 decimal digits ~
   // 113 bits
      UINT256 P256;

   // check for NaN or Infinity
 if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
     // x is special
 if ((x.w[1] & MASK_NAN) == MASK_NAN) {	// x is NAN
   // if x = NaN, then res = Q (x)
   // check first for non-canonical NaN payload
   if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
       (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
        (x.w[0] > 0x38c15b09ffffffffull))) {
     x.w[1] = x.w[1] & 0xffffc00000000000ull;
     x.w[0] = 0x0ull;
   }
   if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {	// x is SNAN
     // set invalid flag
     *pfpsf |= INVALID_EXCEPTION;
     // return quiet (x)
     res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out also G[6]-G[16]
     res.w[0] = x.w[0];
   } else {	// x is QNaN
     // return x
     res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out G[6]-G[16]
     res.w[0] = x.w[0];
   }
   BID_RETURN (res)
 } else {	// x is not a NaN, so it must be infinity
   if ((x.w[1] & MASK_SIGN) == 0x0ull) {	// x is +inf
     // return +inf
     res.w[1] = 0x7800000000000000ull;
     res.w[0] = 0x0000000000000000ull;
   } else {	// x is -inf
     // return -inf
     res.w[1] = 0xf800000000000000ull;
     res.w[0] = 0x0000000000000000ull;
   }
   BID_RETURN (res);
 }
 }
   // unpack x
 x_sign = x.w[1] & MASK_SIGN;	// 0 for positive, MASK_SIGN for negative
 C1.w[1] = x.w[1] & MASK_COEFF;
 C1.w[0] = x.w[0];

   // check for non-canonical values (treated as zero)
 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {	// G0_G1=11
   // non-canonical
   x_exp = (x.w[1] << 2) & MASK_EXP;	// biased and shifted left 49 bits
   C1.w[1] = 0;	// significand high
   C1.w[0] = 0;	// significand low
 } else {	// G0_G1 != 11
   x_exp = x.w[1] & MASK_EXP;	// biased and shifted left 49 bits
   if (C1.w[1] > 0x0001ed09bead87c0ull ||
       (C1.w[1] == 0x0001ed09bead87c0ull
        && C1.w[0] > 0x378d8e63ffffffffull)) {
     // x is non-canonical if coefficient is larger than 10^34 -1
     C1.w[1] = 0;
     C1.w[0] = 0;
   } else {	// canonical
     ;
   }
 }

   // test for input equal to zero
 if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
   // x is 0
   // return 0 preserving the sign bit and the preferred exponent
   // of MAX(Q(x), 0)
   if (x_exp <= (0x1820ull << 49)) {
     res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
   } else {
     res.w[1] = x_sign | x_exp;
   }
   res.w[0] = 0x0000000000000000ull;
   BID_RETURN (res);
 }
   // x is not special and is not zero

   // if (exp <= -p) return -0.0 or +0.0
 if (x_exp <= 0x2ffc000000000000ull) {	// 0x2ffc000000000000ull == -34
   res.w[1] = x_sign | 0x3040000000000000ull;
   res.w[0] = 0x0000000000000000ull;
   BID_RETURN (res);
 }
   // q = nr. of decimal digits in x
   // determine first the nr. of bits in x
 if (C1.w[1] == 0) {
   if (C1.w[0] >= 0x0020000000000000ull) {	// x >= 2^53
     // split the 64-bit value in two 32-bit halves to avoid rounding errors
     if (C1.w[0] >= 0x0000000100000000ull) {	// x >= 2^32
       tmp1.d = (double) (C1.w[0] >> 32);	// exact conversion
       x_nr_bits =
 	33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
     } else {	// x < 2^32
       tmp1.d = (double) (C1.w[0]);	// exact conversion
       x_nr_bits =
 	1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
     }
   } else {	// if x < 2^53
     tmp1.d = (double) C1.w[0];	// exact conversion
     x_nr_bits =
       1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
   }
 } else {	// C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
   tmp1.d = (double) C1.w[1];	// exact conversion
   x_nr_bits =
     65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
 }

 q = nr_digits[x_nr_bits - 1].digits;
 if (q == 0) {
   q = nr_digits[x_nr_bits - 1].digits1;
   if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
       (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
        C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
     q++;
 }
 exp = (x_exp >> 49) - 6176;
 if (exp >= 0) {	// -exp <= 0
   // the argument is an integer already
   res.w[1] = x.w[1];
   res.w[0] = x.w[0];
   BID_RETURN (res);
 } else if ((q + exp) > 0) {	// exp < 0 and 1 <= -exp < q
   // need to shift right -exp digits from the coefficient; the exp will be 0
   ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x'
   // (number of digits to be chopped off)
   // chop off ind digits from the lower part of C1
   // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
   // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
   // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
   // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
   //tmp64 = C1.w[0];
   // if (ind <= 19) {
   //   C1.w[0] = C1.w[0] + midpoint64[ind - 1];
   // } else {
   //   C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
   //   C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
   // }
   // if (C1.w[0] < tmp64) C1.w[1]++;
   // if carry-out from C1.w[0], increment C1.w[1]
   // calculate C* and f*
   // C* is actually floor(C*) in this case
   // C* and f* need shifting and masking, as shown by
   // shiftright128[] and maskhigh128[]
   // 1 <= x <= 34
   // kx = 10^(-x) = ten2mk128[ind - 1]
   // C* = (C1 + 1/2 * 10^x) * 10^(-x)
   // the approximation of 10^(-x) was rounded up to 118 bits
   __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
   if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
     res.w[1] = P256.w[3];
     res.w[0] = P256.w[2];
   } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
     shift = shiftright128[ind - 1];	// 3 <= shift <= 63
     res.w[1] = (P256.w[3] >> shift);
     res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
   } else {	// 22 <= ind - 1 <= 33
     shift = shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
     res.w[1] = 0;
     res.w[0] = P256.w[3] >> shift;
   }
   res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
   BID_RETURN (res);
 } else {	// if exp < 0 and q + exp <= 0 the result is +0 or -0
   res.w[1] = x_sign | 0x3040000000000000ull;
   res.w[0] = 0x0000000000000000ull;
   BID_RETURN (res);
 }
 }

 /*****************************************************************************
  *  BID128_round_integral_nearest_away
  ****************************************************************************/

 BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_away, x)

      UINT128 res;
      UINT64 x_sign;
      UINT64 x_exp;
      int exp;			// unbiased exponent
   // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
   // (all are UINT64)
      UINT64 tmp64;
      BID_UI64DOUBLE tmp1;
      unsigned int x_nr_bits;
      int q, ind, shift;
      UINT128 C1;
   // UINT128 res is C* at first - represents up to 34 decimal digits ~
   // 113 bits
   // UINT256 fstar;
      UINT256 P256;

   // check for NaN or Infinity
 if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
     // x is special
 if ((x.w[1] & MASK_NAN) == MASK_NAN) {	// x is NAN
   // if x = NaN, then res = Q (x)
   // check first for non-canonical NaN payload
   if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
       (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
        (x.w[0] > 0x38c15b09ffffffffull))) {
     x.w[1] = x.w[1] & 0xffffc00000000000ull;
     x.w[0] = 0x0ull;
   }
   if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {	// x is SNAN
     // set invalid flag
     *pfpsf |= INVALID_EXCEPTION;
     // return quiet (x)
     res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out also G[6]-G[16]
     res.w[0] = x.w[0];
   } else {	// x is QNaN
     // return x
     res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out G[6]-G[16]
     res.w[0] = x.w[0];
   }
   BID_RETURN (res)
 } else {	// x is not a NaN, so it must be infinity
   if ((x.w[1] & MASK_SIGN) == 0x0ull) {	// x is +inf
     // return +inf
     res.w[1] = 0x7800000000000000ull;
     res.w[0] = 0x0000000000000000ull;
   } else {	// x is -inf
     // return -inf
     res.w[1] = 0xf800000000000000ull;
     res.w[0] = 0x0000000000000000ull;
   }
   BID_RETURN (res);
 }
 }
   // unpack x
 x_sign = x.w[1] & MASK_SIGN;	// 0 for positive, MASK_SIGN for negative
 C1.w[1] = x.w[1] & MASK_COEFF;
 C1.w[0] = x.w[0];

   // check for non-canonical values (treated as zero)
 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {	// G0_G1=11
   // non-canonical
   x_exp = (x.w[1] << 2) & MASK_EXP;	// biased and shifted left 49 bits
   C1.w[1] = 0;	// significand high
   C1.w[0] = 0;	// significand low
 } else {	// G0_G1 != 11
   x_exp = x.w[1] & MASK_EXP;	// biased and shifted left 49 bits
   if (C1.w[1] > 0x0001ed09bead87c0ull ||
       (C1.w[1] == 0x0001ed09bead87c0ull
        && C1.w[0] > 0x378d8e63ffffffffull)) {
     // x is non-canonical if coefficient is larger than 10^34 -1
     C1.w[1] = 0;
     C1.w[0] = 0;
   } else {	// canonical
     ;
   }
 }

   // test for input equal to zero
 if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
   // x is 0
   // return 0 preserving the sign bit and the preferred exponent
   // of MAX(Q(x), 0)
   if (x_exp <= (0x1820ull << 49)) {
     res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
   } else {
     res.w[1] = x_sign | x_exp;
   }
   res.w[0] = 0x0000000000000000ull;
   BID_RETURN (res);
 }
   // x is not special and is not zero

   // if (exp <= -(p+1)) return 0.0
 if (x_exp <= 0x2ffa000000000000ull) {	// 0x2ffa000000000000ull == -35
   res.w[1] = x_sign | 0x3040000000000000ull;
   res.w[0] = 0x0000000000000000ull;
   BID_RETURN (res);
 }
   // q = nr. of decimal digits in x
   //  determine first the nr. of bits in x
 if (C1.w[1] == 0) {
   if (C1.w[0] >= 0x0020000000000000ull) {	// x >= 2^53
     // split the 64-bit value in two 32-bit halves to avoid rounding errors
     if (C1.w[0] >= 0x0000000100000000ull) {	// x >= 2^32
       tmp1.d = (double) (C1.w[0] >> 32);	// exact conversion
       x_nr_bits =
 	33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
     } else {	// x < 2^32
       tmp1.d = (double) (C1.w[0]);	// exact conversion
       x_nr_bits =
 	1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
     }
   } else {	// if x < 2^53
     tmp1.d = (double) C1.w[0];	// exact conversion
     x_nr_bits =
       1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
   }
 } else {	// C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
   tmp1.d = (double) C1.w[1];	// exact conversion
   x_nr_bits =
     65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
 }

 q = nr_digits[x_nr_bits - 1].digits;
 if (q == 0) {
   q = nr_digits[x_nr_bits - 1].digits1;
   if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
       (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
        C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
     q++;
 }
 exp = (x_exp >> 49) - 6176;
 if (exp >= 0) {	// -exp <= 0
   // the argument is an integer already
   res.w[1] = x.w[1];
   res.w[0] = x.w[0];
   BID_RETURN (res);
 } else if ((q + exp) >= 0) {	// exp < 0 and 1 <= -exp <= q
   // need to shift right -exp digits from the coefficient; the exp will be 0
   ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x'
   // chop off ind digits from the lower part of C1
   // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
   tmp64 = C1.w[0];
   if (ind <= 19) {
     C1.w[0] = C1.w[0] + midpoint64[ind - 1];
   } else {
     C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
     C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
   }
   if (C1.w[0] < tmp64)
     C1.w[1]++;
   // calculate C* and f*
   // C* is actually floor(C*) in this case
   // C* and f* need shifting and masking, as shown by
   // shiftright128[] and maskhigh128[]
   // 1 <= x <= 34
   // kx = 10^(-x) = ten2mk128[ind - 1]
   // C* = (C1 + 1/2 * 10^x) * 10^(-x)
   // the approximation of 10^(-x) was rounded up to 118 bits
   __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
   // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
   // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
   // if (0 < f* < 10^(-x)) then the result is a midpoint
   //   if floor(C*) is even then C* = floor(C*) - logical right
   //       shift; C* has p decimal digits, correct by Prop. 1)
   //   else if floor(C*) is odd C* = floor(C*)-1 (logical right
   //       shift; C* has p decimal digits, correct by Pr. 1)
   // else
   //   C* = floor(C*) (logical right shift; C has p decimal digits,
   //       correct by Property 1)
   // n = C* * 10^(e+x)

   // shift right C* by Ex-128 = shiftright128[ind]
   if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
     res.w[1] = P256.w[3];
     res.w[0] = P256.w[2];
   } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
     shift = shiftright128[ind - 1];	// 3 <= shift <= 63
     res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
     res.w[1] = (P256.w[3] >> shift);
   } else {	// 22 <= ind - 1 <= 33
     shift = shiftright128[ind - 1];	// 2 <= shift <= 38
     res.w[1] = 0;
     res.w[0] = (P256.w[3] >> (shift - 64));	// 2 <= shift - 64 <= 38
   }
   // if the result was a midpoint, it was already rounded away from zero
   res.w[1] |= x_sign | 0x3040000000000000ull;
   BID_RETURN (res);
 } else {	// if ((q + exp) < 0) <=> q < -exp
   // the result is +0 or -0
   res.w[1] = x_sign | 0x3040000000000000ull;
   res.w[0] = 0x0000000000000000ull;
   BID_RETURN (res);
 }
 }